Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
[~shefty/rdma-dev.git] / fs / nfs / dir.c
index 0fac7fea18efe669a40656303fb4ab4cc46bb610..07ac3847e562b54c26efd30c9a9eabab468c309d 100644 (file)
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
-#include "nfs4_fs.h"
 #include "delegation.h"
 #include "iostat.h"
 #include "internal.h"
+#include "fscache.h"
 
 /* #define NFS_DEBUG_VERBOSE 1 */
 
@@ -55,6 +56,7 @@ static int nfs_rename(struct inode *, struct dentry *,
                      struct inode *, struct dentry *);
 static int nfs_fsync_dir(struct file *, int);
 static loff_t nfs_llseek_dir(struct file *, loff_t, int);
+static int nfs_readdir_clear_array(struct page*, gfp_t);
 
 const struct file_operations nfs_dir_operations = {
        .llseek         = nfs_llseek_dir,
@@ -80,6 +82,10 @@ const struct inode_operations nfs_dir_inode_operations = {
        .setattr        = nfs_setattr,
 };
 
+const struct address_space_operations nfs_dir_addr_space_ops = {
+       .releasepage = nfs_readdir_clear_array,
+};
+
 #ifdef CONFIG_NFS_V3
 const struct inode_operations nfs3_dir_inode_operations = {
        .create         = nfs_create,
@@ -104,8 +110,9 @@ const struct inode_operations nfs3_dir_inode_operations = {
 #ifdef CONFIG_NFS_V4
 
 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
+static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
 const struct inode_operations nfs4_dir_inode_operations = {
-       .create         = nfs_create,
+       .create         = nfs_open_create,
        .lookup         = nfs_atomic_lookup,
        .link           = nfs_link,
        .unlink         = nfs_unlink,
@@ -150,51 +157,197 @@ nfs_opendir(struct inode *inode, struct file *filp)
        return res;
 }
 
-typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int);
+struct nfs_cache_array_entry {
+       u64 cookie;
+       u64 ino;
+       struct qstr string;
+};
+
+struct nfs_cache_array {
+       unsigned int size;
+       int eof_index;
+       u64 last_cookie;
+       struct nfs_cache_array_entry array[0];
+};
+
+#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
+
+typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 typedef struct {
        struct file     *file;
        struct page     *page;
        unsigned long   page_index;
-       __be32          *ptr;
        u64             *dir_cookie;
        loff_t          current_index;
-       struct nfs_entry *entry;
        decode_dirent_t decode;
-       int             plus;
+
        unsigned long   timestamp;
        unsigned long   gencount;
-       int             timestamp_valid;
+       unsigned int    cache_entry_index;
+       unsigned int    plus:1;
+       unsigned int    eof:1;
 } nfs_readdir_descriptor_t;
 
-/* Now we cache directories properly, by stuffing the dirent
- * data directly in the page cache.
- *
- * Inode invalidation due to refresh etc. takes care of
- * _everything_, no sloppy entry flushing logic, no extraneous
- * copying, network direct to page cache, the way it was meant
- * to be.
- *
- * NOTE: Dirent information verification is done always by the
- *      page-in of the RPC reply, nowhere else, this simplies
- *      things substantially.
+/*
+ * The caller is responsible for calling nfs_readdir_release_array(page)
  */
 static
-int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
+struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
+{
+       if (page == NULL)
+               return ERR_PTR(-EIO);
+       return (struct nfs_cache_array *)kmap(page);
+}
+
+static
+void nfs_readdir_release_array(struct page *page)
+{
+       kunmap(page);
+}
+
+/*
+ * we are freeing strings created by nfs_add_to_readdir_array()
+ */
+static
+int nfs_readdir_clear_array(struct page *page, gfp_t mask)
+{
+       struct nfs_cache_array *array = nfs_readdir_get_array(page);
+       int i;
+       for (i = 0; i < array->size; i++)
+               kfree(array->array[i].string.name);
+       nfs_readdir_release_array(page);
+       return 0;
+}
+
+/*
+ * the caller is responsible for freeing qstr.name
+ * when called by nfs_readdir_add_to_array, the strings will be freed in
+ * nfs_clear_readdir_array()
+ */
+static
+int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
+{
+       string->len = len;
+       string->name = kmemdup(name, len, GFP_KERNEL);
+       if (string->name == NULL)
+               return -ENOMEM;
+       string->hash = full_name_hash(name, len);
+       return 0;
+}
+
+static
+int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
+{
+       struct nfs_cache_array *array = nfs_readdir_get_array(page);
+       struct nfs_cache_array_entry *cache_entry;
+       int ret;
+
+       if (IS_ERR(array))
+               return PTR_ERR(array);
+       ret = -EIO;
+       if (array->size >= MAX_READDIR_ARRAY)
+               goto out;
+
+       cache_entry = &array->array[array->size];
+       cache_entry->cookie = entry->prev_cookie;
+       cache_entry->ino = entry->ino;
+       ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
+       if (ret)
+               goto out;
+       array->last_cookie = entry->cookie;
+       if (entry->eof == 1)
+               array->eof_index = array->size;
+       array->size++;
+out:
+       nfs_readdir_release_array(page);
+       return ret;
+}
+
+static
+int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
+{
+       loff_t diff = desc->file->f_pos - desc->current_index;
+       unsigned int index;
+
+       if (diff < 0)
+               goto out_eof;
+       if (diff >= array->size) {
+               if (array->eof_index > 0)
+                       goto out_eof;
+               desc->current_index += array->size;
+               return -EAGAIN;
+       }
+
+       index = (unsigned int)diff;
+       *desc->dir_cookie = array->array[index].cookie;
+       desc->cache_entry_index = index;
+       if (index == array->eof_index)
+               desc->eof = 1;
+       return 0;
+out_eof:
+       desc->eof = 1;
+       return -EBADCOOKIE;
+}
+
+static
+int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
+{
+       int i;
+       int status = -EAGAIN;
+
+       for (i = 0; i < array->size; i++) {
+               if (i == array->eof_index) {
+                       desc->eof = 1;
+                       status = -EBADCOOKIE;
+               }
+               if (array->array[i].cookie == *desc->dir_cookie) {
+                       desc->cache_entry_index = i;
+                       status = 0;
+                       break;
+               }
+       }
+
+       return status;
+}
+
+static
+int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
+{
+       struct nfs_cache_array *array;
+       int status = -EBADCOOKIE;
+
+       if (desc->dir_cookie == NULL)
+               goto out;
+
+       array = nfs_readdir_get_array(desc->page);
+       if (IS_ERR(array)) {
+               status = PTR_ERR(array);
+               goto out;
+       }
+
+       if (*desc->dir_cookie == 0)
+               status = nfs_readdir_search_for_pos(array, desc);
+       else
+               status = nfs_readdir_search_for_cookie(array, desc);
+
+       nfs_readdir_release_array(desc->page);
+out:
+       return status;
+}
+
+/* Fill a page with xdr information before transferring to the cache page */
+static
+int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
+                       struct nfs_entry *entry, struct file *file, struct inode *inode)
 {
-       struct file     *file = desc->file;
-       struct inode    *inode = file->f_path.dentry->d_inode;
        struct rpc_cred *cred = nfs_file_cred(file);
        unsigned long   timestamp, gencount;
        int             error;
 
-       dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
-                       __func__, (long long)desc->entry->cookie,
-                       page->index);
-
  again:
        timestamp = jiffies;
        gencount = nfs_inc_attr_generation_counter();
-       error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page,
+       error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages,
                                          NFS_SERVER(inode)->dtsize, desc->plus);
        if (error < 0) {
                /* We requested READDIRPLUS, but the server doesn't grok it */
@@ -208,190 +361,292 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
        }
        desc->timestamp = timestamp;
        desc->gencount = gencount;
-       desc->timestamp_valid = 1;
-       SetPageUptodate(page);
-       /* Ensure consistent page alignment of the data.
-        * Note: assumes we have exclusive access to this mapping either
-        *       through inode->i_mutex or some other mechanism.
-        */
-       if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
-               /* Should never happen */
-               nfs_zap_mapping(inode, inode->i_mapping);
-       }
-       unlock_page(page);
-       return 0;
- error:
-       unlock_page(page);
-       return -EIO;
+error:
+       return error;
 }
 
-static inline
-int dir_decode(nfs_readdir_descriptor_t *desc)
+/* Fill in an entry based on the xdr code stored in desc->page */
+static
+int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
 {
-       __be32  *p = desc->ptr;
-       p = desc->decode(p, desc->entry, desc->plus);
+       __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
        if (IS_ERR(p))
                return PTR_ERR(p);
-       desc->ptr = p;
-       if (desc->timestamp_valid) {
-               desc->entry->fattr->time_start = desc->timestamp;
-               desc->entry->fattr->gencount = desc->gencount;
-       } else
-               desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
+
+       entry->fattr->time_start = desc->timestamp;
+       entry->fattr->gencount = desc->gencount;
        return 0;
 }
 
-static inline
-void dir_page_release(nfs_readdir_descriptor_t *desc)
+static
+int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 {
-       kunmap(desc->page);
-       page_cache_release(desc->page);
-       desc->page = NULL;
-       desc->ptr = NULL;
+       struct nfs_inode *node;
+       if (dentry->d_inode == NULL)
+               goto different;
+       node = NFS_I(dentry->d_inode);
+       if (node->fh.size != entry->fh->size)
+               goto different;
+       if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
+               goto different;
+       return 1;
+different:
+       return 0;
 }
 
-/*
- * Given a pointer to a buffer that has already been filled by a call
- * to readdir, find the next entry with cookie '*desc->dir_cookie'.
- *
- * If the end of the buffer has been reached, return -EAGAIN, if not,
- * return the offset within the buffer of the next entry to be
- * read.
- */
-static inline
-int find_dirent(nfs_readdir_descriptor_t *desc)
+static
+void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 {
-       struct nfs_entry *entry = desc->entry;
-       int             loop_count = 0,
-                       status;
+       struct qstr filename = {
+               .len = entry->len,
+               .name = entry->name,
+       };
+       struct dentry *dentry;
+       struct dentry *alias;
+       struct inode *dir = parent->d_inode;
+       struct inode *inode;
 
-       while((status = dir_decode(desc)) == 0) {
-               dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
-                               __func__, (unsigned long long)entry->cookie);
-               if (entry->prev_cookie == *desc->dir_cookie)
-                       break;
-               if (loop_count++ > 200) {
-                       loop_count = 0;
-                       schedule();
+       if (filename.name[0] == '.') {
+               if (filename.len == 1)
+                       return;
+               if (filename.len == 2 && filename.name[1] == '.')
+                       return;
+       }
+       filename.hash = full_name_hash(filename.name, filename.len);
+
+       dentry = d_lookup(parent, &filename);
+       if (dentry != NULL) {
+               if (nfs_same_file(dentry, entry)) {
+                       nfs_refresh_inode(dentry->d_inode, entry->fattr);
+                       goto out;
+               } else {
+                       d_drop(dentry);
+                       dput(dentry);
                }
        }
-       return status;
+
+       dentry = d_alloc(parent, &filename);
+       if (dentry == NULL)
+               return;
+
+       dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+       inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
+       if (IS_ERR(inode))
+               goto out;
+
+       alias = d_materialise_unique(dentry, inode);
+       if (IS_ERR(alias))
+               goto out;
+       else if (alias) {
+               nfs_set_verifier(alias, nfs_save_change_attribute(dir));
+               dput(alias);
+       } else
+               nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+
+out:
+       dput(dentry);
+}
+
+/* Perform conversion from xdr to cache array */
+static
+void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
+                               void *xdr_page, struct page *page, unsigned int buflen)
+{
+       struct xdr_stream stream;
+       struct xdr_buf buf;
+       __be32 *ptr = xdr_page;
+       int status;
+       struct nfs_cache_array *array;
+
+       buf.head->iov_base = xdr_page;
+       buf.head->iov_len = buflen;
+       buf.tail->iov_len = 0;
+       buf.page_base = 0;
+       buf.page_len = 0;
+       buf.buflen = buf.head->iov_len;
+       buf.len = buf.head->iov_len;
+
+       xdr_init_decode(&stream, &buf, ptr);
+
+
+       do {
+               status = xdr_decode(desc, entry, &stream);
+               if (status != 0)
+                       break;
+
+               if (nfs_readdir_add_to_array(entry, page) == -1)
+                       break;
+               if (desc->plus == 1)
+                       nfs_prime_dcache(desc->file->f_path.dentry, entry);
+       } while (!entry->eof);
+
+       if (status == -EBADCOOKIE && entry->eof) {
+               array = nfs_readdir_get_array(page);
+               array->eof_index = array->size - 1;
+               status = 0;
+               nfs_readdir_release_array(page);
+       }
+}
+
+static
+void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages)
+{
+       unsigned int i;
+       for (i = 0; i < npages; i++)
+               put_page(pages[i]);
+}
+
+static
+void nfs_readdir_free_large_page(void *ptr, struct page **pages,
+               unsigned int npages)
+{
+       vm_unmap_ram(ptr, npages);
+       nfs_readdir_free_pagearray(pages, npages);
 }
 
 /*
- * Given a pointer to a buffer that has already been filled by a call
- * to readdir, find the entry at offset 'desc->file->f_pos'.
- *
- * If the end of the buffer has been reached, return -EAGAIN, if not,
- * return the offset within the buffer of the next entry to be
- * read.
+ * nfs_readdir_large_page will allocate pages that must be freed with a call
+ * to nfs_readdir_free_large_page
  */
-static inline
-int find_dirent_index(nfs_readdir_descriptor_t *desc)
+static
+void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
 {
-       struct nfs_entry *entry = desc->entry;
-       int             loop_count = 0,
-                       status;
+       void *ptr;
+       unsigned int i;
+
+       for (i = 0; i < npages; i++) {
+               struct page *page = alloc_page(GFP_KERNEL);
+               if (page == NULL)
+                       goto out_freepages;
+               pages[i] = page;
+       }
 
-       for(;;) {
-               status = dir_decode(desc);
-               if (status)
-                       break;
+       ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
+       if (!IS_ERR_OR_NULL(ptr))
+               return ptr;
+out_freepages:
+       nfs_readdir_free_pagearray(pages, i);
+       return NULL;
+}
+
+static
+int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
+{
+       struct page *pages[NFS_MAX_READDIR_PAGES];
+       void *pages_ptr = NULL;
+       struct nfs_entry entry;
+       struct file     *file = desc->file;
+       struct nfs_cache_array *array;
+       int status = 0;
+       unsigned int array_size = ARRAY_SIZE(pages);
+
+       entry.prev_cookie = 0;
+       entry.cookie = *desc->dir_cookie;
+       entry.eof = 0;
+       entry.fh = nfs_alloc_fhandle();
+       entry.fattr = nfs_alloc_fattr();
+       if (entry.fh == NULL || entry.fattr == NULL)
+               goto out;
 
-               dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
-                               (unsigned long long)entry->cookie, desc->current_index);
+       array = nfs_readdir_get_array(page);
+       memset(array, 0, sizeof(struct nfs_cache_array));
+       array->eof_index = -1;
 
-               if (desc->file->f_pos == desc->current_index) {
-                       *desc->dir_cookie = entry->cookie;
+       pages_ptr = nfs_readdir_large_page(pages, array_size);
+       if (!pages_ptr)
+               goto out_release_array;
+       do {
+               status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
+
+               if (status < 0)
                        break;
-               }
-               desc->current_index++;
-               if (loop_count++ > 200) {
-                       loop_count = 0;
-                       schedule();
-               }
-       }
+               nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
+       } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
+
+       nfs_readdir_free_large_page(pages_ptr, pages, array_size);
+out_release_array:
+       nfs_readdir_release_array(page);
+out:
+       nfs_free_fattr(entry.fattr);
+       nfs_free_fhandle(entry.fh);
        return status;
 }
 
 /*
- * Find the given page, and call find_dirent() or find_dirent_index in
- * order to try to return the next entry.
+ * Now we cache directories properly, by converting xdr information
+ * to an array that can be used for lookups later.  This results in
+ * fewer cache pages, since we can store more information on each page.
+ * We only need to convert from xdr once so future lookups are much simpler
  */
-static inline
-int find_dirent_page(nfs_readdir_descriptor_t *desc)
+static
+int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 {
        struct inode    *inode = desc->file->f_path.dentry->d_inode;
-       struct page     *page;
-       int             status;
 
-       dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
-                       __func__, desc->page_index,
-                       (long long) *desc->dir_cookie);
+       if (nfs_readdir_xdr_to_array(desc, page, inode) < 0)
+               goto error;
+       SetPageUptodate(page);
 
-       /* If we find the page in the page_cache, we cannot be sure
-        * how fresh the data is, so we will ignore readdir_plus attributes.
-        */
-       desc->timestamp_valid = 0;
-       page = read_cache_page(inode->i_mapping, desc->page_index,
-                              (filler_t *)nfs_readdir_filler, desc);
-       if (IS_ERR(page)) {
-               status = PTR_ERR(page);
-               goto out;
+       if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
+               /* Should never happen */
+               nfs_zap_mapping(inode, inode->i_mapping);
        }
+       unlock_page(page);
+       return 0;
+ error:
+       unlock_page(page);
+       return -EIO;
+}
 
-       /* NOTE: Someone else may have changed the READDIRPLUS flag */
-       desc->page = page;
-       desc->ptr = kmap(page);         /* matching kunmap in nfs_do_filldir */
-       if (*desc->dir_cookie != 0)
-               status = find_dirent(desc);
-       else
-               status = find_dirent_index(desc);
-       if (status < 0)
-               dir_page_release(desc);
- out:
-       dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
-       return status;
+static
+void cache_page_release(nfs_readdir_descriptor_t *desc)
+{
+       page_cache_release(desc->page);
+       desc->page = NULL;
+}
+
+static
+struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
+{
+       struct page *page;
+       page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
+                       desc->page_index, (filler_t *)nfs_readdir_filler, desc);
+       if (IS_ERR(page))
+               desc->eof = 1;
+       return page;
 }
 
 /*
- * Recurse through the page cache pages, and return a
- * filled nfs_entry structure of the next directory entry if possible.
- *
- * The target for the search is '*desc->dir_cookie' if non-0,
- * 'desc->file->f_pos' otherwise
+ * Returns 0 if desc->dir_cookie was found on page desc->page_index
  */
+static
+int find_cache_page(nfs_readdir_descriptor_t *desc)
+{
+       int res;
+
+       desc->page = get_cache_page(desc);
+       if (IS_ERR(desc->page))
+               return PTR_ERR(desc->page);
+
+       res = nfs_readdir_search_array(desc);
+       if (res == 0)
+               return 0;
+       cache_page_release(desc);
+       return res;
+}
+
+/* Search for desc->dir_cookie from the beginning of the page cache */
 static inline
 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 {
-       int             loop_count = 0;
-       int             res;
-
-       /* Always search-by-index from the beginning of the cache */
-       if (*desc->dir_cookie == 0) {
-               dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
-                               (long long)desc->file->f_pos);
-               desc->page_index = 0;
-               desc->entry->cookie = desc->entry->prev_cookie = 0;
-               desc->entry->eof = 0;
-               desc->current_index = 0;
-       } else
-               dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
-                               (unsigned long long)*desc->dir_cookie);
+       int res = -EAGAIN;
 
-       for (;;) {
-               res = find_dirent_page(desc);
+       while (1) {
+               res = find_cache_page(desc);
                if (res != -EAGAIN)
                        break;
-               /* Align to beginning of next page */
-               desc->page_index ++;
-               if (loop_count++ > 200) {
-                       loop_count = 0;
-                       schedule();
-               }
+               desc->page_index++;
        }
-
-       dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res);
        return res;
 }
 
@@ -400,8 +655,6 @@ static inline unsigned int dt_type(struct inode *inode)
        return (inode->i_mode >> 12) & 15;
 }
 
-static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
-
 /*
  * Once we've found the start of the dirent within a page: fill 'er up...
  */
@@ -410,49 +663,36 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
                   filldir_t filldir)
 {
        struct file     *file = desc->file;
-       struct nfs_entry *entry = desc->entry;
-       struct dentry   *dentry = NULL;
-       u64             fileid;
-       int             loop_count = 0,
-                       res;
-
-       dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
-                       (unsigned long long)entry->cookie);
-
-       for(;;) {
-               unsigned d_type = DT_UNKNOWN;
-               /* Note: entry->prev_cookie contains the cookie for
-                *       retrieving the current dirent on the server */
-               fileid = entry->ino;
-
-               /* Get a dentry if we have one */
-               if (dentry != NULL)
-                       dput(dentry);
-               dentry = nfs_readdir_lookup(desc);
+       int i = 0;
+       int res = 0;
+       struct nfs_cache_array *array = NULL;
+       unsigned int d_type = DT_UNKNOWN;
+       struct dentry *dentry = NULL;
 
-               /* Use readdirplus info */
-               if (dentry != NULL && dentry->d_inode != NULL) {
-                       d_type = dt_type(dentry->d_inode);
-                       fileid = NFS_FILEID(dentry->d_inode);
-               }
+       array = nfs_readdir_get_array(desc->page);
 
-               res = filldir(dirent, entry->name, entry->len, 
-                             file->f_pos, nfs_compat_user_ino64(fileid),
-                             d_type);
+       for (i = desc->cache_entry_index; i < array->size; i++) {
+               d_type = DT_UNKNOWN;
+
+               res = filldir(dirent, array->array[i].string.name,
+                       array->array[i].string.len, file->f_pos,
+                       nfs_compat_user_ino64(array->array[i].ino), d_type);
                if (res < 0)
                        break;
                file->f_pos++;
-               *desc->dir_cookie = entry->cookie;
-               if (dir_decode(desc) != 0) {
-                       desc->page_index ++;
+               desc->cache_entry_index = i;
+               if (i < (array->size-1))
+                       *desc->dir_cookie = array->array[i+1].cookie;
+               else
+                       *desc->dir_cookie = array->last_cookie;
+               if (i == array->eof_index) {
+                       desc->eof = 1;
                        break;
                }
-               if (loop_count++ > 200) {
-                       loop_count = 0;
-                       schedule();
-               }
        }
-       dir_page_release(desc);
+
+       nfs_readdir_release_array(desc->page);
+       cache_page_release(desc);
        if (dentry != NULL)
                dput(dentry);
        dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
@@ -476,12 +716,9 @@ static inline
 int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
                     filldir_t filldir)
 {
-       struct file     *file = desc->file;
-       struct inode    *inode = file->f_path.dentry->d_inode;
-       struct rpc_cred *cred = nfs_file_cred(file);
        struct page     *page = NULL;
        int             status;
-       unsigned long   timestamp, gencount;
+       struct inode *inode = desc->file->f_path.dentry->d_inode;
 
        dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
                        (unsigned long long)*desc->dir_cookie);
@@ -491,38 +728,22 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
                status = -ENOMEM;
                goto out;
        }
-       timestamp = jiffies;
-       gencount = nfs_inc_attr_generation_counter();
-       status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred,
-                                               *desc->dir_cookie, page,
-                                               NFS_SERVER(inode)->dtsize,
-                                               desc->plus);
-       desc->page = page;
-       desc->ptr = kmap(page);         /* matching kunmap in nfs_do_filldir */
-       if (status >= 0) {
-               desc->timestamp = timestamp;
-               desc->gencount = gencount;
-               desc->timestamp_valid = 1;
-               if ((status = dir_decode(desc)) == 0)
-                       desc->entry->prev_cookie = *desc->dir_cookie;
-       } else
+
+       if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
                status = -EIO;
-       if (status < 0)
                goto out_release;
+       }
 
+       desc->page_index = 0;
+       desc->page = page;
        status = nfs_do_filldir(desc, dirent, filldir);
 
-       /* Reset read descriptor so it searches the page cache from
-        * the start upon the next call to readdir_search_pagecache() */
-       desc->page_index = 0;
-       desc->entry->cookie = desc->entry->prev_cookie = 0;
-       desc->entry->eof = 0;
  out:
        dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
                        __func__, status);
        return status;
  out_release:
-       dir_page_release(desc);
+       cache_page_release(desc);
        goto out;
 }
 
@@ -536,7 +757,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        struct inode    *inode = dentry->d_inode;
        nfs_readdir_descriptor_t my_desc,
                        *desc = &my_desc;
-       struct nfs_entry my_entry;
        int res = -ENOMEM;
 
        dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -557,26 +777,17 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        desc->decode = NFS_PROTO(inode)->decode_dirent;
        desc->plus = NFS_USE_READDIRPLUS(inode);
 
-       my_entry.cookie = my_entry.prev_cookie = 0;
-       my_entry.eof = 0;
-       my_entry.fh = nfs_alloc_fhandle();
-       my_entry.fattr = nfs_alloc_fattr();
-       if (my_entry.fh == NULL || my_entry.fattr == NULL)
-               goto out_alloc_failed;
-
-       desc->entry = &my_entry;
-
        nfs_block_sillyrename(dentry);
        res = nfs_revalidate_mapping(inode, filp->f_mapping);
        if (res < 0)
                goto out;
 
-       while(!desc->entry->eof) {
+       while (desc->eof != 1) {
                res = readdir_search_pagecache(desc);
 
                if (res == -EBADCOOKIE) {
                        /* This means either end of directory */
-                       if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) {
+                       if (*desc->dir_cookie && desc->eof == 0) {
                                /* Or that the server has 'lost' a cookie */
                                res = uncached_readdir(desc, dirent, filldir);
                                if (res >= 0)
@@ -588,8 +799,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                if (res == -ETOOSMALL && desc->plus) {
                        clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
                        nfs_zap_caches(inode);
+                       desc->page_index = 0;
                        desc->plus = 0;
-                       desc->entry->eof = 0;
+                       desc->eof = 0;
                        continue;
                }
                if (res < 0)
@@ -605,9 +817,6 @@ out:
        nfs_unblock_sillyrename(dentry);
        if (res > 0)
                res = 0;
-out_alloc_failed:
-       nfs_free_fattr(my_entry.fattr);
-       nfs_free_fhandle(my_entry.fh);
        dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
                        dentry->d_parent->d_name.name, dentry->d_name.name,
                        res);
@@ -1029,10 +1238,63 @@ static int is_atomic_open(struct nameidata *nd)
        return 1;
 }
 
+static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd)
+{
+       struct path path = {
+               .mnt = nd->path.mnt,
+               .dentry = dentry,
+       };
+       struct nfs_open_context *ctx;
+       struct rpc_cred *cred;
+       fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
+
+       cred = rpc_lookup_cred();
+       if (IS_ERR(cred))
+               return ERR_CAST(cred);
+       ctx = alloc_nfs_open_context(&path, cred, fmode);
+       put_rpccred(cred);
+       if (ctx == NULL)
+               return ERR_PTR(-ENOMEM);
+       return ctx;
+}
+
+static int do_open(struct inode *inode, struct file *filp)
+{
+       nfs_fscache_set_inode_cookie(inode, filp);
+       return 0;
+}
+
+static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
+{
+       struct file *filp;
+       int ret = 0;
+
+       /* If the open_intent is for execute, we have an extra check to make */
+       if (ctx->mode & FMODE_EXEC) {
+               ret = nfs_may_open(ctx->path.dentry->d_inode,
+                               ctx->cred,
+                               nd->intent.open.flags);
+               if (ret < 0)
+                       goto out;
+       }
+       filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open);
+       if (IS_ERR(filp))
+               ret = PTR_ERR(filp);
+       else
+               nfs_file_set_open_context(filp, ctx);
+out:
+       put_nfs_open_context(ctx);
+       return ret;
+}
+
 static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+       struct nfs_open_context *ctx;
+       struct iattr attr;
        struct dentry *res = NULL;
-       int error;
+       struct inode *inode;
+       int open_flags;
+       int err;
 
        dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
                        dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1054,13 +1316,32 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
                goto out;
        }
 
+       ctx = nameidata_to_nfs_open_context(dentry, nd);
+       res = ERR_CAST(ctx);
+       if (IS_ERR(ctx))
+               goto out;
+
+       open_flags = nd->intent.open.flags;
+       if (nd->flags & LOOKUP_CREATE) {
+               attr.ia_mode = nd->intent.open.create_mode;
+               attr.ia_valid = ATTR_MODE;
+               if (!IS_POSIXACL(dir))
+                       attr.ia_mode &= ~current_umask();
+       } else {
+               open_flags &= ~(O_EXCL | O_CREAT);
+               attr.ia_valid = 0;
+       }
+
        /* Open the file on the server */
-       res = nfs4_atomic_open(dir, dentry, nd);
-       if (IS_ERR(res)) {
-               error = PTR_ERR(res);
-               switch (error) {
+       nfs_block_sillyrename(dentry->d_parent);
+       inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
+       if (IS_ERR(inode)) {
+               nfs_unblock_sillyrename(dentry->d_parent);
+               put_nfs_open_context(ctx);
+               switch (PTR_ERR(inode)) {
                        /* Make a negative dentry */
                        case -ENOENT:
+                               d_add(dentry, NULL);
                                res = NULL;
                                goto out;
                        /* This turned out not to be a regular file */
@@ -1072,11 +1353,25 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
                                        goto no_open;
                        /* case -EINVAL: */
                        default:
+                               res = ERR_CAST(inode);
                                goto out;
                }
-       } else if (res != NULL)
+       }
+       res = d_add_unique(dentry, inode);
+       nfs_unblock_sillyrename(dentry->d_parent);
+       if (res != NULL) {
+               dput(ctx->path.dentry);
+               ctx->path.dentry = dget(res);
                dentry = res;
+       }
+       err = nfs_intent_set_file(nd, ctx);
+       if (err < 0) {
+               if (res != NULL)
+                       dput(res);
+               return ERR_PTR(err);
+       }
 out:
+       nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
        return res;
 no_open:
        return nfs_lookup(dir, dentry, nd);
@@ -1087,12 +1382,15 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
        struct dentry *parent = NULL;
        struct inode *inode = dentry->d_inode;
        struct inode *dir;
+       struct nfs_open_context *ctx;
        int openflags, ret = 0;
 
        if (!is_atomic_open(nd) || d_mountpoint(dentry))
                goto no_open;
+
        parent = dget_parent(dentry);
        dir = parent->d_inode;
+
        /* We can't create new files in nfs_open_revalidate(), so we
         * optimize away revalidation of negative dentries.
         */
@@ -1112,99 +1410,96 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
        /* We can't create new files, or truncate existing ones here */
        openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
 
+       ctx = nameidata_to_nfs_open_context(dentry, nd);
+       ret = PTR_ERR(ctx);
+       if (IS_ERR(ctx))
+               goto out;
        /*
         * Note: we're not holding inode->i_mutex and so may be racing with
         * operations that change the directory. We therefore save the
         * change attribute *before* we do the RPC call.
         */
-       ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
+       inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL);
+       if (IS_ERR(inode)) {
+               ret = PTR_ERR(inode);
+               switch (ret) {
+               case -EPERM:
+               case -EACCES:
+               case -EDQUOT:
+               case -ENOSPC:
+               case -EROFS:
+                       goto out_put_ctx;
+               default:
+                       goto out_drop;
+               }
+       }
+       iput(inode);
+       if (inode != dentry->d_inode)
+               goto out_drop;
+
+       nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+       ret = nfs_intent_set_file(nd, ctx);
+       if (ret >= 0)
+               ret = 1;
 out:
        dput(parent);
-       if (!ret)
-               d_drop(dentry);
        return ret;
+out_drop:
+       d_drop(dentry);
+       ret = 0;
+out_put_ctx:
+       put_nfs_open_context(ctx);
+       goto out;
+
 no_open_dput:
        dput(parent);
 no_open:
        return nfs_lookup_revalidate(dentry, nd);
 }
-#endif /* CONFIG_NFSV4 */
 
-static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
+static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
+               struct nameidata *nd)
 {
-       struct dentry *parent = desc->file->f_path.dentry;
-       struct inode *dir = parent->d_inode;
-       struct nfs_entry *entry = desc->entry;
-       struct dentry *dentry, *alias;
-       struct qstr name = {
-               .name = entry->name,
-               .len = entry->len,
-       };
-       struct inode *inode;
-       unsigned long verf = nfs_save_change_attribute(dir);
+       struct nfs_open_context *ctx = NULL;
+       struct iattr attr;
+       int error;
+       int open_flags = 0;
 
-       switch (name.len) {
-               case 2:
-                       if (name.name[0] == '.' && name.name[1] == '.')
-                               return dget_parent(parent);
-                       break;
-               case 1:
-                       if (name.name[0] == '.')
-                               return dget(parent);
-       }
+       dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
-       spin_lock(&dir->i_lock);
-       if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) {
-               spin_unlock(&dir->i_lock);
-               return NULL;
-       }
-       spin_unlock(&dir->i_lock);
+       attr.ia_mode = mode;
+       attr.ia_valid = ATTR_MODE;
 
-       name.hash = full_name_hash(name.name, name.len);
-       dentry = d_lookup(parent, &name);
-       if (dentry != NULL) {
-               /* Is this a positive dentry that matches the readdir info? */
-               if (dentry->d_inode != NULL &&
-                               (NFS_FILEID(dentry->d_inode) == entry->ino ||
-                               d_mountpoint(dentry))) {
-                       if (!desc->plus || entry->fh->size == 0)
-                               return dentry;
-                       if (nfs_compare_fh(NFS_FH(dentry->d_inode),
-                                               entry->fh) == 0)
-                               goto out_renew;
-               }
-               /* No, so d_drop to allow one to be created */
-               d_drop(dentry);
-               dput(dentry);
-       }
-       if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
-               return NULL;
-       if (name.len > NFS_SERVER(dir)->namelen)
-               return NULL;
-       /* Note: caller is already holding the dir->i_mutex! */
-       dentry = d_alloc(parent, &name);
-       if (dentry == NULL)
-               return NULL;
-       dentry->d_op = NFS_PROTO(dir)->dentry_ops;
-       inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
-       if (IS_ERR(inode)) {
-               dput(dentry);
-               return NULL;
-       }
+       if ((nd->flags & LOOKUP_CREATE) != 0) {
+               open_flags = nd->intent.open.flags;
 
-       alias = d_materialise_unique(dentry, inode);
-       if (alias != NULL) {
-               dput(dentry);
-               if (IS_ERR(alias))
-                       return NULL;
-               dentry = alias;
+               ctx = nameidata_to_nfs_open_context(dentry, nd);
+               error = PTR_ERR(ctx);
+               if (IS_ERR(ctx))
+                       goto out_err_drop;
        }
 
-out_renew:
-       nfs_set_verifier(dentry, verf);
-       return dentry;
+       error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
+       if (error != 0)
+               goto out_put_ctx;
+       if (ctx != NULL) {
+               error = nfs_intent_set_file(nd, ctx);
+               if (error < 0)
+                       goto out_err;
+       }
+       return 0;
+out_put_ctx:
+       if (ctx != NULL)
+               put_nfs_open_context(ctx);
+out_err_drop:
+       d_drop(dentry);
+out_err:
+       return error;
 }
 
+#endif /* CONFIG_NFSV4 */
+
 /*
  * Code common to create, mkdir, and mknod.
  */
@@ -1258,7 +1553,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 {
        struct iattr attr;
        int error;
-       int open_flags = 0;
 
        dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
                        dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1266,10 +1560,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
        attr.ia_mode = mode;
        attr.ia_valid = ATTR_MODE;
 
-       if ((nd->flags & LOOKUP_CREATE) != 0)
-               open_flags = nd->intent.open.flags;
-
-       error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
+       error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL);
        if (error != 0)
                goto out_err;
        return 0;
@@ -1351,76 +1642,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
        return error;
 }
 
-static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
-{
-       static unsigned int sillycounter;
-       const int      fileidsize  = sizeof(NFS_FILEID(dentry->d_inode))*2;
-       const int      countersize = sizeof(sillycounter)*2;
-       const int      slen        = sizeof(".nfs")+fileidsize+countersize-1;
-       char           silly[slen+1];
-       struct qstr    qsilly;
-       struct dentry *sdentry;
-       int            error = -EIO;
-
-       dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name, 
-               atomic_read(&dentry->d_count));
-       nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
-
-       /*
-        * We don't allow a dentry to be silly-renamed twice.
-        */
-       error = -EBUSY;
-       if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
-               goto out;
-
-       sprintf(silly, ".nfs%*.*Lx",
-               fileidsize, fileidsize,
-               (unsigned long long)NFS_FILEID(dentry->d_inode));
-
-       /* Return delegation in anticipation of the rename */
-       nfs_inode_return_delegation(dentry->d_inode);
-
-       sdentry = NULL;
-       do {
-               char *suffix = silly + slen - countersize;
-
-               dput(sdentry);
-               sillycounter++;
-               sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
-
-               dfprintk(VFS, "NFS: trying to rename %s to %s\n",
-                               dentry->d_name.name, silly);
-               
-               sdentry = lookup_one_len(silly, dentry->d_parent, slen);
-               /*
-                * N.B. Better to return EBUSY here ... it could be
-                * dangerous to delete the file while it's in use.
-                */
-               if (IS_ERR(sdentry))
-                       goto out;
-       } while(sdentry->d_inode != NULL); /* need negative lookup */
-
-       qsilly.name = silly;
-       qsilly.len  = strlen(silly);
-       if (dentry->d_inode) {
-               error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
-                               dir, &qsilly);
-               nfs_mark_for_revalidate(dentry->d_inode);
-       } else
-               error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
-                               dir, &qsilly);
-       if (!error) {
-               nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-               d_move(dentry, sdentry);
-               error = nfs_async_unlink(dir, dentry);
-               /* If we return 0 we don't unlink */
-       }
-       dput(sdentry);
-out:
-       return error;
-}
-
 /*
  * Remove a file after making sure there are no pending writes,
  * and after checking that the file has only one user. 
@@ -1711,14 +1932,14 @@ static void nfs_access_free_list(struct list_head *head)
 int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
        LIST_HEAD(head);
-       struct nfs_inode *nfsi;
+       struct nfs_inode *nfsi, *next;
        struct nfs_access_entry *cache;
 
        if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
                return (nr_to_scan == 0) ? 0 : -1;
 
        spin_lock(&nfs_access_lru_lock);
-       list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+       list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
                struct inode *inode;
 
                if (nr_to_scan-- == 0)