Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...
[~shefty/rdma-dev.git] / fs / cifs / file.c
index c23fbd8..a8d8b58 100644 (file)
@@ -238,6 +238,23 @@ out:
        return rc;
 }
 
+static bool
+cifs_has_mand_locks(struct cifsInodeInfo *cinode)
+{
+       struct cifs_fid_locks *cur;
+       bool has_locks = false;
+
+       down_read(&cinode->lock_sem);
+       list_for_each_entry(cur, &cinode->llist, llist) {
+               if (!list_empty(&cur->locks)) {
+                       has_locks = true;
+                       break;
+               }
+       }
+       up_read(&cinode->lock_sem);
+       return has_locks;
+}
+
 struct cifsFileInfo *
 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
                  struct tcon_link *tlink, __u32 oplock)
@@ -248,6 +265,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
        struct cifsFileInfo *cfile;
        struct cifs_fid_locks *fdlocks;
        struct cifs_tcon *tcon = tlink_tcon(tlink);
+       struct TCP_Server_Info *server = tcon->ses->server;
 
        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
        if (cfile == NULL)
@@ -276,12 +294,22 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
        mutex_init(&cfile->fh_mutex);
 
+       /*
+        * If the server returned a read oplock and we have mandatory brlocks,
+        * set oplock level to None.
+        */
+       if (oplock == server->vals->oplock_read &&
+                                               cifs_has_mand_locks(cinode)) {
+               cFYI(1, "Reset oplock val from read to None due to mand locks");
+               oplock = 0;
+       }
+
        spin_lock(&cifs_file_list_lock);
-       if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE)
+       if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
                oplock = fid->pending_open->oplock;
        list_del(&fid->pending_open->olist);
 
-       tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
+       server->ops->set_fid(cfile, fid, oplock);
 
        list_add(&cfile->tlist, &tcon->openFileList);
        /* if readable file instance put first in list*/
@@ -1422,6 +1450,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        struct TCP_Server_Info *server = tcon->ses->server;
+       struct inode *inode = cfile->dentry->d_inode;
 
        if (posix_lck) {
                int posix_lock_type;
@@ -1459,6 +1488,21 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
                if (!rc)
                        goto out;
 
+               /*
+                * Windows 7 server can delay breaking lease from read to None
+                * if we set a byte-range lock on a file - break it explicitly
+                * before sending the lock to the server to be sure the next
+                * read won't conflict with non-overlapted locks due to
+                * pagereading.
+                */
+               if (!CIFS_I(inode)->clientCanCacheAll &&
+                                       CIFS_I(inode)->clientCanCacheRead) {
+                       cifs_invalidate_mapping(inode);
+                       cFYI(1, "Set no oplock for inode=%p due to mand locks",
+                            inode);
+                       CIFS_I(inode)->clientCanCacheRead = false;
+               }
+
                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
                                            type, 1, 0, wait_flag);
                if (rc) {
@@ -2103,15 +2147,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
        } else {
                rc = copied;
                pos += copied;
-               /*
-                * When we use strict cache mode and cifs_strict_writev was run
-                * with level II oplock (indicated by leave_pages_clean field of
-                * CIFS_I(inode)), we can leave pages clean - cifs_strict_writev
-                * sent the data to the server itself.
-                */
-               if (!CIFS_I(inode)->leave_pages_clean ||
-                   !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO))
-                       set_page_dirty(page);
+               set_page_dirty(page);
        }
 
        if (rc > 0) {
@@ -2462,8 +2498,8 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
 }
 
 static ssize_t
-cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov,
-                     unsigned long nr_segs, loff_t pos, bool cache_ex)
+cifs_writev(struct kiocb *iocb, const struct iovec *iov,
+           unsigned long nr_segs, loff_t pos)
 {
        struct file *file = iocb->ki_filp;
        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
@@ -2485,12 +2521,8 @@ cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov,
                                     server->vals->exclusive_lock_type, NULL,
                                     CIFS_WRITE_OP)) {
                mutex_lock(&inode->i_mutex);
-               if (!cache_ex)
-                       cinode->leave_pages_clean = true;
                rc = __generic_file_aio_write(iocb, iov, nr_segs,
-                                             &iocb->ki_pos);
-               if (!cache_ex)
-                       cinode->leave_pages_clean = false;
+                                              &iocb->ki_pos);
                mutex_unlock(&inode->i_mutex);
        }
 
@@ -2517,60 +2549,32 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
                                                iocb->ki_filp->private_data;
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
-       ssize_t written, written2;
-       /*
-        * We need to store clientCanCacheAll here to prevent race
-        * conditions - this value can be changed during an execution
-        * of generic_file_aio_write. For CIFS it can be changed from
-        * true to false only, but for SMB2 it can be changed both from
-        * true to false and vice versa. So, we can end up with a data
-        * stored in the cache, not marked dirty and not sent to the
-        * server if this value changes its state from false to true
-        * after cifs_write_end.
-        */
-       bool cache_ex = cinode->clientCanCacheAll;
-       bool cache_read = cinode->clientCanCacheRead;
-       int rc;
-       loff_t saved_pos;
+       ssize_t written;
 
-       if (cache_ex) {
+       if (cinode->clientCanCacheAll) {
                if (cap_unix(tcon->ses) &&
-                   ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) &&
-                   (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(
-                                               tcon->fsUnixInfo.Capability)))
+               (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
+                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
                        return generic_file_aio_write(iocb, iov, nr_segs, pos);
-               return cifs_pagecache_writev(iocb, iov, nr_segs, pos, cache_ex);
+               return cifs_writev(iocb, iov, nr_segs, pos);
        }
-
        /*
-        * For files without exclusive oplock in strict cache mode we need to
-        * write the data to the server exactly from the pos to pos+len-1 rather
-        * than flush all affected pages because it may cause a error with
-        * mandatory locks on these pages but not on the region from pos to
-        * ppos+len-1.
+        * For non-oplocked files in strict cache mode we need to write the data
+        * to the server exactly from the pos to pos+len-1 rather than flush all
+        * affected pages because it may cause a error with mandatory locks on
+        * these pages but not on the region from pos to ppos+len-1.
         */
        written = cifs_user_writev(iocb, iov, nr_segs, pos);
-       if (!cache_read || written <= 0)
-               return written;
-
-       saved_pos = iocb->ki_pos;
-       iocb->ki_pos = pos;
-       /* we have a read oplock - need to store a data in the page cache */
-       if (cap_unix(tcon->ses) &&
-           ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) &&
-           (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(
-                                       tcon->fsUnixInfo.Capability)))
-               written2 = generic_file_aio_write(iocb, iov, nr_segs, pos);
-       else
-               written2 = cifs_pagecache_writev(iocb, iov, nr_segs, pos,
-                                                cache_ex);
-       /* errors occured during writing - invalidate the page cache */
-       if (written2 < 0) {
-               rc = cifs_invalidate_mapping(inode);
-               if (rc)
-                       written = (ssize_t)rc;
-               else
-                       iocb->ki_pos = saved_pos;
+       if (written > 0 && cinode->clientCanCacheRead) {
+               /*
+                * Windows 7 server can delay breaking level2 oplock if a write
+                * request comes - break it on the client to prevent reading
+                * an old data.
+                */
+               cifs_invalidate_mapping(inode);
+               cFYI(1, "Set no oplock for inode=%p after a write operation",
+                    inode);
+               cinode->clientCanCacheRead = false;
        }
        return written;
 }
@@ -3577,6 +3581,13 @@ void cifs_oplock_break(struct work_struct *work)
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        int rc = 0;
 
+       if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
+                                               cifs_has_mand_locks(cinode)) {
+               cFYI(1, "Reset oplock to None for inode=%p due to mand locks",
+                    inode);
+               cinode->clientCanCacheRead = false;
+       }
+
        if (inode && S_ISREG(inode->i_mode)) {
                if (cinode->clientCanCacheRead)
                        break_lease(inode, O_RDONLY);