Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...
[~shefty/rdma-dev.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46 static inline int cifs_convert_flags(unsigned int flags)
47 {
48         if ((flags & O_ACCMODE) == O_RDONLY)
49                 return GENERIC_READ;
50         else if ((flags & O_ACCMODE) == O_WRONLY)
51                 return GENERIC_WRITE;
52         else if ((flags & O_ACCMODE) == O_RDWR) {
53                 /* GENERIC_ALL is too much permission to request
54                    can cause unnecessary access denied on create */
55                 /* return GENERIC_ALL; */
56                 return (GENERIC_READ | GENERIC_WRITE);
57         }
58
59         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61                 FILE_READ_DATA);
62 }
63
64 static u32 cifs_posix_convert_flags(unsigned int flags)
65 {
66         u32 posix_flags = 0;
67
68         if ((flags & O_ACCMODE) == O_RDONLY)
69                 posix_flags = SMB_O_RDONLY;
70         else if ((flags & O_ACCMODE) == O_WRONLY)
71                 posix_flags = SMB_O_WRONLY;
72         else if ((flags & O_ACCMODE) == O_RDWR)
73                 posix_flags = SMB_O_RDWR;
74
75         if (flags & O_CREAT)
76                 posix_flags |= SMB_O_CREAT;
77         if (flags & O_EXCL)
78                 posix_flags |= SMB_O_EXCL;
79         if (flags & O_TRUNC)
80                 posix_flags |= SMB_O_TRUNC;
81         /* be safe and imply O_SYNC for O_DSYNC */
82         if (flags & O_DSYNC)
83                 posix_flags |= SMB_O_SYNC;
84         if (flags & O_DIRECTORY)
85                 posix_flags |= SMB_O_DIRECTORY;
86         if (flags & O_NOFOLLOW)
87                 posix_flags |= SMB_O_NOFOLLOW;
88         if (flags & O_DIRECT)
89                 posix_flags |= SMB_O_DIRECT;
90
91         return posix_flags;
92 }
93
94 static inline int cifs_get_disposition(unsigned int flags)
95 {
96         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97                 return FILE_CREATE;
98         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99                 return FILE_OVERWRITE_IF;
100         else if ((flags & O_CREAT) == O_CREAT)
101                 return FILE_OPEN_IF;
102         else if ((flags & O_TRUNC) == O_TRUNC)
103                 return FILE_OVERWRITE;
104         else
105                 return FILE_OPEN;
106 }
107
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109                         struct super_block *sb, int mode, unsigned int f_flags,
110                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
111 {
112         int rc;
113         FILE_UNIX_BASIC_INFO *presp_data;
114         __u32 posix_flags = 0;
115         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116         struct cifs_fattr fattr;
117         struct tcon_link *tlink;
118         struct cifs_tcon *tcon;
119
120         cFYI(1, "posix open %s", full_path);
121
122         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123         if (presp_data == NULL)
124                 return -ENOMEM;
125
126         tlink = cifs_sb_tlink(cifs_sb);
127         if (IS_ERR(tlink)) {
128                 rc = PTR_ERR(tlink);
129                 goto posix_open_ret;
130         }
131
132         tcon = tlink_tcon(tlink);
133         mode &= ~current_umask();
134
135         posix_flags = cifs_posix_convert_flags(f_flags);
136         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137                              poplock, full_path, cifs_sb->local_nls,
138                              cifs_sb->mnt_cifs_flags &
139                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
140         cifs_put_tlink(tlink);
141
142         if (rc)
143                 goto posix_open_ret;
144
145         if (presp_data->Type == cpu_to_le32(-1))
146                 goto posix_open_ret; /* open ok, caller does qpathinfo */
147
148         if (!pinode)
149                 goto posix_open_ret; /* caller does not need info */
150
151         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152
153         /* get new inode and set it up */
154         if (*pinode == NULL) {
155                 cifs_fill_uniqueid(sb, &fattr);
156                 *pinode = cifs_iget(sb, &fattr);
157                 if (!*pinode) {
158                         rc = -ENOMEM;
159                         goto posix_open_ret;
160                 }
161         } else {
162                 cifs_fattr_to_inode(*pinode, &fattr);
163         }
164
165 posix_open_ret:
166         kfree(presp_data);
167         return rc;
168 }
169
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173              struct cifs_fid *fid, unsigned int xid)
174 {
175         int rc;
176         int desired_access;
177         int disposition;
178         int create_options = CREATE_NOT_DIR;
179         FILE_ALL_INFO *buf;
180         struct TCP_Server_Info *server = tcon->ses->server;
181
182         if (!server->ops->open)
183                 return -ENOSYS;
184
185         desired_access = cifs_convert_flags(f_flags);
186
187 /*********************************************************************
188  *  open flag mapping table:
189  *
190  *      POSIX Flag            CIFS Disposition
191  *      ----------            ----------------
192  *      O_CREAT               FILE_OPEN_IF
193  *      O_CREAT | O_EXCL      FILE_CREATE
194  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
195  *      O_TRUNC               FILE_OVERWRITE
196  *      none of the above     FILE_OPEN
197  *
198  *      Note that there is not a direct match between disposition
199  *      FILE_SUPERSEDE (ie create whether or not file exists although
200  *      O_CREAT | O_TRUNC is similar but truncates the existing
201  *      file rather than creating a new file as FILE_SUPERSEDE does
202  *      (which uses the attributes / metadata passed in on open call)
203  *?
204  *?  O_SYNC is a reasonable match to CIFS writethrough flag
205  *?  and the read write flags match reasonably.  O_LARGEFILE
206  *?  is irrelevant because largefile support is always used
207  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
208  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
209  *********************************************************************/
210
211         disposition = cifs_get_disposition(f_flags);
212
213         /* BB pass O_SYNC flag through on file attributes .. BB */
214
215         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
216         if (!buf)
217                 return -ENOMEM;
218
219         if (backup_cred(cifs_sb))
220                 create_options |= CREATE_OPEN_BACKUP_INTENT;
221
222         rc = server->ops->open(xid, tcon, full_path, disposition,
223                                desired_access, create_options, fid, oplock, buf,
224                                cifs_sb);
225
226         if (rc)
227                 goto out;
228
229         if (tcon->unix_ext)
230                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
231                                               xid);
232         else
233                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
234                                          xid, &fid->netfid);
235
236 out:
237         kfree(buf);
238         return rc;
239 }
240
241 static bool
242 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
243 {
244         struct cifs_fid_locks *cur;
245         bool has_locks = false;
246
247         down_read(&cinode->lock_sem);
248         list_for_each_entry(cur, &cinode->llist, llist) {
249                 if (!list_empty(&cur->locks)) {
250                         has_locks = true;
251                         break;
252                 }
253         }
254         up_read(&cinode->lock_sem);
255         return has_locks;
256 }
257
258 struct cifsFileInfo *
259 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
260                   struct tcon_link *tlink, __u32 oplock)
261 {
262         struct dentry *dentry = file->f_path.dentry;
263         struct inode *inode = dentry->d_inode;
264         struct cifsInodeInfo *cinode = CIFS_I(inode);
265         struct cifsFileInfo *cfile;
266         struct cifs_fid_locks *fdlocks;
267         struct cifs_tcon *tcon = tlink_tcon(tlink);
268         struct TCP_Server_Info *server = tcon->ses->server;
269
270         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
271         if (cfile == NULL)
272                 return cfile;
273
274         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
275         if (!fdlocks) {
276                 kfree(cfile);
277                 return NULL;
278         }
279
280         INIT_LIST_HEAD(&fdlocks->locks);
281         fdlocks->cfile = cfile;
282         cfile->llist = fdlocks;
283         down_write(&cinode->lock_sem);
284         list_add(&fdlocks->llist, &cinode->llist);
285         up_write(&cinode->lock_sem);
286
287         cfile->count = 1;
288         cfile->pid = current->tgid;
289         cfile->uid = current_fsuid();
290         cfile->dentry = dget(dentry);
291         cfile->f_flags = file->f_flags;
292         cfile->invalidHandle = false;
293         cfile->tlink = cifs_get_tlink(tlink);
294         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
295         mutex_init(&cfile->fh_mutex);
296
297         /*
298          * If the server returned a read oplock and we have mandatory brlocks,
299          * set oplock level to None.
300          */
301         if (oplock == server->vals->oplock_read &&
302                                                 cifs_has_mand_locks(cinode)) {
303                 cFYI(1, "Reset oplock val from read to None due to mand locks");
304                 oplock = 0;
305         }
306
307         spin_lock(&cifs_file_list_lock);
308         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
309                 oplock = fid->pending_open->oplock;
310         list_del(&fid->pending_open->olist);
311
312         server->ops->set_fid(cfile, fid, oplock);
313
314         list_add(&cfile->tlist, &tcon->openFileList);
315         /* if readable file instance put first in list*/
316         if (file->f_mode & FMODE_READ)
317                 list_add(&cfile->flist, &cinode->openFileList);
318         else
319                 list_add_tail(&cfile->flist, &cinode->openFileList);
320         spin_unlock(&cifs_file_list_lock);
321
322         file->private_data = cfile;
323         return cfile;
324 }
325
326 struct cifsFileInfo *
327 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
328 {
329         spin_lock(&cifs_file_list_lock);
330         cifsFileInfo_get_locked(cifs_file);
331         spin_unlock(&cifs_file_list_lock);
332         return cifs_file;
333 }
334
335 /*
336  * Release a reference on the file private data. This may involve closing
337  * the filehandle out on the server. Must be called without holding
338  * cifs_file_list_lock.
339  */
340 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
341 {
342         struct inode *inode = cifs_file->dentry->d_inode;
343         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
344         struct TCP_Server_Info *server = tcon->ses->server;
345         struct cifsInodeInfo *cifsi = CIFS_I(inode);
346         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
347         struct cifsLockInfo *li, *tmp;
348         struct cifs_fid fid;
349         struct cifs_pending_open open;
350
351         spin_lock(&cifs_file_list_lock);
352         if (--cifs_file->count > 0) {
353                 spin_unlock(&cifs_file_list_lock);
354                 return;
355         }
356
357         if (server->ops->get_lease_key)
358                 server->ops->get_lease_key(inode, &fid);
359
360         /* store open in pending opens to make sure we don't miss lease break */
361         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
362
363         /* remove it from the lists */
364         list_del(&cifs_file->flist);
365         list_del(&cifs_file->tlist);
366
367         if (list_empty(&cifsi->openFileList)) {
368                 cFYI(1, "closing last open instance for inode %p",
369                         cifs_file->dentry->d_inode);
370                 /*
371                  * In strict cache mode we need invalidate mapping on the last
372                  * close  because it may cause a error when we open this file
373                  * again and get at least level II oplock.
374                  */
375                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
376                         CIFS_I(inode)->invalid_mapping = true;
377                 cifs_set_oplock_level(cifsi, 0);
378         }
379         spin_unlock(&cifs_file_list_lock);
380
381         cancel_work_sync(&cifs_file->oplock_break);
382
383         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
384                 struct TCP_Server_Info *server = tcon->ses->server;
385                 unsigned int xid;
386
387                 xid = get_xid();
388                 if (server->ops->close)
389                         server->ops->close(xid, tcon, &cifs_file->fid);
390                 _free_xid(xid);
391         }
392
393         cifs_del_pending_open(&open);
394
395         /*
396          * Delete any outstanding lock records. We'll lose them when the file
397          * is closed anyway.
398          */
399         down_write(&cifsi->lock_sem);
400         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
401                 list_del(&li->llist);
402                 cifs_del_lock_waiters(li);
403                 kfree(li);
404         }
405         list_del(&cifs_file->llist->llist);
406         kfree(cifs_file->llist);
407         up_write(&cifsi->lock_sem);
408
409         cifs_put_tlink(cifs_file->tlink);
410         dput(cifs_file->dentry);
411         kfree(cifs_file);
412 }
413
414 int cifs_open(struct inode *inode, struct file *file)
415
416 {
417         int rc = -EACCES;
418         unsigned int xid;
419         __u32 oplock;
420         struct cifs_sb_info *cifs_sb;
421         struct TCP_Server_Info *server;
422         struct cifs_tcon *tcon;
423         struct tcon_link *tlink;
424         struct cifsFileInfo *cfile = NULL;
425         char *full_path = NULL;
426         bool posix_open_ok = false;
427         struct cifs_fid fid;
428         struct cifs_pending_open open;
429
430         xid = get_xid();
431
432         cifs_sb = CIFS_SB(inode->i_sb);
433         tlink = cifs_sb_tlink(cifs_sb);
434         if (IS_ERR(tlink)) {
435                 free_xid(xid);
436                 return PTR_ERR(tlink);
437         }
438         tcon = tlink_tcon(tlink);
439         server = tcon->ses->server;
440
441         full_path = build_path_from_dentry(file->f_path.dentry);
442         if (full_path == NULL) {
443                 rc = -ENOMEM;
444                 goto out;
445         }
446
447         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
448                  inode, file->f_flags, full_path);
449
450         if (server->oplocks)
451                 oplock = REQ_OPLOCK;
452         else
453                 oplock = 0;
454
455         if (!tcon->broken_posix_open && tcon->unix_ext &&
456             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
457                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
458                 /* can not refresh inode info since size could be stale */
459                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
460                                 cifs_sb->mnt_file_mode /* ignored */,
461                                 file->f_flags, &oplock, &fid.netfid, xid);
462                 if (rc == 0) {
463                         cFYI(1, "posix open succeeded");
464                         posix_open_ok = true;
465                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
466                         if (tcon->ses->serverNOS)
467                                 cERROR(1, "server %s of type %s returned"
468                                            " unexpected error on SMB posix open"
469                                            ", disabling posix open support."
470                                            " Check if server update available.",
471                                            tcon->ses->serverName,
472                                            tcon->ses->serverNOS);
473                         tcon->broken_posix_open = true;
474                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
475                          (rc != -EOPNOTSUPP)) /* path not found or net err */
476                         goto out;
477                 /*
478                  * Else fallthrough to retry open the old way on network i/o
479                  * or DFS errors.
480                  */
481         }
482
483         if (server->ops->get_lease_key)
484                 server->ops->get_lease_key(inode, &fid);
485
486         cifs_add_pending_open(&fid, tlink, &open);
487
488         if (!posix_open_ok) {
489                 if (server->ops->get_lease_key)
490                         server->ops->get_lease_key(inode, &fid);
491
492                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
493                                   file->f_flags, &oplock, &fid, xid);
494                 if (rc) {
495                         cifs_del_pending_open(&open);
496                         goto out;
497                 }
498         }
499
500         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
501         if (cfile == NULL) {
502                 if (server->ops->close)
503                         server->ops->close(xid, tcon, &fid);
504                 cifs_del_pending_open(&open);
505                 rc = -ENOMEM;
506                 goto out;
507         }
508
509         cifs_fscache_set_inode_cookie(inode, file);
510
511         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
512                 /*
513                  * Time to set mode which we can not set earlier due to
514                  * problems creating new read-only files.
515                  */
516                 struct cifs_unix_set_info_args args = {
517                         .mode   = inode->i_mode,
518                         .uid    = INVALID_UID, /* no change */
519                         .gid    = INVALID_GID, /* no change */
520                         .ctime  = NO_CHANGE_64,
521                         .atime  = NO_CHANGE_64,
522                         .mtime  = NO_CHANGE_64,
523                         .device = 0,
524                 };
525                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
526                                        cfile->pid);
527         }
528
529 out:
530         kfree(full_path);
531         free_xid(xid);
532         cifs_put_tlink(tlink);
533         return rc;
534 }
535
536 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
537
538 /*
539  * Try to reacquire byte range locks that were released when session
540  * to server was lost.
541  */
542 static int
543 cifs_relock_file(struct cifsFileInfo *cfile)
544 {
545         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
546         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
547         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
548         int rc = 0;
549
550         /* we are going to update can_cache_brlcks here - need a write access */
551         down_write(&cinode->lock_sem);
552         if (cinode->can_cache_brlcks) {
553                 /* can cache locks - no need to push them */
554                 up_write(&cinode->lock_sem);
555                 return rc;
556         }
557
558         if (cap_unix(tcon->ses) &&
559             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
560             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
561                 rc = cifs_push_posix_locks(cfile);
562         else
563                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
564
565         up_write(&cinode->lock_sem);
566         return rc;
567 }
568
569 static int
570 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
571 {
572         int rc = -EACCES;
573         unsigned int xid;
574         __u32 oplock;
575         struct cifs_sb_info *cifs_sb;
576         struct cifs_tcon *tcon;
577         struct TCP_Server_Info *server;
578         struct cifsInodeInfo *cinode;
579         struct inode *inode;
580         char *full_path = NULL;
581         int desired_access;
582         int disposition = FILE_OPEN;
583         int create_options = CREATE_NOT_DIR;
584         struct cifs_fid fid;
585
586         xid = get_xid();
587         mutex_lock(&cfile->fh_mutex);
588         if (!cfile->invalidHandle) {
589                 mutex_unlock(&cfile->fh_mutex);
590                 rc = 0;
591                 free_xid(xid);
592                 return rc;
593         }
594
595         inode = cfile->dentry->d_inode;
596         cifs_sb = CIFS_SB(inode->i_sb);
597         tcon = tlink_tcon(cfile->tlink);
598         server = tcon->ses->server;
599
600         /*
601          * Can not grab rename sem here because various ops, including those
602          * that already have the rename sem can end up causing writepage to get
603          * called and if the server was down that means we end up here, and we
604          * can never tell if the caller already has the rename_sem.
605          */
606         full_path = build_path_from_dentry(cfile->dentry);
607         if (full_path == NULL) {
608                 rc = -ENOMEM;
609                 mutex_unlock(&cfile->fh_mutex);
610                 free_xid(xid);
611                 return rc;
612         }
613
614         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
615              full_path);
616
617         if (tcon->ses->server->oplocks)
618                 oplock = REQ_OPLOCK;
619         else
620                 oplock = 0;
621
622         if (tcon->unix_ext && cap_unix(tcon->ses) &&
623             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
624                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
625                 /*
626                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
627                  * original open. Must mask them off for a reopen.
628                  */
629                 unsigned int oflags = cfile->f_flags &
630                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
631
632                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
633                                      cifs_sb->mnt_file_mode /* ignored */,
634                                      oflags, &oplock, &fid.netfid, xid);
635                 if (rc == 0) {
636                         cFYI(1, "posix reopen succeeded");
637                         goto reopen_success;
638                 }
639                 /*
640                  * fallthrough to retry open the old way on errors, especially
641                  * in the reconnect path it is important to retry hard
642                  */
643         }
644
645         desired_access = cifs_convert_flags(cfile->f_flags);
646
647         if (backup_cred(cifs_sb))
648                 create_options |= CREATE_OPEN_BACKUP_INTENT;
649
650         if (server->ops->get_lease_key)
651                 server->ops->get_lease_key(inode, &fid);
652
653         /*
654          * Can not refresh inode by passing in file_info buf to be returned by
655          * CIFSSMBOpen and then calling get_inode_info with returned buf since
656          * file might have write behind data that needs to be flushed and server
657          * version of file size can be stale. If we knew for sure that inode was
658          * not dirty locally we could do this.
659          */
660         rc = server->ops->open(xid, tcon, full_path, disposition,
661                                desired_access, create_options, &fid, &oplock,
662                                NULL, cifs_sb);
663         if (rc) {
664                 mutex_unlock(&cfile->fh_mutex);
665                 cFYI(1, "cifs_reopen returned 0x%x", rc);
666                 cFYI(1, "oplock: %d", oplock);
667                 goto reopen_error_exit;
668         }
669
670 reopen_success:
671         cfile->invalidHandle = false;
672         mutex_unlock(&cfile->fh_mutex);
673         cinode = CIFS_I(inode);
674
675         if (can_flush) {
676                 rc = filemap_write_and_wait(inode->i_mapping);
677                 mapping_set_error(inode->i_mapping, rc);
678
679                 if (tcon->unix_ext)
680                         rc = cifs_get_inode_info_unix(&inode, full_path,
681                                                       inode->i_sb, xid);
682                 else
683                         rc = cifs_get_inode_info(&inode, full_path, NULL,
684                                                  inode->i_sb, xid, NULL);
685         }
686         /*
687          * Else we are writing out data to server already and could deadlock if
688          * we tried to flush data, and since we do not know if we have data that
689          * would invalidate the current end of file on the server we can not go
690          * to the server to get the new inode info.
691          */
692
693         server->ops->set_fid(cfile, &fid, oplock);
694         cifs_relock_file(cfile);
695
696 reopen_error_exit:
697         kfree(full_path);
698         free_xid(xid);
699         return rc;
700 }
701
702 int cifs_close(struct inode *inode, struct file *file)
703 {
704         if (file->private_data != NULL) {
705                 cifsFileInfo_put(file->private_data);
706                 file->private_data = NULL;
707         }
708
709         /* return code from the ->release op is always ignored */
710         return 0;
711 }
712
713 int cifs_closedir(struct inode *inode, struct file *file)
714 {
715         int rc = 0;
716         unsigned int xid;
717         struct cifsFileInfo *cfile = file->private_data;
718         struct cifs_tcon *tcon;
719         struct TCP_Server_Info *server;
720         char *buf;
721
722         cFYI(1, "Closedir inode = 0x%p", inode);
723
724         if (cfile == NULL)
725                 return rc;
726
727         xid = get_xid();
728         tcon = tlink_tcon(cfile->tlink);
729         server = tcon->ses->server;
730
731         cFYI(1, "Freeing private data in close dir");
732         spin_lock(&cifs_file_list_lock);
733         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
734                 cfile->invalidHandle = true;
735                 spin_unlock(&cifs_file_list_lock);
736                 if (server->ops->close_dir)
737                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
738                 else
739                         rc = -ENOSYS;
740                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
741                 /* not much we can do if it fails anyway, ignore rc */
742                 rc = 0;
743         } else
744                 spin_unlock(&cifs_file_list_lock);
745
746         buf = cfile->srch_inf.ntwrk_buf_start;
747         if (buf) {
748                 cFYI(1, "closedir free smb buf in srch struct");
749                 cfile->srch_inf.ntwrk_buf_start = NULL;
750                 if (cfile->srch_inf.smallBuf)
751                         cifs_small_buf_release(buf);
752                 else
753                         cifs_buf_release(buf);
754         }
755
756         cifs_put_tlink(cfile->tlink);
757         kfree(file->private_data);
758         file->private_data = NULL;
759         /* BB can we lock the filestruct while this is going on? */
760         free_xid(xid);
761         return rc;
762 }
763
764 static struct cifsLockInfo *
765 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
766 {
767         struct cifsLockInfo *lock =
768                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
769         if (!lock)
770                 return lock;
771         lock->offset = offset;
772         lock->length = length;
773         lock->type = type;
774         lock->pid = current->tgid;
775         INIT_LIST_HEAD(&lock->blist);
776         init_waitqueue_head(&lock->block_q);
777         return lock;
778 }
779
780 void
781 cifs_del_lock_waiters(struct cifsLockInfo *lock)
782 {
783         struct cifsLockInfo *li, *tmp;
784         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
785                 list_del_init(&li->blist);
786                 wake_up(&li->block_q);
787         }
788 }
789
790 #define CIFS_LOCK_OP    0
791 #define CIFS_READ_OP    1
792 #define CIFS_WRITE_OP   2
793
794 /* @rw_check : 0 - no op, 1 - read, 2 - write */
795 static bool
796 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
797                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
798                             struct cifsLockInfo **conf_lock, int rw_check)
799 {
800         struct cifsLockInfo *li;
801         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
802         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
803
804         list_for_each_entry(li, &fdlocks->locks, llist) {
805                 if (offset + length <= li->offset ||
806                     offset >= li->offset + li->length)
807                         continue;
808                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
809                     server->ops->compare_fids(cfile, cur_cfile)) {
810                         /* shared lock prevents write op through the same fid */
811                         if (!(li->type & server->vals->shared_lock_type) ||
812                             rw_check != CIFS_WRITE_OP)
813                                 continue;
814                 }
815                 if ((type & server->vals->shared_lock_type) &&
816                     ((server->ops->compare_fids(cfile, cur_cfile) &&
817                      current->tgid == li->pid) || type == li->type))
818                         continue;
819                 if (conf_lock)
820                         *conf_lock = li;
821                 return true;
822         }
823         return false;
824 }
825
826 bool
827 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
828                         __u8 type, struct cifsLockInfo **conf_lock,
829                         int rw_check)
830 {
831         bool rc = false;
832         struct cifs_fid_locks *cur;
833         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
834
835         list_for_each_entry(cur, &cinode->llist, llist) {
836                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
837                                                  cfile, conf_lock, rw_check);
838                 if (rc)
839                         break;
840         }
841
842         return rc;
843 }
844
845 /*
846  * Check if there is another lock that prevents us to set the lock (mandatory
847  * style). If such a lock exists, update the flock structure with its
848  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
849  * or leave it the same if we can't. Returns 0 if we don't need to request to
850  * the server or 1 otherwise.
851  */
852 static int
853 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
854                __u8 type, struct file_lock *flock)
855 {
856         int rc = 0;
857         struct cifsLockInfo *conf_lock;
858         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
859         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
860         bool exist;
861
862         down_read(&cinode->lock_sem);
863
864         exist = cifs_find_lock_conflict(cfile, offset, length, type,
865                                         &conf_lock, CIFS_LOCK_OP);
866         if (exist) {
867                 flock->fl_start = conf_lock->offset;
868                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
869                 flock->fl_pid = conf_lock->pid;
870                 if (conf_lock->type & server->vals->shared_lock_type)
871                         flock->fl_type = F_RDLCK;
872                 else
873                         flock->fl_type = F_WRLCK;
874         } else if (!cinode->can_cache_brlcks)
875                 rc = 1;
876         else
877                 flock->fl_type = F_UNLCK;
878
879         up_read(&cinode->lock_sem);
880         return rc;
881 }
882
883 static void
884 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
885 {
886         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
887         down_write(&cinode->lock_sem);
888         list_add_tail(&lock->llist, &cfile->llist->locks);
889         up_write(&cinode->lock_sem);
890 }
891
892 /*
893  * Set the byte-range lock (mandatory style). Returns:
894  * 1) 0, if we set the lock and don't need to request to the server;
895  * 2) 1, if no locks prevent us but we need to request to the server;
896  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
897  */
898 static int
899 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
900                  bool wait)
901 {
902         struct cifsLockInfo *conf_lock;
903         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
904         bool exist;
905         int rc = 0;
906
907 try_again:
908         exist = false;
909         down_write(&cinode->lock_sem);
910
911         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
912                                         lock->type, &conf_lock, CIFS_LOCK_OP);
913         if (!exist && cinode->can_cache_brlcks) {
914                 list_add_tail(&lock->llist, &cfile->llist->locks);
915                 up_write(&cinode->lock_sem);
916                 return rc;
917         }
918
919         if (!exist)
920                 rc = 1;
921         else if (!wait)
922                 rc = -EACCES;
923         else {
924                 list_add_tail(&lock->blist, &conf_lock->blist);
925                 up_write(&cinode->lock_sem);
926                 rc = wait_event_interruptible(lock->block_q,
927                                         (lock->blist.prev == &lock->blist) &&
928                                         (lock->blist.next == &lock->blist));
929                 if (!rc)
930                         goto try_again;
931                 down_write(&cinode->lock_sem);
932                 list_del_init(&lock->blist);
933         }
934
935         up_write(&cinode->lock_sem);
936         return rc;
937 }
938
939 /*
940  * Check if there is another lock that prevents us to set the lock (posix
941  * style). If such a lock exists, update the flock structure with its
942  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
943  * or leave it the same if we can't. Returns 0 if we don't need to request to
944  * the server or 1 otherwise.
945  */
946 static int
947 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
948 {
949         int rc = 0;
950         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
951         unsigned char saved_type = flock->fl_type;
952
953         if ((flock->fl_flags & FL_POSIX) == 0)
954                 return 1;
955
956         down_read(&cinode->lock_sem);
957         posix_test_lock(file, flock);
958
959         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
960                 flock->fl_type = saved_type;
961                 rc = 1;
962         }
963
964         up_read(&cinode->lock_sem);
965         return rc;
966 }
967
968 /*
969  * Set the byte-range lock (posix style). Returns:
970  * 1) 0, if we set the lock and don't need to request to the server;
971  * 2) 1, if we need to request to the server;
972  * 3) <0, if the error occurs while setting the lock.
973  */
974 static int
975 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
976 {
977         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
978         int rc = 1;
979
980         if ((flock->fl_flags & FL_POSIX) == 0)
981                 return rc;
982
983 try_again:
984         down_write(&cinode->lock_sem);
985         if (!cinode->can_cache_brlcks) {
986                 up_write(&cinode->lock_sem);
987                 return rc;
988         }
989
990         rc = posix_lock_file(file, flock, NULL);
991         up_write(&cinode->lock_sem);
992         if (rc == FILE_LOCK_DEFERRED) {
993                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
994                 if (!rc)
995                         goto try_again;
996                 locks_delete_block(flock);
997         }
998         return rc;
999 }
1000
1001 int
1002 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1003 {
1004         unsigned int xid;
1005         int rc = 0, stored_rc;
1006         struct cifsLockInfo *li, *tmp;
1007         struct cifs_tcon *tcon;
1008         unsigned int num, max_num, max_buf;
1009         LOCKING_ANDX_RANGE *buf, *cur;
1010         int types[] = {LOCKING_ANDX_LARGE_FILES,
1011                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1012         int i;
1013
1014         xid = get_xid();
1015         tcon = tlink_tcon(cfile->tlink);
1016
1017         /*
1018          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1019          * and check it for zero before using.
1020          */
1021         max_buf = tcon->ses->server->maxBuf;
1022         if (!max_buf) {
1023                 free_xid(xid);
1024                 return -EINVAL;
1025         }
1026
1027         max_num = (max_buf - sizeof(struct smb_hdr)) /
1028                                                 sizeof(LOCKING_ANDX_RANGE);
1029         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1030         if (!buf) {
1031                 free_xid(xid);
1032                 return -ENOMEM;
1033         }
1034
1035         for (i = 0; i < 2; i++) {
1036                 cur = buf;
1037                 num = 0;
1038                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1039                         if (li->type != types[i])
1040                                 continue;
1041                         cur->Pid = cpu_to_le16(li->pid);
1042                         cur->LengthLow = cpu_to_le32((u32)li->length);
1043                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1044                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1045                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1046                         if (++num == max_num) {
1047                                 stored_rc = cifs_lockv(xid, tcon,
1048                                                        cfile->fid.netfid,
1049                                                        (__u8)li->type, 0, num,
1050                                                        buf);
1051                                 if (stored_rc)
1052                                         rc = stored_rc;
1053                                 cur = buf;
1054                                 num = 0;
1055                         } else
1056                                 cur++;
1057                 }
1058
1059                 if (num) {
1060                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1061                                                (__u8)types[i], 0, num, buf);
1062                         if (stored_rc)
1063                                 rc = stored_rc;
1064                 }
1065         }
1066
1067         kfree(buf);
1068         free_xid(xid);
1069         return rc;
1070 }
1071
1072 /* copied from fs/locks.c with a name change */
1073 #define cifs_for_each_lock(inode, lockp) \
1074         for (lockp = &inode->i_flock; *lockp != NULL; \
1075              lockp = &(*lockp)->fl_next)
1076
1077 struct lock_to_push {
1078         struct list_head llist;
1079         __u64 offset;
1080         __u64 length;
1081         __u32 pid;
1082         __u16 netfid;
1083         __u8 type;
1084 };
1085
1086 static int
1087 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1088 {
1089         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1090         struct file_lock *flock, **before;
1091         unsigned int count = 0, i = 0;
1092         int rc = 0, xid, type;
1093         struct list_head locks_to_send, *el;
1094         struct lock_to_push *lck, *tmp;
1095         __u64 length;
1096
1097         xid = get_xid();
1098
1099         lock_flocks();
1100         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1101                 if ((*before)->fl_flags & FL_POSIX)
1102                         count++;
1103         }
1104         unlock_flocks();
1105
1106         INIT_LIST_HEAD(&locks_to_send);
1107
1108         /*
1109          * Allocating count locks is enough because no FL_POSIX locks can be
1110          * added to the list while we are holding cinode->lock_sem that
1111          * protects locking operations of this inode.
1112          */
1113         for (; i < count; i++) {
1114                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1115                 if (!lck) {
1116                         rc = -ENOMEM;
1117                         goto err_out;
1118                 }
1119                 list_add_tail(&lck->llist, &locks_to_send);
1120         }
1121
1122         el = locks_to_send.next;
1123         lock_flocks();
1124         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1125                 flock = *before;
1126                 if ((flock->fl_flags & FL_POSIX) == 0)
1127                         continue;
1128                 if (el == &locks_to_send) {
1129                         /*
1130                          * The list ended. We don't have enough allocated
1131                          * structures - something is really wrong.
1132                          */
1133                         cERROR(1, "Can't push all brlocks!");
1134                         break;
1135                 }
1136                 length = 1 + flock->fl_end - flock->fl_start;
1137                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1138                         type = CIFS_RDLCK;
1139                 else
1140                         type = CIFS_WRLCK;
1141                 lck = list_entry(el, struct lock_to_push, llist);
1142                 lck->pid = flock->fl_pid;
1143                 lck->netfid = cfile->fid.netfid;
1144                 lck->length = length;
1145                 lck->type = type;
1146                 lck->offset = flock->fl_start;
1147                 el = el->next;
1148         }
1149         unlock_flocks();
1150
1151         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1152                 int stored_rc;
1153
1154                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1155                                              lck->offset, lck->length, NULL,
1156                                              lck->type, 0);
1157                 if (stored_rc)
1158                         rc = stored_rc;
1159                 list_del(&lck->llist);
1160                 kfree(lck);
1161         }
1162
1163 out:
1164         free_xid(xid);
1165         return rc;
1166 err_out:
1167         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1168                 list_del(&lck->llist);
1169                 kfree(lck);
1170         }
1171         goto out;
1172 }
1173
1174 static int
1175 cifs_push_locks(struct cifsFileInfo *cfile)
1176 {
1177         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1178         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1179         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1180         int rc = 0;
1181
1182         /* we are going to update can_cache_brlcks here - need a write access */
1183         down_write(&cinode->lock_sem);
1184         if (!cinode->can_cache_brlcks) {
1185                 up_write(&cinode->lock_sem);
1186                 return rc;
1187         }
1188
1189         if (cap_unix(tcon->ses) &&
1190             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1191             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1192                 rc = cifs_push_posix_locks(cfile);
1193         else
1194                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1195
1196         cinode->can_cache_brlcks = false;
1197         up_write(&cinode->lock_sem);
1198         return rc;
1199 }
1200
1201 static void
1202 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1203                 bool *wait_flag, struct TCP_Server_Info *server)
1204 {
1205         if (flock->fl_flags & FL_POSIX)
1206                 cFYI(1, "Posix");
1207         if (flock->fl_flags & FL_FLOCK)
1208                 cFYI(1, "Flock");
1209         if (flock->fl_flags & FL_SLEEP) {
1210                 cFYI(1, "Blocking lock");
1211                 *wait_flag = true;
1212         }
1213         if (flock->fl_flags & FL_ACCESS)
1214                 cFYI(1, "Process suspended by mandatory locking - "
1215                         "not implemented yet");
1216         if (flock->fl_flags & FL_LEASE)
1217                 cFYI(1, "Lease on file - not implemented yet");
1218         if (flock->fl_flags &
1219             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1220                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1221                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1222
1223         *type = server->vals->large_lock_type;
1224         if (flock->fl_type == F_WRLCK) {
1225                 cFYI(1, "F_WRLCK ");
1226                 *type |= server->vals->exclusive_lock_type;
1227                 *lock = 1;
1228         } else if (flock->fl_type == F_UNLCK) {
1229                 cFYI(1, "F_UNLCK");
1230                 *type |= server->vals->unlock_lock_type;
1231                 *unlock = 1;
1232                 /* Check if unlock includes more than one lock range */
1233         } else if (flock->fl_type == F_RDLCK) {
1234                 cFYI(1, "F_RDLCK");
1235                 *type |= server->vals->shared_lock_type;
1236                 *lock = 1;
1237         } else if (flock->fl_type == F_EXLCK) {
1238                 cFYI(1, "F_EXLCK");
1239                 *type |= server->vals->exclusive_lock_type;
1240                 *lock = 1;
1241         } else if (flock->fl_type == F_SHLCK) {
1242                 cFYI(1, "F_SHLCK");
1243                 *type |= server->vals->shared_lock_type;
1244                 *lock = 1;
1245         } else
1246                 cFYI(1, "Unknown type of lock");
1247 }
1248
1249 static int
1250 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1251            bool wait_flag, bool posix_lck, unsigned int xid)
1252 {
1253         int rc = 0;
1254         __u64 length = 1 + flock->fl_end - flock->fl_start;
1255         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1256         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1257         struct TCP_Server_Info *server = tcon->ses->server;
1258         __u16 netfid = cfile->fid.netfid;
1259
1260         if (posix_lck) {
1261                 int posix_lock_type;
1262
1263                 rc = cifs_posix_lock_test(file, flock);
1264                 if (!rc)
1265                         return rc;
1266
1267                 if (type & server->vals->shared_lock_type)
1268                         posix_lock_type = CIFS_RDLCK;
1269                 else
1270                         posix_lock_type = CIFS_WRLCK;
1271                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1272                                       flock->fl_start, length, flock,
1273                                       posix_lock_type, wait_flag);
1274                 return rc;
1275         }
1276
1277         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1278         if (!rc)
1279                 return rc;
1280
1281         /* BB we could chain these into one lock request BB */
1282         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1283                                     1, 0, false);
1284         if (rc == 0) {
1285                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1286                                             type, 0, 1, false);
1287                 flock->fl_type = F_UNLCK;
1288                 if (rc != 0)
1289                         cERROR(1, "Error unlocking previously locked "
1290                                   "range %d during test of lock", rc);
1291                 return 0;
1292         }
1293
1294         if (type & server->vals->shared_lock_type) {
1295                 flock->fl_type = F_WRLCK;
1296                 return 0;
1297         }
1298
1299         type &= ~server->vals->exclusive_lock_type;
1300
1301         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1302                                     type | server->vals->shared_lock_type,
1303                                     1, 0, false);
1304         if (rc == 0) {
1305                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1306                         type | server->vals->shared_lock_type, 0, 1, false);
1307                 flock->fl_type = F_RDLCK;
1308                 if (rc != 0)
1309                         cERROR(1, "Error unlocking previously locked "
1310                                   "range %d during test of lock", rc);
1311         } else
1312                 flock->fl_type = F_WRLCK;
1313
1314         return 0;
1315 }
1316
1317 void
1318 cifs_move_llist(struct list_head *source, struct list_head *dest)
1319 {
1320         struct list_head *li, *tmp;
1321         list_for_each_safe(li, tmp, source)
1322                 list_move(li, dest);
1323 }
1324
1325 void
1326 cifs_free_llist(struct list_head *llist)
1327 {
1328         struct cifsLockInfo *li, *tmp;
1329         list_for_each_entry_safe(li, tmp, llist, llist) {
1330                 cifs_del_lock_waiters(li);
1331                 list_del(&li->llist);
1332                 kfree(li);
1333         }
1334 }
1335
1336 int
1337 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1338                   unsigned int xid)
1339 {
1340         int rc = 0, stored_rc;
1341         int types[] = {LOCKING_ANDX_LARGE_FILES,
1342                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1343         unsigned int i;
1344         unsigned int max_num, num, max_buf;
1345         LOCKING_ANDX_RANGE *buf, *cur;
1346         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1347         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1348         struct cifsLockInfo *li, *tmp;
1349         __u64 length = 1 + flock->fl_end - flock->fl_start;
1350         struct list_head tmp_llist;
1351
1352         INIT_LIST_HEAD(&tmp_llist);
1353
1354         /*
1355          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1356          * and check it for zero before using.
1357          */
1358         max_buf = tcon->ses->server->maxBuf;
1359         if (!max_buf)
1360                 return -EINVAL;
1361
1362         max_num = (max_buf - sizeof(struct smb_hdr)) /
1363                                                 sizeof(LOCKING_ANDX_RANGE);
1364         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1365         if (!buf)
1366                 return -ENOMEM;
1367
1368         down_write(&cinode->lock_sem);
1369         for (i = 0; i < 2; i++) {
1370                 cur = buf;
1371                 num = 0;
1372                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1373                         if (flock->fl_start > li->offset ||
1374                             (flock->fl_start + length) <
1375                             (li->offset + li->length))
1376                                 continue;
1377                         if (current->tgid != li->pid)
1378                                 continue;
1379                         if (types[i] != li->type)
1380                                 continue;
1381                         if (cinode->can_cache_brlcks) {
1382                                 /*
1383                                  * We can cache brlock requests - simply remove
1384                                  * a lock from the file's list.
1385                                  */
1386                                 list_del(&li->llist);
1387                                 cifs_del_lock_waiters(li);
1388                                 kfree(li);
1389                                 continue;
1390                         }
1391                         cur->Pid = cpu_to_le16(li->pid);
1392                         cur->LengthLow = cpu_to_le32((u32)li->length);
1393                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1394                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1395                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1396                         /*
1397                          * We need to save a lock here to let us add it again to
1398                          * the file's list if the unlock range request fails on
1399                          * the server.
1400                          */
1401                         list_move(&li->llist, &tmp_llist);
1402                         if (++num == max_num) {
1403                                 stored_rc = cifs_lockv(xid, tcon,
1404                                                        cfile->fid.netfid,
1405                                                        li->type, num, 0, buf);
1406                                 if (stored_rc) {
1407                                         /*
1408                                          * We failed on the unlock range
1409                                          * request - add all locks from the tmp
1410                                          * list to the head of the file's list.
1411                                          */
1412                                         cifs_move_llist(&tmp_llist,
1413                                                         &cfile->llist->locks);
1414                                         rc = stored_rc;
1415                                 } else
1416                                         /*
1417                                          * The unlock range request succeed -
1418                                          * free the tmp list.
1419                                          */
1420                                         cifs_free_llist(&tmp_llist);
1421                                 cur = buf;
1422                                 num = 0;
1423                         } else
1424                                 cur++;
1425                 }
1426                 if (num) {
1427                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1428                                                types[i], num, 0, buf);
1429                         if (stored_rc) {
1430                                 cifs_move_llist(&tmp_llist,
1431                                                 &cfile->llist->locks);
1432                                 rc = stored_rc;
1433                         } else
1434                                 cifs_free_llist(&tmp_llist);
1435                 }
1436         }
1437
1438         up_write(&cinode->lock_sem);
1439         kfree(buf);
1440         return rc;
1441 }
1442
1443 static int
1444 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1445            bool wait_flag, bool posix_lck, int lock, int unlock,
1446            unsigned int xid)
1447 {
1448         int rc = 0;
1449         __u64 length = 1 + flock->fl_end - flock->fl_start;
1450         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1451         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1452         struct TCP_Server_Info *server = tcon->ses->server;
1453         struct inode *inode = cfile->dentry->d_inode;
1454
1455         if (posix_lck) {
1456                 int posix_lock_type;
1457
1458                 rc = cifs_posix_lock_set(file, flock);
1459                 if (!rc || rc < 0)
1460                         return rc;
1461
1462                 if (type & server->vals->shared_lock_type)
1463                         posix_lock_type = CIFS_RDLCK;
1464                 else
1465                         posix_lock_type = CIFS_WRLCK;
1466
1467                 if (unlock == 1)
1468                         posix_lock_type = CIFS_UNLCK;
1469
1470                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1471                                       current->tgid, flock->fl_start, length,
1472                                       NULL, posix_lock_type, wait_flag);
1473                 goto out;
1474         }
1475
1476         if (lock) {
1477                 struct cifsLockInfo *lock;
1478
1479                 lock = cifs_lock_init(flock->fl_start, length, type);
1480                 if (!lock)
1481                         return -ENOMEM;
1482
1483                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1484                 if (rc < 0) {
1485                         kfree(lock);
1486                         return rc;
1487                 }
1488                 if (!rc)
1489                         goto out;
1490
1491                 /*
1492                  * Windows 7 server can delay breaking lease from read to None
1493                  * if we set a byte-range lock on a file - break it explicitly
1494                  * before sending the lock to the server to be sure the next
1495                  * read won't conflict with non-overlapted locks due to
1496                  * pagereading.
1497                  */
1498                 if (!CIFS_I(inode)->clientCanCacheAll &&
1499                                         CIFS_I(inode)->clientCanCacheRead) {
1500                         cifs_invalidate_mapping(inode);
1501                         cFYI(1, "Set no oplock for inode=%p due to mand locks",
1502                              inode);
1503                         CIFS_I(inode)->clientCanCacheRead = false;
1504                 }
1505
1506                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1507                                             type, 1, 0, wait_flag);
1508                 if (rc) {
1509                         kfree(lock);
1510                         return rc;
1511                 }
1512
1513                 cifs_lock_add(cfile, lock);
1514         } else if (unlock)
1515                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1516
1517 out:
1518         if (flock->fl_flags & FL_POSIX)
1519                 posix_lock_file_wait(file, flock);
1520         return rc;
1521 }
1522
1523 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1524 {
1525         int rc, xid;
1526         int lock = 0, unlock = 0;
1527         bool wait_flag = false;
1528         bool posix_lck = false;
1529         struct cifs_sb_info *cifs_sb;
1530         struct cifs_tcon *tcon;
1531         struct cifsInodeInfo *cinode;
1532         struct cifsFileInfo *cfile;
1533         __u16 netfid;
1534         __u32 type;
1535
1536         rc = -EACCES;
1537         xid = get_xid();
1538
1539         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1540                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1541                 flock->fl_start, flock->fl_end);
1542
1543         cfile = (struct cifsFileInfo *)file->private_data;
1544         tcon = tlink_tcon(cfile->tlink);
1545
1546         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1547                         tcon->ses->server);
1548
1549         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1550         netfid = cfile->fid.netfid;
1551         cinode = CIFS_I(file->f_path.dentry->d_inode);
1552
1553         if (cap_unix(tcon->ses) &&
1554             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1555             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1556                 posix_lck = true;
1557         /*
1558          * BB add code here to normalize offset and length to account for
1559          * negative length which we can not accept over the wire.
1560          */
1561         if (IS_GETLK(cmd)) {
1562                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1563                 free_xid(xid);
1564                 return rc;
1565         }
1566
1567         if (!lock && !unlock) {
1568                 /*
1569                  * if no lock or unlock then nothing to do since we do not
1570                  * know what it is
1571                  */
1572                 free_xid(xid);
1573                 return -EOPNOTSUPP;
1574         }
1575
1576         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1577                         xid);
1578         free_xid(xid);
1579         return rc;
1580 }
1581
1582 /*
1583  * update the file size (if needed) after a write. Should be called with
1584  * the inode->i_lock held
1585  */
1586 void
1587 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1588                       unsigned int bytes_written)
1589 {
1590         loff_t end_of_write = offset + bytes_written;
1591
1592         if (end_of_write > cifsi->server_eof)
1593                 cifsi->server_eof = end_of_write;
1594 }
1595
1596 static ssize_t
1597 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1598            size_t write_size, loff_t *offset)
1599 {
1600         int rc = 0;
1601         unsigned int bytes_written = 0;
1602         unsigned int total_written;
1603         struct cifs_sb_info *cifs_sb;
1604         struct cifs_tcon *tcon;
1605         struct TCP_Server_Info *server;
1606         unsigned int xid;
1607         struct dentry *dentry = open_file->dentry;
1608         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1609         struct cifs_io_parms io_parms;
1610
1611         cifs_sb = CIFS_SB(dentry->d_sb);
1612
1613         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1614              *offset, dentry->d_name.name);
1615
1616         tcon = tlink_tcon(open_file->tlink);
1617         server = tcon->ses->server;
1618
1619         if (!server->ops->sync_write)
1620                 return -ENOSYS;
1621
1622         xid = get_xid();
1623
1624         for (total_written = 0; write_size > total_written;
1625              total_written += bytes_written) {
1626                 rc = -EAGAIN;
1627                 while (rc == -EAGAIN) {
1628                         struct kvec iov[2];
1629                         unsigned int len;
1630
1631                         if (open_file->invalidHandle) {
1632                                 /* we could deadlock if we called
1633                                    filemap_fdatawait from here so tell
1634                                    reopen_file not to flush data to
1635                                    server now */
1636                                 rc = cifs_reopen_file(open_file, false);
1637                                 if (rc != 0)
1638                                         break;
1639                         }
1640
1641                         len = min((size_t)cifs_sb->wsize,
1642                                   write_size - total_written);
1643                         /* iov[0] is reserved for smb header */
1644                         iov[1].iov_base = (char *)write_data + total_written;
1645                         iov[1].iov_len = len;
1646                         io_parms.pid = pid;
1647                         io_parms.tcon = tcon;
1648                         io_parms.offset = *offset;
1649                         io_parms.length = len;
1650                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1651                                                      &bytes_written, iov, 1);
1652                 }
1653                 if (rc || (bytes_written == 0)) {
1654                         if (total_written)
1655                                 break;
1656                         else {
1657                                 free_xid(xid);
1658                                 return rc;
1659                         }
1660                 } else {
1661                         spin_lock(&dentry->d_inode->i_lock);
1662                         cifs_update_eof(cifsi, *offset, bytes_written);
1663                         spin_unlock(&dentry->d_inode->i_lock);
1664                         *offset += bytes_written;
1665                 }
1666         }
1667
1668         cifs_stats_bytes_written(tcon, total_written);
1669
1670         if (total_written > 0) {
1671                 spin_lock(&dentry->d_inode->i_lock);
1672                 if (*offset > dentry->d_inode->i_size)
1673                         i_size_write(dentry->d_inode, *offset);
1674                 spin_unlock(&dentry->d_inode->i_lock);
1675         }
1676         mark_inode_dirty_sync(dentry->d_inode);
1677         free_xid(xid);
1678         return total_written;
1679 }
1680
1681 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1682                                         bool fsuid_only)
1683 {
1684         struct cifsFileInfo *open_file = NULL;
1685         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1686
1687         /* only filter by fsuid on multiuser mounts */
1688         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1689                 fsuid_only = false;
1690
1691         spin_lock(&cifs_file_list_lock);
1692         /* we could simply get the first_list_entry since write-only entries
1693            are always at the end of the list but since the first entry might
1694            have a close pending, we go through the whole list */
1695         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1696                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1697                         continue;
1698                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1699                         if (!open_file->invalidHandle) {
1700                                 /* found a good file */
1701                                 /* lock it so it will not be closed on us */
1702                                 cifsFileInfo_get_locked(open_file);
1703                                 spin_unlock(&cifs_file_list_lock);
1704                                 return open_file;
1705                         } /* else might as well continue, and look for
1706                              another, or simply have the caller reopen it
1707                              again rather than trying to fix this handle */
1708                 } else /* write only file */
1709                         break; /* write only files are last so must be done */
1710         }
1711         spin_unlock(&cifs_file_list_lock);
1712         return NULL;
1713 }
1714
1715 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1716                                         bool fsuid_only)
1717 {
1718         struct cifsFileInfo *open_file, *inv_file = NULL;
1719         struct cifs_sb_info *cifs_sb;
1720         bool any_available = false;
1721         int rc;
1722         unsigned int refind = 0;
1723
1724         /* Having a null inode here (because mapping->host was set to zero by
1725         the VFS or MM) should not happen but we had reports of on oops (due to
1726         it being zero) during stress testcases so we need to check for it */
1727
1728         if (cifs_inode == NULL) {
1729                 cERROR(1, "Null inode passed to cifs_writeable_file");
1730                 dump_stack();
1731                 return NULL;
1732         }
1733
1734         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1735
1736         /* only filter by fsuid on multiuser mounts */
1737         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1738                 fsuid_only = false;
1739
1740         spin_lock(&cifs_file_list_lock);
1741 refind_writable:
1742         if (refind > MAX_REOPEN_ATT) {
1743                 spin_unlock(&cifs_file_list_lock);
1744                 return NULL;
1745         }
1746         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1747                 if (!any_available && open_file->pid != current->tgid)
1748                         continue;
1749                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1750                         continue;
1751                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1752                         if (!open_file->invalidHandle) {
1753                                 /* found a good writable file */
1754                                 cifsFileInfo_get_locked(open_file);
1755                                 spin_unlock(&cifs_file_list_lock);
1756                                 return open_file;
1757                         } else {
1758                                 if (!inv_file)
1759                                         inv_file = open_file;
1760                         }
1761                 }
1762         }
1763         /* couldn't find useable FH with same pid, try any available */
1764         if (!any_available) {
1765                 any_available = true;
1766                 goto refind_writable;
1767         }
1768
1769         if (inv_file) {
1770                 any_available = false;
1771                 cifsFileInfo_get_locked(inv_file);
1772         }
1773
1774         spin_unlock(&cifs_file_list_lock);
1775
1776         if (inv_file) {
1777                 rc = cifs_reopen_file(inv_file, false);
1778                 if (!rc)
1779                         return inv_file;
1780                 else {
1781                         spin_lock(&cifs_file_list_lock);
1782                         list_move_tail(&inv_file->flist,
1783                                         &cifs_inode->openFileList);
1784                         spin_unlock(&cifs_file_list_lock);
1785                         cifsFileInfo_put(inv_file);
1786                         spin_lock(&cifs_file_list_lock);
1787                         ++refind;
1788                         goto refind_writable;
1789                 }
1790         }
1791
1792         return NULL;
1793 }
1794
1795 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1796 {
1797         struct address_space *mapping = page->mapping;
1798         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1799         char *write_data;
1800         int rc = -EFAULT;
1801         int bytes_written = 0;
1802         struct inode *inode;
1803         struct cifsFileInfo *open_file;
1804
1805         if (!mapping || !mapping->host)
1806                 return -EFAULT;
1807
1808         inode = page->mapping->host;
1809
1810         offset += (loff_t)from;
1811         write_data = kmap(page);
1812         write_data += from;
1813
1814         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1815                 kunmap(page);
1816                 return -EIO;
1817         }
1818
1819         /* racing with truncate? */
1820         if (offset > mapping->host->i_size) {
1821                 kunmap(page);
1822                 return 0; /* don't care */
1823         }
1824
1825         /* check to make sure that we are not extending the file */
1826         if (mapping->host->i_size - offset < (loff_t)to)
1827                 to = (unsigned)(mapping->host->i_size - offset);
1828
1829         open_file = find_writable_file(CIFS_I(mapping->host), false);
1830         if (open_file) {
1831                 bytes_written = cifs_write(open_file, open_file->pid,
1832                                            write_data, to - from, &offset);
1833                 cifsFileInfo_put(open_file);
1834                 /* Does mm or vfs already set times? */
1835                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1836                 if ((bytes_written > 0) && (offset))
1837                         rc = 0;
1838                 else if (bytes_written < 0)
1839                         rc = bytes_written;
1840         } else {
1841                 cFYI(1, "No writeable filehandles for inode");
1842                 rc = -EIO;
1843         }
1844
1845         kunmap(page);
1846         return rc;
1847 }
1848
1849 static int cifs_writepages(struct address_space *mapping,
1850                            struct writeback_control *wbc)
1851 {
1852         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1853         bool done = false, scanned = false, range_whole = false;
1854         pgoff_t end, index;
1855         struct cifs_writedata *wdata;
1856         struct TCP_Server_Info *server;
1857         struct page *page;
1858         int rc = 0;
1859
1860         /*
1861          * If wsize is smaller than the page cache size, default to writing
1862          * one page at a time via cifs_writepage
1863          */
1864         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1865                 return generic_writepages(mapping, wbc);
1866
1867         if (wbc->range_cyclic) {
1868                 index = mapping->writeback_index; /* Start from prev offset */
1869                 end = -1;
1870         } else {
1871                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1872                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1873                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1874                         range_whole = true;
1875                 scanned = true;
1876         }
1877 retry:
1878         while (!done && index <= end) {
1879                 unsigned int i, nr_pages, found_pages;
1880                 pgoff_t next = 0, tofind;
1881                 struct page **pages;
1882
1883                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1884                                 end - index) + 1;
1885
1886                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1887                                              cifs_writev_complete);
1888                 if (!wdata) {
1889                         rc = -ENOMEM;
1890                         break;
1891                 }
1892
1893                 /*
1894                  * find_get_pages_tag seems to return a max of 256 on each
1895                  * iteration, so we must call it several times in order to
1896                  * fill the array or the wsize is effectively limited to
1897                  * 256 * PAGE_CACHE_SIZE.
1898                  */
1899                 found_pages = 0;
1900                 pages = wdata->pages;
1901                 do {
1902                         nr_pages = find_get_pages_tag(mapping, &index,
1903                                                         PAGECACHE_TAG_DIRTY,
1904                                                         tofind, pages);
1905                         found_pages += nr_pages;
1906                         tofind -= nr_pages;
1907                         pages += nr_pages;
1908                 } while (nr_pages && tofind && index <= end);
1909
1910                 if (found_pages == 0) {
1911                         kref_put(&wdata->refcount, cifs_writedata_release);
1912                         break;
1913                 }
1914
1915                 nr_pages = 0;
1916                 for (i = 0; i < found_pages; i++) {
1917                         page = wdata->pages[i];
1918                         /*
1919                          * At this point we hold neither mapping->tree_lock nor
1920                          * lock on the page itself: the page may be truncated or
1921                          * invalidated (changing page->mapping to NULL), or even
1922                          * swizzled back from swapper_space to tmpfs file
1923                          * mapping
1924                          */
1925
1926                         if (nr_pages == 0)
1927                                 lock_page(page);
1928                         else if (!trylock_page(page))
1929                                 break;
1930
1931                         if (unlikely(page->mapping != mapping)) {
1932                                 unlock_page(page);
1933                                 break;
1934                         }
1935
1936                         if (!wbc->range_cyclic && page->index > end) {
1937                                 done = true;
1938                                 unlock_page(page);
1939                                 break;
1940                         }
1941
1942                         if (next && (page->index != next)) {
1943                                 /* Not next consecutive page */
1944                                 unlock_page(page);
1945                                 break;
1946                         }
1947
1948                         if (wbc->sync_mode != WB_SYNC_NONE)
1949                                 wait_on_page_writeback(page);
1950
1951                         if (PageWriteback(page) ||
1952                                         !clear_page_dirty_for_io(page)) {
1953                                 unlock_page(page);
1954                                 break;
1955                         }
1956
1957                         /*
1958                          * This actually clears the dirty bit in the radix tree.
1959                          * See cifs_writepage() for more commentary.
1960                          */
1961                         set_page_writeback(page);
1962
1963                         if (page_offset(page) >= i_size_read(mapping->host)) {
1964                                 done = true;
1965                                 unlock_page(page);
1966                                 end_page_writeback(page);
1967                                 break;
1968                         }
1969
1970                         wdata->pages[i] = page;
1971                         next = page->index + 1;
1972                         ++nr_pages;
1973                 }
1974
1975                 /* reset index to refind any pages skipped */
1976                 if (nr_pages == 0)
1977                         index = wdata->pages[0]->index + 1;
1978
1979                 /* put any pages we aren't going to use */
1980                 for (i = nr_pages; i < found_pages; i++) {
1981                         page_cache_release(wdata->pages[i]);
1982                         wdata->pages[i] = NULL;
1983                 }
1984
1985                 /* nothing to write? */
1986                 if (nr_pages == 0) {
1987                         kref_put(&wdata->refcount, cifs_writedata_release);
1988                         continue;
1989                 }
1990
1991                 wdata->sync_mode = wbc->sync_mode;
1992                 wdata->nr_pages = nr_pages;
1993                 wdata->offset = page_offset(wdata->pages[0]);
1994                 wdata->pagesz = PAGE_CACHE_SIZE;
1995                 wdata->tailsz =
1996                         min(i_size_read(mapping->host) -
1997                             page_offset(wdata->pages[nr_pages - 1]),
1998                             (loff_t)PAGE_CACHE_SIZE);
1999                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2000                                         wdata->tailsz;
2001
2002                 do {
2003                         if (wdata->cfile != NULL)
2004                                 cifsFileInfo_put(wdata->cfile);
2005                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2006                                                           false);
2007                         if (!wdata->cfile) {
2008                                 cERROR(1, "No writable handles for inode");
2009                                 rc = -EBADF;
2010                                 break;
2011                         }
2012                         wdata->pid = wdata->cfile->pid;
2013                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2014                         rc = server->ops->async_writev(wdata);
2015                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2016
2017                 for (i = 0; i < nr_pages; ++i)
2018                         unlock_page(wdata->pages[i]);
2019
2020                 /* send failure -- clean up the mess */
2021                 if (rc != 0) {
2022                         for (i = 0; i < nr_pages; ++i) {
2023                                 if (rc == -EAGAIN)
2024                                         redirty_page_for_writepage(wbc,
2025                                                            wdata->pages[i]);
2026                                 else
2027                                         SetPageError(wdata->pages[i]);
2028                                 end_page_writeback(wdata->pages[i]);
2029                                 page_cache_release(wdata->pages[i]);
2030                         }
2031                         if (rc != -EAGAIN)
2032                                 mapping_set_error(mapping, rc);
2033                 }
2034                 kref_put(&wdata->refcount, cifs_writedata_release);
2035
2036                 wbc->nr_to_write -= nr_pages;
2037                 if (wbc->nr_to_write <= 0)
2038                         done = true;
2039
2040                 index = next;
2041         }
2042
2043         if (!scanned && !done) {
2044                 /*
2045                  * We hit the last page and there is more work to be done: wrap
2046                  * back to the start of the file
2047                  */
2048                 scanned = true;
2049                 index = 0;
2050                 goto retry;
2051         }
2052
2053         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2054                 mapping->writeback_index = index;
2055
2056         return rc;
2057 }
2058
2059 static int
2060 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2061 {
2062         int rc;
2063         unsigned int xid;
2064
2065         xid = get_xid();
2066 /* BB add check for wbc flags */
2067         page_cache_get(page);
2068         if (!PageUptodate(page))
2069                 cFYI(1, "ppw - page not up to date");
2070
2071         /*
2072          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2073          *
2074          * A writepage() implementation always needs to do either this,
2075          * or re-dirty the page with "redirty_page_for_writepage()" in
2076          * the case of a failure.
2077          *
2078          * Just unlocking the page will cause the radix tree tag-bits
2079          * to fail to update with the state of the page correctly.
2080          */
2081         set_page_writeback(page);
2082 retry_write:
2083         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2084         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2085                 goto retry_write;
2086         else if (rc == -EAGAIN)
2087                 redirty_page_for_writepage(wbc, page);
2088         else if (rc != 0)
2089                 SetPageError(page);
2090         else
2091                 SetPageUptodate(page);
2092         end_page_writeback(page);
2093         page_cache_release(page);
2094         free_xid(xid);
2095         return rc;
2096 }
2097
2098 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2099 {
2100         int rc = cifs_writepage_locked(page, wbc);
2101         unlock_page(page);
2102         return rc;
2103 }
2104
2105 static int cifs_write_end(struct file *file, struct address_space *mapping,
2106                         loff_t pos, unsigned len, unsigned copied,
2107                         struct page *page, void *fsdata)
2108 {
2109         int rc;
2110         struct inode *inode = mapping->host;
2111         struct cifsFileInfo *cfile = file->private_data;
2112         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2113         __u32 pid;
2114
2115         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2116                 pid = cfile->pid;
2117         else
2118                 pid = current->tgid;
2119
2120         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2121                  page, pos, copied);
2122
2123         if (PageChecked(page)) {
2124                 if (copied == len)
2125                         SetPageUptodate(page);
2126                 ClearPageChecked(page);
2127         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2128                 SetPageUptodate(page);
2129
2130         if (!PageUptodate(page)) {
2131                 char *page_data;
2132                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2133                 unsigned int xid;
2134
2135                 xid = get_xid();
2136                 /* this is probably better than directly calling
2137                    partialpage_write since in this function the file handle is
2138                    known which we might as well leverage */
2139                 /* BB check if anything else missing out of ppw
2140                    such as updating last write time */
2141                 page_data = kmap(page);
2142                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2143                 /* if (rc < 0) should we set writebehind rc? */
2144                 kunmap(page);
2145
2146                 free_xid(xid);
2147         } else {
2148                 rc = copied;
2149                 pos += copied;
2150                 set_page_dirty(page);
2151         }
2152
2153         if (rc > 0) {
2154                 spin_lock(&inode->i_lock);
2155                 if (pos > inode->i_size)
2156                         i_size_write(inode, pos);
2157                 spin_unlock(&inode->i_lock);
2158         }
2159
2160         unlock_page(page);
2161         page_cache_release(page);
2162
2163         return rc;
2164 }
2165
2166 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2167                       int datasync)
2168 {
2169         unsigned int xid;
2170         int rc = 0;
2171         struct cifs_tcon *tcon;
2172         struct TCP_Server_Info *server;
2173         struct cifsFileInfo *smbfile = file->private_data;
2174         struct inode *inode = file->f_path.dentry->d_inode;
2175         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2176
2177         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2178         if (rc)
2179                 return rc;
2180         mutex_lock(&inode->i_mutex);
2181
2182         xid = get_xid();
2183
2184         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2185                 file->f_path.dentry->d_name.name, datasync);
2186
2187         if (!CIFS_I(inode)->clientCanCacheRead) {
2188                 rc = cifs_invalidate_mapping(inode);
2189                 if (rc) {
2190                         cFYI(1, "rc: %d during invalidate phase", rc);
2191                         rc = 0; /* don't care about it in fsync */
2192                 }
2193         }
2194
2195         tcon = tlink_tcon(smbfile->tlink);
2196         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2197                 server = tcon->ses->server;
2198                 if (server->ops->flush)
2199                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2200                 else
2201                         rc = -ENOSYS;
2202         }
2203
2204         free_xid(xid);
2205         mutex_unlock(&inode->i_mutex);
2206         return rc;
2207 }
2208
2209 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2210 {
2211         unsigned int xid;
2212         int rc = 0;
2213         struct cifs_tcon *tcon;
2214         struct TCP_Server_Info *server;
2215         struct cifsFileInfo *smbfile = file->private_data;
2216         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2217         struct inode *inode = file->f_mapping->host;
2218
2219         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2220         if (rc)
2221                 return rc;
2222         mutex_lock(&inode->i_mutex);
2223
2224         xid = get_xid();
2225
2226         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2227                 file->f_path.dentry->d_name.name, datasync);
2228
2229         tcon = tlink_tcon(smbfile->tlink);
2230         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2231                 server = tcon->ses->server;
2232                 if (server->ops->flush)
2233                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2234                 else
2235                         rc = -ENOSYS;
2236         }
2237
2238         free_xid(xid);
2239         mutex_unlock(&inode->i_mutex);
2240         return rc;
2241 }
2242
2243 /*
2244  * As file closes, flush all cached write data for this inode checking
2245  * for write behind errors.
2246  */
2247 int cifs_flush(struct file *file, fl_owner_t id)
2248 {
2249         struct inode *inode = file->f_path.dentry->d_inode;
2250         int rc = 0;
2251
2252         if (file->f_mode & FMODE_WRITE)
2253                 rc = filemap_write_and_wait(inode->i_mapping);
2254
2255         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2256
2257         return rc;
2258 }
2259
2260 static int
2261 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2262 {
2263         int rc = 0;
2264         unsigned long i;
2265
2266         for (i = 0; i < num_pages; i++) {
2267                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2268                 if (!pages[i]) {
2269                         /*
2270                          * save number of pages we have already allocated and
2271                          * return with ENOMEM error
2272                          */
2273                         num_pages = i;
2274                         rc = -ENOMEM;
2275                         break;
2276                 }
2277         }
2278
2279         if (rc) {
2280                 for (i = 0; i < num_pages; i++)
2281                         put_page(pages[i]);
2282         }
2283         return rc;
2284 }
2285
2286 static inline
2287 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2288 {
2289         size_t num_pages;
2290         size_t clen;
2291
2292         clen = min_t(const size_t, len, wsize);
2293         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2294
2295         if (cur_len)
2296                 *cur_len = clen;
2297
2298         return num_pages;
2299 }
2300
2301 static void
2302 cifs_uncached_writev_complete(struct work_struct *work)
2303 {
2304         int i;
2305         struct cifs_writedata *wdata = container_of(work,
2306                                         struct cifs_writedata, work);
2307         struct inode *inode = wdata->cfile->dentry->d_inode;
2308         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2309
2310         spin_lock(&inode->i_lock);
2311         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2312         if (cifsi->server_eof > inode->i_size)
2313                 i_size_write(inode, cifsi->server_eof);
2314         spin_unlock(&inode->i_lock);
2315
2316         complete(&wdata->done);
2317
2318         if (wdata->result != -EAGAIN) {
2319                 for (i = 0; i < wdata->nr_pages; i++)
2320                         put_page(wdata->pages[i]);
2321         }
2322
2323         kref_put(&wdata->refcount, cifs_writedata_release);
2324 }
2325
2326 /* attempt to send write to server, retry on any -EAGAIN errors */
2327 static int
2328 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2329 {
2330         int rc;
2331         struct TCP_Server_Info *server;
2332
2333         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2334
2335         do {
2336                 if (wdata->cfile->invalidHandle) {
2337                         rc = cifs_reopen_file(wdata->cfile, false);
2338                         if (rc != 0)
2339                                 continue;
2340                 }
2341                 rc = server->ops->async_writev(wdata);
2342         } while (rc == -EAGAIN);
2343
2344         return rc;
2345 }
2346
2347 static ssize_t
2348 cifs_iovec_write(struct file *file, const struct iovec *iov,
2349                  unsigned long nr_segs, loff_t *poffset)
2350 {
2351         unsigned long nr_pages, i;
2352         size_t copied, len, cur_len;
2353         ssize_t total_written = 0;
2354         loff_t offset;
2355         struct iov_iter it;
2356         struct cifsFileInfo *open_file;
2357         struct cifs_tcon *tcon;
2358         struct cifs_sb_info *cifs_sb;
2359         struct cifs_writedata *wdata, *tmp;
2360         struct list_head wdata_list;
2361         int rc;
2362         pid_t pid;
2363
2364         len = iov_length(iov, nr_segs);
2365         if (!len)
2366                 return 0;
2367
2368         rc = generic_write_checks(file, poffset, &len, 0);
2369         if (rc)
2370                 return rc;
2371
2372         INIT_LIST_HEAD(&wdata_list);
2373         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2374         open_file = file->private_data;
2375         tcon = tlink_tcon(open_file->tlink);
2376
2377         if (!tcon->ses->server->ops->async_writev)
2378                 return -ENOSYS;
2379
2380         offset = *poffset;
2381
2382         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2383                 pid = open_file->pid;
2384         else
2385                 pid = current->tgid;
2386
2387         iov_iter_init(&it, iov, nr_segs, len, 0);
2388         do {
2389                 size_t save_len;
2390
2391                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2392                 wdata = cifs_writedata_alloc(nr_pages,
2393                                              cifs_uncached_writev_complete);
2394                 if (!wdata) {
2395                         rc = -ENOMEM;
2396                         break;
2397                 }
2398
2399                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2400                 if (rc) {
2401                         kfree(wdata);
2402                         break;
2403                 }
2404
2405                 save_len = cur_len;
2406                 for (i = 0; i < nr_pages; i++) {
2407                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2408                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2409                                                          0, copied);
2410                         cur_len -= copied;
2411                         iov_iter_advance(&it, copied);
2412                 }
2413                 cur_len = save_len - cur_len;
2414
2415                 wdata->sync_mode = WB_SYNC_ALL;
2416                 wdata->nr_pages = nr_pages;
2417                 wdata->offset = (__u64)offset;
2418                 wdata->cfile = cifsFileInfo_get(open_file);
2419                 wdata->pid = pid;
2420                 wdata->bytes = cur_len;
2421                 wdata->pagesz = PAGE_SIZE;
2422                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2423                 rc = cifs_uncached_retry_writev(wdata);
2424                 if (rc) {
2425                         kref_put(&wdata->refcount, cifs_writedata_release);
2426                         break;
2427                 }
2428
2429                 list_add_tail(&wdata->list, &wdata_list);
2430                 offset += cur_len;
2431                 len -= cur_len;
2432         } while (len > 0);
2433
2434         /*
2435          * If at least one write was successfully sent, then discard any rc
2436          * value from the later writes. If the other write succeeds, then
2437          * we'll end up returning whatever was written. If it fails, then
2438          * we'll get a new rc value from that.
2439          */
2440         if (!list_empty(&wdata_list))
2441                 rc = 0;
2442
2443         /*
2444          * Wait for and collect replies for any successful sends in order of
2445          * increasing offset. Once an error is hit or we get a fatal signal
2446          * while waiting, then return without waiting for any more replies.
2447          */
2448 restart_loop:
2449         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2450                 if (!rc) {
2451                         /* FIXME: freezable too? */
2452                         rc = wait_for_completion_killable(&wdata->done);
2453                         if (rc)
2454                                 rc = -EINTR;
2455                         else if (wdata->result)
2456                                 rc = wdata->result;
2457                         else
2458                                 total_written += wdata->bytes;
2459
2460                         /* resend call if it's a retryable error */
2461                         if (rc == -EAGAIN) {
2462                                 rc = cifs_uncached_retry_writev(wdata);
2463                                 goto restart_loop;
2464                         }
2465                 }
2466                 list_del_init(&wdata->list);
2467                 kref_put(&wdata->refcount, cifs_writedata_release);
2468         }
2469
2470         if (total_written > 0)
2471                 *poffset += total_written;
2472
2473         cifs_stats_bytes_written(tcon, total_written);
2474         return total_written ? total_written : (ssize_t)rc;
2475 }
2476
2477 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2478                                 unsigned long nr_segs, loff_t pos)
2479 {
2480         ssize_t written;
2481         struct inode *inode;
2482
2483         inode = iocb->ki_filp->f_path.dentry->d_inode;
2484
2485         /*
2486          * BB - optimize the way when signing is disabled. We can drop this
2487          * extra memory-to-memory copying and use iovec buffers for constructing
2488          * write request.
2489          */
2490
2491         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2492         if (written > 0) {
2493                 CIFS_I(inode)->invalid_mapping = true;
2494                 iocb->ki_pos = pos;
2495         }
2496
2497         return written;
2498 }
2499
2500 static ssize_t
2501 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2502             unsigned long nr_segs, loff_t pos)
2503 {
2504         struct file *file = iocb->ki_filp;
2505         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2506         struct inode *inode = file->f_mapping->host;
2507         struct cifsInodeInfo *cinode = CIFS_I(inode);
2508         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2509         ssize_t rc = -EACCES;
2510
2511         BUG_ON(iocb->ki_pos != pos);
2512
2513         sb_start_write(inode->i_sb);
2514
2515         /*
2516          * We need to hold the sem to be sure nobody modifies lock list
2517          * with a brlock that prevents writing.
2518          */
2519         down_read(&cinode->lock_sem);
2520         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2521                                      server->vals->exclusive_lock_type, NULL,
2522                                      CIFS_WRITE_OP)) {
2523                 mutex_lock(&inode->i_mutex);
2524                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2525                                                &iocb->ki_pos);
2526                 mutex_unlock(&inode->i_mutex);
2527         }
2528
2529         if (rc > 0 || rc == -EIOCBQUEUED) {
2530                 ssize_t err;
2531
2532                 err = generic_write_sync(file, pos, rc);
2533                 if (err < 0 && rc > 0)
2534                         rc = err;
2535         }
2536
2537         up_read(&cinode->lock_sem);
2538         sb_end_write(inode->i_sb);
2539         return rc;
2540 }
2541
2542 ssize_t
2543 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2544                    unsigned long nr_segs, loff_t pos)
2545 {
2546         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2547         struct cifsInodeInfo *cinode = CIFS_I(inode);
2548         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2549         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2550                                                 iocb->ki_filp->private_data;
2551         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2552         ssize_t written;
2553
2554         if (cinode->clientCanCacheAll) {
2555                 if (cap_unix(tcon->ses) &&
2556                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2557                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2558                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2559                 return cifs_writev(iocb, iov, nr_segs, pos);
2560         }
2561         /*
2562          * For non-oplocked files in strict cache mode we need to write the data
2563          * to the server exactly from the pos to pos+len-1 rather than flush all
2564          * affected pages because it may cause a error with mandatory locks on
2565          * these pages but not on the region from pos to ppos+len-1.
2566          */
2567         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2568         if (written > 0 && cinode->clientCanCacheRead) {
2569                 /*
2570                  * Windows 7 server can delay breaking level2 oplock if a write
2571                  * request comes - break it on the client to prevent reading
2572                  * an old data.
2573                  */
2574                 cifs_invalidate_mapping(inode);
2575                 cFYI(1, "Set no oplock for inode=%p after a write operation",
2576                      inode);
2577                 cinode->clientCanCacheRead = false;
2578         }
2579         return written;
2580 }
2581
2582 static struct cifs_readdata *
2583 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2584 {
2585         struct cifs_readdata *rdata;
2586
2587         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2588                         GFP_KERNEL);
2589         if (rdata != NULL) {
2590                 kref_init(&rdata->refcount);
2591                 INIT_LIST_HEAD(&rdata->list);
2592                 init_completion(&rdata->done);
2593                 INIT_WORK(&rdata->work, complete);
2594         }
2595
2596         return rdata;
2597 }
2598
2599 void
2600 cifs_readdata_release(struct kref *refcount)
2601 {
2602         struct cifs_readdata *rdata = container_of(refcount,
2603                                         struct cifs_readdata, refcount);
2604
2605         if (rdata->cfile)
2606                 cifsFileInfo_put(rdata->cfile);
2607
2608         kfree(rdata);
2609 }
2610
2611 static int
2612 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2613 {
2614         int rc = 0;
2615         struct page *page;
2616         unsigned int i;
2617
2618         for (i = 0; i < nr_pages; i++) {
2619                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2620                 if (!page) {
2621                         rc = -ENOMEM;
2622                         break;
2623                 }
2624                 rdata->pages[i] = page;
2625         }
2626
2627         if (rc) {
2628                 for (i = 0; i < nr_pages; i++) {
2629                         put_page(rdata->pages[i]);
2630                         rdata->pages[i] = NULL;
2631                 }
2632         }
2633         return rc;
2634 }
2635
2636 static void
2637 cifs_uncached_readdata_release(struct kref *refcount)
2638 {
2639         struct cifs_readdata *rdata = container_of(refcount,
2640                                         struct cifs_readdata, refcount);
2641         unsigned int i;
2642
2643         for (i = 0; i < rdata->nr_pages; i++) {
2644                 put_page(rdata->pages[i]);
2645                 rdata->pages[i] = NULL;
2646         }
2647         cifs_readdata_release(refcount);
2648 }
2649
2650 static int
2651 cifs_retry_async_readv(struct cifs_readdata *rdata)
2652 {
2653         int rc;
2654         struct TCP_Server_Info *server;
2655
2656         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2657
2658         do {
2659                 if (rdata->cfile->invalidHandle) {
2660                         rc = cifs_reopen_file(rdata->cfile, true);
2661                         if (rc != 0)
2662                                 continue;
2663                 }
2664                 rc = server->ops->async_readv(rdata);
2665         } while (rc == -EAGAIN);
2666
2667         return rc;
2668 }
2669
2670 /**
2671  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2672  * @rdata:      the readdata response with list of pages holding data
2673  * @iov:        vector in which we should copy the data
2674  * @nr_segs:    number of segments in vector
2675  * @offset:     offset into file of the first iovec
2676  * @copied:     used to return the amount of data copied to the iov
2677  *
2678  * This function copies data from a list of pages in a readdata response into
2679  * an array of iovecs. It will first calculate where the data should go
2680  * based on the info in the readdata and then copy the data into that spot.
2681  */
2682 static ssize_t
2683 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2684                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2685 {
2686         int rc = 0;
2687         struct iov_iter ii;
2688         size_t pos = rdata->offset - offset;
2689         ssize_t remaining = rdata->bytes;
2690         unsigned char *pdata;
2691         unsigned int i;
2692
2693         /* set up iov_iter and advance to the correct offset */
2694         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2695         iov_iter_advance(&ii, pos);
2696
2697         *copied = 0;
2698         for (i = 0; i < rdata->nr_pages; i++) {
2699                 ssize_t copy;
2700                 struct page *page = rdata->pages[i];
2701
2702                 /* copy a whole page or whatever's left */
2703                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2704
2705                 /* ...but limit it to whatever space is left in the iov */
2706                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2707
2708                 /* go while there's data to be copied and no errors */
2709                 if (copy && !rc) {
2710                         pdata = kmap(page);
2711                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2712                                                 (int)copy);
2713                         kunmap(page);
2714                         if (!rc) {
2715                                 *copied += copy;
2716                                 remaining -= copy;
2717                                 iov_iter_advance(&ii, copy);
2718                         }
2719                 }
2720         }
2721
2722         return rc;
2723 }
2724
2725 static void
2726 cifs_uncached_readv_complete(struct work_struct *work)
2727 {
2728         struct cifs_readdata *rdata = container_of(work,
2729                                                 struct cifs_readdata, work);
2730
2731         complete(&rdata->done);
2732         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2733 }
2734
2735 static int
2736 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2737                         struct cifs_readdata *rdata, unsigned int len)
2738 {
2739         int total_read = 0, result = 0;
2740         unsigned int i;
2741         unsigned int nr_pages = rdata->nr_pages;
2742         struct kvec iov;
2743
2744         rdata->tailsz = PAGE_SIZE;
2745         for (i = 0; i < nr_pages; i++) {
2746                 struct page *page = rdata->pages[i];
2747
2748                 if (len >= PAGE_SIZE) {
2749                         /* enough data to fill the page */
2750                         iov.iov_base = kmap(page);
2751                         iov.iov_len = PAGE_SIZE;
2752                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2753                                 i, iov.iov_base, iov.iov_len);
2754                         len -= PAGE_SIZE;
2755                 } else if (len > 0) {
2756                         /* enough for partial page, fill and zero the rest */
2757                         iov.iov_base = kmap(page);
2758                         iov.iov_len = len;
2759                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2760                                 i, iov.iov_base, iov.iov_len);
2761                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2762                         rdata->tailsz = len;
2763                         len = 0;
2764                 } else {
2765                         /* no need to hold page hostage */
2766                         rdata->pages[i] = NULL;
2767                         rdata->nr_pages--;
2768                         put_page(page);
2769                         continue;
2770                 }
2771
2772                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2773                 kunmap(page);
2774                 if (result < 0)
2775                         break;
2776
2777                 total_read += result;
2778         }
2779
2780         return total_read > 0 ? total_read : result;
2781 }
2782
2783 static ssize_t
2784 cifs_iovec_read(struct file *file, const struct iovec *iov,
2785                  unsigned long nr_segs, loff_t *poffset)
2786 {
2787         ssize_t rc;
2788         size_t len, cur_len;
2789         ssize_t total_read = 0;
2790         loff_t offset = *poffset;
2791         unsigned int npages;
2792         struct cifs_sb_info *cifs_sb;
2793         struct cifs_tcon *tcon;
2794         struct cifsFileInfo *open_file;
2795         struct cifs_readdata *rdata, *tmp;
2796         struct list_head rdata_list;
2797         pid_t pid;
2798
2799         if (!nr_segs)
2800                 return 0;
2801
2802         len = iov_length(iov, nr_segs);
2803         if (!len)
2804                 return 0;
2805
2806         INIT_LIST_HEAD(&rdata_list);
2807         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2808         open_file = file->private_data;
2809         tcon = tlink_tcon(open_file->tlink);
2810
2811         if (!tcon->ses->server->ops->async_readv)
2812                 return -ENOSYS;
2813
2814         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2815                 pid = open_file->pid;
2816         else
2817                 pid = current->tgid;
2818
2819         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2820                 cFYI(1, "attempting read on write only file instance");
2821
2822         do {
2823                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2824                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2825
2826                 /* allocate a readdata struct */
2827                 rdata = cifs_readdata_alloc(npages,
2828                                             cifs_uncached_readv_complete);
2829                 if (!rdata) {
2830                         rc = -ENOMEM;
2831                         goto error;
2832                 }
2833
2834                 rc = cifs_read_allocate_pages(rdata, npages);
2835                 if (rc)
2836                         goto error;
2837
2838                 rdata->cfile = cifsFileInfo_get(open_file);
2839                 rdata->nr_pages = npages;
2840                 rdata->offset = offset;
2841                 rdata->bytes = cur_len;
2842                 rdata->pid = pid;
2843                 rdata->pagesz = PAGE_SIZE;
2844                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2845
2846                 rc = cifs_retry_async_readv(rdata);
2847 error:
2848                 if (rc) {
2849                         kref_put(&rdata->refcount,
2850                                  cifs_uncached_readdata_release);
2851                         break;
2852                 }
2853
2854                 list_add_tail(&rdata->list, &rdata_list);
2855                 offset += cur_len;
2856                 len -= cur_len;
2857         } while (len > 0);
2858
2859         /* if at least one read request send succeeded, then reset rc */
2860         if (!list_empty(&rdata_list))
2861                 rc = 0;
2862
2863         /* the loop below should proceed in the order of increasing offsets */
2864 restart_loop:
2865         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2866                 if (!rc) {
2867                         ssize_t copied;
2868
2869                         /* FIXME: freezable sleep too? */
2870                         rc = wait_for_completion_killable(&rdata->done);
2871                         if (rc)
2872                                 rc = -EINTR;
2873                         else if (rdata->result)
2874                                 rc = rdata->result;
2875                         else {
2876                                 rc = cifs_readdata_to_iov(rdata, iov,
2877                                                         nr_segs, *poffset,
2878                                                         &copied);
2879                                 total_read += copied;
2880                         }
2881
2882                         /* resend call if it's a retryable error */
2883                         if (rc == -EAGAIN) {
2884                                 rc = cifs_retry_async_readv(rdata);
2885                                 goto restart_loop;
2886                         }
2887                 }
2888                 list_del_init(&rdata->list);
2889                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2890         }
2891
2892         cifs_stats_bytes_read(tcon, total_read);
2893         *poffset += total_read;
2894
2895         /* mask nodata case */
2896         if (rc == -ENODATA)
2897                 rc = 0;
2898
2899         return total_read ? total_read : rc;
2900 }
2901
2902 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2903                                unsigned long nr_segs, loff_t pos)
2904 {
2905         ssize_t read;
2906
2907         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2908         if (read > 0)
2909                 iocb->ki_pos = pos;
2910
2911         return read;
2912 }
2913
2914 ssize_t
2915 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2916                   unsigned long nr_segs, loff_t pos)
2917 {
2918         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2919         struct cifsInodeInfo *cinode = CIFS_I(inode);
2920         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2921         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2922                                                 iocb->ki_filp->private_data;
2923         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2924         int rc = -EACCES;
2925
2926         /*
2927          * In strict cache mode we need to read from the server all the time
2928          * if we don't have level II oplock because the server can delay mtime
2929          * change - so we can't make a decision about inode invalidating.
2930          * And we can also fail with pagereading if there are mandatory locks
2931          * on pages affected by this read but not on the region from pos to
2932          * pos+len-1.
2933          */
2934         if (!cinode->clientCanCacheRead)
2935                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2936
2937         if (cap_unix(tcon->ses) &&
2938             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2939             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2940                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2941
2942         /*
2943          * We need to hold the sem to be sure nobody modifies lock list
2944          * with a brlock that prevents reading.
2945          */
2946         down_read(&cinode->lock_sem);
2947         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2948                                      tcon->ses->server->vals->shared_lock_type,
2949                                      NULL, CIFS_READ_OP))
2950                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2951         up_read(&cinode->lock_sem);
2952         return rc;
2953 }
2954
2955 static ssize_t
2956 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2957 {
2958         int rc = -EACCES;
2959         unsigned int bytes_read = 0;
2960         unsigned int total_read;
2961         unsigned int current_read_size;
2962         unsigned int rsize;
2963         struct cifs_sb_info *cifs_sb;
2964         struct cifs_tcon *tcon;
2965         struct TCP_Server_Info *server;
2966         unsigned int xid;
2967         char *cur_offset;
2968         struct cifsFileInfo *open_file;
2969         struct cifs_io_parms io_parms;
2970         int buf_type = CIFS_NO_BUFFER;
2971         __u32 pid;
2972
2973         xid = get_xid();
2974         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2975
2976         /* FIXME: set up handlers for larger reads and/or convert to async */
2977         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2978
2979         if (file->private_data == NULL) {
2980                 rc = -EBADF;
2981                 free_xid(xid);
2982                 return rc;
2983         }
2984         open_file = file->private_data;
2985         tcon = tlink_tcon(open_file->tlink);
2986         server = tcon->ses->server;
2987
2988         if (!server->ops->sync_read) {
2989                 free_xid(xid);
2990                 return -ENOSYS;
2991         }
2992
2993         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2994                 pid = open_file->pid;
2995         else
2996                 pid = current->tgid;
2997
2998         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2999                 cFYI(1, "attempting read on write only file instance");
3000
3001         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3002              total_read += bytes_read, cur_offset += bytes_read) {
3003                 current_read_size = min_t(uint, read_size - total_read, rsize);
3004                 /*
3005                  * For windows me and 9x we do not want to request more than it
3006                  * negotiated since it will refuse the read then.
3007                  */
3008                 if ((tcon->ses) && !(tcon->ses->capabilities &
3009                                 tcon->ses->server->vals->cap_large_files)) {
3010                         current_read_size = min_t(uint, current_read_size,
3011                                         CIFSMaxBufSize);
3012                 }
3013                 rc = -EAGAIN;
3014                 while (rc == -EAGAIN) {
3015                         if (open_file->invalidHandle) {
3016                                 rc = cifs_reopen_file(open_file, true);
3017                                 if (rc != 0)
3018                                         break;
3019                         }
3020                         io_parms.pid = pid;
3021                         io_parms.tcon = tcon;
3022                         io_parms.offset = *offset;
3023                         io_parms.length = current_read_size;
3024                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3025                                                     &bytes_read, &cur_offset,
3026                                                     &buf_type);
3027                 }
3028                 if (rc || (bytes_read == 0)) {
3029                         if (total_read) {
3030                                 break;
3031                         } else {
3032                                 free_xid(xid);
3033                                 return rc;
3034                         }
3035                 } else {
3036                         cifs_stats_bytes_read(tcon, total_read);
3037                         *offset += bytes_read;
3038                 }
3039         }
3040         free_xid(xid);
3041         return total_read;
3042 }
3043
3044 /*
3045  * If the page is mmap'ed into a process' page tables, then we need to make
3046  * sure that it doesn't change while being written back.
3047  */
3048 static int
3049 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3050 {
3051         struct page *page = vmf->page;
3052
3053         lock_page(page);
3054         return VM_FAULT_LOCKED;
3055 }
3056
3057 static struct vm_operations_struct cifs_file_vm_ops = {
3058         .fault = filemap_fault,
3059         .page_mkwrite = cifs_page_mkwrite,
3060         .remap_pages = generic_file_remap_pages,
3061 };
3062
3063 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3064 {
3065         int rc, xid;
3066         struct inode *inode = file->f_path.dentry->d_inode;
3067
3068         xid = get_xid();
3069
3070         if (!CIFS_I(inode)->clientCanCacheRead) {
3071                 rc = cifs_invalidate_mapping(inode);
3072                 if (rc)
3073                         return rc;
3074         }
3075
3076         rc = generic_file_mmap(file, vma);
3077         if (rc == 0)
3078                 vma->vm_ops = &cifs_file_vm_ops;
3079         free_xid(xid);
3080         return rc;
3081 }
3082
3083 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3084 {
3085         int rc, xid;
3086
3087         xid = get_xid();
3088         rc = cifs_revalidate_file(file);
3089         if (rc) {
3090                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
3091                 free_xid(xid);
3092                 return rc;
3093         }
3094         rc = generic_file_mmap(file, vma);
3095         if (rc == 0)
3096                 vma->vm_ops = &cifs_file_vm_ops;
3097         free_xid(xid);
3098         return rc;
3099 }
3100
3101 static void
3102 cifs_readv_complete(struct work_struct *work)
3103 {
3104         unsigned int i;
3105         struct cifs_readdata *rdata = container_of(work,
3106                                                 struct cifs_readdata, work);
3107
3108         for (i = 0; i < rdata->nr_pages; i++) {
3109                 struct page *page = rdata->pages[i];
3110
3111                 lru_cache_add_file(page);
3112
3113                 if (rdata->result == 0) {
3114                         flush_dcache_page(page);
3115                         SetPageUptodate(page);
3116                 }
3117
3118                 unlock_page(page);
3119
3120                 if (rdata->result == 0)
3121                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3122
3123                 page_cache_release(page);
3124                 rdata->pages[i] = NULL;
3125         }
3126         kref_put(&rdata->refcount, cifs_readdata_release);
3127 }
3128
3129 static int
3130 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3131                         struct cifs_readdata *rdata, unsigned int len)
3132 {
3133         int total_read = 0, result = 0;
3134         unsigned int i;
3135         u64 eof;
3136         pgoff_t eof_index;
3137         unsigned int nr_pages = rdata->nr_pages;
3138         struct kvec iov;
3139
3140         /* determine the eof that the server (probably) has */
3141         eof = CIFS_I(rdata->mapping->host)->server_eof;
3142         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3143         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
3144
3145         rdata->tailsz = PAGE_CACHE_SIZE;
3146         for (i = 0; i < nr_pages; i++) {
3147                 struct page *page = rdata->pages[i];
3148
3149                 if (len >= PAGE_CACHE_SIZE) {
3150                         /* enough data to fill the page */
3151                         iov.iov_base = kmap(page);
3152                         iov.iov_len = PAGE_CACHE_SIZE;
3153                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3154                                 i, page->index, iov.iov_base, iov.iov_len);
3155                         len -= PAGE_CACHE_SIZE;
3156                 } else if (len > 0) {
3157                         /* enough for partial page, fill and zero the rest */
3158                         iov.iov_base = kmap(page);
3159                         iov.iov_len = len;
3160                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3161                                 i, page->index, iov.iov_base, iov.iov_len);
3162                         memset(iov.iov_base + len,
3163                                 '\0', PAGE_CACHE_SIZE - len);
3164                         rdata->tailsz = len;
3165                         len = 0;
3166                 } else if (page->index > eof_index) {
3167                         /*
3168                          * The VFS will not try to do readahead past the
3169                          * i_size, but it's possible that we have outstanding
3170                          * writes with gaps in the middle and the i_size hasn't
3171                          * caught up yet. Populate those with zeroed out pages
3172                          * to prevent the VFS from repeatedly attempting to
3173                          * fill them until the writes are flushed.
3174                          */
3175                         zero_user(page, 0, PAGE_CACHE_SIZE);
3176                         lru_cache_add_file(page);
3177                         flush_dcache_page(page);
3178                         SetPageUptodate(page);
3179                         unlock_page(page);
3180                         page_cache_release(page);
3181                         rdata->pages[i] = NULL;
3182                         rdata->nr_pages--;
3183                         continue;
3184                 } else {
3185                         /* no need to hold page hostage */
3186                         lru_cache_add_file(page);
3187                         unlock_page(page);
3188                         page_cache_release(page);
3189                         rdata->pages[i] = NULL;
3190                         rdata->nr_pages--;
3191                         continue;
3192                 }
3193
3194                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3195                 kunmap(page);
3196                 if (result < 0)
3197                         break;
3198
3199                 total_read += result;
3200         }
3201
3202         return total_read > 0 ? total_read : result;
3203 }
3204
3205 static int cifs_readpages(struct file *file, struct address_space *mapping,
3206         struct list_head *page_list, unsigned num_pages)
3207 {
3208         int rc;
3209         struct list_head tmplist;
3210         struct cifsFileInfo *open_file = file->private_data;
3211         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3212         unsigned int rsize = cifs_sb->rsize;
3213         pid_t pid;
3214
3215         /*
3216          * Give up immediately if rsize is too small to read an entire page.
3217          * The VFS will fall back to readpage. We should never reach this
3218          * point however since we set ra_pages to 0 when the rsize is smaller
3219          * than a cache page.
3220          */
3221         if (unlikely(rsize < PAGE_CACHE_SIZE))
3222                 return 0;
3223
3224         /*
3225          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3226          * immediately if the cookie is negative
3227          */
3228         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3229                                          &num_pages);
3230         if (rc == 0)
3231                 return rc;
3232
3233         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3234                 pid = open_file->pid;
3235         else
3236                 pid = current->tgid;
3237
3238         rc = 0;
3239         INIT_LIST_HEAD(&tmplist);
3240
3241         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3242                 mapping, num_pages);
3243
3244         /*
3245          * Start with the page at end of list and move it to private
3246          * list. Do the same with any following pages until we hit
3247          * the rsize limit, hit an index discontinuity, or run out of
3248          * pages. Issue the async read and then start the loop again
3249          * until the list is empty.
3250          *
3251          * Note that list order is important. The page_list is in
3252          * the order of declining indexes. When we put the pages in
3253          * the rdata->pages, then we want them in increasing order.
3254          */
3255         while (!list_empty(page_list)) {
3256                 unsigned int i;
3257                 unsigned int bytes = PAGE_CACHE_SIZE;
3258                 unsigned int expected_index;
3259                 unsigned int nr_pages = 1;
3260                 loff_t offset;
3261                 struct page *page, *tpage;
3262                 struct cifs_readdata *rdata;
3263
3264                 page = list_entry(page_list->prev, struct page, lru);
3265
3266                 /*
3267                  * Lock the page and put it in the cache. Since no one else
3268                  * should have access to this page, we're safe to simply set
3269                  * PG_locked without checking it first.
3270                  */
3271                 __set_page_locked(page);
3272                 rc = add_to_page_cache_locked(page, mapping,
3273                                               page->index, GFP_KERNEL);
3274
3275                 /* give up if we can't stick it in the cache */
3276                 if (rc) {
3277                         __clear_page_locked(page);
3278                         break;
3279                 }
3280
3281                 /* move first page to the tmplist */
3282                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3283                 list_move_tail(&page->lru, &tmplist);
3284
3285                 /* now try and add more pages onto the request */
3286                 expected_index = page->index + 1;
3287                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3288                         /* discontinuity ? */
3289                         if (page->index != expected_index)
3290                                 break;
3291
3292                         /* would this page push the read over the rsize? */
3293                         if (bytes + PAGE_CACHE_SIZE > rsize)
3294                                 break;
3295
3296                         __set_page_locked(page);
3297                         if (add_to_page_cache_locked(page, mapping,
3298