Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 28 Mar 2012 17:02:55 +0000 (10:02 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 28 Mar 2012 17:02:55 +0000 (10:02 -0700)
Pull ext4 updates for 3.4 from Ted Ts'o:
 "Ext4 commits for 3.3 merge window; mostly cleanups and bug fixes

  The changes to export dirty_writeback_interval are from Artem's s_dirt
  cleanup patch series.  The same is true of the change to remove the
  s_dirt helper functions which never got used by anyone in-tree.  I've
  run these changes by Al Viro, and am carrying them so that Artem can
  more easily fix up the rest of the file systems during the next merge
  window.  (Originally we had hopped to remove the use of s_dirt from
  ext4 during this merge window, but his patches had some bugs, so I
  ultimately ended dropping them from the ext4 tree.)"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (66 commits)
  vfs: remove unused superblock helpers
  mm: export dirty_writeback_interval
  ext4: remove useless s_dirt assignment
  ext4: write superblock only once on unmount
  ext4: do not mark superblock as dirty unnecessarily
  ext4: correct ext4_punch_hole return codes
  ext4: remove restrictive checks for EOFBLOCKS_FL
  ext4: always set then trimmed blocks count into len
  ext4: fix trimmed block count accunting
  ext4: fix start and len arguments handling in ext4_trim_fs()
  ext4: update s_free_{inodes,blocks}_count during online resize
  ext4: change some printk() calls to use ext4_msg() instead
  ext4: avoid output message interleaving in ext4_error_<foo>()
  ext4: remove trailing newlines from ext4_msg() and ext4_error() messages
  ext4: add no_printk argument validation, fix fallout
  ext4: remove redundant "EXT4-fs: " from uses of ext4_msg
  ext4: give more helpful error message in ext4_ext_rm_leaf()
  ext4: remove unused code from ext4_ext_map_blocks()
  ext4: rewrite punch hole to use ext4_ext_remove_space()
  jbd2: cleanup journal tail after transaction commit
  ...

1  2 
Documentation/filesystems/ext4.txt
fs/ext4/super.c
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
include/linux/fs.h
mm/page-writeback.c

index 8c10bf375c73d281c5b9e0919e23e1064ba8abc9,990219c58b1aa2b59d606db31c9bcd7ab6b1905c..1b7f9acbcbbe450c8d0f3f8a16d592275c61a5dd
@@@ -144,9 -144,6 +144,6 @@@ journal_async_commit       Commit block can b
                        mount the device. This will enable 'journal_checksum'
                        internally.
  
- journal=update                Update the ext4 file system's journal to the current
-                       format.
  journal_dev=devnum    When the external journal device's major/minor numbers
                        have changed, this option allows the user to specify
                        the new journal location.  The journal device is
@@@ -308,7 -305,7 +305,7 @@@ min_batch_time=usec        This parameter set
                        fast disks, at the cost of increasing latency.
  
  journal_ioprio=prio   The I/O priority (from 0 to 7, where 0 is the
 -                      highest priorty) which should be used for I/O
 +                      highest priority) which should be used for I/O
                        operations submitted by kjournald2 during a
                        commit operation.  This defaults to 3, which is
                        a slightly higher priority than the default I/O
@@@ -343,7 -340,7 +340,7 @@@ noinit_itable              Do not initialize any un
  init_itable=n         The lazy itable init code will wait n times the
                        number of milliseconds it took to zero out the
                        previous block group's inode table.  This
 -                      minimizes the impact on the systme performance
 +                      minimizes the impact on the system performance
                        while file system's inode table is being initialized.
  
  discard                       Controls whether ext4 should issue discard/TRIM
@@@ -356,11 -353,6 +353,6 @@@ nouid32                   Disables 32-bit UIDs and GIDs
                        interoperability  with  older kernels which only
                        store and expect 16-bit values.
  
- resize                        Allows to resize filesystem to the end of the last
-                       existing block group, further resize has to be done
-                       with resize2fs either online, or offline. It can be
-                       used only with conjunction with remount.
  block_validity                This options allows to enables/disables the in-kernel
  noblock_validity      facility for tracking filesystem metadata blocks
                        within internal data structures. This allows multi-
diff --combined fs/ext4/super.c
index 933900909ed0facb352e8d2c86c19a456a4fbb49,150840a4af38a73c20923d85cca33795c5613e74..ceebaf853beb74c7e139e63f46bf44975696566b
@@@ -62,6 -62,7 +62,7 @@@ static struct ext4_features *ext4_feat
  
  static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                             unsigned long journal_devnum);
+ static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  static int ext4_commit_super(struct super_block *sb, int sync);
  static void ext4_mark_recovery_complete(struct super_block *sb,
                                        struct ext4_super_block *es);
@@@ -375,7 -376,7 +376,7 @@@ void ext4_journal_abort_handle(const ch
        if (is_handle_aborted(handle))
                return;
  
-       printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n",
+       printk(KERN_ERR "EXT4-fs: %s:%d: aborting transaction: %s in %s\n",
               caller, line, errstr, err_fn);
  
        jbd2_journal_abort_handle(handle);
@@@ -431,6 -432,22 +432,22 @@@ static int block_device_ejected(struct 
        return bdi->dev == NULL;
  }
  
+ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
+ {
+       struct super_block              *sb = journal->j_private;
+       struct ext4_sb_info             *sbi = EXT4_SB(sb);
+       int                             error = is_journal_aborted(journal);
+       struct ext4_journal_cb_entry    *jce, *tmp;
+       spin_lock(&sbi->s_md_lock);
+       list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) {
+               list_del_init(&jce->jce_list);
+               spin_unlock(&sbi->s_md_lock);
+               jce->jce_func(sb, jce, error);
+               spin_lock(&sbi->s_md_lock);
+       }
+       spin_unlock(&sbi->s_md_lock);
+ }
  
  /* Deal with the reporting of failure conditions on a filesystem such as
   * inconsistencies detected or read IO failures.
@@@ -498,11 -515,16 +515,16 @@@ void ext4_error_inode(struct inode *ino
        va_start(args, fmt);
        vaf.fmt = fmt;
        vaf.va = &args;
-       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
-              inode->i_sb->s_id, function, line, inode->i_ino);
        if (block)
-               printk(KERN_CONT "block %llu: ", block);
-       printk(KERN_CONT "comm %s: %pV\n", current->comm, &vaf);
+               printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+                      "inode #%lu: block %llu: comm %s: %pV\n",
+                      inode->i_sb->s_id, function, line, inode->i_ino,
+                      block, current->comm, &vaf);
+       else
+               printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
+                      "inode #%lu: comm %s: %pV\n",
+                      inode->i_sb->s_id, function, line, inode->i_ino,
+                      current->comm, &vaf);
        va_end(args);
  
        ext4_handle_error(inode->i_sb);
@@@ -524,15 -546,21 +546,21 @@@ void ext4_error_file(struct file *file
        path = d_path(&(file->f_path), pathname, sizeof(pathname));
        if (IS_ERR(path))
                path = "(unknown)";
-       printk(KERN_CRIT
-              "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
-              inode->i_sb->s_id, function, line, inode->i_ino);
-       if (block)
-               printk(KERN_CONT "block %llu: ", block);
        va_start(args, fmt);
        vaf.fmt = fmt;
        vaf.va = &args;
-       printk(KERN_CONT "comm %s: path %s: %pV\n", current->comm, path, &vaf);
+       if (block)
+               printk(KERN_CRIT
+                      "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+                      "block %llu: comm %s: path %s: %pV\n",
+                      inode->i_sb->s_id, function, line, inode->i_ino,
+                      block, current->comm, path, &vaf);
+       else
+               printk(KERN_CRIT
+                      "EXT4-fs error (device %s): %s:%d: inode #%lu: "
+                      "comm %s: path %s: %pV\n",
+                      inode->i_sb->s_id, function, line, inode->i_ino,
+                      current->comm, path, &vaf);
        va_end(args);
  
        ext4_handle_error(inode->i_sb);
@@@ -808,9 -836,6 +836,6 @@@ static void ext4_put_super(struct super
        destroy_workqueue(sbi->dio_unwritten_wq);
  
        lock_super(sb);
-       if (sb->s_dirt)
-               ext4_commit_super(sb, 1);
        if (sbi->s_journal) {
                err = jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
        if (!(sb->s_flags & MS_RDONLY)) {
                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
                es->s_state = cpu_to_le16(sbi->s_mount_state);
-               ext4_commit_super(sb, 1);
        }
+       if (sb->s_dirt || !(sb->s_flags & MS_RDONLY))
+               ext4_commit_super(sb, 1);
        if (sbi->s_proc) {
+               remove_proc_entry("options", sbi->s_proc);
                remove_proc_entry(sb->s_id, ext4_proc_root);
        }
        kobject_del(&sbi->s_kobj);
@@@ -990,180 -1018,6 +1018,6 @@@ void ext4_clear_inode(struct inode *ino
        }
  }
  
- static inline void ext4_show_quota_options(struct seq_file *seq,
-                                          struct super_block *sb)
- {
- #if defined(CONFIG_QUOTA)
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       if (sbi->s_jquota_fmt) {
-               char *fmtname = "";
-               switch (sbi->s_jquota_fmt) {
-               case QFMT_VFS_OLD:
-                       fmtname = "vfsold";
-                       break;
-               case QFMT_VFS_V0:
-                       fmtname = "vfsv0";
-                       break;
-               case QFMT_VFS_V1:
-                       fmtname = "vfsv1";
-                       break;
-               }
-               seq_printf(seq, ",jqfmt=%s", fmtname);
-       }
-       if (sbi->s_qf_names[USRQUOTA])
-               seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
-       if (sbi->s_qf_names[GRPQUOTA])
-               seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
-       if (test_opt(sb, USRQUOTA))
-               seq_puts(seq, ",usrquota");
-       if (test_opt(sb, GRPQUOTA))
-               seq_puts(seq, ",grpquota");
- #endif
- }
- /*
-  * Show an option if
-  *  - it's set to a non-default value OR
-  *  - if the per-sb default is different from the global default
-  */
- static int ext4_show_options(struct seq_file *seq, struct dentry *root)
- {
-       int def_errors;
-       unsigned long def_mount_opts;
-       struct super_block *sb = root->d_sb;
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_super_block *es = sbi->s_es;
-       def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
-       def_errors     = le16_to_cpu(es->s_errors);
-       if (sbi->s_sb_block != 1)
-               seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
-       if (test_opt(sb, MINIX_DF))
-               seq_puts(seq, ",minixdf");
-       if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
-               seq_puts(seq, ",grpid");
-       if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
-               seq_puts(seq, ",nogrpid");
-       if (sbi->s_resuid != EXT4_DEF_RESUID ||
-           le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
-               seq_printf(seq, ",resuid=%u", sbi->s_resuid);
-       }
-       if (sbi->s_resgid != EXT4_DEF_RESGID ||
-           le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
-               seq_printf(seq, ",resgid=%u", sbi->s_resgid);
-       }
-       if (test_opt(sb, ERRORS_RO)) {
-               if (def_errors == EXT4_ERRORS_PANIC ||
-                   def_errors == EXT4_ERRORS_CONTINUE) {
-                       seq_puts(seq, ",errors=remount-ro");
-               }
-       }
-       if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
-               seq_puts(seq, ",errors=continue");
-       if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
-               seq_puts(seq, ",errors=panic");
-       if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
-               seq_puts(seq, ",nouid32");
-       if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
-               seq_puts(seq, ",debug");
- #ifdef CONFIG_EXT4_FS_XATTR
-       if (test_opt(sb, XATTR_USER))
-               seq_puts(seq, ",user_xattr");
-       if (!test_opt(sb, XATTR_USER))
-               seq_puts(seq, ",nouser_xattr");
- #endif
- #ifdef CONFIG_EXT4_FS_POSIX_ACL
-       if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
-               seq_puts(seq, ",acl");
-       if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
-               seq_puts(seq, ",noacl");
- #endif
-       if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
-               seq_printf(seq, ",commit=%u",
-                          (unsigned) (sbi->s_commit_interval / HZ));
-       }
-       if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
-               seq_printf(seq, ",min_batch_time=%u",
-                          (unsigned) sbi->s_min_batch_time);
-       }
-       if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
-               seq_printf(seq, ",max_batch_time=%u",
-                          (unsigned) sbi->s_max_batch_time);
-       }
-       /*
-        * We're changing the default of barrier mount option, so
-        * let's always display its mount state so it's clear what its
-        * status is.
-        */
-       seq_puts(seq, ",barrier=");
-       seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
-       if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
-               seq_puts(seq, ",journal_async_commit");
-       else if (test_opt(sb, JOURNAL_CHECKSUM))
-               seq_puts(seq, ",journal_checksum");
-       if (test_opt(sb, I_VERSION))
-               seq_puts(seq, ",i_version");
-       if (!test_opt(sb, DELALLOC) &&
-           !(def_mount_opts & EXT4_DEFM_NODELALLOC))
-               seq_puts(seq, ",nodelalloc");
-       if (!test_opt(sb, MBLK_IO_SUBMIT))
-               seq_puts(seq, ",nomblk_io_submit");
-       if (sbi->s_stripe)
-               seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
-       /*
-        * journal mode get enabled in different ways
-        * So just print the value even if we didn't specify it
-        */
-       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
-               seq_puts(seq, ",data=journal");
-       else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
-               seq_puts(seq, ",data=ordered");
-       else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
-               seq_puts(seq, ",data=writeback");
-       if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
-               seq_printf(seq, ",inode_readahead_blks=%u",
-                          sbi->s_inode_readahead_blks);
-       if (test_opt(sb, DATA_ERR_ABORT))
-               seq_puts(seq, ",data_err=abort");
-       if (test_opt(sb, NO_AUTO_DA_ALLOC))
-               seq_puts(seq, ",noauto_da_alloc");
-       if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD))
-               seq_puts(seq, ",discard");
-       if (test_opt(sb, NOLOAD))
-               seq_puts(seq, ",norecovery");
-       if (test_opt(sb, DIOREAD_NOLOCK))
-               seq_puts(seq, ",dioread_nolock");
-       if (test_opt(sb, BLOCK_VALIDITY) &&
-           !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
-               seq_puts(seq, ",block_validity");
-       if (!test_opt(sb, INIT_INODE_TABLE))
-               seq_puts(seq, ",noinit_itable");
-       else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
-               seq_printf(seq, ",init_itable=%u",
-                          (unsigned) sbi->s_li_wait_mult);
-       ext4_show_quota_options(seq, sb);
-       return 0;
- }
  static struct inode *ext4_nfs_get_inode(struct super_block *sb,
                                        u64 ino, u32 generation)
  {
@@@ -1316,18 -1170,17 +1170,17 @@@ static const struct export_operations e
  enum {
        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
-       Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
+       Opt_nouid32, Opt_debug, Opt_removed,
        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
-       Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
+       Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
        Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
-       Opt_journal_update, Opt_journal_dev,
-       Opt_journal_checksum, Opt_journal_async_commit,
+       Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit,
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_data_err_abort, Opt_data_err_ignore,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
-       Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
-       Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
+       Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
+       Opt_usrquota, Opt_grpquota, Opt_i_version,
        Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
        Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
        Opt_inode_readahead_blks, Opt_journal_ioprio,
@@@ -1350,20 -1203,19 +1203,19 @@@ static const match_table_t tokens = 
        {Opt_err_ro, "errors=remount-ro"},
        {Opt_nouid32, "nouid32"},
        {Opt_debug, "debug"},
-       {Opt_oldalloc, "oldalloc"},
-       {Opt_orlov, "orlov"},
+       {Opt_removed, "oldalloc"},
+       {Opt_removed, "orlov"},
        {Opt_user_xattr, "user_xattr"},
        {Opt_nouser_xattr, "nouser_xattr"},
        {Opt_acl, "acl"},
        {Opt_noacl, "noacl"},
-       {Opt_noload, "noload"},
        {Opt_noload, "norecovery"},
-       {Opt_nobh, "nobh"},
-       {Opt_bh, "bh"},
+       {Opt_noload, "noload"},
+       {Opt_removed, "nobh"},
+       {Opt_removed, "bh"},
        {Opt_commit, "commit=%u"},
        {Opt_min_batch_time, "min_batch_time=%u"},
        {Opt_max_batch_time, "max_batch_time=%u"},
-       {Opt_journal_update, "journal=update"},
        {Opt_journal_dev, "journal_dev=%u"},
        {Opt_journal_checksum, "journal_checksum"},
        {Opt_journal_async_commit, "journal_async_commit"},
        {Opt_nobarrier, "nobarrier"},
        {Opt_i_version, "i_version"},
        {Opt_stripe, "stripe=%u"},
-       {Opt_resize, "resize"},
        {Opt_delalloc, "delalloc"},
        {Opt_nodelalloc, "nodelalloc"},
        {Opt_mblk_io_submit, "mblk_io_submit"},
        {Opt_init_itable, "init_itable=%u"},
        {Opt_init_itable, "init_itable"},
        {Opt_noinit_itable, "noinit_itable"},
+       {Opt_removed, "check=none"},    /* mount option from ext2/3 */
+       {Opt_removed, "nocheck"},       /* mount option from ext2/3 */
+       {Opt_removed, "reservation"},   /* mount option from ext2/3 */
+       {Opt_removed, "noreservation"}, /* mount option from ext2/3 */
+       {Opt_removed, "journal=%u"},    /* mount option from ext2/3 */
        {Opt_err, NULL},
  };
  
@@@ -1496,420 -1352,273 +1352,273 @@@ static int clear_qf_name(struct super_b
  }
  #endif
  
- static int parse_options(char *options, struct super_block *sb,
-                        unsigned long *journal_devnum,
-                        unsigned int *journal_ioprio,
-                        ext4_fsblk_t *n_blocks_count, int is_remount)
- {
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       char *p;
-       substring_t args[MAX_OPT_ARGS];
-       int data_opt = 0;
-       int option;
+ #define MOPT_SET      0x0001
+ #define MOPT_CLEAR    0x0002
+ #define MOPT_NOSUPPORT        0x0004
+ #define MOPT_EXPLICIT 0x0008
+ #define MOPT_CLEAR_ERR        0x0010
+ #define MOPT_GTE0     0x0020
  #ifdef CONFIG_QUOTA
-       int qfmt;
+ #define MOPT_Q                0
+ #define MOPT_QFMT     0x0040
+ #else
+ #define MOPT_Q                MOPT_NOSUPPORT
+ #define MOPT_QFMT     MOPT_NOSUPPORT
  #endif
-       if (!options)
-               return 1;
-       while ((p = strsep(&options, ",")) != NULL) {
-               int token;
-               if (!*p)
-                       continue;
-               /*
-                * Initialize args struct so we know whether arg was
-                * found; some options take optional arguments.
-                */
-               args[0].to = args[0].from = NULL;
-               token = match_token(p, tokens, args);
-               switch (token) {
-               case Opt_bsd_df:
-                       ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
-                       clear_opt(sb, MINIX_DF);
-                       break;
-               case Opt_minix_df:
-                       ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
-                       set_opt(sb, MINIX_DF);
-                       break;
-               case Opt_grpid:
-                       ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
-                       set_opt(sb, GRPID);
-                       break;
-               case Opt_nogrpid:
-                       ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
-                       clear_opt(sb, GRPID);
-                       break;
-               case Opt_resuid:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       sbi->s_resuid = option;
-                       break;
-               case Opt_resgid:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       sbi->s_resgid = option;
-                       break;
-               case Opt_sb:
-                       /* handled by get_sb_block() instead of here */
-                       /* *sb_block = match_int(&args[0]); */
-                       break;
-               case Opt_err_panic:
-                       clear_opt(sb, ERRORS_CONT);
-                       clear_opt(sb, ERRORS_RO);
-                       set_opt(sb, ERRORS_PANIC);
-                       break;
-               case Opt_err_ro:
-                       clear_opt(sb, ERRORS_CONT);
-                       clear_opt(sb, ERRORS_PANIC);
-                       set_opt(sb, ERRORS_RO);
-                       break;
-               case Opt_err_cont:
-                       clear_opt(sb, ERRORS_RO);
-                       clear_opt(sb, ERRORS_PANIC);
-                       set_opt(sb, ERRORS_CONT);
-                       break;
-               case Opt_nouid32:
-                       set_opt(sb, NO_UID32);
-                       break;
-               case Opt_debug:
-                       set_opt(sb, DEBUG);
-                       break;
-               case Opt_oldalloc:
-                       ext4_msg(sb, KERN_WARNING,
-                                "Ignoring deprecated oldalloc option");
-                       break;
-               case Opt_orlov:
-                       ext4_msg(sb, KERN_WARNING,
-                                "Ignoring deprecated orlov option");
-                       break;
+ #define MOPT_DATAJ    0x0080
+ static const struct mount_opts {
+       int     token;
+       int     mount_opt;
+       int     flags;
+ } ext4_mount_opts[] = {
+       {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
+       {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
+       {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
+       {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
+       {Opt_mblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_SET},
+       {Opt_nomblk_io_submit, EXT4_MOUNT_MBLK_IO_SUBMIT, MOPT_CLEAR},
+       {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
+       {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
+       {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_SET},
+       {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, MOPT_CLEAR},
+       {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
+       {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
+       {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_SET | MOPT_EXPLICIT},
+       {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_CLEAR | MOPT_EXPLICIT},
+       {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_SET},
+       {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
+                                   EXT4_MOUNT_JOURNAL_CHECKSUM), MOPT_SET},
+       {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_SET},
+       {Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR},
+       {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
+       {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
+       {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_SET},
+       {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_CLEAR},
+       {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
+       {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
+       {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
+       {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
+       {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
+       {Opt_commit, 0, MOPT_GTE0},
+       {Opt_max_batch_time, 0, MOPT_GTE0},
+       {Opt_min_batch_time, 0, MOPT_GTE0},
+       {Opt_inode_readahead_blks, 0, MOPT_GTE0},
+       {Opt_init_itable, 0, MOPT_GTE0},
+       {Opt_stripe, 0, MOPT_GTE0},
+       {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_DATAJ},
+       {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_DATAJ},
+       {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, MOPT_DATAJ},
  #ifdef CONFIG_EXT4_FS_XATTR
-               case Opt_user_xattr:
-                       set_opt(sb, XATTR_USER);
-                       break;
-               case Opt_nouser_xattr:
-                       clear_opt(sb, XATTR_USER);
-                       break;
+       {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
+       {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
  #else
-               case Opt_user_xattr:
-               case Opt_nouser_xattr:
-                       ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported");
-                       break;
+       {Opt_user_xattr, 0, MOPT_NOSUPPORT},
+       {Opt_nouser_xattr, 0, MOPT_NOSUPPORT},
  #endif
  #ifdef CONFIG_EXT4_FS_POSIX_ACL
-               case Opt_acl:
-                       set_opt(sb, POSIX_ACL);
-                       break;
-               case Opt_noacl:
-                       clear_opt(sb, POSIX_ACL);
-                       break;
+       {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
+       {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
  #else
-               case Opt_acl:
-               case Opt_noacl:
-                       ext4_msg(sb, KERN_ERR, "(no)acl options not supported");
-                       break;
+       {Opt_acl, 0, MOPT_NOSUPPORT},
+       {Opt_noacl, 0, MOPT_NOSUPPORT},
  #endif
-               case Opt_journal_update:
-                       /* @@@ FIXME */
-                       /* Eventually we will want to be able to create
-                          a journal file here.  For now, only allow the
-                          user to specify an existing inode to be the
-                          journal file. */
-                       if (is_remount) {
-                               ext4_msg(sb, KERN_ERR,
-                                        "Cannot specify journal on remount");
-                               return 0;
-                       }
-                       set_opt(sb, UPDATE_JOURNAL);
-                       break;
-               case Opt_journal_dev:
-                       if (is_remount) {
+       {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
+       {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
+       {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
+       {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
+                                                       MOPT_SET | MOPT_Q},
+       {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
+                                                       MOPT_SET | MOPT_Q},
+       {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
+                      EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q},
+       {Opt_usrjquota, 0, MOPT_Q},
+       {Opt_grpjquota, 0, MOPT_Q},
+       {Opt_offusrjquota, 0, MOPT_Q},
+       {Opt_offgrpjquota, 0, MOPT_Q},
+       {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
+       {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
+       {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
+       {Opt_err, 0, 0}
+ };
+ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
+                           substring_t *args, unsigned long *journal_devnum,
+                           unsigned int *journal_ioprio, int is_remount)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       const struct mount_opts *m;
+       int arg = 0;
+       if (args->from && match_int(args, &arg))
+               return -1;
+       switch (token) {
+       case Opt_noacl:
+       case Opt_nouser_xattr:
+               ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5");
+               break;
+       case Opt_sb:
+               return 1;       /* handled by get_sb_block() */
+       case Opt_removed:
+               ext4_msg(sb, KERN_WARNING,
+                        "Ignoring removed %s option", opt);
+               return 1;
+       case Opt_resuid:
+               sbi->s_resuid = arg;
+               return 1;
+       case Opt_resgid:
+               sbi->s_resgid = arg;
+               return 1;
+       case Opt_abort:
+               sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               return 1;
+       case Opt_i_version:
+               sb->s_flags |= MS_I_VERSION;
+               return 1;
+       case Opt_journal_dev:
+               if (is_remount) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Cannot specify journal on remount");
+                       return -1;
+               }
+               *journal_devnum = arg;
+               return 1;
+       case Opt_journal_ioprio:
+               if (arg < 0 || arg > 7)
+                       return -1;
+               *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
+               return 1;
+       }
+       for (m = ext4_mount_opts; m->token != Opt_err; m++) {
+               if (token != m->token)
+                       continue;
+               if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
+                       return -1;
+               if (m->flags & MOPT_EXPLICIT)
+                       set_opt2(sb, EXPLICIT_DELALLOC);
+               if (m->flags & MOPT_CLEAR_ERR)
+                       clear_opt(sb, ERRORS_MASK);
+               if (token == Opt_noquota && sb_any_quota_loaded(sb)) {
+                       ext4_msg(sb, KERN_ERR, "Cannot change quota "
+                                "options when quota turned on");
+                       return -1;
+               }
+               if (m->flags & MOPT_NOSUPPORT) {
+                       ext4_msg(sb, KERN_ERR, "%s option not supported", opt);
+               } else if (token == Opt_commit) {
+                       if (arg == 0)
+                               arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
+                       sbi->s_commit_interval = HZ * arg;
+               } else if (token == Opt_max_batch_time) {
+                       if (arg == 0)
+                               arg = EXT4_DEF_MAX_BATCH_TIME;
+                       sbi->s_max_batch_time = arg;
+               } else if (token == Opt_min_batch_time) {
+                       sbi->s_min_batch_time = arg;
+               } else if (token == Opt_inode_readahead_blks) {
+                       if (arg > (1 << 30))
+                               return -1;
+                       if (arg && !is_power_of_2(arg)) {
                                ext4_msg(sb, KERN_ERR,
-                                       "Cannot specify journal on remount");
-                               return 0;
+                                        "EXT4-fs: inode_readahead_blks"
+                                        " must be a power of 2");
+                               return -1;
                        }
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       *journal_devnum = option;
-                       break;
-               case Opt_journal_checksum:
-                       set_opt(sb, JOURNAL_CHECKSUM);
-                       break;
-               case Opt_journal_async_commit:
-                       set_opt(sb, JOURNAL_ASYNC_COMMIT);
-                       set_opt(sb, JOURNAL_CHECKSUM);
-                       break;
-               case Opt_noload:
-                       set_opt(sb, NOLOAD);
-                       break;
-               case Opt_commit:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       if (option < 0)
-                               return 0;
-                       if (option == 0)
-                               option = JBD2_DEFAULT_MAX_COMMIT_AGE;
-                       sbi->s_commit_interval = HZ * option;
-                       break;
-               case Opt_max_batch_time:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       if (option < 0)
-                               return 0;
-                       if (option == 0)
-                               option = EXT4_DEF_MAX_BATCH_TIME;
-                       sbi->s_max_batch_time = option;
-                       break;
-               case Opt_min_batch_time:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       if (option < 0)
-                               return 0;
-                       sbi->s_min_batch_time = option;
-                       break;
-               case Opt_data_journal:
-                       data_opt = EXT4_MOUNT_JOURNAL_DATA;
-                       goto datacheck;
-               case Opt_data_ordered:
-                       data_opt = EXT4_MOUNT_ORDERED_DATA;
-                       goto datacheck;
-               case Opt_data_writeback:
-                       data_opt = EXT4_MOUNT_WRITEBACK_DATA;
-               datacheck:
+                       sbi->s_inode_readahead_blks = arg;
+               } else if (token == Opt_init_itable) {
+                       set_opt(sb, INIT_INODE_TABLE);
+                       if (!args->from)
+                               arg = EXT4_DEF_LI_WAIT_MULT;
+                       sbi->s_li_wait_mult = arg;
+               } else if (token == Opt_stripe) {
+                       sbi->s_stripe = arg;
+               } else if (m->flags & MOPT_DATAJ) {
                        if (is_remount) {
                                if (!sbi->s_journal)
                                        ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option");
-                               else if (test_opt(sb, DATA_FLAGS) != data_opt) {
+                               else if (test_opt(sb, DATA_FLAGS) !=
+                                        m->mount_opt) {
                                        ext4_msg(sb, KERN_ERR,
-                                               "Cannot change data mode on remount");
-                                       return 0;
+                                        "Cannot change data mode on remount");
+                                       return -1;
                                }
                        } else {
                                clear_opt(sb, DATA_FLAGS);
-                               sbi->s_mount_opt |= data_opt;
+                               sbi->s_mount_opt |= m->mount_opt;
                        }
-                       break;
-               case Opt_data_err_abort:
-                       set_opt(sb, DATA_ERR_ABORT);
-                       break;
-               case Opt_data_err_ignore:
-                       clear_opt(sb, DATA_ERR_ABORT);
-                       break;
  #ifdef CONFIG_QUOTA
-               case Opt_usrjquota:
+               } else if (token == Opt_usrjquota) {
                        if (!set_qf_name(sb, USRQUOTA, &args[0]))
-                               return 0;
-                       break;
-               case Opt_grpjquota:
+                               return -1;
+               } else if (token == Opt_grpjquota) {
                        if (!set_qf_name(sb, GRPQUOTA, &args[0]))
-                               return 0;
-                       break;
-               case Opt_offusrjquota:
+                               return -1;
+               } else if (token == Opt_offusrjquota) {
                        if (!clear_qf_name(sb, USRQUOTA))
-                               return 0;
-                       break;
-               case Opt_offgrpjquota:
+                               return -1;
+               } else if (token == Opt_offgrpjquota) {
                        if (!clear_qf_name(sb, GRPQUOTA))
-                               return 0;
-                       break;
-               case Opt_jqfmt_vfsold:
-                       qfmt = QFMT_VFS_OLD;
-                       goto set_qf_format;
-               case Opt_jqfmt_vfsv0:
-                       qfmt = QFMT_VFS_V0;
-                       goto set_qf_format;
-               case Opt_jqfmt_vfsv1:
-                       qfmt = QFMT_VFS_V1;
- set_qf_format:
+                               return -1;
+               } else if (m->flags & MOPT_QFMT) {
                        if (sb_any_quota_loaded(sb) &&
-                           sbi->s_jquota_fmt != qfmt) {
-                               ext4_msg(sb, KERN_ERR, "Cannot change "
-                                       "journaled quota options when "
-                                       "quota turned on");
-                               return 0;
-                       }
-                       sbi->s_jquota_fmt = qfmt;
-                       break;
-               case Opt_quota:
-               case Opt_usrquota:
-                       set_opt(sb, QUOTA);
-                       set_opt(sb, USRQUOTA);
-                       break;
-               case Opt_grpquota:
-                       set_opt(sb, QUOTA);
-                       set_opt(sb, GRPQUOTA);
-                       break;
-               case Opt_noquota:
-                       if (sb_any_quota_loaded(sb)) {
-                               ext4_msg(sb, KERN_ERR, "Cannot change quota "
-                                       "options when quota turned on");
-                               return 0;
+                           sbi->s_jquota_fmt != m->mount_opt) {
+                               ext4_msg(sb, KERN_ERR, "Cannot "
+                                        "change journaled quota options "
+                                        "when quota turned on");
+                               return -1;
                        }
-                       clear_opt(sb, QUOTA);
-                       clear_opt(sb, USRQUOTA);
-                       clear_opt(sb, GRPQUOTA);
-                       break;
- #else
-               case Opt_quota:
-               case Opt_usrquota:
-               case Opt_grpquota:
-                       ext4_msg(sb, KERN_ERR,
-                               "quota options not supported");
-                       break;
-               case Opt_usrjquota:
-               case Opt_grpjquota:
-               case Opt_offusrjquota:
-               case Opt_offgrpjquota:
-               case Opt_jqfmt_vfsold:
-               case Opt_jqfmt_vfsv0:
-               case Opt_jqfmt_vfsv1:
-                       ext4_msg(sb, KERN_ERR,
-                               "journaled quota options not supported");
-                       break;
-               case Opt_noquota:
-                       break;
+                       sbi->s_jquota_fmt = m->mount_opt;
  #endif
-               case Opt_abort:
-                       sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
-                       break;
-               case Opt_nobarrier:
-                       clear_opt(sb, BARRIER);
-                       break;
-               case Opt_barrier:
-                       if (args[0].from) {
-                               if (match_int(&args[0], &option))
-                                       return 0;
-                       } else
-                               option = 1;     /* No argument, default to 1 */
-                       if (option)
-                               set_opt(sb, BARRIER);
-                       else
-                               clear_opt(sb, BARRIER);
-                       break;
-               case Opt_ignore:
-                       break;
-               case Opt_resize:
-                       if (!is_remount) {
-                               ext4_msg(sb, KERN_ERR,
-                                       "resize option only available "
-                                       "for remount");
-                               return 0;
-                       }
-                       if (match_int(&args[0], &option) != 0)
-                               return 0;
-                       *n_blocks_count = option;
-                       break;
-               case Opt_nobh:
-                       ext4_msg(sb, KERN_WARNING,
-                                "Ignoring deprecated nobh option");
-                       break;
-               case Opt_bh:
-                       ext4_msg(sb, KERN_WARNING,
-                                "Ignoring deprecated bh option");
-                       break;
-               case Opt_i_version:
-                       set_opt(sb, I_VERSION);
-                       sb->s_flags |= MS_I_VERSION;
-                       break;
-               case Opt_nodelalloc:
-                       clear_opt(sb, DELALLOC);
-                       clear_opt2(sb, EXPLICIT_DELALLOC);
-                       break;
-               case Opt_mblk_io_submit:
-                       set_opt(sb, MBLK_IO_SUBMIT);
-                       break;
-               case Opt_nomblk_io_submit:
-                       clear_opt(sb, MBLK_IO_SUBMIT);
-                       break;
-               case Opt_stripe:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       if (option < 0)
-                               return 0;
-                       sbi->s_stripe = option;
-                       break;
-               case Opt_delalloc:
-                       set_opt(sb, DELALLOC);
-                       set_opt2(sb, EXPLICIT_DELALLOC);
-                       break;
-               case Opt_block_validity:
-                       set_opt(sb, BLOCK_VALIDITY);
-                       break;
-               case Opt_noblock_validity:
-                       clear_opt(sb, BLOCK_VALIDITY);
-                       break;
-               case Opt_inode_readahead_blks:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       if (option < 0 || option > (1 << 30))
-                               return 0;
-                       if (option && !is_power_of_2(option)) {
-                               ext4_msg(sb, KERN_ERR,
-                                        "EXT4-fs: inode_readahead_blks"
-                                        " must be a power of 2");
-                               return 0;
+               } else {
+                       if (!args->from)
+                               arg = 1;
+                       if (m->flags & MOPT_CLEAR)
+                               arg = !arg;
+                       else if (unlikely(!(m->flags & MOPT_SET))) {
+                               ext4_msg(sb, KERN_WARNING,
+                                        "buggy handling of option %s", opt);
+                               WARN_ON(1);
+                               return -1;
                        }
-                       sbi->s_inode_readahead_blks = option;
-                       break;
-               case Opt_journal_ioprio:
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       if (option < 0 || option > 7)
-                               break;
-                       *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
-                                                           option);
-                       break;
-               case Opt_noauto_da_alloc:
-                       set_opt(sb, NO_AUTO_DA_ALLOC);
-                       break;
-               case Opt_auto_da_alloc:
-                       if (args[0].from) {
-                               if (match_int(&args[0], &option))
-                                       return 0;
-                       } else
-                               option = 1;     /* No argument, default to 1 */
-                       if (option)
-                               clear_opt(sb, NO_AUTO_DA_ALLOC);
+                       if (arg != 0)
+                               sbi->s_mount_opt |= m->mount_opt;
                        else
-                               set_opt(sb,NO_AUTO_DA_ALLOC);
-                       break;
-               case Opt_discard:
-                       set_opt(sb, DISCARD);
-                       break;
-               case Opt_nodiscard:
-                       clear_opt(sb, DISCARD);
-                       break;
-               case Opt_dioread_nolock:
-                       set_opt(sb, DIOREAD_NOLOCK);
-                       break;
-               case Opt_dioread_lock:
-                       clear_opt(sb, DIOREAD_NOLOCK);
-                       break;
-               case Opt_init_itable:
-                       set_opt(sb, INIT_INODE_TABLE);
-                       if (args[0].from) {
-                               if (match_int(&args[0], &option))
-                                       return 0;
-                       } else
-                               option = EXT4_DEF_LI_WAIT_MULT;
-                       if (option < 0)
-                               return 0;
-                       sbi->s_li_wait_mult = option;
-                       break;
-               case Opt_noinit_itable:
-                       clear_opt(sb, INIT_INODE_TABLE);
-                       break;
-               default:
-                       ext4_msg(sb, KERN_ERR,
-                              "Unrecognized mount option \"%s\" "
-                              "or missing value", p);
-                       return 0;
+                               sbi->s_mount_opt &= ~m->mount_opt;
                }
+               return 1;
+       }
+       ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" "
+                "or missing value", opt);
+       return -1;
+ }
+ static int parse_options(char *options, struct super_block *sb,
+                        unsigned long *journal_devnum,
+                        unsigned int *journal_ioprio,
+                        int is_remount)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       char *p;
+       substring_t args[MAX_OPT_ARGS];
+       int token;
+       if (!options)
+               return 1;
+       while ((p = strsep(&options, ",")) != NULL) {
+               if (!*p)
+                       continue;
+               /*
+                * Initialize args struct so we know whether arg was
+                * found; some options take optional arguments.
+                */
+               args[0].to = args[0].from = 0;
+               token = match_token(p, tokens, args);
+               if (handle_mount_opt(sb, p, token, args, journal_devnum,
+                                    journal_ioprio, is_remount) < 0)
+                       return 0;
        }
  #ifdef CONFIG_QUOTA
        if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
        return 1;
  }
  
+ static inline void ext4_show_quota_options(struct seq_file *seq,
+                                          struct super_block *sb)
+ {
+ #if defined(CONFIG_QUOTA)
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       if (sbi->s_jquota_fmt) {
+               char *fmtname = "";
+               switch (sbi->s_jquota_fmt) {
+               case QFMT_VFS_OLD:
+                       fmtname = "vfsold";
+                       break;
+               case QFMT_VFS_V0:
+                       fmtname = "vfsv0";
+                       break;
+               case QFMT_VFS_V1:
+                       fmtname = "vfsv1";
+                       break;
+               }
+               seq_printf(seq, ",jqfmt=%s", fmtname);
+       }
+       if (sbi->s_qf_names[USRQUOTA])
+               seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
+       if (sbi->s_qf_names[GRPQUOTA])
+               seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
+       if (test_opt(sb, USRQUOTA))
+               seq_puts(seq, ",usrquota");
+       if (test_opt(sb, GRPQUOTA))
+               seq_puts(seq, ",grpquota");
+ #endif
+ }
+ static const char *token2str(int token)
+ {
+       static const struct match_token *t;
+       for (t = tokens; t->token != Opt_err; t++)
+               if (t->token == token && !strchr(t->pattern, '='))
+                       break;
+       return t->pattern;
+ }
+ /*
+  * Show an option if
+  *  - it's set to a non-default value OR
+  *  - if the per-sb default is different from the global default
+  */
+ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
+                             int nodefs)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_super_block *es = sbi->s_es;
+       int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt;
+       const struct mount_opts *m;
+       char sep = nodefs ? '\n' : ',';
+ #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
+ #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
+       if (sbi->s_sb_block != 1)
+               SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
+       for (m = ext4_mount_opts; m->token != Opt_err; m++) {
+               int want_set = m->flags & MOPT_SET;
+               if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
+                   (m->flags & MOPT_CLEAR_ERR))
+                       continue;
+               if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
+                       continue; /* skip if same as the default */
+               if ((want_set &&
+                    (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) ||
+                   (!want_set && (sbi->s_mount_opt & m->mount_opt)))
+                       continue; /* select Opt_noFoo vs Opt_Foo */
+               SEQ_OPTS_PRINT("%s", token2str(m->token));
+       }
+       if (nodefs || sbi->s_resuid != EXT4_DEF_RESUID ||
+           le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
+               SEQ_OPTS_PRINT("resuid=%u", sbi->s_resuid);
+       if (nodefs || sbi->s_resgid != EXT4_DEF_RESGID ||
+           le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
+               SEQ_OPTS_PRINT("resgid=%u", sbi->s_resgid);
+       def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
+       if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
+               SEQ_OPTS_PUTS("errors=remount-ro");
+       if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
+               SEQ_OPTS_PUTS("errors=continue");
+       if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
+               SEQ_OPTS_PUTS("errors=panic");
+       if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
+               SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
+       if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
+               SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
+       if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
+               SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
+       if (sb->s_flags & MS_I_VERSION)
+               SEQ_OPTS_PUTS("i_version");
+       if (nodefs || sbi->s_stripe)
+               SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
+       if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) {
+               if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+                       SEQ_OPTS_PUTS("data=journal");
+               else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+                       SEQ_OPTS_PUTS("data=ordered");
+               else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
+                       SEQ_OPTS_PUTS("data=writeback");
+       }
+       if (nodefs ||
+           sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
+               SEQ_OPTS_PRINT("inode_readahead_blks=%u",
+                              sbi->s_inode_readahead_blks);
+       if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
+                      (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
+               SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
+       ext4_show_quota_options(seq, sb);
+       return 0;
+ }
+ static int ext4_show_options(struct seq_file *seq, struct dentry *root)
+ {
+       return _ext4_show_options(seq, root->d_sb, 0);
+ }
+ static int options_seq_show(struct seq_file *seq, void *offset)
+ {
+       struct super_block *sb = seq->private;
+       int rc;
+       seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
+       rc = _ext4_show_options(seq, sb, 1);
+       seq_puts(seq, "\n");
+       return rc;
+ }
+ static int options_open_fs(struct inode *inode, struct file *file)
+ {
+       return single_open(file, options_seq_show, PDE(inode)->data);
+ }
+ static const struct file_operations ext4_seq_options_fops = {
+       .owner = THIS_MODULE,
+       .open = options_open_fs,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+ };
  static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
                            int read_only)
  {
@@@ -2945,7 -2808,7 +2808,7 @@@ static int ext4_run_lazyinit_thread(voi
                ext4_clear_request_list();
                kfree(ext4_li_info);
                ext4_li_info = NULL;
-               printk(KERN_CRIT "EXT4: error %d creating inode table "
+               printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
                                 "initialization thread\n",
                                 err);
                return err;
@@@ -3183,11 -3046,8 +3046,8 @@@ static int ext4_fill_super(struct super
        set_opt(sb, INIT_INODE_TABLE);
        if (def_mount_opts & EXT4_DEFM_DEBUG)
                set_opt(sb, DEBUG);
-       if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
-               ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
-                       "2.6.38");
+       if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
                set_opt(sb, GRPID);
-       }
        if (def_mount_opts & EXT4_DEFM_UID16)
                set_opt(sb, NO_UID32);
        /* xattr user namespace & acls are now defaulted on */
        sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
  
        if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
-                          &journal_devnum, &journal_ioprio, NULL, 0)) {
+                          &journal_devnum, &journal_ioprio, 0)) {
                ext4_msg(sb, KERN_WARNING,
                         "failed to parse options in superblock: %s",
                         sbi->s_es->s_mount_opts);
        }
+       sbi->s_def_mount_opt = sbi->s_mount_opt;
        if (!parse_options((char *) data, sb, &journal_devnum,
-                          &journal_ioprio, NULL, 0))
+                          &journal_ioprio, 0))
                goto failed_mount;
  
        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
  #else
                es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
  #endif
-               sb->s_dirt = 1;
        }
  
        /* Handle clustersize */
        if (ext4_proc_root)
                sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
  
+       if (sbi->s_proc)
+               proc_create_data("options", S_IRUGO, sbi->s_proc,
+                                &ext4_seq_options_fops, sb);
        bgl_lock_init(sbi->s_blockgroup_lock);
  
        for (i = 0; i < db_count; i++) {
        }
        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
  
+       sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
        /*
         * The journal may have updated the bg summary counts, so we
         * need to update the global counters.
@@@ -3735,8 -3601,9 +3601,8 @@@ no_journal
                iput(root);
                goto failed_mount4;
        }
 -      sb->s_root = d_alloc_root(root);
 +      sb->s_root = d_make_root(root);
        if (!sb->s_root) {
 -              iput(root);
                ext4_msg(sb, KERN_ERR, "get root dentry failed");
                ret = -ENOMEM;
                goto failed_mount4;
@@@ -3861,6 -3728,7 +3727,7 @@@ failed_mount2
        ext4_kvfree(sbi->s_group_desc);
  failed_mount:
        if (sbi->s_proc) {
+               remove_proc_entry("options", sbi->s_proc);
                remove_proc_entry(sb->s_id, ext4_proc_root);
        }
  #ifdef CONFIG_QUOTA
@@@ -4090,15 -3958,6 +3957,6 @@@ static int ext4_load_journal(struct sup
        if (!(journal->j_flags & JBD2_BARRIER))
                ext4_msg(sb, KERN_INFO, "barriers disabled");
  
-       if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
-               err = jbd2_journal_update_format(journal);
-               if (err)  {
-                       ext4_msg(sb, KERN_ERR, "error updating journal");
-                       jbd2_journal_destroy(journal);
-                       return err;
-               }
-       }
        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
                err = jbd2_journal_wipe(journal, !really_read_only);
        if (!err) {
@@@ -4385,7 -4244,6 +4243,6 @@@ static int ext4_remount(struct super_bl
  {
        struct ext4_super_block *es;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       ext4_fsblk_t n_blocks_count = 0;
        unsigned long old_sb_flags;
        struct ext4_mount_options old_opts;
        int enable_quota = 0;
        /*
         * Allow the "check" option to be passed as a remount option.
         */
-       if (!parse_options(data, sb, NULL, &journal_ioprio,
-                          &n_blocks_count, 1)) {
+       if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
                err = -EINVAL;
                goto restore_opts;
        }
                set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
        }
  
-       if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
-               n_blocks_count > ext4_blocks_count(es)) {
+       if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
                if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
                        err = -EROFS;
                        goto restore_opts;
                        if (sbi->s_journal)
                                ext4_clear_journal_err(sb, es);
                        sbi->s_mount_state = le16_to_cpu(es->s_state);
-                       if ((err = ext4_group_extend(sb, es, n_blocks_count)))
-                               goto restore_opts;
                        if (!ext4_setup_super(sb, es, 0))
                                sb->s_flags &= ~MS_RDONLY;
                        if (EXT4_HAS_INCOMPAT_FEATURE(sb,
@@@ -5055,9 -4909,6 +4908,9 @@@ static int __init ext4_init_fs(void
  {
        int i, err;
  
 +      ext4_li_info = NULL;
 +      mutex_init(&ext4_li_mtx);
 +
        ext4_check_flag_values();
  
        for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
        if (err)
                goto out;
  
 -      ext4_li_info = NULL;
 -      mutex_init(&ext4_li_mtx);
        return 0;
  out:
        unregister_as_ext2();
diff --combined fs/jbd2/commit.c
index c067a8cae63bf322067763ff02687ae24c216008,1dfcb207ea69c615757659b3daebc33604fd5aec..17f557f01cf0dd570dcd112e5d686fa072cd7498
@@@ -286,10 -286,10 +286,10 @@@ static __u32 jbd2_checksum_data(__u32 c
        char *addr;
        __u32 checksum;
  
 -      addr = kmap_atomic(page, KM_USER0);
 +      addr = kmap_atomic(page);
        checksum = crc32_be(crc32_sum,
                (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
 -      kunmap_atomic(addr, KM_USER0);
 +      kunmap_atomic(addr);
  
        return checksum;
  }
@@@ -331,6 -331,10 +331,10 @@@ void jbd2_journal_commit_transaction(jo
        struct buffer_head *cbh = NULL; /* For transactional checksums */
        __u32 crc32_sum = ~0;
        struct blk_plug plug;
+       /* Tail of the journal */
+       unsigned long first_block;
+       tid_t first_tid;
+       int update_tail;
  
        /*
         * First job: lock down the current transaction and wait for
        /* Do we need to erase the effects of a prior jbd2_journal_flush? */
        if (journal->j_flags & JBD2_FLUSHED) {
                jbd_debug(3, "super block updated\n");
-               jbd2_journal_update_superblock(journal, 1);
+               mutex_lock(&journal->j_checkpoint_mutex);
+               /*
+                * We hold j_checkpoint_mutex so tail cannot change under us.
+                * We don't need any special data guarantees for writing sb
+                * since journal is empty and it is ok for write to be
+                * flushed only with transaction commit.
+                */
+               jbd2_journal_update_sb_log_tail(journal,
+                                               journal->j_tail_sequence,
+                                               journal->j_tail,
+                                               WRITE_SYNC);
+               mutex_unlock(&journal->j_checkpoint_mutex);
        } else {
                jbd_debug(3, "superblock not updated\n");
        }
@@@ -677,10 -692,30 +692,30 @@@ start_journal_io
                err = 0;
        }
  
+       /*
+        * Get current oldest transaction in the log before we issue flush
+        * to the filesystem device. After the flush we can be sure that
+        * blocks of all older transactions are checkpointed to persistent
+        * storage and we will be safe to update journal start in the
+        * superblock with the numbers we get here.
+        */
+       update_tail =
+               jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
        write_lock(&journal->j_state_lock);
+       if (update_tail) {
+               long freed = first_block - journal->j_tail;
+               if (first_block < journal->j_tail)
+                       freed += journal->j_last - journal->j_first;
+               /* Update tail only if we free significant amount of space */
+               if (freed < journal->j_maxlen / 4)
+                       update_tail = 0;
+       }
        J_ASSERT(commit_transaction->t_state == T_COMMIT);
        commit_transaction->t_state = T_COMMIT_DFLUSH;
        write_unlock(&journal->j_state_lock);
        /* 
         * If the journal is not located on the file system device,
         * then we must flush the file system device before we issue
@@@ -831,6 -866,14 +866,14 @@@ wait_for_iobuf
        if (err)
                jbd2_journal_abort(journal, err);
  
+       /*
+        * Now disk caches for filesystem device are flushed so we are safe to
+        * erase checkpointed transactions from the log by updating journal
+        * superblock.
+        */
+       if (update_tail)
+               jbd2_update_log_tail(journal, first_tid, first_block);
        /* End of a transaction!  Finally, we can do checkpoint
             processing: any buffers committed as a result of this
             transaction can be removed from any checkpoint list it was on
@@@ -1048,7 -1091,7 +1091,7 @@@ restart_loop
        jbd_debug(1, "JBD2: commit %d complete, head %d\n",
                  journal->j_commit_sequence, journal->j_tail_sequence);
        if (to_free)
-               kfree(commit_transaction);
+               jbd2_journal_free_transaction(commit_transaction);
  
        wake_up(&journal->j_wait_done_commit);
  }
diff --combined fs/jbd2/journal.c
index 839377e3d6244ac540e0a573bf9c4adc679225a8,bda564f638642ec9e0735935292012bba12e3d96..98ed6dbfe381370710f89109dd0b5a2209a44494
@@@ -71,7 -71,6 +71,6 @@@ EXPORT_SYMBOL(jbd2_journal_revoke)
  
  EXPORT_SYMBOL(jbd2_journal_init_dev);
  EXPORT_SYMBOL(jbd2_journal_init_inode);
- EXPORT_SYMBOL(jbd2_journal_update_format);
  EXPORT_SYMBOL(jbd2_journal_check_used_features);
  EXPORT_SYMBOL(jbd2_journal_check_available_features);
  EXPORT_SYMBOL(jbd2_journal_set_features);
@@@ -96,7 -95,6 +95,6 @@@ EXPORT_SYMBOL(jbd2_journal_release_jbd_
  EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
  EXPORT_SYMBOL(jbd2_inode_cache);
  
- static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
  static void __journal_abort_soft (journal_t *journal, int errno);
  static int jbd2_journal_create_slab(size_t slab_size);
  
@@@ -139,8 -137,6 +137,8 @@@ static int kjournald2(void *arg
        setup_timer(&journal->j_commit_timer, commit_timeout,
                        (unsigned long)current);
  
 +      set_freezable();
 +
        /* Record that the journal thread is running */
        journal->j_task = current;
        wake_up(&journal->j_wait_done_commit);
@@@ -347,7 -343,7 +345,7 @@@ repeat
                new_offset = offset_in_page(jh2bh(jh_in)->b_data);
        }
  
 -      mapped_data = kmap_atomic(new_page, KM_USER0);
 +      mapped_data = kmap_atomic(new_page);
        /*
         * Fire data frozen trigger if data already wasn't frozen.  Do this
         * before checking for escaping, as the trigger may modify the magic
                need_copy_out = 1;
                do_escape = 1;
        }
 -      kunmap_atomic(mapped_data, KM_USER0);
 +      kunmap_atomic(mapped_data);
  
        /*
         * Do we need to do a data copy?
                }
  
                jh_in->b_frozen_data = tmp;
 -              mapped_data = kmap_atomic(new_page, KM_USER0);
 +              mapped_data = kmap_atomic(new_page);
                memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
 -              kunmap_atomic(mapped_data, KM_USER0);
 +              kunmap_atomic(mapped_data);
  
                new_page = virt_to_page(tmp);
                new_offset = offset_in_page(tmp);
         * copying, we can finally do so.
         */
        if (do_escape) {
 -              mapped_data = kmap_atomic(new_page, KM_USER0);
 +              mapped_data = kmap_atomic(new_page);
                *((unsigned int *)(mapped_data + new_offset)) = 0;
 -              kunmap_atomic(mapped_data, KM_USER0);
 +              kunmap_atomic(mapped_data);
        }
  
        set_bh_page(new_bh, new_page, new_offset);
@@@ -746,6 -742,98 +744,98 @@@ struct journal_head *jbd2_journal_get_d
        return jbd2_journal_add_journal_head(bh);
  }
  
+ /*
+  * Return tid of the oldest transaction in the journal and block in the journal
+  * where the transaction starts.
+  *
+  * If the journal is now empty, return which will be the next transaction ID
+  * we will write and where will that transaction start.
+  *
+  * The return value is 0 if journal tail cannot be pushed any further, 1 if
+  * it can.
+  */
+ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
+                             unsigned long *block)
+ {
+       transaction_t *transaction;
+       int ret;
+       read_lock(&journal->j_state_lock);
+       spin_lock(&journal->j_list_lock);
+       transaction = journal->j_checkpoint_transactions;
+       if (transaction) {
+               *tid = transaction->t_tid;
+               *block = transaction->t_log_start;
+       } else if ((transaction = journal->j_committing_transaction) != NULL) {
+               *tid = transaction->t_tid;
+               *block = transaction->t_log_start;
+       } else if ((transaction = journal->j_running_transaction) != NULL) {
+               *tid = transaction->t_tid;
+               *block = journal->j_head;
+       } else {
+               *tid = journal->j_transaction_sequence;
+               *block = journal->j_head;
+       }
+       ret = tid_gt(*tid, journal->j_tail_sequence);
+       spin_unlock(&journal->j_list_lock);
+       read_unlock(&journal->j_state_lock);
+       return ret;
+ }
+ /*
+  * Update information in journal structure and in on disk journal superblock
+  * about log tail. This function does not check whether information passed in
+  * really pushes log tail further. It's responsibility of the caller to make
+  * sure provided log tail information is valid (e.g. by holding
+  * j_checkpoint_mutex all the time between computing log tail and calling this
+  * function as is the case with jbd2_cleanup_journal_tail()).
+  *
+  * Requires j_checkpoint_mutex
+  */
+ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
+ {
+       unsigned long freed;
+       BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
+       /*
+        * We cannot afford for write to remain in drive's caches since as
+        * soon as we update j_tail, next transaction can start reusing journal
+        * space and if we lose sb update during power failure we'd replay
+        * old transaction with possibly newly overwritten data.
+        */
+       jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+       write_lock(&journal->j_state_lock);
+       freed = block - journal->j_tail;
+       if (block < journal->j_tail)
+               freed += journal->j_last - journal->j_first;
+       trace_jbd2_update_log_tail(journal, tid, block, freed);
+       jbd_debug(1,
+                 "Cleaning journal tail from %d to %d (offset %lu), "
+                 "freeing %lu\n",
+                 journal->j_tail_sequence, tid, block, freed);
+       journal->j_free += freed;
+       journal->j_tail_sequence = tid;
+       journal->j_tail = block;
+       write_unlock(&journal->j_state_lock);
+ }
+ /*
+  * This is a variaon of __jbd2_update_log_tail which checks for validity of
+  * provided log tail and locks j_checkpoint_mutex. So it is safe against races
+  * with other threads updating log tail.
+  */
+ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
+ {
+       mutex_lock(&journal->j_checkpoint_mutex);
+       if (tid_gt(tid, journal->j_tail_sequence))
+               __jbd2_update_log_tail(journal, tid, block);
+       mutex_unlock(&journal->j_checkpoint_mutex);
+ }
  struct jbd2_stats_proc_session {
        journal_t *journal;
        struct transaction_stats_s *stats;
@@@ -1114,40 -1202,45 +1204,45 @@@ static int journal_reset(journal_t *jou
  
        journal->j_max_transaction_buffers = journal->j_maxlen / 4;
  
-       /* Add the dynamic fields and write it to disk. */
-       jbd2_journal_update_superblock(journal, 1);
-       return jbd2_journal_start_thread(journal);
- }
- /**
-  * void jbd2_journal_update_superblock() - Update journal sb on disk.
-  * @journal: The journal to update.
-  * @wait: Set to '0' if you don't want to wait for IO completion.
-  *
-  * Update a journal's dynamic superblock fields and write it to disk,
-  * optionally waiting for the IO to complete.
-  */
- void jbd2_journal_update_superblock(journal_t *journal, int wait)
- {
-       journal_superblock_t *sb = journal->j_superblock;
-       struct buffer_head *bh = journal->j_sb_buffer;
        /*
         * As a special case, if the on-disk copy is already marked as needing
-        * no recovery (s_start == 0) and there are no outstanding transactions
-        * in the filesystem, then we can safely defer the superblock update
-        * until the next commit by setting JBD2_FLUSHED.  This avoids
+        * no recovery (s_start == 0), then we can safely defer the superblock
+        * update until the next commit by setting JBD2_FLUSHED.  This avoids
         * attempting a write to a potential-readonly device.
         */
-       if (sb->s_start == 0 && journal->j_tail_sequence ==
-                               journal->j_transaction_sequence) {
+       if (sb->s_start == 0) {
                jbd_debug(1, "JBD2: Skipping superblock update on recovered sb "
                        "(start %ld, seq %d, errno %d)\n",
                        journal->j_tail, journal->j_tail_sequence,
                        journal->j_errno);
-               goto out;
+               journal->j_flags |= JBD2_FLUSHED;
+       } else {
+               /* Lock here to make assertions happy... */
+               mutex_lock(&journal->j_checkpoint_mutex);
+               /*
+                * Update log tail information. We use WRITE_FUA since new
+                * transaction will start reusing journal space and so we
+                * must make sure information about current log tail is on
+                * disk before that.
+                */
+               jbd2_journal_update_sb_log_tail(journal,
+                                               journal->j_tail_sequence,
+                                               journal->j_tail,
+                                               WRITE_FUA);
+               mutex_unlock(&journal->j_checkpoint_mutex);
        }
+       return jbd2_journal_start_thread(journal);
+ }
  
+ static void jbd2_write_superblock(journal_t *journal, int write_op)
+ {
+       struct buffer_head *bh = journal->j_sb_buffer;
+       int ret;
+       trace_jbd2_write_superblock(journal, write_op);
+       if (!(journal->j_flags & JBD2_BARRIER))
+               write_op &= ~(REQ_FUA | REQ_FLUSH);
+       lock_buffer(bh);
        if (buffer_write_io_error(bh)) {
                /*
                 * Oh, dear.  A previous attempt to write the journal
                clear_buffer_write_io_error(bh);
                set_buffer_uptodate(bh);
        }
+       get_bh(bh);
+       bh->b_end_io = end_buffer_write_sync;
+       ret = submit_bh(write_op, bh);
+       wait_on_buffer(bh);
+       if (buffer_write_io_error(bh)) {
+               clear_buffer_write_io_error(bh);
+               set_buffer_uptodate(bh);
+               ret = -EIO;
+       }
+       if (ret) {
+               printk(KERN_ERR "JBD2: Error %d detected when updating "
+                      "journal superblock for %s.\n", ret,
+                      journal->j_devname);
+       }
+ }
+ /**
+  * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
+  * @journal: The journal to update.
+  * @tail_tid: TID of the new transaction at the tail of the log
+  * @tail_block: The first block of the transaction at the tail of the log
+  * @write_op: With which operation should we write the journal sb
+  *
+  * Update a journal's superblock information about log tail and write it to
+  * disk, waiting for the IO to complete.
+  */
+ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
+                                    unsigned long tail_block, int write_op)
+ {
+       journal_superblock_t *sb = journal->j_superblock;
+       BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
+       jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
+                 tail_block, tail_tid);
+       sb->s_sequence = cpu_to_be32(tail_tid);
+       sb->s_start    = cpu_to_be32(tail_block);
+       jbd2_write_superblock(journal, write_op);
+       /* Log is no longer empty */
+       write_lock(&journal->j_state_lock);
+       WARN_ON(!sb->s_sequence);
+       journal->j_flags &= ~JBD2_FLUSHED;
+       write_unlock(&journal->j_state_lock);
+ }
+ /**
+  * jbd2_mark_journal_empty() - Mark on disk journal as empty.
+  * @journal: The journal to update.
+  *
+  * Update a journal's dynamic superblock fields to show that journal is empty.
+  * Write updated superblock to disk waiting for IO to complete.
+  */
+ static void jbd2_mark_journal_empty(journal_t *journal)
+ {
+       journal_superblock_t *sb = journal->j_superblock;
  
+       BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
        read_lock(&journal->j_state_lock);
-       jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d, errno %d)\n",
-                 journal->j_tail, journal->j_tail_sequence, journal->j_errno);
+       jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
+                 journal->j_tail_sequence);
  
        sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
-       sb->s_start    = cpu_to_be32(journal->j_tail);
-       sb->s_errno    = cpu_to_be32(journal->j_errno);
+       sb->s_start    = cpu_to_be32(0);
        read_unlock(&journal->j_state_lock);
  
-       BUFFER_TRACE(bh, "marking dirty");
-       mark_buffer_dirty(bh);
-       if (wait) {
-               sync_dirty_buffer(bh);
-               if (buffer_write_io_error(bh)) {
-                       printk(KERN_ERR "JBD2: I/O error detected "
-                              "when updating journal superblock for %s.\n",
-                              journal->j_devname);
-                       clear_buffer_write_io_error(bh);
-                       set_buffer_uptodate(bh);
-               }
-       } else
-               write_dirty_buffer(bh, WRITE);
- out:
-       /* If we have just flushed the log (by marking s_start==0), then
-        * any future commit will have to be careful to update the
-        * superblock again to re-record the true start of the log. */
+       jbd2_write_superblock(journal, WRITE_FUA);
  
+       /* Log is no longer empty */
        write_lock(&journal->j_state_lock);
-       if (sb->s_start)
-               journal->j_flags &= ~JBD2_FLUSHED;
-       else
-               journal->j_flags |= JBD2_FLUSHED;
+       journal->j_flags |= JBD2_FLUSHED;
        write_unlock(&journal->j_state_lock);
  }
  
+ /**
+  * jbd2_journal_update_sb_errno() - Update error in the journal.
+  * @journal: The journal to update.
+  *
+  * Update a journal's errno.  Write updated superblock to disk waiting for IO
+  * to complete.
+  */
+ static void jbd2_journal_update_sb_errno(journal_t *journal)
+ {
+       journal_superblock_t *sb = journal->j_superblock;
+       read_lock(&journal->j_state_lock);
+       jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
+                 journal->j_errno);
+       sb->s_errno    = cpu_to_be32(journal->j_errno);
+       read_unlock(&journal->j_state_lock);
+       jbd2_write_superblock(journal, WRITE_SYNC);
+ }
  /*
   * Read the superblock for a given journal, performing initial
   * validation of the format.
   */
  static int journal_get_superblock(journal_t *journal)
  {
        struct buffer_head *bh;
@@@ -1398,14 -1549,11 +1551,11 @@@ int jbd2_journal_destroy(journal_t *jou
  
        if (journal->j_sb_buffer) {
                if (!is_journal_aborted(journal)) {
-                       /* We can now mark the journal as empty. */
-                       journal->j_tail = 0;
-                       journal->j_tail_sequence =
-                               ++journal->j_transaction_sequence;
-                       jbd2_journal_update_superblock(journal, 1);
-               } else {
+                       mutex_lock(&journal->j_checkpoint_mutex);
+                       jbd2_mark_journal_empty(journal);
+                       mutex_unlock(&journal->j_checkpoint_mutex);
+               } else
                        err = -EIO;
-               }
                brelse(journal->j_sb_buffer);
        }
  
@@@ -1551,61 -1699,6 +1701,6 @@@ void jbd2_journal_clear_features(journa
  }
  EXPORT_SYMBOL(jbd2_journal_clear_features);
  
- /**
-  * int jbd2_journal_update_format () - Update on-disk journal structure.
-  * @journal: Journal to act on.
-  *
-  * Given an initialised but unloaded journal struct, poke about in the
-  * on-disk structure to update it to the most recent supported version.
-  */
- int jbd2_journal_update_format (journal_t *journal)
- {
-       journal_superblock_t *sb;
-       int err;
-       err = journal_get_superblock(journal);
-       if (err)
-               return err;
-       sb = journal->j_superblock;
-       switch (be32_to_cpu(sb->s_header.h_blocktype)) {
-       case JBD2_SUPERBLOCK_V2:
-               return 0;
-       case JBD2_SUPERBLOCK_V1:
-               return journal_convert_superblock_v1(journal, sb);
-       default:
-               break;
-       }
-       return -EINVAL;
- }
- static int journal_convert_superblock_v1(journal_t *journal,
-                                        journal_superblock_t *sb)
- {
-       int offset, blocksize;
-       struct buffer_head *bh;
-       printk(KERN_WARNING
-               "JBD2: Converting superblock from version 1 to 2.\n");
-       /* Pre-initialise new fields to zero */
-       offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
-       blocksize = be32_to_cpu(sb->s_blocksize);
-       memset(&sb->s_feature_compat, 0, blocksize-offset);
-       sb->s_nr_users = cpu_to_be32(1);
-       sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
-       journal->j_format_version = 2;
-       bh = journal->j_sb_buffer;
-       BUFFER_TRACE(bh, "marking dirty");
-       mark_buffer_dirty(bh);
-       sync_dirty_buffer(bh);
-       return 0;
- }
  /**
   * int jbd2_journal_flush () - Flush journal
   * @journal: Journal to act on.
@@@ -1619,7 -1712,6 +1714,6 @@@ int jbd2_journal_flush(journal_t *journ
  {
        int err = 0;
        transaction_t *transaction = NULL;
-       unsigned long old_tail;
  
        write_lock(&journal->j_state_lock);
  
        if (is_journal_aborted(journal))
                return -EIO;
  
+       mutex_lock(&journal->j_checkpoint_mutex);
        jbd2_cleanup_journal_tail(journal);
  
        /* Finally, mark the journal as really needing no recovery.
         * the magic code for a fully-recovered superblock.  Any future
         * commits of data to the journal will restore the current
         * s_start value. */
+       jbd2_mark_journal_empty(journal);
+       mutex_unlock(&journal->j_checkpoint_mutex);
        write_lock(&journal->j_state_lock);
-       old_tail = journal->j_tail;
-       journal->j_tail = 0;
-       write_unlock(&journal->j_state_lock);
-       jbd2_journal_update_superblock(journal, 1);
-       write_lock(&journal->j_state_lock);
-       journal->j_tail = old_tail;
        J_ASSERT(!journal->j_running_transaction);
        J_ASSERT(!journal->j_committing_transaction);
        J_ASSERT(!journal->j_checkpoint_transactions);
@@@ -1708,8 -1796,12 +1798,12 @@@ int jbd2_journal_wipe(journal_t *journa
                write ? "Clearing" : "Ignoring");
  
        err = jbd2_journal_skip_recovery(journal);
-       if (write)
-               jbd2_journal_update_superblock(journal, 1);
+       if (write) {
+               /* Lock to make assertions happy... */
+               mutex_lock(&journal->j_checkpoint_mutex);
+               jbd2_mark_journal_empty(journal);
+               mutex_unlock(&journal->j_checkpoint_mutex);
+       }
  
   no_recovery:
        return err;
@@@ -1759,7 -1851,7 +1853,7 @@@ static void __journal_abort_soft (journ
        __jbd2_journal_abort_hard(journal);
  
        if (errno)
-               jbd2_journal_update_superblock(journal, 1);
+               jbd2_journal_update_sb_errno(journal);
  }
  
  /**
@@@ -2017,7 -2109,7 +2111,7 @@@ static struct kmem_cache *jbd2_journal_
  static atomic_t nr_journal_heads = ATOMIC_INIT(0);
  #endif
  
- static int journal_init_jbd2_journal_head_cache(void)
+ static int jbd2_journal_init_journal_head_cache(void)
  {
        int retval;
  
        return retval;
  }
  
- static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
+ static void jbd2_journal_destroy_journal_head_cache(void)
  {
        if (jbd2_journal_head_cache) {
                kmem_cache_destroy(jbd2_journal_head_cache);
@@@ -2323,7 -2415,7 +2417,7 @@@ static void __exit jbd2_remove_jbd_stat
  
  struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
  
- static int __init journal_init_handle_cache(void)
+ static int __init jbd2_journal_init_handle_cache(void)
  {
        jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
        if (jbd2_handle_cache == NULL) {
@@@ -2358,17 -2450,20 +2452,20 @@@ static int __init journal_init_caches(v
  
        ret = jbd2_journal_init_revoke_caches();
        if (ret == 0)
-               ret = journal_init_jbd2_journal_head_cache();
+               ret = jbd2_journal_init_journal_head_cache();
+       if (ret == 0)
+               ret = jbd2_journal_init_handle_cache();
        if (ret == 0)
-               ret = journal_init_handle_cache();
+               ret = jbd2_journal_init_transaction_cache();
        return ret;
  }
  
  static void jbd2_journal_destroy_caches(void)
  {
        jbd2_journal_destroy_revoke_caches();
-       jbd2_journal_destroy_jbd2_journal_head_cache();
+       jbd2_journal_destroy_journal_head_cache();
        jbd2_journal_destroy_handle_cache();
+       jbd2_journal_destroy_transaction_cache();
        jbd2_journal_destroy_slabs();
  }
  
diff --combined fs/jbd2/transaction.c
index e5aba56e1fd51cd01e001d090f572b3b5f1b41ed,fd052a88e9ecca635bce0cd723e0fab68841039d..ddcd3549c6c26cbc9cb9dd46831b189ed3c0441e
  static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
  static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
  
+ static struct kmem_cache *transaction_cache;
+ int __init jbd2_journal_init_transaction_cache(void)
+ {
+       J_ASSERT(!transaction_cache);
+       transaction_cache = kmem_cache_create("jbd2_transaction_s",
+                                       sizeof(transaction_t),
+                                       0,
+                                       SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
+                                       NULL);
+       if (transaction_cache)
+               return 0;
+       return -ENOMEM;
+ }
+ void jbd2_journal_destroy_transaction_cache(void)
+ {
+       if (transaction_cache) {
+               kmem_cache_destroy(transaction_cache);
+               transaction_cache = NULL;
+       }
+ }
+ void jbd2_journal_free_transaction(transaction_t *transaction)
+ {
+       if (unlikely(ZERO_OR_NULL_PTR(transaction)))
+               return;
+       kmem_cache_free(transaction_cache, transaction);
+ }
  /*
   * jbd2_get_transaction: obtain a new transaction_t object.
   *
@@@ -133,7 -162,8 +162,8 @@@ static int start_this_handle(journal_t 
  
  alloc_transaction:
        if (!journal->j_running_transaction) {
-               new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
+               new_transaction = kmem_cache_alloc(transaction_cache,
+                                                  gfp_mask | __GFP_ZERO);
                if (!new_transaction) {
                        /*
                         * If __GFP_FS is not present, then we may be
@@@ -162,7 -192,7 +192,7 @@@ repeat
        if (is_journal_aborted(journal) ||
            (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
                read_unlock(&journal->j_state_lock);
-               kfree(new_transaction);
+               jbd2_journal_free_transaction(new_transaction);
                return -EROFS;
        }
  
        read_unlock(&journal->j_state_lock);
  
        lock_map_acquire(&handle->h_lockdep_map);
-       kfree(new_transaction);
+       jbd2_journal_free_transaction(new_transaction);
        return 0;
  }
  
@@@ -783,12 -813,12 +813,12 @@@ done
                            "Possible IO failure.\n");
                page = jh2bh(jh)->b_page;
                offset = offset_in_page(jh2bh(jh)->b_data);
 -              source = kmap_atomic(page, KM_USER0);
 +              source = kmap_atomic(page);
                /* Fire data frozen trigger just before we copy the data */
                jbd2_buffer_frozen_trigger(jh, source + offset,
                                           jh->b_triggers);
                memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
 -              kunmap_atomic(source, KM_USER0);
 +              kunmap_atomic(source);
  
                /*
                 * Now that the frozen data is saved off, we need to store
@@@ -1549,9 -1579,9 +1579,9 @@@ __blist_del_buffer(struct journal_head 
   * of these pointers, it could go bad.  Generally the caller needs to re-read
   * the pointer from the transaction_t.
   *
-  * Called under j_list_lock.  The journal may not be locked.
+  * Called under j_list_lock.
   */
- void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
  {
        struct journal_head **list = NULL;
        transaction_t *transaction;
@@@ -1646,10 -1676,8 +1676,8 @@@ __journal_try_to_free_buffer(journal_t 
        spin_lock(&journal->j_list_lock);
        if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
                /* written-back checkpointed metadata buffer */
-               if (jh->b_jlist == BJ_None) {
-                       JBUFFER_TRACE(jh, "remove from checkpoint list");
-                       __jbd2_journal_remove_checkpoint(jh);
-               }
+               JBUFFER_TRACE(jh, "remove from checkpoint list");
+               __jbd2_journal_remove_checkpoint(jh);
        }
        spin_unlock(&journal->j_list_lock);
  out:
@@@ -1949,6 -1977,8 +1977,8 @@@ zap_buffer_unlocked
        clear_buffer_mapped(bh);
        clear_buffer_req(bh);
        clear_buffer_new(bh);
+       clear_buffer_delay(bh);
+       clear_buffer_unwritten(bh);
        bh->b_bdev = NULL;
        return may_free;
  }
diff --combined include/linux/fs.h
index fa63f1b46103eceba8a380851c30500f30978c65,526072c073f7540b562278b9408f80113142feb6..c437f914d537746d5c856d506ef5ee6b4e520f0b
@@@ -389,7 -389,6 +389,7 @@@ struct inodes_stat_t 
  #include <linux/prio_tree.h>
  #include <linux/init.h>
  #include <linux/pid.h>
 +#include <linux/bug.h>
  #include <linux/mutex.h>
  #include <linux/capability.h>
  #include <linux/semaphore.h>
@@@ -1460,7 -1459,6 +1460,7 @@@ struct super_block 
        u8 s_uuid[16];                          /* UUID */
  
        void                    *s_fs_info;     /* Filesystem private info */
 +      unsigned int            s_max_links;
        fmode_t                 s_mode;
  
        /* Granularity of c/m/atime in ns.
@@@ -1813,11 -1811,11 +1813,11 @@@ static inline void inode_inc_iversion(s
         spin_unlock(&inode->i_lock);
  }
  
 -extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry);
 +extern void touch_atime(struct path *);
  static inline void file_accessed(struct file *file)
  {
        if (!(file->f_flags & O_NOATIME))
 -              touch_atime(file->f_path.mnt, file->f_path.dentry);
 +              touch_atime(&file->f_path);
  }
  
  int sync_inode(struct inode *inode, struct writeback_control *wbc);
@@@ -1872,19 -1870,6 +1872,6 @@@ extern struct dentry *mount_pseudo(stru
        const struct dentry_operations *dops,
        unsigned long);
  
- static inline void sb_mark_dirty(struct super_block *sb)
- {
-       sb->s_dirt = 1;
- }
- static inline void sb_mark_clean(struct super_block *sb)
- {
-       sb->s_dirt = 0;
- }
- static inline int sb_is_dirty(struct super_block *sb)
- {
-       return sb->s_dirt;
- }
  /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
  #define fops_get(fops) \
        (((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
@@@ -2306,10 -2291,7 +2293,10 @@@ extern struct inode * igrab(struct inod
  extern ino_t iunique(struct super_block *, ino_t);
  extern int inode_needs_sync(struct inode *inode);
  extern int generic_delete_inode(struct inode *inode);
 -extern int generic_drop_inode(struct inode *inode);
 +static inline int generic_drop_inode(struct inode *inode)
 +{
 +      return !inode->i_nlink || inode_unhashed(inode);
 +}
  
  extern struct inode *ilookup5_nowait(struct super_block *sb,
                unsigned long hashval, int (*test)(struct inode *, void *),
@@@ -2501,7 -2483,6 +2488,7 @@@ extern void get_filesystem(struct file_
  extern void put_filesystem(struct file_system_type *fs);
  extern struct file_system_type *get_fs_type(const char *name);
  extern struct super_block *get_super(struct block_device *);
 +extern struct super_block *get_super_thawed(struct block_device *);
  extern struct super_block *get_active_super(struct block_device *bdev);
  extern void drop_super(struct super_block *sb);
  extern void iterate_supers(void (*)(struct super_block *, void *), void *);
diff --combined mm/page-writeback.c
index 3fc261705b1e068c28c72a329f31ee431ae3fdc9,5e39858880f30a892a7a3a637768965c4759081c..26adea8ca2e7dd9ebdbc9c0938498a1a8a56324d
@@@ -95,6 -95,8 +95,8 @@@ unsigned long vm_dirty_bytes
   */
  unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
  
+ EXPORT_SYMBOL_GPL(dirty_writeback_interval);
  /*
   * The longest time for which data is allowed to remain dirty
   */
@@@ -1472,7 -1474,6 +1474,7 @@@ void throttle_vm_writeout(gfp_t gfp_mas
  
          for ( ; ; ) {
                global_dirty_limits(&background_thresh, &dirty_thresh);
 +              dirty_thresh = hard_dirty_limit(dirty_thresh);
  
                  /*
                   * Boost the allowable dirty threshold a bit for page