Merge branch 'akpm' (Andrew's patch-bomb)
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 22 Mar 2012 16:04:48 +0000 (09:04 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 22 Mar 2012 16:04:48 +0000 (09:04 -0700)
Merge first batch of patches from Andrew Morton:
 "A few misc things and all the MM queue"

* emailed from Andrew Morton <akpm@linux-foundation.org>: (92 commits)
  memcg: avoid THP split in task migration
  thp: add HPAGE_PMD_* definitions for !CONFIG_TRANSPARENT_HUGEPAGE
  memcg: clean up existing move charge code
  mm/memcontrol.c: remove unnecessary 'break' in mem_cgroup_read()
  mm/memcontrol.c: remove redundant BUG_ON() in mem_cgroup_usage_unregister_event()
  mm/memcontrol.c: s/stealed/stolen/
  memcg: fix performance of mem_cgroup_begin_update_page_stat()
  memcg: remove PCG_FILE_MAPPED
  memcg: use new logic for page stat accounting
  memcg: remove PCG_MOVE_LOCK flag from page_cgroup
  memcg: simplify move_account() check
  memcg: remove EXPORT_SYMBOL(mem_cgroup_update_page_stat)
  memcg: kill dead prev_priority stubs
  memcg: remove PCG_CACHE page_cgroup flag
  memcg: let css_get_next() rely upon rcu_read_lock()
  cgroup: revert ss_id_lock to spinlock
  idr: make idr_get_next() good for rcu_read_lock()
  memcg: remove unnecessary thp check in page stat accounting
  memcg: remove redundant returns
  memcg: enum lru_list lru
  ...

12 files changed:
1  2 
fs/exec.c
fs/hugetlbfs/inode.c
fs/namei.c
include/linux/mm.h
kernel/cgroup.c
kernel/exit.c
kernel/fork.c
mm/memory.c
mm/mmap.c
mm/mprotect.c
mm/shmem.c
mm/swapfile.c

diff --combined fs/exec.c
index 0b931471d4f439d444987cb2ef81c203c8ff4f8f,6ed164d20d7da8c0f2b87034b9f9307f9a7e4e0a..23559c227d9cb4f9480855590df611999e852dd4
+++ b/fs/exec.c
@@@ -81,13 -81,15 +81,13 @@@ static atomic_t call_count = ATOMIC_INI
  static LIST_HEAD(formats);
  static DEFINE_RWLOCK(binfmt_lock);
  
 -int __register_binfmt(struct linux_binfmt * fmt, int insert)
 +void __register_binfmt(struct linux_binfmt * fmt, int insert)
  {
 -      if (!fmt)
 -              return -EINVAL;
 +      BUG_ON(!fmt);
        write_lock(&binfmt_lock);
        insert ? list_add(&fmt->lh, &formats) :
                 list_add_tail(&fmt->lh, &formats);
        write_unlock(&binfmt_lock);
 -      return 0;       
  }
  
  EXPORT_SYMBOL(__register_binfmt);
@@@ -822,7 -824,7 +822,7 @@@ static int exec_mmap(struct mm_struct *
        /* Notify parent that we're no longer interested in the old VM */
        tsk = current;
        old_mm = current->mm;
-       sync_mm_rss(tsk, old_mm);
+       sync_mm_rss(old_mm);
        mm_release(tsk, old_mm);
  
        if (old_mm) {
@@@ -1113,7 -1115,7 +1113,7 @@@ int flush_old_exec(struct linux_binprm 
        bprm->mm = NULL;                /* We're using it now */
  
        set_fs(USER_DS);
 -      current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
 +      current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD);
        flush_thread();
        current->personality &= ~bprm->per_clear;
  
diff --combined fs/hugetlbfs/inode.c
index 81932fa1861ae1a70e51dc105af411093afbe72e,269163324b73c1811ba6f5e94f7aeed7f82a5755..ea251749d9d5982e6fc35ecf6d21ab61547962e8
@@@ -41,6 -41,25 +41,25 @@@ const struct file_operations hugetlbfs_
  static const struct inode_operations hugetlbfs_dir_inode_operations;
  static const struct inode_operations hugetlbfs_inode_operations;
  
+ struct hugetlbfs_config {
+       uid_t   uid;
+       gid_t   gid;
+       umode_t mode;
+       long    nr_blocks;
+       long    nr_inodes;
+       struct hstate *hstate;
+ };
+ struct hugetlbfs_inode_info {
+       struct shared_policy policy;
+       struct inode vfs_inode;
+ };
+ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
+ {
+       return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
+ }
  static struct backing_dev_info hugetlbfs_backing_dev_info = {
        .name           = "hugetlbfs",
        .ra_pages       = 0,    /* No readahead */
@@@ -154,10 -173,12 +173,12 @@@ hugetlb_get_unmapped_area(struct file *
                        return addr;
        }
  
-       start_addr = mm->free_area_cache;
-       if (len <= mm->cached_hole_size)
+       if (len > mm->cached_hole_size)
+               start_addr = mm->free_area_cache;
+       else {
                start_addr = TASK_UNMAPPED_BASE;
+               mm->cached_hole_size = 0;
+       }
  
  full_search:
        addr = ALIGN(start_addr, huge_page_size(h));
                         */
                        if (start_addr != TASK_UNMAPPED_BASE) {
                                start_addr = TASK_UNMAPPED_BASE;
+                               mm->cached_hole_size = 0;
                                goto full_search;
                        }
                        return -ENOMEM;
                }
  
-               if (!vma || addr + len <= vma->vm_start)
+               if (!vma || addr + len <= vma->vm_start) {
+                       mm->free_area_cache = addr + len;
                        return addr;
+               }
+               if (addr + mm->cached_hole_size < vma->vm_start)
+                       mm->cached_hole_size = vma->vm_start - addr;
                addr = ALIGN(vma->vm_end, huge_page_size(h));
        }
  }
@@@ -238,17 -264,10 +264,10 @@@ static ssize_t hugetlbfs_read(struct fi
        loff_t isize;
        ssize_t retval = 0;
  
-       mutex_lock(&inode->i_mutex);
        /* validate length */
        if (len == 0)
                goto out;
  
-       isize = i_size_read(inode);
-       if (!isize)
-               goto out;
-       end_index = (isize - 1) >> huge_page_shift(h);
        for (;;) {
                struct page *page;
                unsigned long nr, ret;
  
                /* nr is the maximum number of bytes to copy from this page */
                nr = huge_page_size(h);
+               isize = i_size_read(inode);
+               if (!isize)
+                       goto out;
+               end_index = (isize - 1) >> huge_page_shift(h);
                if (index >= end_index) {
                        if (index > end_index)
                                goto out;
                        nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
-                       if (nr <= offset) {
+                       if (nr <= offset)
                                goto out;
-                       }
                }
                nr = nr - offset;
  
                /* Find the page */
-               page = find_get_page(mapping, index);
+               page = find_lock_page(mapping, index);
                if (unlikely(page == NULL)) {
                        /*
                         * We have a HOLE, zero out the user-buffer for the
                        else
                                ra = 0;
                } else {
+                       unlock_page(page);
                        /*
                         * We have the page, copy it to user space buffer.
                         */
                        ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
                        ret = ra;
+                       page_cache_release(page);
                }
                if (ra < 0) {
                        if (retval == 0)
                                retval = ra;
-                       if (page)
-                               page_cache_release(page);
                        goto out;
                }
  
                index += offset >> huge_page_shift(h);
                offset &= ~huge_page_mask(h);
  
-               if (page)
-                       page_cache_release(page);
                /* short read or no more work */
                if ((ret != nr) || (len == 0))
                        break;
        }
  out:
        *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
-       mutex_unlock(&inode->i_mutex);
        return retval;
  }
  
@@@ -607,9 -626,15 +626,15 @@@ static int hugetlbfs_statfs(struct dent
                spin_lock(&sbinfo->stat_lock);
                /* If no limits set, just report 0 for max/free/used
                 * blocks, like simple_statfs() */
-               if (sbinfo->max_blocks >= 0) {
-                       buf->f_blocks = sbinfo->max_blocks;
-                       buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
+               if (sbinfo->spool) {
+                       long free_pages;
+                       spin_lock(&sbinfo->spool->lock);
+                       buf->f_blocks = sbinfo->spool->max_hpages;
+                       free_pages = sbinfo->spool->max_hpages
+                               - sbinfo->spool->used_hpages;
+                       buf->f_bavail = buf->f_bfree = free_pages;
+                       spin_unlock(&sbinfo->spool->lock);
                        buf->f_files = sbinfo->max_inodes;
                        buf->f_ffree = sbinfo->free_inodes;
                }
@@@ -625,6 -650,10 +650,10 @@@ static void hugetlbfs_put_super(struct 
  
        if (sbi) {
                sb->s_fs_info = NULL;
+               if (sbi->spool)
+                       hugepage_put_subpool(sbi->spool);
                kfree(sbi);
        }
  }
@@@ -831,6 -860,8 +860,6 @@@ bad_val
  static int
  hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
  {
 -      struct inode * inode;
 -      struct dentry * root;
        int ret;
        struct hugetlbfs_config config;
        struct hugetlbfs_sb_info *sbinfo;
        sb->s_fs_info = sbinfo;
        sbinfo->hstate = config.hstate;
        spin_lock_init(&sbinfo->stat_lock);
-       sbinfo->max_blocks = config.nr_blocks;
-       sbinfo->free_blocks = config.nr_blocks;
        sbinfo->max_inodes = config.nr_inodes;
        sbinfo->free_inodes = config.nr_inodes;
+       sbinfo->spool = NULL;
+       if (config.nr_blocks != -1) {
+               sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
+               if (!sbinfo->spool)
+                       goto out_free;
+       }
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_blocksize = huge_page_size(config.hstate);
        sb->s_blocksize_bits = huge_page_shift(config.hstate);
        sb->s_magic = HUGETLBFS_MAGIC;
        sb->s_op = &hugetlbfs_ops;
        sb->s_time_gran = 1;
 -      inode = hugetlbfs_get_root(sb, &config);
 -      if (!inode)
 +      sb->s_root = d_make_root(hugetlbfs_get_root(sb, &config));
 +      if (!sb->s_root)
                goto out_free;
 -
 -      root = d_alloc_root(inode);
 -      if (!root) {
 -              iput(inode);
 -              goto out_free;
 -      }
 -      sb->s_root = root;
        return 0;
  out_free:
+       if (sbinfo->spool)
+               kfree(sbinfo->spool);
        kfree(sbinfo);
        return -ENOMEM;
  }
  
- int hugetlb_get_quota(struct address_space *mapping, long delta)
- {
-       int ret = 0;
-       struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
-       if (sbinfo->free_blocks > -1) {
-               spin_lock(&sbinfo->stat_lock);
-               if (sbinfo->free_blocks - delta >= 0)
-                       sbinfo->free_blocks -= delta;
-               else
-                       ret = -ENOMEM;
-               spin_unlock(&sbinfo->stat_lock);
-       }
-       return ret;
- }
- void hugetlb_put_quota(struct address_space *mapping, long delta)
- {
-       struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
-       if (sbinfo->free_blocks > -1) {
-               spin_lock(&sbinfo->stat_lock);
-               sbinfo->free_blocks += delta;
-               spin_unlock(&sbinfo->stat_lock);
-       }
- }
  static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
  {
@@@ -919,8 -935,8 +926,8 @@@ static int can_do_hugetlb_shm(void
        return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
  }
  
- struct file *hugetlb_file_setup(const char *name, size_t size,
-                               vm_flags_t acctflag,
+ struct file *hugetlb_file_setup(const char *name, unsigned long addr,
+                               size_t size, vm_flags_t acctflag,
                                struct user_struct **user, int creat_flags)
  {
        int error = -ENOMEM;
        struct path path;
        struct dentry *root;
        struct qstr quick_string;
+       struct hstate *hstate;
+       unsigned long num_pages;
  
        *user = NULL;
        if (!hugetlbfs_vfsmount)
        if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
                *user = current_user();
                if (user_shm_lock(size, *user)) {
-                       printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n");
+                       task_lock(current);
+                       printk_once(KERN_WARNING
+                               "%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
+                               current->comm, current->pid);
+                       task_unlock(current);
                } else {
                        *user = NULL;
                        return ERR_PTR(-EPERM);
        if (!inode)
                goto out_dentry;
  
+       hstate = hstate_inode(inode);
+       size += addr & ~huge_page_mask(hstate);
+       num_pages = ALIGN(size, huge_page_size(hstate)) >>
+                       huge_page_shift(hstate);
        error = -ENOMEM;
-       if (hugetlb_reserve_pages(inode, 0,
-                       size >> huge_page_shift(hstate_inode(inode)), NULL,
-                       acctflag))
+       if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag))
                goto out_inode;
  
        d_instantiate(path.dentry, inode);
@@@ -997,6 -1021,7 +1012,7 @@@ static int __init init_hugetlbfs_fs(voi
        if (error)
                return error;
  
+       error = -ENOMEM;
        hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache",
                                        sizeof(struct hugetlbfs_inode_info),
                                        0, 0, init_once);
        }
  
        error = PTR_ERR(vfsmount);
+       unregister_filesystem(&hugetlbfs_fs_type);
  
   out:
-       if (error)
-               kmem_cache_destroy(hugetlbfs_inode_cachep);
+       kmem_cache_destroy(hugetlbfs_inode_cachep);
   out2:
        bdi_destroy(&hugetlbfs_backing_dev_info);
        return error;
diff --combined fs/namei.c
index 13e6a1f191a900aecb6fdaef499a955e17c92a9d,561db47ae0414318a8c5f88b48dfeda757a29c30..a94a7f9a03eaf9d543fb32620c9a172fdb0c60ec
@@@ -642,7 -642,7 +642,7 @@@ follow_link(struct path *link, struct n
        cond_resched();
        current->total_link_count++;
  
 -      touch_atime(link->mnt, dentry);
 +      touch_atime(link);
        nd_set_link(nd, NULL);
  
        error = security_inode_follow_link(link->dentry, nd);
@@@ -1455,9 -1455,15 +1455,15 @@@ done
  }
  EXPORT_SYMBOL(full_name_hash);
  
+ #ifdef CONFIG_64BIT
  #define ONEBYTES      0x0101010101010101ul
  #define SLASHBYTES    0x2f2f2f2f2f2f2f2ful
  #define HIGHBITS      0x8080808080808080ul
+ #else
+ #define ONEBYTES      0x01010101ul
+ #define SLASHBYTES    0x2f2f2f2ful
+ #define HIGHBITS      0x80808080ul
+ #endif
  
  /* Return the high bit set in the first byte that is a zero */
  static inline unsigned long has_zero(unsigned long a)
@@@ -2691,7 -2697,6 +2697,7 @@@ SYSCALL_DEFINE3(mknod, const char __use
  int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  {
        int error = may_create(dir, dentry);
 +      unsigned max_links = dir->i_sb->s_max_links;
  
        if (error)
                return error;
        if (error)
                return error;
  
 +      if (max_links && dir->i_nlink >= max_links)
 +              return -EMLINK;
 +
        error = dir->i_op->mkdir(dir, dentry, mode);
        if (!error)
                fsnotify_mkdir(dir, dentry);
@@@ -3037,7 -3039,6 +3043,7 @@@ SYSCALL_DEFINE2(symlink, const char __u
  int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
  {
        struct inode *inode = old_dentry->d_inode;
 +      unsigned max_links = dir->i_sb->s_max_links;
        int error;
  
        if (!inode)
        /* Make sure we don't allow creating hardlink to an unlinked file */
        if (inode->i_nlink == 0)
                error =  -ENOENT;
 +      else if (max_links && inode->i_nlink >= max_links)
 +              error = -EMLINK;
        else
                error = dir->i_op->link(old_dentry, dir, new_dentry);
        mutex_unlock(&inode->i_mutex);
@@@ -3179,7 -3178,6 +3185,7 @@@ static int vfs_rename_dir(struct inode 
  {
        int error = 0;
        struct inode *target = new_dentry->d_inode;
 +      unsigned max_links = new_dir->i_sb->s_max_links;
  
        /*
         * If we are going to change the parent - check write permissions,
        if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
                goto out;
  
 +      error = -EMLINK;
 +      if (max_links && !target && new_dir != old_dir &&
 +          new_dir->i_nlink >= max_links)
 +              goto out;
 +
        if (target)
                shrink_dcache_parent(new_dentry);
        error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
diff --combined include/linux/mm.h
index b5bb54d6d667dbdd333aab7f92f1c819fe81fca8,b1c8318e32b8d4495c4a71f72a9348468c92d48c..ee67e326b6f8bc277ea5b9a6972077491f2b688c
@@@ -893,9 -893,9 +893,9 @@@ struct page *vm_normal_page(struct vm_a
  
  int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
                unsigned long size);
 -unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 +void zap_page_range(struct vm_area_struct *vma, unsigned long address,
                unsigned long size, struct zap_details *);
 -unsigned long unmap_vmas(struct mmu_gather *tlb,
 +void unmap_vmas(struct mmu_gather *tlb,
                struct vm_area_struct *start_vma, unsigned long start_addr,
                unsigned long end_addr, unsigned long *nr_accounted,
                struct zap_details *);
@@@ -1040,6 -1040,9 +1040,9 @@@ static inline int stack_guard_page_end(
                !vma_growsup(vma->vm_next, addr);
  }
  
+ extern pid_t
+ vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group);
  extern unsigned long move_page_tables(struct vm_area_struct *vma,
                unsigned long old_addr, struct vm_area_struct *new_vma,
                unsigned long new_addr, unsigned long len);
@@@ -1058,19 -1061,20 +1061,20 @@@ int __get_user_pages_fast(unsigned lon
  /*
   * per-process(per-mm_struct) statistics.
   */
- static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
- {
-       atomic_long_set(&mm->rss_stat.count[member], value);
- }
- #if defined(SPLIT_RSS_COUNTING)
- unsigned long get_mm_counter(struct mm_struct *mm, int member);
- #else
  static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
  {
-       return atomic_long_read(&mm->rss_stat.count[member]);
- }
+       long val = atomic_long_read(&mm->rss_stat.count[member]);
+ #ifdef SPLIT_RSS_COUNTING
+       /*
+        * counter is updated in asynchronous manner and may go to minus.
+        * But it's never be expected number for users.
+        */
+       if (val < 0)
+               val = 0;
  #endif
+       return (unsigned long)val;
+ }
  
  static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
  {
@@@ -1127,9 -1131,9 +1131,9 @@@ static inline void setmax_mm_hiwater_rs
  }
  
  #if defined(SPLIT_RSS_COUNTING)
- void sync_mm_rss(struct task_struct *task, struct mm_struct *mm);
+ void sync_mm_rss(struct mm_struct *mm);
  #else
- static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
+ static inline void sync_mm_rss(struct mm_struct *mm)
  {
  }
  #endif
@@@ -1291,8 -1295,6 +1295,6 @@@ extern void get_pfn_range_for_nid(unsig
  extern unsigned long find_min_pfn_with_active_regions(void);
  extern void free_bootmem_with_active_regions(int nid,
                                                unsigned long max_low_pfn);
- int add_from_early_node_map(struct range *range, int az,
-                                  int nr_range, int nid);
  extern void sparse_memory_present_with_active_regions(int nid);
  
  #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
diff --combined kernel/cgroup.c
index 1ece8e20fdb50b4109bf1d99a1e465808863568a,391d5e991e5f539c99a081799957f63f0b39124f..f4ea4b6f3cf1eae8725ab6107f20bab58324f103
@@@ -1472,6 -1472,7 +1472,6 @@@ static int cgroup_get_rootdir(struct su
  
        struct inode *inode =
                cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
 -      struct dentry *dentry;
  
        if (!inode)
                return -ENOMEM;
        inode->i_op = &cgroup_dir_inode_operations;
        /* directories start off with i_nlink == 2 (for "." entry) */
        inc_nlink(inode);
 -      dentry = d_alloc_root(inode);
 -      if (!dentry) {
 -              iput(inode);
 +      sb->s_root = d_make_root(inode);
 +      if (!sb->s_root)
                return -ENOMEM;
 -      }
 -      sb->s_root = dentry;
        /* for everything else we want ->d_op set */
        sb->s_d_op = &cgroup_dops;
        return 0;
@@@ -4881,9 -4885,9 +4881,9 @@@ void free_css_id(struct cgroup_subsys *
  
        rcu_assign_pointer(id->css, NULL);
        rcu_assign_pointer(css->id, NULL);
-       write_lock(&ss->id_lock);
+       spin_lock(&ss->id_lock);
        idr_remove(&ss->idr, id->id);
-       write_unlock(&ss->id_lock);
+       spin_unlock(&ss->id_lock);
        kfree_rcu(id, rcu_head);
  }
  EXPORT_SYMBOL_GPL(free_css_id);
@@@ -4909,10 -4913,10 +4909,10 @@@ static struct css_id *get_new_cssid(str
                error = -ENOMEM;
                goto err_out;
        }
-       write_lock(&ss->id_lock);
+       spin_lock(&ss->id_lock);
        /* Don't use 0. allocates an ID of 1-65535 */
        error = idr_get_new_above(&ss->idr, newid, 1, &myid);
-       write_unlock(&ss->id_lock);
+       spin_unlock(&ss->id_lock);
  
        /* Returns error when there are no free spaces for new ID.*/
        if (error) {
        return newid;
  remove_idr:
        error = -ENOSPC;
-       write_lock(&ss->id_lock);
+       spin_lock(&ss->id_lock);
        idr_remove(&ss->idr, myid);
-       write_unlock(&ss->id_lock);
+       spin_unlock(&ss->id_lock);
  err_out:
        kfree(newid);
        return ERR_PTR(error);
@@@ -4941,7 -4945,7 +4941,7 @@@ static int __init_or_module cgroup_init
  {
        struct css_id *newid;
  
-       rwlock_init(&ss->id_lock);
+       spin_lock_init(&ss->id_lock);
        idr_init(&ss->idr);
  
        newid = get_new_cssid(ss, 0);
@@@ -5029,6 -5033,8 +5029,8 @@@ css_get_next(struct cgroup_subsys *ss, 
                return NULL;
  
        BUG_ON(!ss->use_id);
+       WARN_ON_ONCE(!rcu_read_lock_held());
        /* fill start point for scan */
        tmpid = id;
        while (1) {
                 * scan next entry from bitmap(tree), tmpid is updated after
                 * idr_get_next().
                 */
-               read_lock(&ss->id_lock);
                tmp = idr_get_next(&ss->idr, &tmpid);
-               read_unlock(&ss->id_lock);
                if (!tmp)
                        break;
                if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
diff --combined kernel/exit.c
index 7ad335c3045a15e4e3d28757d27a8103168a2cdc,d26acd3c1e2ef9ac91b4a52bc906b29b80921eb5..16b07bfac224d3f13960b09502fbecfba8897db0
@@@ -52,7 -52,6 +52,7 @@@
  #include <linux/hw_breakpoint.h>
  #include <linux/oom.h>
  #include <linux/writeback.h>
 +#include <linux/shm.h>
  
  #include <asm/uaccess.h>
  #include <asm/unistd.h>
@@@ -935,7 -934,7 +935,7 @@@ void do_exit(long code
        acct_update_integrals(tsk);
        /* sync mm's RSS info before statistics gathering */
        if (tsk->mm)
-               sync_mm_rss(tsk, tsk->mm);
+               sync_mm_rss(tsk->mm);
        group_dead = atomic_dec_and_test(&tsk->signal->live);
        if (group_dead) {
                hrtimer_cancel(&tsk->signal->real_timer);
diff --combined kernel/fork.c
index 26a7138bb849340e76a5cd8f73ed9d9bc84e01e1,9cc227d54102256c61f4b5c85bbc1e9d3ba24e04..37674ec55cde19d12e3deda5845ecb1b8b4d376c
@@@ -193,7 -193,6 +193,7 @@@ void __put_task_struct(struct task_stru
        WARN_ON(atomic_read(&tsk->usage));
        WARN_ON(tsk == current);
  
 +      security_task_free(tsk);
        exit_creds(tsk);
        delayacct_tsk_free(tsk);
        put_signal_struct(tsk->signal);
@@@ -356,7 -355,7 +356,7 @@@ static int dup_mmap(struct mm_struct *m
                charge = 0;
                if (mpnt->vm_flags & VM_ACCOUNT) {
                        unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
 -                      if (security_vm_enough_memory(len))
 +                      if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
                                goto fail_nomem;
                        charge = len;
                }
@@@ -512,6 -511,23 +512,23 @@@ static struct mm_struct *mm_init(struc
        return NULL;
  }
  
+ static void check_mm(struct mm_struct *mm)
+ {
+       int i;
+       for (i = 0; i < NR_MM_COUNTERS; i++) {
+               long x = atomic_long_read(&mm->rss_stat.count[i]);
+               if (unlikely(x))
+                       printk(KERN_ALERT "BUG: Bad rss-counter state "
+                                         "mm:%p idx:%d val:%ld\n", mm, i, x);
+       }
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       VM_BUG_ON(mm->pmd_huge_pte);
+ #endif
+ }
  /*
   * Allocate and initialize an mm_struct.
   */
@@@ -539,9 -555,7 +556,7 @@@ void __mmdrop(struct mm_struct *mm
        mm_free_pgd(mm);
        destroy_context(mm);
        mmu_notifier_mm_destroy(mm);
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       VM_BUG_ON(mm->pmd_huge_pte);
- #endif
+       check_mm(mm);
        free_mm(mm);
  }
  EXPORT_SYMBOL_GPL(__mmdrop);
@@@ -1223,6 -1237,7 +1238,7 @@@ static struct task_struct *copy_process
  #ifdef CONFIG_CPUSETS
        p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
        p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
+       seqcount_init(&p->mems_allowed_seq);
  #endif
  #ifdef CONFIG_TRACE_IRQFLAGS
        p->irq_events = 0;
diff --combined mm/memory.c
index 8438c157e4d99bed4b34ba4f48a05f02bcd55515,1e0561e1f190feddb3df8246b9716ce4fc0c46b5..3416b6e018d6a7667fce3168924da3dccdec8e22
@@@ -125,17 -125,17 +125,17 @@@ core_initcall(init_zero_pfn)
  
  #if defined(SPLIT_RSS_COUNTING)
  
static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
void sync_mm_rss(struct mm_struct *mm)
  {
        int i;
  
        for (i = 0; i < NR_MM_COUNTERS; i++) {
-               if (task->rss_stat.count[i]) {
-                       add_mm_counter(mm, i, task->rss_stat.count[i]);
-                       task->rss_stat.count[i] = 0;
+               if (current->rss_stat.count[i]) {
+                       add_mm_counter(mm, i, current->rss_stat.count[i]);
+                       current->rss_stat.count[i] = 0;
                }
        }
-       task->rss_stat.events = 0;
+       current->rss_stat.events = 0;
  }
  
  static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
@@@ -157,30 -157,7 +157,7 @@@ static void check_sync_rss_stat(struct 
        if (unlikely(task != current))
                return;
        if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
-               __sync_task_rss_stat(task, task->mm);
- }
- unsigned long get_mm_counter(struct mm_struct *mm, int member)
- {
-       long val = 0;
-       /*
-        * Don't use task->mm here...for avoiding to use task_get_mm()..
-        * The caller must guarantee task->mm is not invalid.
-        */
-       val = atomic_long_read(&mm->rss_stat.count[member]);
-       /*
-        * counter is updated in asynchronous manner and may go to minus.
-        * But it's never be expected number for users.
-        */
-       if (val < 0)
-               return 0;
-       return (unsigned long)val;
- }
- void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
- {
-       __sync_task_rss_stat(task, mm);
+               sync_mm_rss(task->mm);
  }
  #else /* SPLIT_RSS_COUNTING */
  
@@@ -661,7 -638,7 +638,7 @@@ static inline void add_mm_rss_vec(struc
        int i;
  
        if (current->mm == mm)
-               sync_mm_rss(current, mm);
+               sync_mm_rss(mm);
        for (i = 0; i < NR_MM_COUNTERS; i++)
                if (rss[i])
                        add_mm_counter(mm, i, rss[i]);
@@@ -1247,16 -1224,24 +1224,24 @@@ static inline unsigned long zap_pmd_ran
        do {
                next = pmd_addr_end(addr, end);
                if (pmd_trans_huge(*pmd)) {
-                       if (next-addr != HPAGE_PMD_SIZE) {
+                       if (next - addr != HPAGE_PMD_SIZE) {
                                VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
                                split_huge_page_pmd(vma->vm_mm, pmd);
                        } else if (zap_huge_pmd(tlb, vma, pmd, addr))
-                               continue;
+                               goto next;
                        /* fall through */
                }
-               if (pmd_none_or_clear_bad(pmd))
-                       continue;
+               /*
+                * Here there can be other concurrent MADV_DONTNEED or
+                * trans huge page faults running, and if the pmd is
+                * none or trans huge it can change under us. This is
+                * because MADV_DONTNEED holds the mmap_sem in read
+                * mode.
+                */
+               if (pmd_none_or_trans_huge_or_clear_bad(pmd))
+                       goto next;
                next = zap_pte_range(tlb, vma, pmd, addr, next, details);
+ next:
                cond_resched();
        } while (pmd++, addr = next, addr != end);
  
@@@ -1282,10 -1267,10 +1267,10 @@@ static inline unsigned long zap_pud_ran
        return addr;
  }
  
 -static unsigned long unmap_page_range(struct mmu_gather *tlb,
 -                              struct vm_area_struct *vma,
 -                              unsigned long addr, unsigned long end,
 -                              struct zap_details *details)
 +static void unmap_page_range(struct mmu_gather *tlb,
 +                           struct vm_area_struct *vma,
 +                           unsigned long addr, unsigned long end,
 +                           struct zap_details *details)
  {
        pgd_t *pgd;
        unsigned long next;
        } while (pgd++, addr = next, addr != end);
        tlb_end_vma(tlb, vma);
        mem_cgroup_uncharge_end();
 +}
  
 -      return addr;
 +
 +static void unmap_single_vma(struct mmu_gather *tlb,
 +              struct vm_area_struct *vma, unsigned long start_addr,
 +              unsigned long end_addr, unsigned long *nr_accounted,
 +              struct zap_details *details)
 +{
 +      unsigned long start = max(vma->vm_start, start_addr);
 +      unsigned long end;
 +
 +      if (start >= vma->vm_end)
 +              return;
 +      end = min(vma->vm_end, end_addr);
 +      if (end <= vma->vm_start)
 +              return;
 +
 +      if (vma->vm_flags & VM_ACCOUNT)
 +              *nr_accounted += (end - start) >> PAGE_SHIFT;
 +
 +      if (unlikely(is_pfn_mapping(vma)))
 +              untrack_pfn_vma(vma, 0, 0);
 +
 +      if (start != end) {
 +              if (unlikely(is_vm_hugetlb_page(vma))) {
 +                      /*
 +                       * It is undesirable to test vma->vm_file as it
 +                       * should be non-null for valid hugetlb area.
 +                       * However, vm_file will be NULL in the error
 +                       * cleanup path of do_mmap_pgoff. When
 +                       * hugetlbfs ->mmap method fails,
 +                       * do_mmap_pgoff() nullifies vma->vm_file
 +                       * before calling this function to clean up.
 +                       * Since no pte has actually been setup, it is
 +                       * safe to do nothing in this case.
 +                       */
 +                      if (vma->vm_file)
 +                              unmap_hugepage_range(vma, start, end, NULL);
 +              } else
 +                      unmap_page_range(tlb, vma, start, end, details);
 +      }
  }
  
  /**
   * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
   * @details: details of nonlinear truncation or shared cache invalidation
   *
 - * Returns the end address of the unmapping (restart addr if interrupted).
 - *
   * Unmap all pages in the vma list.
   *
   * Only addresses between `start' and `end' will be unmapped.
   * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
   * drops the lock and schedules.
   */
 -unsigned long unmap_vmas(struct mmu_gather *tlb,
 +void unmap_vmas(struct mmu_gather *tlb,
                struct vm_area_struct *vma, unsigned long start_addr,
                unsigned long end_addr, unsigned long *nr_accounted,
                struct zap_details *details)
  {
 -      unsigned long start = start_addr;
        struct mm_struct *mm = vma->vm_mm;
  
        mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
 -      for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
 -              unsigned long end;
 -
 -              start = max(vma->vm_start, start_addr);
 -              if (start >= vma->vm_end)
 -                      continue;
 -              end = min(vma->vm_end, end_addr);
 -              if (end <= vma->vm_start)
 -                      continue;
 -
 -              if (vma->vm_flags & VM_ACCOUNT)
 -                      *nr_accounted += (end - start) >> PAGE_SHIFT;
 -
 -              if (unlikely(is_pfn_mapping(vma)))
 -                      untrack_pfn_vma(vma, 0, 0);
 -
 -              while (start != end) {
 -                      if (unlikely(is_vm_hugetlb_page(vma))) {
 -                              /*
 -                               * It is undesirable to test vma->vm_file as it
 -                               * should be non-null for valid hugetlb area.
 -                               * However, vm_file will be NULL in the error
 -                               * cleanup path of do_mmap_pgoff. When
 -                               * hugetlbfs ->mmap method fails,
 -                               * do_mmap_pgoff() nullifies vma->vm_file
 -                               * before calling this function to clean up.
 -                               * Since no pte has actually been setup, it is
 -                               * safe to do nothing in this case.
 -                               */
 -                              if (vma->vm_file)
 -                                      unmap_hugepage_range(vma, start, end, NULL);
 -
 -                              start = end;
 -                      } else
 -                              start = unmap_page_range(tlb, vma, start, end, details);
 -              }
 -      }
 -
 +      for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
 +              unmap_single_vma(tlb, vma, start_addr, end_addr, nr_accounted,
 +                               details);
        mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
 -      return start;   /* which is now the end (or restart) address */
  }
  
  /**
   * @address: starting address of pages to zap
   * @size: number of bytes to zap
   * @details: details of nonlinear truncation or shared cache invalidation
 + *
 + * Caller must protect the VMA list
 + */
 +void zap_page_range(struct vm_area_struct *vma, unsigned long address,
 +              unsigned long size, struct zap_details *details)
 +{
 +      struct mm_struct *mm = vma->vm_mm;
 +      struct mmu_gather tlb;
 +      unsigned long end = address + size;
 +      unsigned long nr_accounted = 0;
 +
 +      lru_add_drain();
 +      tlb_gather_mmu(&tlb, mm, 0);
 +      update_hiwater_rss(mm);
 +      unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
 +      tlb_finish_mmu(&tlb, address, end);
 +}
 +
 +/**
 + * zap_page_range_single - remove user pages in a given range
 + * @vma: vm_area_struct holding the applicable pages
 + * @address: starting address of pages to zap
 + * @size: number of bytes to zap
 + * @details: details of nonlinear truncation or shared cache invalidation
 + *
 + * The range must fit into one VMA.
   */
 -unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
 +static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
                unsigned long size, struct zap_details *details)
  {
        struct mm_struct *mm = vma->vm_mm;
        lru_add_drain();
        tlb_gather_mmu(&tlb, mm, 0);
        update_hiwater_rss(mm);
 -      end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
 +      mmu_notifier_invalidate_range_start(mm, address, end);
 +      unmap_single_vma(&tlb, vma, address, end, &nr_accounted, details);
 +      mmu_notifier_invalidate_range_end(mm, address, end);
        tlb_finish_mmu(&tlb, address, end);
 -      return end;
  }
  
  /**
@@@ -1450,7 -1408,7 +1435,7 @@@ int zap_vma_ptes(struct vm_area_struct 
        if (address < vma->vm_start || address + size > vma->vm_end ||
                        !(vma->vm_flags & VM_PFNMAP))
                return -1;
 -      zap_page_range(vma, address, size, NULL);
 +      zap_page_range_single(vma, address, size, NULL);
        return 0;
  }
  EXPORT_SYMBOL_GPL(zap_vma_ptes);
@@@ -2797,7 -2755,7 +2782,7 @@@ static void unmap_mapping_range_vma(str
                unsigned long start_addr, unsigned long end_addr,
                struct zap_details *details)
  {
 -      zap_page_range(vma, start_addr, end_addr - start_addr, details);
 +      zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
  }
  
  static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
diff --combined mm/mmap.c
index 6f3766b57803c20d6115eb12f295f81e20cad4d3,230f0bac06b6bb0ccd5744f8bc7d1972244aaf18..a7bf6a31c9f62be11cb8e5819322565b7cf2c266
+++ b/mm/mmap.c
@@@ -451,9 -451,8 +451,8 @@@ static void vma_link(struct mm_struct *
  }
  
  /*
-  * Helper for vma_adjust in the split_vma insert case:
-  * insert vm structure into list and rbtree and anon_vma,
-  * but it has already been inserted into prio_tree earlier.
+  * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
+  * mm's list and rbtree.  It has already been inserted into the prio_tree.
   */
  static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
  {
@@@ -935,19 -934,6 +934,19 @@@ void vm_stat_account(struct mm_struct *
  }
  #endif /* CONFIG_PROC_FS */
  
 +/*
 + * If a hint addr is less than mmap_min_addr change hint to be as
 + * low as possible but still greater than mmap_min_addr
 + */
 +static inline unsigned long round_hint_to_min(unsigned long hint)
 +{
 +      hint &= PAGE_MASK;
 +      if (((void *)hint != NULL) &&
 +          (hint < mmap_min_addr))
 +              return PAGE_ALIGN(mmap_min_addr);
 +      return hint;
 +}
 +
  /*
   * The caller must hold down_write(&current->mm->mmap_sem).
   */
@@@ -1112,9 -1098,9 +1111,9 @@@ SYSCALL_DEFINE6(mmap_pgoff, unsigned lo
                 * A dummy user value is used because we are not locking
                 * memory so no accounting is necessary
                 */
-               len = ALIGN(len, huge_page_size(&default_hstate));
-               file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
-                                               &user, HUGETLB_ANONHUGE_INODE);
+               file = hugetlb_file_setup(HUGETLB_ANON_FILE, addr, len,
+                                               VM_NORESERVE, &user,
+                                               HUGETLB_ANONHUGE_INODE);
                if (IS_ERR(file))
                        return PTR_ERR(file);
        }
@@@ -1248,7 -1234,7 +1247,7 @@@ munmap_back
         */
        if (accountable_mapping(file, vm_flags)) {
                charged = len >> PAGE_SHIFT;
 -              if (security_vm_enough_memory(charged))
 +              if (security_vm_enough_memory_mm(mm, charged))
                        return -ENOMEM;
                vm_flags |= VM_ACCOUNT;
        }
@@@ -1439,10 -1425,8 +1438,8 @@@ void arch_unmap_area(struct mm_struct *
        /*
         * Is this a new hole at the lowest possible address?
         */
-       if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
+       if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache)
                mm->free_area_cache = addr;
-               mm->cached_hole_size = ~0UL;
-       }
  }
  
  /*
@@@ -1457,7 -1441,7 +1454,7 @@@ arch_get_unmapped_area_topdown(struct f
  {
        struct vm_area_struct *vma;
        struct mm_struct *mm = current->mm;
-       unsigned long addr = addr0;
+       unsigned long addr = addr0, start_addr;
  
        /* requested length too big for entire address space */
        if (len > TASK_SIZE)
                mm->free_area_cache = mm->mmap_base;
        }
  
+ try_again:
        /* either no address requested or can't fit in requested address hole */
-       addr = mm->free_area_cache;
-       /* make sure it can fit in the remaining address space */
-       if (addr > len) {
-               vma = find_vma(mm, addr-len);
-               if (!vma || addr <= vma->vm_start)
-                       /* remember the address as a hint for next time */
-                       return (mm->free_area_cache = addr-len);
-       }
-       if (mm->mmap_base < len)
-               goto bottomup;
+       start_addr = addr = mm->free_area_cache;
  
-       addr = mm->mmap_base-len;
+       if (addr < len)
+               goto fail;
  
+       addr -= len;
        do {
                /*
                 * Lookup failure means no vma is above this address,
                addr = vma->vm_start-len;
        } while (len < vma->vm_start);
  
- bottomup:
+ fail:
+       /*
+        * if hint left us with no space for the requested
+        * mapping then try again:
+        *
+        * Note: this is different with the case of bottomup
+        * which does the fully line-search, but we use find_vma
+        * here that causes some holes skipped.
+        */
+       if (start_addr != mm->mmap_base) {
+               mm->free_area_cache = mm->mmap_base;
+               mm->cached_hole_size = 0;
+               goto try_again;
+       }
        /*
         * A failed mmap() very likely causes application failure,
         * so fall back to the bottom-up function here. This scenario
@@@ -2193,7 -2183,7 +2196,7 @@@ unsigned long do_brk(unsigned long addr
        if (mm->map_count > sysctl_max_map_count)
                return -ENOMEM;
  
 -      if (security_vm_enough_memory(len >> PAGE_SHIFT))
 +      if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
                return -ENOMEM;
  
        /* Can we just expand an old private anonymous mapping? */
@@@ -2237,6 -2227,7 +2240,6 @@@ void exit_mmap(struct mm_struct *mm
        struct mmu_gather tlb;
        struct vm_area_struct *vma;
        unsigned long nr_accounted = 0;
 -      unsigned long end;
  
        /* mm's last user has gone, and its about to be pulled down */
        mmu_notifier_release(mm);
        tlb_gather_mmu(&tlb, mm, 1);
        /* update_hiwater_rss(mm) here? but nobody should be looking */
        /* Use -1 here to ensure all VMAs in the mm are unmapped */
 -      end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 +      unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
        vm_unacct_memory(nr_accounted);
  
        free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
 -      tlb_finish_mmu(&tlb, 0, end);
 +      tlb_finish_mmu(&tlb, 0, -1);
  
        /*
         * Walk the list again, actually closing and freeing it,
diff --combined mm/mprotect.c
index 142ef4a1f480d193822b0228aaba8f4715167169,c621e999cbf70e254827b182ac926c71fe5e3697..a40992610ab6f6c0cbe96f9a9bd43a3fa973fd96
@@@ -60,7 -60,7 +60,7 @@@ static void change_pte_range(struct mm_
                                ptent = pte_mkwrite(ptent);
  
                        ptep_modify_prot_commit(mm, addr, pte, ptent);
-               } else if (PAGE_MIGRATION && !pte_file(oldpte)) {
+               } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
                        swp_entry_t entry = pte_to_swp_entry(oldpte);
  
                        if (is_write_migration_entry(entry)) {
@@@ -168,7 -168,7 +168,7 @@@ mprotect_fixup(struct vm_area_struct *v
                if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
                                                VM_SHARED|VM_NORESERVE))) {
                        charged = nrpages;
 -                      if (security_vm_enough_memory(charged))
 +                      if (security_vm_enough_memory_mm(mm, charged))
                                return -ENOMEM;
                        newflags |= VM_ACCOUNT;
                }
diff --combined mm/shmem.c
index 7a45ad004cfd0f908da54b47e298702bc7080a2a,7cc80833b74aee81d61a2dac355de17dbbecb56f..f99ff3e50bd6af061b30722528d002bab68e6e40
@@@ -127,7 -127,7 +127,7 @@@ static inline struct shmem_sb_info *SHM
  static inline int shmem_acct_size(unsigned long flags, loff_t size)
  {
        return (flags & VM_NORESERVE) ?
 -              0 : security_vm_enough_memory_kern(VM_ACCT(size));
 +              0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size));
  }
  
  static inline void shmem_unacct_size(unsigned long flags, loff_t size)
  static inline int shmem_acct_block(unsigned long flags)
  {
        return (flags & VM_NORESERVE) ?
 -              security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)) : 0;
 +              security_vm_enough_memory_mm(current->mm, VM_ACCT(PAGE_CACHE_SIZE)) : 0;
  }
  
  static inline void shmem_unacct_blocks(unsigned long flags, long pages)
@@@ -1178,6 -1178,12 +1178,12 @@@ static struct inode *shmem_get_inode(st
  static const struct inode_operations shmem_symlink_inode_operations;
  static const struct inode_operations shmem_short_symlink_operations;
  
+ #ifdef CONFIG_TMPFS_XATTR
+ static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
+ #else
+ #define shmem_initxattrs NULL
+ #endif
  static int
  shmem_write_begin(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned flags,
@@@ -1490,7 -1496,7 +1496,7 @@@ shmem_mknod(struct inode *dir, struct d
        if (inode) {
                error = security_inode_init_security(inode, dir,
                                                     &dentry->d_name,
-                                                    NULL, NULL);
+                                                    shmem_initxattrs, NULL);
                if (error) {
                        if (error != -EOPNOTSUPP) {
                                iput(inode);
@@@ -1630,7 -1636,7 +1636,7 @@@ static int shmem_symlink(struct inode *
                return -ENOSPC;
  
        error = security_inode_init_security(inode, dir, &dentry->d_name,
-                                            NULL, NULL);
+                                            shmem_initxattrs, NULL);
        if (error) {
                if (error != -EOPNOTSUPP) {
                        iput(inode);
@@@ -1704,6 -1710,66 +1710,66 @@@ static void shmem_put_link(struct dentr
   * filesystem level, though.
   */
  
+ /*
+  * Allocate new xattr and copy in the value; but leave the name to callers.
+  */
+ static struct shmem_xattr *shmem_xattr_alloc(const void *value, size_t size)
+ {
+       struct shmem_xattr *new_xattr;
+       size_t len;
+       /* wrap around? */
+       len = sizeof(*new_xattr) + size;
+       if (len <= sizeof(*new_xattr))
+               return NULL;
+       new_xattr = kmalloc(len, GFP_KERNEL);
+       if (!new_xattr)
+               return NULL;
+       new_xattr->size = size;
+       memcpy(new_xattr->value, value, size);
+       return new_xattr;
+ }
+ /*
+  * Callback for security_inode_init_security() for acquiring xattrs.
+  */
+ static int shmem_initxattrs(struct inode *inode,
+                           const struct xattr *xattr_array,
+                           void *fs_info)
+ {
+       struct shmem_inode_info *info = SHMEM_I(inode);
+       const struct xattr *xattr;
+       struct shmem_xattr *new_xattr;
+       size_t len;
+       for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+               new_xattr = shmem_xattr_alloc(xattr->value, xattr->value_len);
+               if (!new_xattr)
+                       return -ENOMEM;
+               len = strlen(xattr->name) + 1;
+               new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
+                                         GFP_KERNEL);
+               if (!new_xattr->name) {
+                       kfree(new_xattr);
+                       return -ENOMEM;
+               }
+               memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
+                      XATTR_SECURITY_PREFIX_LEN);
+               memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
+                      xattr->name, len);
+               spin_lock(&info->lock);
+               list_add(&new_xattr->list, &info->xattr_list);
+               spin_unlock(&info->lock);
+       }
+       return 0;
+ }
  static int shmem_xattr_get(struct dentry *dentry, const char *name,
                           void *buffer, size_t size)
  {
        return ret;
  }
  
- static int shmem_xattr_set(struct dentry *dentry, const char *name,
+ static int shmem_xattr_set(struct inode *inode, const char *name,
                           const void *value, size_t size, int flags)
  {
-       struct inode *inode = dentry->d_inode;
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_xattr *xattr;
        struct shmem_xattr *new_xattr = NULL;
-       size_t len;
        int err = 0;
  
        /* value == NULL means remove */
        if (value) {
-               /* wrap around? */
-               len = sizeof(*new_xattr) + size;
-               if (len <= sizeof(*new_xattr))
-                       return -ENOMEM;
-               new_xattr = kmalloc(len, GFP_KERNEL);
+               new_xattr = shmem_xattr_alloc(value, size);
                if (!new_xattr)
                        return -ENOMEM;
  
                        kfree(new_xattr);
                        return -ENOMEM;
                }
-               new_xattr->size = size;
-               memcpy(new_xattr->value, value, size);
        }
  
        spin_lock(&info->lock);
@@@ -1858,7 -1914,7 +1914,7 @@@ static int shmem_setxattr(struct dentr
        if (size == 0)
                value = "";  /* empty EA, do not remove */
  
-       return shmem_xattr_set(dentry, name, value, size, flags);
+       return shmem_xattr_set(dentry->d_inode, name, value, size, flags);
  
  }
  
@@@ -1878,7 -1934,7 +1934,7 @@@ static int shmem_removexattr(struct den
        if (err)
                return err;
  
-       return shmem_xattr_set(dentry, name, NULL, 0, XATTR_REPLACE);
+       return shmem_xattr_set(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
  }
  
  static bool xattr_is_trusted(const char *name)
@@@ -2175,6 -2231,7 +2231,6 @@@ static void shmem_put_super(struct supe
  int shmem_fill_super(struct super_block *sb, void *data, int silent)
  {
        struct inode *inode;
 -      struct dentry *root;
        struct shmem_sb_info *sbinfo;
        int err = -ENOMEM;
  
                goto failed;
        inode->i_uid = sbinfo->uid;
        inode->i_gid = sbinfo->gid;
 -      root = d_alloc_root(inode);
 -      if (!root)
 -              goto failed_iput;
 -      sb->s_root = root;
 +      sb->s_root = d_make_root(inode);
 +      if (!sb->s_root)
 +              goto failed;
        return 0;
  
 -failed_iput:
 -      iput(inode);
  failed:
        shmem_put_super(sb);
        return err;
diff --combined mm/swapfile.c
index 6bf67ab6e469ab2329a3fc1a679e79228c6cee9f,21b56945c5d22e81b756381481e18937e2ce3269..dae42f380d6ebcde88d3aaef6aa3cb1d64e0f5a3
@@@ -932,9 -932,7 +932,7 @@@ static inline int unuse_pmd_range(struc
        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
-               if (unlikely(pmd_trans_huge(*pmd)))
-                       continue;
-               if (pmd_none_or_clear_bad(pmd))
+               if (pmd_none_or_trans_huge_or_clear_bad(pmd))
                        continue;
                ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
                if (ret)
@@@ -1563,8 -1561,6 +1561,8 @@@ SYSCALL_DEFINE1(swapoff, const char __u
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
  
 +      BUG_ON(!current->mm);
 +
        pathname = getname(specialfile);
        err = PTR_ERR(pathname);
        if (IS_ERR(pathname))
                spin_unlock(&swap_lock);
                goto out_dput;
        }
 -      if (!security_vm_enough_memory(p->pages))
 +      if (!security_vm_enough_memory_mm(current->mm, p->pages))
                vm_unacct_memory(p->pages);
        else {
                err = -ENOMEM;
@@@ -2107,7 -2103,7 +2105,7 @@@ SYSCALL_DEFINE2(swapon, const char __us
                        p->flags |= SWP_SOLIDSTATE;
                        p->cluster_next = 1 + (random32() % p->highest_bit);
                }
-               if (discard_swap(p) == 0 && (swap_flags & SWAP_FLAG_DISCARD))
+               if ((swap_flags & SWAP_FLAG_DISCARD) && discard_swap(p) == 0)
                        p->flags |= SWP_DISCARDABLE;
        }
  
@@@ -2291,58 -2287,6 +2289,6 @@@ int swapcache_prepare(swp_entry_t entry
        return __swap_duplicate(entry, SWAP_HAS_CACHE);
  }
  
- /*
-  * swap_lock prevents swap_map being freed. Don't grab an extra
-  * reference on the swaphandle, it doesn't matter if it becomes unused.
-  */
- int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
- {
-       struct swap_info_struct *si;
-       int our_page_cluster = page_cluster;
-       pgoff_t target, toff;
-       pgoff_t base, end;
-       int nr_pages = 0;
-       if (!our_page_cluster)  /* no readahead */
-               return 0;
-       si = swap_info[swp_type(entry)];
-       target = swp_offset(entry);
-       base = (target >> our_page_cluster) << our_page_cluster;
-       end = base + (1 << our_page_cluster);
-       if (!base)              /* first page is swap header */
-               base++;
-       spin_lock(&swap_lock);
-       if (end > si->max)      /* don't go beyond end of map */
-               end = si->max;
-       /* Count contiguous allocated slots above our target */
-       for (toff = target; ++toff < end; nr_pages++) {
-               /* Don't read in free or bad pages */
-               if (!si->swap_map[toff])
-                       break;
-               if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
-                       break;
-       }
-       /* Count contiguous allocated slots below our target */
-       for (toff = target; --toff >= base; nr_pages++) {
-               /* Don't read in free or bad pages */
-               if (!si->swap_map[toff])
-                       break;
-               if (swap_count(si->swap_map[toff]) == SWAP_MAP_BAD)
-                       break;
-       }
-       spin_unlock(&swap_lock);
-       /*
-        * Indicate starting offset, and return number of pages to get:
-        * if only 1, say 0, since there's then no readahead to be done.
-        */
-       *offset = ++toff;
-       return nr_pages? ++nr_pages: 0;
- }
  /*
   * add_swap_count_continuation - called when a swap count is duplicated
   * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's