Merge branch 'akpm' (Andrew's patch-bomb)
[~shefty/rdma-dev.git] / mm / memory.c
index 8438c157e4d99bed4b34ba4f48a05f02bcd55515..3416b6e018d6a7667fce3168924da3dccdec8e22 100644 (file)
@@ -125,17 +125,17 @@ core_initcall(init_zero_pfn);
 
 #if defined(SPLIT_RSS_COUNTING)
 
-static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
+void sync_mm_rss(struct mm_struct *mm)
 {
        int i;
 
        for (i = 0; i < NR_MM_COUNTERS; i++) {
-               if (task->rss_stat.count[i]) {
-                       add_mm_counter(mm, i, task->rss_stat.count[i]);
-                       task->rss_stat.count[i] = 0;
+               if (current->rss_stat.count[i]) {
+                       add_mm_counter(mm, i, current->rss_stat.count[i]);
+                       current->rss_stat.count[i] = 0;
                }
        }
-       task->rss_stat.events = 0;
+       current->rss_stat.events = 0;
 }
 
 static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
@@ -157,30 +157,7 @@ static void check_sync_rss_stat(struct task_struct *task)
        if (unlikely(task != current))
                return;
        if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
-               __sync_task_rss_stat(task, task->mm);
-}
-
-unsigned long get_mm_counter(struct mm_struct *mm, int member)
-{
-       long val = 0;
-
-       /*
-        * Don't use task->mm here...for avoiding to use task_get_mm()..
-        * The caller must guarantee task->mm is not invalid.
-        */
-       val = atomic_long_read(&mm->rss_stat.count[member]);
-       /*
-        * counter is updated in asynchronous manner and may go to minus.
-        * But it's never be expected number for users.
-        */
-       if (val < 0)
-               return 0;
-       return (unsigned long)val;
-}
-
-void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
-{
-       __sync_task_rss_stat(task, mm);
+               sync_mm_rss(task->mm);
 }
 #else /* SPLIT_RSS_COUNTING */
 
@@ -661,7 +638,7 @@ static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
        int i;
 
        if (current->mm == mm)
-               sync_mm_rss(current, mm);
+               sync_mm_rss(mm);
        for (i = 0; i < NR_MM_COUNTERS; i++)
                if (rss[i])
                        add_mm_counter(mm, i, rss[i]);
@@ -1247,16 +1224,24 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
        do {
                next = pmd_addr_end(addr, end);
                if (pmd_trans_huge(*pmd)) {
-                       if (next-addr != HPAGE_PMD_SIZE) {
+                       if (next - addr != HPAGE_PMD_SIZE) {
                                VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
                                split_huge_page_pmd(vma->vm_mm, pmd);
                        } else if (zap_huge_pmd(tlb, vma, pmd, addr))
-                               continue;
+                               goto next;
                        /* fall through */
                }
-               if (pmd_none_or_clear_bad(pmd))
-                       continue;
+               /*
+                * Here there can be other concurrent MADV_DONTNEED or
+                * trans huge page faults running, and if the pmd is
+                * none or trans huge it can change under us. This is
+                * because MADV_DONTNEED holds the mmap_sem in read
+                * mode.
+                */
+               if (pmd_none_or_trans_huge_or_clear_bad(pmd))
+                       goto next;
                next = zap_pte_range(tlb, vma, pmd, addr, next, details);
+next:
                cond_resched();
        } while (pmd++, addr = next, addr != end);