mm: augment vma rbtree with rb_subtree_gap
[~shefty/rdma-dev.git] / mm / mmap.c
index ebf19031c5e4b44eca898e1443ba9c541aa94f31..bdcea6310fffcb0c21979a7d5b274e4790622817 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -31,6 +31,7 @@
 #include <linux/audit.h>
 #include <linux/khugepaged.h>
 #include <linux/uprobes.h>
+#include <linux/rbtree_augmented.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -297,6 +298,27 @@ out:
        return retval;
 }
 
+static long vma_compute_subtree_gap(struct vm_area_struct *vma)
+{
+       unsigned long max, subtree_gap;
+       max = vma->vm_start;
+       if (vma->vm_prev)
+               max -= vma->vm_prev->vm_end;
+       if (vma->vm_rb.rb_left) {
+               subtree_gap = rb_entry(vma->vm_rb.rb_left,
+                               struct vm_area_struct, vm_rb)->rb_subtree_gap;
+               if (subtree_gap > max)
+                       max = subtree_gap;
+       }
+       if (vma->vm_rb.rb_right) {
+               subtree_gap = rb_entry(vma->vm_rb.rb_right,
+                               struct vm_area_struct, vm_rb)->rb_subtree_gap;
+               if (subtree_gap > max)
+                       max = subtree_gap;
+       }
+       return max;
+}
+
 #ifdef CONFIG_DEBUG_VM_RB
 static int browse_rb(struct rb_root *root)
 {
@@ -327,6 +349,18 @@ static int browse_rb(struct rb_root *root)
        return i;
 }
 
+static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
+{
+       struct rb_node *nd;
+
+       for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+               struct vm_area_struct *vma;
+               vma = rb_entry(nd, struct vm_area_struct, vm_rb);
+               BUG_ON(vma != ignore &&
+                      vma->rb_subtree_gap != vma_compute_subtree_gap(vma));
+       }
+}
+
 void validate_mm(struct mm_struct *mm)
 {
        int bug = 0;
@@ -349,9 +383,52 @@ void validate_mm(struct mm_struct *mm)
        BUG_ON(bug);
 }
 #else
+#define validate_mm_rb(root, ignore) do { } while (0)
 #define validate_mm(mm) do { } while (0)
 #endif
 
+RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
+                    unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
+
+/*
+ * Update augmented rbtree rb_subtree_gap values after vma->vm_start or
+ * vma->vm_prev->vm_end values changed, without modifying the vma's position
+ * in the rbtree.
+ */
+static void vma_gap_update(struct vm_area_struct *vma)
+{
+       /*
+        * As it turns out, RB_DECLARE_CALLBACKS() already created a callback
+        * function that does exacltly what we want.
+        */
+       vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
+}
+
+static inline void vma_rb_insert(struct vm_area_struct *vma,
+                                struct rb_root *root)
+{
+       /* All rb_subtree_gap values must be consistent prior to insertion */
+       validate_mm_rb(root, NULL);
+
+       rb_insert_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
+}
+
+static void vma_rb_erase(struct vm_area_struct *vma, struct rb_root *root)
+{
+       /*
+        * All rb_subtree_gap values must be consistent prior to erase,
+        * with the possible exception of the vma being erased.
+        */
+       validate_mm_rb(root, vma);
+
+       /*
+        * Note rb_erase_augmented is a fairly large inline function,
+        * so make sure we instantiate it only once with our desired
+        * augmented rbtree callbacks.
+        */
+       rb_erase_augmented(&vma->vm_rb, root, &vma_gap_callbacks);
+}
+
 /*
  * vma has some anon_vma assigned, and is already inserted on that
  * anon_vma's interval trees.
@@ -421,8 +498,25 @@ static int find_vma_links(struct mm_struct *mm, unsigned long addr,
 void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
                struct rb_node **rb_link, struct rb_node *rb_parent)
 {
+       /* Update tracking information for the gap following the new vma. */
+       if (vma->vm_next)
+               vma_gap_update(vma->vm_next);
+       else
+               mm->highest_vm_end = vma->vm_end;
+
+       /*
+        * vma->vm_prev wasn't known when we followed the rbtree to find the
+        * correct insertion point for that vma. As a result, we could not
+        * update the vma vm_rb parents rb_subtree_gap values on the way down.
+        * So, we first insert the vma with a zero rb_subtree_gap value
+        * (to be consistent with what we did on the way down), and then
+        * immediately update the gap to the correct value. Finally we
+        * rebalance the rbtree after all augmented values have been set.
+        */
        rb_link_node(&vma->vm_rb, rb_parent, rb_link);
-       rb_insert_color(&vma->vm_rb, &mm->mm_rb);
+       vma->rb_subtree_gap = 0;
+       vma_gap_update(vma);
+       vma_rb_insert(vma, &mm->mm_rb);
 }
 
 static void __vma_link_file(struct vm_area_struct *vma)
@@ -498,12 +592,12 @@ static inline void
 __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
                struct vm_area_struct *prev)
 {
-       struct vm_area_struct *next = vma->vm_next;
+       struct vm_area_struct *next;
 
-       prev->vm_next = next;
+       vma_rb_erase(vma, &mm->mm_rb);
+       prev->vm_next = next = vma->vm_next;
        if (next)
                next->vm_prev = prev;
-       rb_erase(&vma->vm_rb, &mm->mm_rb);
        if (mm->mmap_cache == vma)
                mm->mmap_cache = prev;
 }
@@ -525,6 +619,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
        struct rb_root *root = NULL;
        struct anon_vma *anon_vma = NULL;
        struct file *file = vma->vm_file;
+       bool start_changed = false, end_changed = false;
        long adjust_next = 0;
        int remove_next = 0;
 
@@ -615,8 +710,14 @@ again:                     remove_next = 1 + (end > next->vm_end);
                        vma_interval_tree_remove(next, root);
        }
 
-       vma->vm_start = start;
-       vma->vm_end = end;
+       if (start != vma->vm_start) {
+               vma->vm_start = start;
+               start_changed = true;
+       }
+       if (end != vma->vm_end) {
+               vma->vm_end = end;
+               end_changed = true;
+       }
        vma->vm_pgoff = pgoff;
        if (adjust_next) {
                next->vm_start += adjust_next << PAGE_SHIFT;
@@ -645,6 +746,15 @@ again:                     remove_next = 1 + (end > next->vm_end);
                 * (it may either follow vma or precede it).
                 */
                __insert_vm_struct(mm, insert);
+       } else {
+               if (start_changed)
+                       vma_gap_update(vma);
+               if (end_changed) {
+                       if (!next)
+                               mm->highest_vm_end = end;
+                       else if (!adjust_next)
+                               vma_gap_update(next);
+               }
        }
 
        if (anon_vma) {
@@ -678,10 +788,13 @@ again:                    remove_next = 1 + (end > next->vm_end);
                 * we must remove another next too. It would clutter
                 * up the code too much to do both in one go.
                 */
-               if (remove_next == 2) {
-                       next = vma->vm_next;
+               next = vma->vm_next;
+               if (remove_next == 2)
                        goto again;
-               }
+               else if (next)
+                       vma_gap_update(next);
+               else
+                       mm->highest_vm_end = end;
        }
        if (insert && file)
                uprobe_mmap(insert);
@@ -1784,6 +1897,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                anon_vma_interval_tree_pre_update_vma(vma);
                                vma->vm_end = address;
                                anon_vma_interval_tree_post_update_vma(vma);
+                               if (vma->vm_next)
+                                       vma_gap_update(vma->vm_next);
+                               else
+                                       vma->vm_mm->highest_vm_end = address;
                                perf_event_mmap(vma);
                        }
                }
@@ -1838,6 +1955,7 @@ int expand_downwards(struct vm_area_struct *vma,
                                vma->vm_start = address;
                                vma->vm_pgoff -= grow;
                                anon_vma_interval_tree_post_update_vma(vma);
+                               vma_gap_update(vma);
                                perf_event_mmap(vma);
                        }
                }
@@ -1960,14 +2078,17 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
        insertion_point = (prev ? &prev->vm_next : &mm->mmap);
        vma->vm_prev = NULL;
        do {
-               rb_erase(&vma->vm_rb, &mm->mm_rb);
+               vma_rb_erase(vma, &mm->mm_rb);
                mm->map_count--;
                tail_vma = vma;
                vma = vma->vm_next;
        } while (vma && vma->vm_start < end);
        *insertion_point = vma;
-       if (vma)
+       if (vma) {
                vma->vm_prev = prev;
+               vma_gap_update(vma);
+       } else
+               mm->highest_vm_end = prev ? prev->vm_end : 0;
        tail_vma->vm_next = NULL;
        if (mm->unmap_area == arch_unmap_area)
                addr = prev ? prev->vm_end : mm->mmap_base;