]> git.openfabrics.org - ~shefty/rdma-dev.git/blobdiff - mm/memcontrol.c
mm: memcg: consolidate hierarchy iteration primitives
[~shefty/rdma-dev.git] / mm / memcontrol.c
index 0b2d4036f1cde795457b8e47794dac24174bad6c..6edef95fecf4bf2c9f1debeaaabf63d7bb82c94f 100644 (file)
@@ -853,83 +853,76 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
        return memcg;
 }
 
-/* The caller has to guarantee "mem" exists before calling this */
-static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *memcg)
+static struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
+                                         struct mem_cgroup *prev,
+                                         bool reclaim)
 {
-       struct cgroup_subsys_state *css;
-       int found;
+       struct mem_cgroup *memcg = NULL;
+       int id = 0;
 
-       if (!memcg) /* ROOT cgroup has the smallest ID */
-               return root_mem_cgroup; /*css_put/get against root is ignored*/
-       if (!memcg->use_hierarchy) {
-               if (css_tryget(&memcg->css))
-                       return memcg;
-               return NULL;
-       }
-       rcu_read_lock();
-       /*
-        * searching a memory cgroup which has the smallest ID under given
-        * ROOT cgroup. (ID >= 1)
-        */
-       css = css_get_next(&mem_cgroup_subsys, 1, &memcg->css, &found);
-       if (css && css_tryget(css))
-               memcg = container_of(css, struct mem_cgroup, css);
-       else
-               memcg = NULL;
-       rcu_read_unlock();
-       return memcg;
-}
+       if (!root)
+               root = root_mem_cgroup;
 
-static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
-                                       struct mem_cgroup *root,
-                                       bool cond)
-{
-       int nextid = css_id(&iter->css) + 1;
-       int found;
-       int hierarchy_used;
-       struct cgroup_subsys_state *css;
+       if (prev && !reclaim)
+               id = css_id(&prev->css);
 
-       hierarchy_used = iter->use_hierarchy;
+       if (prev && prev != root)
+               css_put(&prev->css);
 
-       css_put(&iter->css);
-       /* If no ROOT, walk all, ignore hierarchy */
-       if (!cond || (root && !hierarchy_used))
-               return NULL;
+       if (!root->use_hierarchy && root != root_mem_cgroup) {
+               if (prev)
+                       return NULL;
+               return root;
+       }
 
-       if (!root)
-               root = root_mem_cgroup;
+       while (!memcg) {
+               struct cgroup_subsys_state *css;
 
-       do {
-               iter = NULL;
-               rcu_read_lock();
+               if (reclaim)
+                       id = root->last_scanned_child;
 
-               css = css_get_next(&mem_cgroup_subsys, nextid,
-                               &root->css, &found);
-               if (css && css_tryget(css))
-                       iter = container_of(css, struct mem_cgroup, css);
+               rcu_read_lock();
+               css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id);
+               if (css) {
+                       if (css == &root->css || css_tryget(css))
+                               memcg = container_of(css,
+                                                    struct mem_cgroup, css);
+               } else
+                       id = 0;
                rcu_read_unlock();
-               /* If css is NULL, no more cgroups will be found */
-               nextid = found + 1;
-       } while (css && !iter);
 
-       return iter;
+               if (reclaim)
+                       root->last_scanned_child = id;
+
+               if (prev && !css)
+                       return NULL;
+       }
+       return memcg;
 }
-/*
- * for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please
- * be careful that "break" loop is not allowed. We have reference count.
- * Instead of that modify "cond" to be false and "continue" to exit the loop.
- */
-#define for_each_mem_cgroup_tree_cond(iter, root, cond)        \
-       for (iter = mem_cgroup_start_loop(root);\
-            iter != NULL;\
-            iter = mem_cgroup_get_next(iter, root, cond))
 
-#define for_each_mem_cgroup_tree(iter, root) \
-       for_each_mem_cgroup_tree_cond(iter, root, true)
+static void mem_cgroup_iter_break(struct mem_cgroup *root,
+                                 struct mem_cgroup *prev)
+{
+       if (!root)
+               root = root_mem_cgroup;
+       if (prev && prev != root)
+               css_put(&prev->css);
+}
 
-#define for_each_mem_cgroup_all(iter) \
-       for_each_mem_cgroup_tree_cond(iter, NULL, true)
+/*
+ * Iteration constructs for visiting all cgroups (under a tree).  If
+ * loops are exited prematurely (break), mem_cgroup_iter_break() must
+ * be used for reference counting.
+ */
+#define for_each_mem_cgroup_tree(iter, root)           \
+       for (iter = mem_cgroup_iter(root, NULL, false); \
+            iter != NULL;                              \
+            iter = mem_cgroup_iter(root, iter, false))
 
+#define for_each_mem_cgroup(iter)                      \
+       for (iter = mem_cgroup_iter(NULL, NULL, false); \
+            iter != NULL;                              \
+            iter = mem_cgroup_iter(NULL, iter, false))
 
 static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 {
@@ -1536,43 +1529,6 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
        return min(limit, memsw);
 }
 
-/*
- * Visit the first child (need not be the first child as per the ordering
- * of the cgroup list, since we track last_scanned_child) of @mem and use
- * that to reclaim free pages from.
- */
-static struct mem_cgroup *
-mem_cgroup_select_victim(struct mem_cgroup *root_memcg)
-{
-       struct mem_cgroup *ret = NULL;
-       struct cgroup_subsys_state *css;
-       int nextid, found;
-
-       if (!root_memcg->use_hierarchy) {
-               css_get(&root_memcg->css);
-               ret = root_memcg;
-       }
-
-       while (!ret) {
-               rcu_read_lock();
-               nextid = root_memcg->last_scanned_child + 1;
-               css = css_get_next(&mem_cgroup_subsys, nextid, &root_memcg->css,
-                                  &found);
-               if (css && css_tryget(css))
-                       ret = container_of(css, struct mem_cgroup, css);
-
-               rcu_read_unlock();
-               /* Updates scanning parameter */
-               if (!css) {
-                       /* this means start scan from ID:1 */
-                       root_memcg->last_scanned_child = 0;
-               } else
-                       root_memcg->last_scanned_child = found;
-       }
-
-       return ret;
-}
-
 /**
  * test_mem_cgroup_node_reclaimable
  * @mem: the target memcg
@@ -1728,7 +1684,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
                                                unsigned long reclaim_options,
                                                unsigned long *total_scanned)
 {
-       struct mem_cgroup *victim;
+       struct mem_cgroup *victim = NULL;
        int ret, total = 0;
        int loop = 0;
        bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
@@ -1744,8 +1700,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
                noswap = true;
 
        while (1) {
-               victim = mem_cgroup_select_victim(root_memcg);
-               if (victim == root_memcg) {
+               victim = mem_cgroup_iter(root_memcg, victim, true);
+               if (!victim) {
                        loop++;
                        /*
                         * We are not draining per cpu cached charges during
@@ -1761,10 +1717,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
                                 * anything, it might because there are
                                 * no reclaimable pages under this hierarchy
                                 */
-                               if (!check_soft || !total) {
-                                       css_put(&victim->css);
+                               if (!check_soft || !total)
                                        break;
-                               }
                                /*
                                 * We want to do more targeted reclaim.
                                 * excess >> 2 is not to excessive so as to
@@ -1772,15 +1726,13 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
                                 * coming back to reclaim from this cgroup
                                 */
                                if (total >= (excess >> 2) ||
-                                       (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) {
-                                       css_put(&victim->css);
+                                       (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
                                        break;
-                               }
                        }
+                       continue;
                }
                if (!mem_cgroup_reclaimable(victim, noswap)) {
                        /* this cgroup's local usage == 0 */
-                       css_put(&victim->css);
                        continue;
                }
                /* we use swappiness of local cgroup */
@@ -1791,21 +1743,21 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
                } else
                        ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
                                                noswap);
-               css_put(&victim->css);
+               total += ret;
                /*
                 * At shrinking usage, we can't check we should stop here or
                 * reclaim more. It's depends on callers. last_scanned_child
                 * will work enough for keeping fairness under tree.
                 */
                if (shrink)
-                       return ret;
-               total += ret;
+                       break;
                if (check_soft) {
                        if (!res_counter_soft_limit_excess(&root_memcg->res))
-                               return total;
+                               break;
                } else if (mem_cgroup_margin(root_memcg))
-                       return total;
+                       break;
        }
+       mem_cgroup_iter_break(root_memcg, victim);
        return total;
 }
 
@@ -1817,16 +1769,16 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
 static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
 {
        struct mem_cgroup *iter, *failed = NULL;
-       bool cond = true;
 
-       for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
+       for_each_mem_cgroup_tree(iter, memcg) {
                if (iter->oom_lock) {
                        /*
                         * this subtree of our hierarchy is already locked
                         * so we cannot give a lock.
                         */
                        failed = iter;
-                       cond = false;
+                       mem_cgroup_iter_break(memcg, iter);
+                       break;
                } else
                        iter->oom_lock = true;
        }
@@ -1838,11 +1790,10 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
         * OK, we failed to lock the whole subtree so we have to clean up
         * what we set up to the failing subtree
         */
-       cond = true;
-       for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
+       for_each_mem_cgroup_tree(iter, memcg) {
                if (iter == failed) {
-                       cond = false;
-                       continue;
+                       mem_cgroup_iter_break(memcg, iter);
+                       break;
                }
                iter->oom_lock = false;
        }
@@ -2238,7 +2189,7 @@ static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
        struct mem_cgroup *iter;
 
        if ((action == CPU_ONLINE)) {
-               for_each_mem_cgroup_all(iter)
+               for_each_mem_cgroup(iter)
                        synchronize_mem_cgroup_on_move(iter, cpu);
                return NOTIFY_OK;
        }
@@ -2246,7 +2197,7 @@ static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
        if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN)
                return NOTIFY_OK;
 
-       for_each_mem_cgroup_all(iter)
+       for_each_mem_cgroup(iter)
                mem_cgroup_drain_pcp_counter(iter, cpu);
 
        stock = &per_cpu(memcg_stock, cpu);