#define do_swap_account (0)
#endif
+static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */
/*
* Statistics for memory cgroup.
/*
* While reclaiming in a hiearchy, we cache the last child we
- * reclaimed from. Protected by cgroup_lock()
+ * reclaimed from. Protected by hierarchy_mutex
*/
struct mem_cgroup *last_scanned_child;
/*
*/
bool use_hierarchy;
unsigned long last_oom_jiffies;
- int obsolete;
atomic_t refcnt;
unsigned int swappiness;
struct mem_cgroup, css);
}
+static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
+{
+ struct mem_cgroup *mem = NULL;
+ /*
+ * Because we have no locks, mm->owner's may be being moved to other
+ * cgroup. We use css_tryget() here even if this looks
+ * pessimistic (rather than adding locks here).
+ */
+ rcu_read_lock();
+ do {
+ mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ if (unlikely(!mem))
+ break;
+ } while (!css_tryget(&mem->css));
+ rcu_read_unlock();
+ return mem;
+}
+
+static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem)
+{
+ if (!mem)
+ return true;
+ return css_is_removed(&mem->css);
+}
+
/*
* Following LRU functions are allowed to be used without PCG_LOCK.
* Operations are called by routine of global LRU independently from memcg.
return;
pc = lookup_page_cgroup(page);
/* can happen while we handle swapcache. */
- if (list_empty(&pc->lru))
+ if (list_empty(&pc->lru) || !pc->mem_cgroup)
return;
+ /*
+ * We don't check PCG_USED bit. It's cleared when the "page" is finally
+ * removed from global LRU.
+ */
mz = page_cgroup_zoneinfo(pc);
mem = pc->mem_cgroup;
MEM_CGROUP_ZSTAT(mz, lru) -= 1;
return;
pc = lookup_page_cgroup(page);
+ /*
+ * Used bit is set without atomic ops but after smp_wmb().
+ * For making pc->mem_cgroup visible, insert smp_rmb() here.
+ */
smp_rmb();
/* unused page is not rotated. */
if (!PageCgroupUsed(pc))
if (mem_cgroup_disabled())
return;
pc = lookup_page_cgroup(page);
- /* barrier to sync with "charge" */
+ /*
+ * Used bit is set without atomic ops but after smp_wmb().
+ * For making pc->mem_cgroup visible, insert smp_rmb() here.
+ */
smp_rmb();
if (!PageCgroupUsed(pc))
return;
MEM_CGROUP_ZSTAT(mz, lru) += 1;
list_add(&pc->lru, &mz->lists[lru]);
}
+
/*
- * To add swapcache into LRU. Be careful to all this function.
- * zone->lru_lock shouldn't be held and irq must not be disabled.
+ * At handling SwapCache, pc->mem_cgroup may be changed while it's linked to
+ * lru because the page may.be reused after it's fully uncharged (because of
+ * SwapCache behavior).To handle that, unlink page_cgroup from LRU when charge
+ * it again. This function is only used to charge SwapCache. It's done under
+ * lock_page and expected that zone->lru_lock is never held.
*/
-static void mem_cgroup_lru_fixup(struct page *page)
+static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page)
{
- if (!isolate_lru_page(page))
- putback_lru_page(page);
+ unsigned long flags;
+ struct zone *zone = page_zone(page);
+ struct page_cgroup *pc = lookup_page_cgroup(page);
+
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ /*
+ * Forget old LRU when this page_cgroup is *not* used. This Used bit
+ * is guarded by lock_page() because the page is SwapCache.
+ */
+ if (!PageCgroupUsed(pc))
+ mem_cgroup_del_lru_list(page, page_lru(page));
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
}
+static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
+{
+ unsigned long flags;
+ struct zone *zone = page_zone(page);
+ struct page_cgroup *pc = lookup_page_cgroup(page);
+
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ /* link when the page is linked to LRU but page_cgroup isn't */
+ if (PageLRU(page) && list_empty(&pc->lru))
+ mem_cgroup_add_lru_list(page, page_lru(page));
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+
+
void mem_cgroup_move_lists(struct page *page,
enum lru_list from, enum lru_list to)
{
return NULL;
pc = lookup_page_cgroup(page);
+ /*
+ * Used bit is set without atomic ops but after smp_wmb().
+ * For making pc->mem_cgroup visible, insert smp_rmb() here.
+ */
+ smp_rmb();
+ if (!PageCgroupUsed(pc))
+ return NULL;
+
mz = page_cgroup_zoneinfo(pc);
if (!mz)
return NULL;
/*
* This routine finds the DFS walk successor. This routine should be
- * called with cgroup_mutex held
+ * called with hierarchy_mutex held
*/
static struct mem_cgroup *
-mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
+__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
{
struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
/*
* Walk down to children
*/
- mem_cgroup_put(curr);
cgroup = list_entry(curr_cgroup->children.next,
struct cgroup, sibling);
curr = mem_cgroup_from_cont(cgroup);
- mem_cgroup_get(curr);
goto done;
}
visit_parent:
if (curr_cgroup == root_cgroup) {
- mem_cgroup_put(curr);
- curr = root_mem;
- mem_cgroup_get(curr);
+ /* caller handles NULL case */
+ curr = NULL;
goto done;
}
* Goto next sibling
*/
if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
- mem_cgroup_put(curr);
cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
sibling);
curr = mem_cgroup_from_cont(cgroup);
- mem_cgroup_get(curr);
goto done;
}
goto visit_parent;
done:
- root_mem->last_scanned_child = curr;
return curr;
}
* that to reclaim free pages from.
*/
static struct mem_cgroup *
-mem_cgroup_get_first_node(struct mem_cgroup *root_mem)
+mem_cgroup_get_next_node(struct mem_cgroup *root_mem)
{
struct cgroup *cgroup;
- struct mem_cgroup *ret;
- bool obsolete = (root_mem->last_scanned_child &&
- root_mem->last_scanned_child->obsolete);
+ struct mem_cgroup *orig, *next;
+ bool obsolete;
/*
* Scan all children under the mem_cgroup mem
*/
- cgroup_lock();
+ mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
+
+ orig = root_mem->last_scanned_child;
+ obsolete = mem_cgroup_is_obsolete(orig);
+
if (list_empty(&root_mem->css.cgroup->children)) {
- ret = root_mem;
+ /*
+ * root_mem might have children before and last_scanned_child
+ * may point to one of them. We put it later.
+ */
+ if (orig)
+ VM_BUG_ON(!obsolete);
+ next = NULL;
goto done;
}
- if (!root_mem->last_scanned_child || obsolete) {
-
- if (obsolete)
- mem_cgroup_put(root_mem->last_scanned_child);
-
+ if (!orig || obsolete) {
cgroup = list_first_entry(&root_mem->css.cgroup->children,
struct cgroup, sibling);
- ret = mem_cgroup_from_cont(cgroup);
- mem_cgroup_get(ret);
+ next = mem_cgroup_from_cont(cgroup);
} else
- ret = mem_cgroup_get_next_node(root_mem->last_scanned_child,
- root_mem);
+ next = __mem_cgroup_get_next_node(orig, root_mem);
done:
- root_mem->last_scanned_child = ret;
- cgroup_unlock();
- return ret;
+ if (next)
+ mem_cgroup_get(next);
+ root_mem->last_scanned_child = next;
+ if (orig)
+ mem_cgroup_put(orig);
+ mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
+ return (next) ? next : root_mem;
}
static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
* but there might be left over accounting, even after children
* have left.
*/
- ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
+ ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
get_swappiness(root_mem));
if (mem_cgroup_check_under_limit(root_mem))
- return 0;
+ return 1; /* indicate reclaim has succeeded */
if (!root_mem->use_hierarchy)
return ret;
- next_mem = mem_cgroup_get_first_node(root_mem);
+ next_mem = mem_cgroup_get_next_node(root_mem);
while (next_mem != root_mem) {
- if (next_mem->obsolete) {
- mem_cgroup_put(next_mem);
- cgroup_lock();
- next_mem = mem_cgroup_get_first_node(root_mem);
- cgroup_unlock();
+ if (mem_cgroup_is_obsolete(next_mem)) {
+ next_mem = mem_cgroup_get_next_node(root_mem);
continue;
}
- ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
+ ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
get_swappiness(next_mem));
if (mem_cgroup_check_under_limit(root_mem))
- return 0;
- cgroup_lock();
- next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
- cgroup_unlock();
+ return 1; /* indicate reclaim has succeeded */
+ next_mem = mem_cgroup_get_next_node(root_mem);
}
return ret;
}
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
- if (likely(!*memcg)) {
- rcu_read_lock();
- mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
- if (unlikely(!mem)) {
- rcu_read_unlock();
- return 0;
- }
- /*
- * For every charge from the cgroup, increment reference count
- */
- css_get(&mem->css);
+ mem = *memcg;
+ if (likely(!mem)) {
+ mem = try_get_mem_cgroup_from_mm(mm);
*memcg = mem;
- rcu_read_unlock();
} else {
- mem = *memcg;
css_get(&mem->css);
}
+ if (unlikely(!mem))
+ return 0;
+
+ VM_BUG_ON(mem_cgroup_is_obsolete(mem));
while (1) {
int ret;
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
noswap);
+ if (ret)
+ continue;
/*
* try_to_free_mem_cgroup_pages() might not give us a full
if (!nr_retries--) {
if (oom) {
+ mutex_lock(&memcg_tasklist);
mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
+ mutex_unlock(&memcg_tasklist);
mem_over_limit->last_oom_jiffies = jiffies;
}
goto nomem;
return -ENOMEM;
}
-/**
- * mem_cgroup_try_charge - get charge of PAGE_SIZE.
- * @mm: an mm_struct which is charged against. (when *memcg is NULL)
- * @gfp_mask: gfp_mask for reclaim.
- * @memcg: a pointer to memory cgroup which is charged against.
- *
- * charge against memory cgroup pointed by *memcg. if *memcg == NULL, estimated
- * memory cgroup from @mm is got and stored in *memcg.
- *
- * Returns 0 if success. -ENOMEM at failure.
- * This call can invoke OOM-Killer.
- */
-
-int mem_cgroup_try_charge(struct mm_struct *mm,
- gfp_t mask, struct mem_cgroup **memcg)
+static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
{
- return __mem_cgroup_try_charge(mm, mask, memcg, true);
+ struct mem_cgroup *mem;
+ swp_entry_t ent;
+
+ if (!PageSwapCache(page))
+ return NULL;
+
+ ent.val = page_private(page);
+ mem = lookup_swap_cgroup(ent);
+ if (!mem)
+ return NULL;
+ if (!css_tryget(&mem->css))
+ return NULL;
+ return mem;
}
/*
- * commit a charge got by mem_cgroup_try_charge() and makes page_cgroup to be
+ * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be
* USED state. If already USED, uncharge and return.
*/
if (pc->mem_cgroup != from)
goto out;
- css_put(&from->css);
res_counter_uncharge(&from->res, PAGE_SIZE);
mem_cgroup_charge_statistics(from, pc, false);
if (do_swap_account)
res_counter_uncharge(&from->memsw, PAGE_SIZE);
+ css_put(&from->css);
+
+ css_get(&to->css);
pc->mem_cgroup = to;
mem_cgroup_charge_statistics(to, pc, true);
- css_get(&to->css);
ret = 0;
out:
unlock_page_cgroup(pc);
if (ret || !parent)
return ret;
- if (!get_page_unless_zero(page))
- return -EBUSY;
+ if (!get_page_unless_zero(page)) {
+ ret = -EBUSY;
+ goto uncharge;
+ }
ret = isolate_lru_page(page);
ret = mem_cgroup_move_account(pc, child, parent);
- /* drop extra refcnt by try_charge() (move_account increment one) */
- css_put(&parent->css);
putback_lru_page(page);
if (!ret) {
put_page(page);
+ /* drop extra refcnt by try_charge() */
+ css_put(&parent->css);
return 0;
}
- /* uncharge if move fails */
+
cancel:
+ put_page(page);
+uncharge:
+ /* drop extra refcnt by try_charge() */
+ css_put(&parent->css);
+ /* uncharge if move fails */
res_counter_uncharge(&parent->res, PAGE_SIZE);
if (do_swap_account)
res_counter_uncharge(&parent->memsw, PAGE_SIZE);
- put_page(page);
return ret;
}
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{
+ struct mem_cgroup *mem = NULL;
+ int ret;
+
if (mem_cgroup_disabled())
return 0;
if (PageCompound(page))
* For GFP_NOWAIT case, the page may be pre-charged before calling
* add_to_page_cache(). (See shmem.c) check it here and avoid to call
* charge twice. (It works but has to pay a bit larger cost.)
+ * And when the page is SwapCache, it should take swap information
+ * into account. This is under lock_page() now.
*/
if (!(gfp_mask & __GFP_WAIT)) {
struct page_cgroup *pc;
unlock_page_cgroup(pc);
}
- if (unlikely(!mm))
+ if (do_swap_account && PageSwapCache(page)) {
+ mem = try_get_mem_cgroup_from_swapcache(page);
+ if (mem)
+ mm = NULL;
+ else
+ mem = NULL;
+ /* SwapCache may be still linked to LRU now. */
+ mem_cgroup_lru_del_before_commit_swapcache(page);
+ }
+
+ if (unlikely(!mm && !mem))
mm = &init_mm;
if (page_is_file_cache(page))
return mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
- else
- return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
+
+ ret = mem_cgroup_charge_common(page, mm, gfp_mask,
+ MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
+ if (mem)
+ css_put(&mem->css);
+ if (PageSwapCache(page))
+ mem_cgroup_lru_add_after_commit_swapcache(page);
+
+ if (do_swap_account && !ret && PageSwapCache(page)) {
+ swp_entry_t ent = {.val = page_private(page)};
+ /* avoid double counting */
+ mem = swap_cgroup_record(ent, NULL);
+ if (mem) {
+ res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+ mem_cgroup_put(mem);
+ }
+ }
+ return ret;
}
+/*
+ * While swap-in, try_charge -> commit or cancel, the page is locked.
+ * And when try_charge() successfully returns, one refcnt to memcg without
+ * struct page_cgroup is aquired. This refcnt will be cumsumed by
+ * "commit()" or removed by "cancel()"
+ */
int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
struct page *page,
gfp_t mask, struct mem_cgroup **ptr)
{
struct mem_cgroup *mem;
- swp_entry_t ent;
+ int ret;
if (mem_cgroup_disabled())
return 0;
if (!do_swap_account)
goto charge_cur_mm;
-
/*
* A racing thread's fault, or swapoff, may have already updated
* the pte, and even removed page from swap cache: return success
*/
if (!PageSwapCache(page))
return 0;
-
- ent.val = page_private(page);
-
- mem = lookup_swap_cgroup(ent);
- if (!mem || mem->obsolete)
+ mem = try_get_mem_cgroup_from_swapcache(page);
+ if (!mem)
goto charge_cur_mm;
*ptr = mem;
- return __mem_cgroup_try_charge(NULL, mask, ptr, true);
+ ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
+ /* drop extra refcnt from tryget */
+ css_put(&mem->css);
+ return ret;
charge_cur_mm:
if (unlikely(!mm))
mm = &init_mm;
return __mem_cgroup_try_charge(mm, mask, ptr, true);
}
-#ifdef CONFIG_SWAP
-
-int mem_cgroup_cache_charge_swapin(struct page *page,
- struct mm_struct *mm, gfp_t mask, bool locked)
-{
- int ret = 0;
-
- if (mem_cgroup_disabled())
- return 0;
- if (unlikely(!mm))
- mm = &init_mm;
- if (!locked)
- lock_page(page);
- /*
- * If not locked, the page can be dropped from SwapCache until
- * we reach here.
- */
- if (PageSwapCache(page)) {
- struct mem_cgroup *mem = NULL;
- swp_entry_t ent;
-
- ent.val = page_private(page);
- if (do_swap_account) {
- mem = lookup_swap_cgroup(ent);
- if (mem && mem->obsolete)
- mem = NULL;
- if (mem)
- mm = NULL;
- }
- ret = mem_cgroup_charge_common(page, mm, mask,
- MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
-
- if (!ret && do_swap_account) {
- /* avoid double counting */
- mem = swap_cgroup_record(ent, NULL);
- if (mem) {
- res_counter_uncharge(&mem->memsw, PAGE_SIZE);
- mem_cgroup_put(mem);
- }
- }
- }
- if (!locked)
- unlock_page(page);
- /* add this page(page_cgroup) to the LRU we want. */
- mem_cgroup_lru_fixup(page);
-
- return ret;
-}
-#endif
-
void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
{
struct page_cgroup *pc;
if (!ptr)
return;
pc = lookup_page_cgroup(page);
+ mem_cgroup_lru_del_before_commit_swapcache(page);
__mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED);
+ mem_cgroup_lru_add_after_commit_swapcache(page);
/*
* Now swap is on-memory. This means this page may be
* counted both as mem and swap....double count.
- * Fix it by uncharging from memsw. This SwapCache is stable
- * because we're still under lock_page().
+ * Fix it by uncharging from memsw. Basically, this SwapCache is stable
+ * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
+ * may call delete_from_swap_cache() before reach here.
*/
- if (do_swap_account) {
+ if (do_swap_account && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
struct mem_cgroup *memcg;
memcg = swap_cgroup_record(ent, NULL);
if (memcg) {
- /* If memcg is obsolete, memcg can be != ptr */
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg);
}
}
/* add this page(page_cgroup) to the LRU we want. */
- mem_cgroup_lru_fixup(page);
+
}
void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
mem_cgroup_charge_statistics(mem, pc, false);
ClearPageCgroupUsed(pc);
+ /*
+ * pc->mem_cgroup is not cleared here. It will be accessed when it's
+ * freed from LRU. This is safe because uncharged page is expected not
+ * to be reused (freed soon). Exception is SwapCache, it's handled by
+ * special functions.
+ */
mz = page_cgroup_zoneinfo(pc);
unlock_page_cgroup(pc);
unlock_page_cgroup(pc);
if (mem) {
- ret = mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem);
+ ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
css_put(&mem->css);
}
*ptr = mem;
* This is typically used for page reclaiming for shmem for reducing side
* effect of page allocation from shmem, which is used by some mem_cgroup.
*/
-int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
+int mem_cgroup_shrink_usage(struct page *page,
+ struct mm_struct *mm,
+ gfp_t gfp_mask)
{
- struct mem_cgroup *mem;
+ struct mem_cgroup *mem = NULL;
int progress = 0;
int retry = MEM_CGROUP_RECLAIM_RETRIES;
if (mem_cgroup_disabled())
return 0;
- if (!mm)
- return 0;
-
- rcu_read_lock();
- mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
- if (unlikely(!mem)) {
- rcu_read_unlock();
+ if (page)
+ mem = try_get_mem_cgroup_from_swapcache(page);
+ if (!mem && mm)
+ mem = try_get_mem_cgroup_from_mm(mm);
+ if (unlikely(!mem))
return 0;
- }
- css_get(&mem->css);
- rcu_read_unlock();
do {
- progress = try_to_free_mem_cgroup_pages(mem, gfp_mask, true,
- get_swappiness(mem));
+ progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true);
progress += mem_cgroup_check_under_limit(mem);
} while (!progress && --retry);
if (!ret)
break;
- progress = try_to_free_mem_cgroup_pages(memcg,
- GFP_KERNEL,
- false,
- get_swappiness(memcg));
+ progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
+ false);
if (!progress) retry_count--;
}
break;
oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
- try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, true,
- get_swappiness(memcg));
+ mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true);
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
if (curusage >= oldusage)
retry_count--;
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct mem_cgroup *parent;
+
if (val > 100)
return -EINVAL;
return -EINVAL;
parent = mem_cgroup_from_cont(cgrp->parent);
+
+ cgroup_lock();
+
/* If under hierarchy, only empty-root can set this value */
if ((parent->use_hierarchy) ||
- (memcg->use_hierarchy && !list_empty(&cgrp->children)))
+ (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
+ cgroup_unlock();
return -EINVAL;
+ }
spin_lock(&memcg->reclaim_param_lock);
memcg->swappiness = val;
spin_unlock(&memcg->reclaim_param_lock);
+ cgroup_unlock();
+
return 0;
}
* the number of reference from swap_cgroup and free mem_cgroup when
* it goes down to 0.
*
- * When mem_cgroup is destroyed, mem->obsolete will be set to 0 and
- * entry which points to this memcg will be ignore at swapin.
- *
* Removal of cgroup itself succeeds regardless of refs from swap.
*/
-static void mem_cgroup_free(struct mem_cgroup *mem)
+static void __mem_cgroup_free(struct mem_cgroup *mem)
{
int node;
- if (atomic_read(&mem->refcnt) > 0)
- return;
-
-
for_each_node_state(node, N_POSSIBLE)
free_mem_cgroup_per_zone_info(mem, node);
static void mem_cgroup_put(struct mem_cgroup *mem)
{
- if (atomic_dec_and_test(&mem->refcnt)) {
- if (!mem->obsolete)
- return;
- mem_cgroup_free(mem);
- }
+ if (atomic_dec_and_test(&mem->refcnt))
+ __mem_cgroup_free(mem);
}
}
#endif
-static struct cgroup_subsys_state *
+static struct cgroup_subsys_state * __ref
mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
{
struct mem_cgroup *mem, *parent;
if (parent)
mem->swappiness = get_swappiness(parent);
-
+ atomic_set(&mem->refcnt, 1);
return &mem->css;
free_out:
- for_each_node_state(node, N_POSSIBLE)
- free_mem_cgroup_per_zone_info(mem, node);
- mem_cgroup_free(mem);
+ __mem_cgroup_free(mem);
return ERR_PTR(-ENOMEM);
}
struct cgroup *cont)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
- mem->obsolete = 1;
mem_cgroup_force_empty(mem, false);
}
static void mem_cgroup_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
- mem_cgroup_free(mem_cgroup_from_cont(cont));
+ struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *last_scanned_child = mem->last_scanned_child;
+
+ if (last_scanned_child) {
+ VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child));
+ mem_cgroup_put(last_scanned_child);
+ }
+ mem_cgroup_put(mem);
}
static int mem_cgroup_populate(struct cgroup_subsys *ss,
struct cgroup *old_cont,
struct task_struct *p)
{
+ mutex_lock(&memcg_tasklist);
/*
* FIXME: It's better to move charges of this process from old
* memcg to new memcg. But it's just on TODO-List now.
*/
+ mutex_unlock(&memcg_tasklist);
}
struct cgroup_subsys mem_cgroup_subsys = {