Newer
Older
ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
if (likely(!ret)) {
if (!do_swap_account)
break;
ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
&fail_res);
if (likely(!ret))
break;
/* mem+swap counter fails */
res_counter_uncharge(&mem->res, PAGE_SIZE);
noswap = true;
mem_over_limit = mem_cgroup_from_res_counter(fail_res,
memsw);
} else
/* mem counter fails */
mem_over_limit = mem_cgroup_from_res_counter(fail_res,
res);
if (!(gfp_mask & __GFP_WAIT))
goto nomem;
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
noswap, false);
* try_to_free_mem_cgroup_pages() might not give us a full
* picture of reclaim. Some pages are reclaimed and might be
* moved to swap cache or just unmapped from the cgroup.
* Check the limit again to see if the reclaim reduced the
* current usage of the cgroup before giving up
if (mem_cgroup_check_under_limit(mem_over_limit))
continue;
mutex_lock(&memcg_tasklist);
mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
mutex_unlock(&memcg_tasklist);
goto nomem;
return 0;
nomem:
css_put(&mem->css);
return -ENOMEM;
}
/*
* A helper function to get mem_cgroup from ID. must be called under
* rcu_read_lock(). The caller must check css_is_removed() or some if
* it's concern. (dropping refcnt from swap can be called against removed
* memcg.)
*/
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
{
struct cgroup_subsys_state *css;
/* ID 0 is unused ID */
if (!id)
return NULL;
css = css_lookup(&mem_cgroup_subsys, id);
if (!css)
return NULL;
return container_of(css, struct mem_cgroup, css);
}
static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
{
struct mem_cgroup *mem;
unsigned short id;
VM_BUG_ON(!PageLocked(page));
if (!PageSwapCache(page))
return NULL;
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
if (mem && !css_tryget(&mem->css))
mem = NULL;
} else {
ent.val = page_private(page);
id = lookup_swap_cgroup(ent);
rcu_read_lock();
mem = mem_cgroup_lookup(id);
if (mem && !css_tryget(&mem->css))
mem = NULL;
rcu_read_unlock();
unlock_page_cgroup(pc);
* commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be
* USED state. If already USED, uncharge and return.
*/
static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
struct page_cgroup *pc,
enum charge_type ctype)
{
/* try_charge() can return NULL to *memcg, taking care of it. */
if (!mem)
return;
lock_page_cgroup(pc);
if (unlikely(PageCgroupUsed(pc))) {
unlock_page_cgroup(pc);
res_counter_uncharge(&mem->res, PAGE_SIZE);
if (do_swap_account)
res_counter_uncharge(&mem->memsw, PAGE_SIZE);
pc->flags = pcg_default_flags[ctype];
mem_cgroup_charge_statistics(mem, pc, true);
unlock_page_cgroup(pc);
/**
* mem_cgroup_move_account - move account of the page
* @pc: page_cgroup of the page.
* @from: mem_cgroup which the page is moved from.
* @to: mem_cgroup which the page is moved to. @from != @to.
*
* The caller must confirm following.
* - page is not on LRU (isolate_page() is useful.)
*
* returns 0 at success,
* returns -EBUSY when lock is busy or "pc" is unstable.
*
* This function does "uncharge" from old cgroup but doesn't do "charge" to
* new cgroup. It should be done by a caller.
*/
static int mem_cgroup_move_account(struct page_cgroup *pc,
struct mem_cgroup *from, struct mem_cgroup *to)
{
struct mem_cgroup_per_zone *from_mz, *to_mz;
int nid, zid;
int ret = -EBUSY;
struct page *page;
int cpu;
struct mem_cgroup_stat *stat;
struct mem_cgroup_stat_cpu *cpustat;
VM_BUG_ON(from == to);
nid = page_cgroup_nid(pc);
zid = page_cgroup_zid(pc);
from_mz = mem_cgroup_zoneinfo(from, nid, zid);
to_mz = mem_cgroup_zoneinfo(to, nid, zid);
if (!trylock_page_cgroup(pc))
return ret;
if (!PageCgroupUsed(pc))
goto out;
if (pc->mem_cgroup != from)
goto out;
res_counter_uncharge(&from->res, PAGE_SIZE);
mem_cgroup_charge_statistics(from, pc, false);
page = pc->page;
if (page_is_file_cache(page) && page_mapped(page)) {
cpu = smp_processor_id();
/* Update mapped_file data for mem_cgroup "from" */
stat = &from->stat;
cpustat = &stat->cpustat[cpu];
__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE,
-1);
/* Update mapped_file data for mem_cgroup "to" */
stat = &to->stat;
cpustat = &stat->cpustat[cpu];
__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE,
1);
}
if (do_swap_account)
res_counter_uncharge(&from->memsw, PAGE_SIZE);
css_put(&from->css);
css_get(&to->css);
pc->mem_cgroup = to;
mem_cgroup_charge_statistics(to, pc, true);
ret = 0;
out:
unlock_page_cgroup(pc);
return ret;
}
/*
* move charges to its parent.
*/
static int mem_cgroup_move_parent(struct page_cgroup *pc,
struct mem_cgroup *child,
gfp_t gfp_mask)
{
struct cgroup *cg = child->css.cgroup;
struct cgroup *pcg = cg->parent;
struct mem_cgroup *parent;
int ret;
/* Is ROOT ? */
if (!pcg)
return -EINVAL;
parent = mem_cgroup_from_cont(pcg);
ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
if (ret || !parent)
if (!get_page_unless_zero(page)) {
ret = -EBUSY;
goto uncharge;
}
ret = isolate_lru_page(page);
if (ret)
goto cancel;
ret = mem_cgroup_move_account(pc, child, parent);
putback_lru_page(page);
if (!ret) {
put_page(page);
/* drop extra refcnt by try_charge() */
css_put(&parent->css);
put_page(page);
uncharge:
/* drop extra refcnt by try_charge() */
css_put(&parent->css);
/* uncharge if move fails */
res_counter_uncharge(&parent->res, PAGE_SIZE);
if (do_swap_account)
res_counter_uncharge(&parent->memsw, PAGE_SIZE);
return ret;
}
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
/*
* Charge the memory controller for page usage.
* Return
* 0 if the charge was successful
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, enum charge_type ctype,
struct mem_cgroup *memcg)
{
struct mem_cgroup *mem;
struct page_cgroup *pc;
int ret;
pc = lookup_page_cgroup(page);
/* can happen at boot */
if (unlikely(!pc))
return 0;
prefetchw(pc);
mem = memcg;
ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
return ret;
__mem_cgroup_commit_charge(mem, pc, ctype);
int mem_cgroup_newpage_charge(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
if (PageCompound(page))
return 0;
/*
* If already mapped, we don't have to account.
* If page cache, page->mapping has address_space.
* But page->mapping may have out-of-use anon_vma pointer,
* detecit it by PageAnon() check. newly-mapped-anon's page->mapping
* is NULL.
*/
if (page_mapped(page) || (page->mapping && !PageAnon(page)))
return 0;
if (unlikely(!mm))
mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
}
static void
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
enum charge_type ctype);
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
struct mem_cgroup *mem = NULL;
int ret;
if (PageCompound(page))
return 0;
/*
* Corner case handling. This is called from add_to_page_cache()
* in usual. But some FS (shmem) precharges this page before calling it
* and call add_to_page_cache() with GFP_NOWAIT.
*
* For GFP_NOWAIT case, the page may be pre-charged before calling
* add_to_page_cache(). (See shmem.c) check it here and avoid to call
* charge twice. (It works but has to pay a bit larger cost.)
* And when the page is SwapCache, it should take swap information
* into account. This is under lock_page() now.
*/
if (!(gfp_mask & __GFP_WAIT)) {
struct page_cgroup *pc;
pc = lookup_page_cgroup(page);
if (!pc)
return 0;
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
unlock_page_cgroup(pc);
mm = &init_mm;
if (page_is_file_cache(page))
return mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
/* shmem */
if (PageSwapCache(page)) {
ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
if (!ret)
__mem_cgroup_commit_charge_swapin(page, mem,
MEM_CGROUP_CHARGE_TYPE_SHMEM);
} else
ret = mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
/*
* While swap-in, try_charge -> commit or cancel, the page is locked.
* And when try_charge() successfully returns, one refcnt to memcg without
* struct page_cgroup is aquired. This refcnt will be cumsumed by
* "commit()" or removed by "cancel()"
*/
int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
struct page *page,
gfp_t mask, struct mem_cgroup **ptr)
{
struct mem_cgroup *mem;
return 0;
if (!do_swap_account)
goto charge_cur_mm;
/*
* A racing thread's fault, or swapoff, may have already updated
* the pte, and even removed page from swap cache: return success
* to go on to do_swap_page()'s pte_same() test, which should fail.
*/
if (!PageSwapCache(page))
return 0;
mem = try_get_mem_cgroup_from_swapcache(page);
ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
/* drop extra refcnt from tryget */
css_put(&mem->css);
return ret;
charge_cur_mm:
if (unlikely(!mm))
mm = &init_mm;
return __mem_cgroup_try_charge(mm, mask, ptr, true);
}
static void
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
enum charge_type ctype)
{
struct page_cgroup *pc;
return;
if (!ptr)
return;
pc = lookup_page_cgroup(page);
mem_cgroup_lru_del_before_commit_swapcache(page);
__mem_cgroup_commit_charge(ptr, pc, ctype);
mem_cgroup_lru_add_after_commit_swapcache(page);
/*
* Now swap is on-memory. This means this page may be
* counted both as mem and swap....double count.
* Fix it by uncharging from memsw. Basically, this SwapCache is stable
* under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
* may call delete_from_swap_cache() before reach here.
if (do_swap_account && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
unsigned short id;
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
/*
* This recorded memcg can be obsolete one. So, avoid
* calling css_tryget
*/
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg);
}
rcu_read_unlock();
/* add this page(page_cgroup) to the LRU we want. */
void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
{
__mem_cgroup_commit_charge_swapin(page, ptr,
MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
{
return;
if (!mem)
return;
res_counter_uncharge(&mem->res, PAGE_SIZE);
if (do_swap_account)
res_counter_uncharge(&mem->memsw, PAGE_SIZE);
css_put(&mem->css);
}
* uncharge if !page_mapped(page)
__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)

KAMEZAWA Hiroyuki
committed
struct mem_cgroup_per_zone *mz;
* Check if our page_cgroup is valid
pc = lookup_page_cgroup(page);
if (unlikely(!pc || !PageCgroupUsed(pc)))
if (!PageCgroupUsed(pc))
goto unlock_out;
switch (ctype) {
case MEM_CGROUP_CHARGE_TYPE_MAPPED:
if (page_mapped(page))
goto unlock_out;
break;
case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
if (!PageAnon(page)) { /* Shared memory */
if (page->mapping && !page_is_file_cache(page))
goto unlock_out;
} else if (page_mapped(page)) /* Anon */
goto unlock_out;
break;
default:
break;
res_counter_uncharge(&mem->res, PAGE_SIZE);
if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
res_counter_uncharge(&mem->memsw, PAGE_SIZE);
mem_cgroup_charge_statistics(mem, pc, false);
/*
* pc->mem_cgroup is not cleared here. It will be accessed when it's
* freed from LRU. This is safe because uncharged page is expected not
* to be reused (freed soon). Exception is SwapCache, it's handled by
* special functions.
*/
mz = page_cgroup_zoneinfo(pc);
/* at swapout, this memcg will be accessed to record to swap */
if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
css_put(&mem->css);

KAMEZAWA Hiroyuki
committed
unlock_out:
unlock_page_cgroup(pc);
void mem_cgroup_uncharge_page(struct page *page)
{
/* early check. */
if (page_mapped(page))
return;
if (page->mapping && !PageAnon(page))
return;
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
void mem_cgroup_uncharge_cache_page(struct page *page)
{
VM_BUG_ON(page_mapped(page));
VM_BUG_ON(page->mapping);
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
}
#ifdef CONFIG_SWAP
* called after __delete_from_swap_cache() and drop "page" account.
* memcg information is recorded to swap_cgroup of "ent"
*/
void
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
if (!swapout) /* this was a swap cache but the swap is unused ! */
ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
memcg = __mem_cgroup_uncharge_common(page, ctype);
/* record memcg information */
if (do_swap_account && swapout && memcg) {
swap_cgroup_record(ent, css_id(&memcg->css));
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
/*
* called from swap_entry_free(). remove record in swap_cgroup and
* uncharge "memsw" account.
*/
void mem_cgroup_uncharge_swap(swp_entry_t ent)
unsigned short id;
if (!do_swap_account)
return;
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
/*
* We uncharge this because swap is freed.
* This memcg can be obsolete one. We avoid calling css_tryget
*/
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg);
}
rcu_read_unlock();

KAMEZAWA Hiroyuki
committed
/*
* Before starting migration, account PAGE_SIZE to mem_cgroup that the old
* page belongs to.

KAMEZAWA Hiroyuki
committed
*/
int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)

KAMEZAWA Hiroyuki
committed
{
struct page_cgroup *pc;
struct mem_cgroup *mem = NULL;
int ret = 0;
return 0;
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
mem = pc->mem_cgroup;
css_get(&mem->css);
}
ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);

KAMEZAWA Hiroyuki
committed
}
/* remove redundant charge if migration failed*/
void mem_cgroup_end_migration(struct mem_cgroup *mem,
struct page *oldpage, struct page *newpage)

KAMEZAWA Hiroyuki
committed
{
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
struct page *target, *unused;
struct page_cgroup *pc;
enum charge_type ctype;
if (!mem)
return;
/* at migration success, oldpage->mapping is NULL. */
if (oldpage->mapping) {
target = oldpage;
unused = NULL;
} else {
target = newpage;
unused = oldpage;
}
if (PageAnon(target))
ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
else if (page_is_file_cache(target))
ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
else
ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
/* unused page is not on radix-tree now. */
__mem_cgroup_uncharge_common(unused, ctype);
pc = lookup_page_cgroup(target);
* __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup.
* So, double-counting is effectively avoided.
*/
__mem_cgroup_commit_charge(mem, pc, ctype);
/*
* Both of oldpage and newpage are still under lock_page().
* Then, we don't have to care about race in radix-tree.
* But we have to be careful that this page is unmapped or not.
*
* There is a case for !page_mapped(). At the start of
* migration, oldpage was mapped. But now, it's zapped.
* But we know *target* page is not freed/reused under us.
* mem_cgroup_uncharge_page() does all necessary checks.
if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
mem_cgroup_uncharge_page(target);

KAMEZAWA Hiroyuki
committed
}
* A call to try to shrink memory usage on charge failure at shmem's swapin.
* Calling hierarchical_reclaim is not enough because we should update
* last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
* Moreover considering hierarchy, we should reclaim from the mem_over_limit,
* not from the memcg which this page would be charged to.
* try_charge_swapin does all of these works properly.
int mem_cgroup_shmem_charge_fallback(struct page *page,
struct mm_struct *mm,
gfp_t gfp_mask)
ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
if (!ret)
mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
static DEFINE_MUTEX(set_limit_mutex);
static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
int retry_count;
int children = mem_cgroup_count_children(memcg);
u64 curusage, oldusage;
/*
* For keeping hierarchical_reclaim simple, how long we should retry
* is depends on callers. We set our retry-count to be function
* of # of children which we should visit in this loop.
*/
retry_count = MEM_CGROUP_RECLAIM_RETRIES * children;
oldusage = res_counter_read_u64(&memcg->res, RES_USAGE);
if (signal_pending(current)) {
ret = -EINTR;
break;
}
/*
* Rather than hide all in some function, I do this in
* open coded manner. You see what this really does.
* We have to guarantee mem->res.limit < mem->memsw.limit.
*/
mutex_lock(&set_limit_mutex);
memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
if (memswlimit < val) {
ret = -EINVAL;
mutex_unlock(&set_limit_mutex);
ret = res_counter_set_limit(&memcg->res, val);
if (!ret) {
if (memswlimit == val)
memcg->memsw_is_minimum = true;
else
memcg->memsw_is_minimum = false;
}
mutex_unlock(&set_limit_mutex);
if (!ret)
break;
progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
false, true);
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
/* Usage is reduced ? */
if (curusage >= oldusage)
retry_count--;
else
oldusage = curusage;
static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
unsigned long long val)
int retry_count;
int children = mem_cgroup_count_children(memcg);
int ret = -EBUSY;
/* see mem_cgroup_resize_res_limit */
retry_count = children * MEM_CGROUP_RECLAIM_RETRIES;
oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
while (retry_count) {
if (signal_pending(current)) {
ret = -EINTR;
break;
}
/*
* Rather than hide all in some function, I do this in
* open coded manner. You see what this really does.
* We have to guarantee mem->res.limit < mem->memsw.limit.
*/
mutex_lock(&set_limit_mutex);
memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
if (memlimit > val) {
ret = -EINVAL;
mutex_unlock(&set_limit_mutex);
break;
}
ret = res_counter_set_limit(&memcg->memsw, val);
if (!ret) {
if (memlimit == val)
memcg->memsw_is_minimum = true;
else
memcg->memsw_is_minimum = false;
}
mutex_unlock(&set_limit_mutex);
if (!ret)
break;
mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true);
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/* Usage is reduced ? */
else
oldusage = curusage;

KAMEZAWA Hiroyuki
committed
/*
* This routine traverse page_cgroup in given list and drop them all.
* *And* this routine doesn't reclaim page itself, just removes page_cgroup.
*/
static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,

KAMEZAWA Hiroyuki
committed
{
struct zone *zone;
struct mem_cgroup_per_zone *mz;
struct page_cgroup *pc, *busy;

KAMEZAWA Hiroyuki
committed
struct list_head *list;

KAMEZAWA Hiroyuki
committed
zone = &NODE_DATA(node)->node_zones[zid];
mz = mem_cgroup_zoneinfo(mem, node, zid);
list = &mz->lists[lru];

KAMEZAWA Hiroyuki
committed
loop = MEM_CGROUP_ZSTAT(mz, lru);
/* give some margin against EBUSY etc...*/
loop += 256;
busy = NULL;
while (loop--) {
ret = 0;
if (list_empty(list)) {
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
pc = list_entry(list->prev, struct page_cgroup, lru);
if (busy == pc) {
list_move(&pc->lru, list);
busy = 0;
spin_unlock_irqrestore(&zone->lru_lock, flags);
spin_unlock_irqrestore(&zone->lru_lock, flags);
ret = mem_cgroup_move_parent(pc, mem, GFP_KERNEL);
if (ret == -ENOMEM)
if (ret == -EBUSY || ret == -EINVAL) {
/* found lock contention or "pc" is obsolete. */
busy = pc;
cond_resched();
} else
busy = NULL;

KAMEZAWA Hiroyuki
committed
}
if (!ret && !list_empty(list))
return -EBUSY;
return ret;

KAMEZAWA Hiroyuki
committed
}
/*
* make mem_cgroup's charge to be 0 if there is no task.
* This enables deleting this mem_cgroup.
*/
static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)

KAMEZAWA Hiroyuki
committed
{
int ret;
int node, zid, shrink;
int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct cgroup *cgrp = mem->css.cgroup;

KAMEZAWA Hiroyuki
committed
css_get(&mem->css);
/* should free all ? */
if (free_all)
goto try_to_free;

KAMEZAWA Hiroyuki
committed
while (mem->res.usage > 0) {
if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
goto out;
ret = -EINTR;
if (signal_pending(current))

KAMEZAWA Hiroyuki
committed
goto out;
/* This is for making all *used* pages to be on LRU. */
lru_add_drain_all();
for_each_node_state(node, N_HIGH_MEMORY) {
for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
for_each_lru(l) {
ret = mem_cgroup_force_empty_list(mem,
if (ret)
break;
}

KAMEZAWA Hiroyuki
committed
}
if (ret)
break;
}
/* it seems parent cgroup doesn't have enough mem */
if (ret == -ENOMEM)
goto try_to_free;

KAMEZAWA Hiroyuki
committed
}
ret = 0;
out:
css_put(&mem->css);
return ret;
/* returns EBUSY if there is a task or if we come here twice. */
if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) {
ret = -EBUSY;
goto out;
}
/* we call try-to-free pages for make this cgroup empty */
lru_add_drain_all();
/* try to free all pages in this cgroup */
shrink = 1;
while (nr_retries && mem->res.usage > 0) {
int progress;
if (signal_pending(current)) {
ret = -EINTR;
goto out;
}
progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
false, get_swappiness(mem));
/* maybe some writeback is necessary */
congestion_wait(BLK_RW_ASYNC, HZ/10);
/* try move_account...there may be some *locked* pages. */
if (mem->res.usage)
goto move_account;
ret = 0;
goto out;

KAMEZAWA Hiroyuki
committed
}
int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
{
return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);
}
static u64 mem_cgroup_hierarchy_read(struct cgroup *cont, struct cftype *cft)
{
return mem_cgroup_from_cont(cont)->use_hierarchy;
}
static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
u64 val)