Newer
Older
stock->nr_pages += nr_pages;
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
put_cpu_var(memcg_stock);
}
/*
* Tries to drain stocked charges in other cpus. This function is asynchronous
* and just put a work per cpu for draining localy on each cpu. Caller can
* expects some charges will be back to res_counter later but cannot wait for
* it.
*/
static void drain_all_stock_async(void)
{
int cpu;
/* This function is for scheduling "drain" in asynchronous way.
* The result of "drain" is not directly handled by callers. Then,
* if someone is calling drain, we don't have to call drain more.
* Anyway, WORK_STRUCT_PENDING check in queue_work_on() will catch if
* there is a race. We just do loose check here.
*/
if (atomic_read(&memcg_drain_count))
return;
/* Notify other cpus that system-wide "drain" is running */
atomic_inc(&memcg_drain_count);
get_online_cpus();
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
schedule_work_on(cpu, &stock->work);
}
put_online_cpus();
atomic_dec(&memcg_drain_count);
/* We don't wait for flush_work */
}
/* This is a synchronous drain interface. */
static void drain_all_stock_sync(void)
{
/* called when force_empty is called */
atomic_inc(&memcg_drain_count);
schedule_on_each_cpu(drain_local_stock);
atomic_dec(&memcg_drain_count);
}
/*
* This function drains percpu counter value from DEAD cpu and
* move it to local cpu. Note that this function can be preempted.
*/
static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu)
{
int i;
spin_lock(&mem->pcp_counter_lock);
for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) {
long x = per_cpu(mem->stat->count[i], cpu);
per_cpu(mem->stat->count[i], cpu) = 0;
mem->nocpu_base.count[i] += x;
}
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
unsigned long x = per_cpu(mem->stat->events[i], cpu);
per_cpu(mem->stat->events[i], cpu) = 0;
mem->nocpu_base.events[i] += x;
}
/* need to clear ON_MOVE value, works as a kind of lock. */
per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0;
spin_unlock(&mem->pcp_counter_lock);
}
static void synchronize_mem_cgroup_on_move(struct mem_cgroup *mem, int cpu)
{
int idx = MEM_CGROUP_ON_MOVE;
spin_lock(&mem->pcp_counter_lock);
per_cpu(mem->stat->count[idx], cpu) = mem->nocpu_base.count[idx];
spin_unlock(&mem->pcp_counter_lock);
}
static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
unsigned long action,
void *hcpu)
{
int cpu = (unsigned long)hcpu;
struct memcg_stock_pcp *stock;
struct mem_cgroup *iter;
if ((action == CPU_ONLINE)) {
for_each_mem_cgroup_all(iter)
synchronize_mem_cgroup_on_move(iter, cpu);
return NOTIFY_OK;
}
if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN)
for_each_mem_cgroup_all(iter)
mem_cgroup_drain_pcp_counter(iter, cpu);
stock = &per_cpu(memcg_stock, cpu);
drain_stock(stock);
return NOTIFY_OK;
}
/* See __mem_cgroup_try_charge() for details */
enum {
CHARGE_OK, /* success */
CHARGE_RETRY, /* need to retry but retry is not bad */
CHARGE_NOMEM, /* we can't do more. return -ENOMEM */
CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */
CHARGE_OOM_DIE, /* the current is killed because of OOM */
};
static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
unsigned int nr_pages, bool oom_check)
unsigned long csize = nr_pages * PAGE_SIZE;
struct mem_cgroup *mem_over_limit;
struct res_counter *fail_res;
unsigned long flags = 0;
int ret;
ret = res_counter_charge(&mem->res, csize, &fail_res);
if (likely(!ret)) {
if (!do_swap_account)
return CHARGE_OK;
ret = res_counter_charge(&mem->memsw, csize, &fail_res);
if (likely(!ret))
return CHARGE_OK;
res_counter_uncharge(&mem->res, csize);
mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
flags |= MEM_CGROUP_RECLAIM_NOSWAP;
} else
mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
* nr_pages can be either a huge page (HPAGE_PMD_NR), a batch
* of regular pages (CHARGE_BATCH), or a single regular page (1).
*
* Never reclaim on behalf of optional batching, retry with a
* single page instead.
*/
if (nr_pages == CHARGE_BATCH)
return CHARGE_RETRY;
if (!(gfp_mask & __GFP_WAIT))
return CHARGE_WOULDBLOCK;
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
gfp_mask, flags, NULL);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
return CHARGE_RETRY;
* Even though the limit is exceeded at this point, reclaim
* may have been able to free some pages. Retry the charge
* before killing the task.
*
* Only for regular pages, though: huge pages are rather
* unlikely to succeed so close to the limit, and we fall back
* to regular pages anyway in case of failure.
if (nr_pages == 1 && ret)
return CHARGE_RETRY;
/*
* At task move, charge accounts can be doubly counted. So, it's
* better to wait until the end of task_move if something is going on.
*/
if (mem_cgroup_wait_acct_move(mem_over_limit))
return CHARGE_RETRY;
/* If we don't need to call oom-killer at el, return immediately */
if (!oom_check)
return CHARGE_NOMEM;
/* check OOM */
if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask))
return CHARGE_OOM_DIE;
return CHARGE_RETRY;
}
/*
* Unlike exported interface, "oom" parameter is added. if oom==true,
* oom-killer can be invoked.
static int __mem_cgroup_try_charge(struct mm_struct *mm,
unsigned int nr_pages,
struct mem_cgroup **memcg,
bool oom)
unsigned int batch = max(CHARGE_BATCH, nr_pages);
int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup *mem = NULL;
int ret;
/*
* Unlike gloval-vm's OOM-kill, we're not in memory shortage
* in system level. So, allow to go ahead dying process in addition to
* MEMDIE process.
*/
if (unlikely(test_thread_flag(TIF_MEMDIE)
|| fatal_signal_pending(current)))
goto bypass;
* We always charge the cgroup the mm_struct belongs to.
* The mm_struct's mem_cgroup changes on task migration if the
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
if (!*memcg && !mm)
goto bypass;
again:
if (*memcg) { /* css should be a valid one */
VM_BUG_ON(css_is_removed(&mem->css));
if (mem_cgroup_is_root(mem))
goto done;
if (nr_pages == 1 && consume_stock(mem))
css_get(&mem->css);
} else {
rcu_read_lock();
p = rcu_dereference(mm->owner);
/*
* Because we don't have task_lock(), "p" can exit.
* In that case, "mem" can point to root or p can be NULL with
* race with swapoff. Then, we have small risk of mis-accouning.
* But such kind of mis-account by race always happens because
* we don't have cgroup_mutex(). It's overkill and we allo that
* small race, here.
* (*) swapoff at el will charge against mm-struct not against
* task-struct. So, mm->owner can be NULL.
if (!mem || mem_cgroup_is_root(mem)) {
if (nr_pages == 1 && consume_stock(mem)) {
/*
* It seems dagerous to access memcg without css_get().
* But considering how consume_stok works, it's not
* necessary. If consume_stock success, some charges
* from this memcg are cached on this cpu. So, we
* don't need to call css_get()/css_tryget() before
* calling consume_stock().
*/
rcu_read_unlock();
goto done;
}
/* after here, we may be blocked. we need to get refcnt */
if (!css_tryget(&mem->css)) {
rcu_read_unlock();
goto again;
}
rcu_read_unlock();
}
/* If killed, bypass charge */
if (fatal_signal_pending(current)) {
css_put(&mem->css);
oom_check = false;
if (oom && !nr_oom_retries) {
oom_check = true;
nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
ret = mem_cgroup_do_charge(mem, gfp_mask, batch, oom_check);
switch (ret) {
case CHARGE_OK:
break;
case CHARGE_RETRY: /* not in OOM situation but retry */
batch = nr_pages;
css_put(&mem->css);
mem = NULL;
goto again;
case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
goto nomem;
case CHARGE_NOMEM: /* OOM routine works */
/* If oom, we never return -ENOMEM */
nr_oom_retries--;
break;
case CHARGE_OOM_DIE: /* Killed by OOM Killer */
} while (ret != CHARGE_OK);
if (batch > nr_pages)
refill_stock(mem, batch - nr_pages);
return 0;
nomem:
return -ENOMEM;
bypass:
*memcg = NULL;
return 0;
/*
* Somemtimes we have to undo a charge we got by try_charge().
* This function is for that and do uncharge, put css's refcnt.
* gotten by try_charge().
*/
static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem,
unsigned int nr_pages)
{
if (!mem_cgroup_is_root(mem)) {
unsigned long bytes = nr_pages * PAGE_SIZE;
res_counter_uncharge(&mem->res, bytes);
res_counter_uncharge(&mem->memsw, bytes);
/*
* A helper function to get mem_cgroup from ID. must be called under
* rcu_read_lock(). The caller must check css_is_removed() or some if
* it's concern. (dropping refcnt from swap can be called against removed
* memcg.)
*/
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
{
struct cgroup_subsys_state *css;
/* ID 0 is unused ID */
if (!id)
return NULL;
css = css_lookup(&mem_cgroup_subsys, id);
if (!css)
return NULL;
return container_of(css, struct mem_cgroup, css);
}
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
struct mem_cgroup *mem = NULL;
unsigned short id;
VM_BUG_ON(!PageLocked(page));
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
if (mem && !css_tryget(&mem->css))
mem = NULL;
} else if (PageSwapCache(page)) {
ent.val = page_private(page);
id = lookup_swap_cgroup(ent);
rcu_read_lock();
mem = mem_cgroup_lookup(id);
if (mem && !css_tryget(&mem->css))
mem = NULL;
rcu_read_unlock();
unlock_page_cgroup(pc);
static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
unsigned int nr_pages,
struct page_cgroup *pc,
enum charge_type ctype)
lock_page_cgroup(pc);
if (unlikely(PageCgroupUsed(pc))) {
unlock_page_cgroup(pc);
__mem_cgroup_cancel_charge(mem, nr_pages);
return;
}
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
* accessed by any other context at this point.
*/
/*
* We access a page_cgroup asynchronously without lock_page_cgroup().
* Especially when a page_cgroup is taken from a page, pc->mem_cgroup
* is accessed after testing USED bit. To make pc->mem_cgroup visible
* before USED bit, we need memory barrier here.
* See mem_cgroup_add_lru_list(), etc.
*/
switch (ctype) {
case MEM_CGROUP_CHARGE_TYPE_CACHE:
case MEM_CGROUP_CHARGE_TYPE_SHMEM:
SetPageCgroupCache(pc);
SetPageCgroupUsed(pc);
break;
case MEM_CGROUP_CHARGE_TYPE_MAPPED:
ClearPageCgroupCache(pc);
SetPageCgroupUsed(pc);
break;
default:
break;
}
mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
/*
* "charge_statistics" updated event counter. Then, check it.
* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
* if they exceeds softlimit.
*/
memcg_check_events(mem, page);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
(1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
/*
* Because tail pages are not marked as "used", set it. We're under
* zone->lru_lock, 'splitting on pmd' and compund_lock.
*/
void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
{
struct page_cgroup *head_pc = lookup_page_cgroup(head);
struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
unsigned long flags;
if (mem_cgroup_disabled())
return;
* We have no races with charge/uncharge but will have races with
* page state accounting.
*/
move_lock_page_cgroup(head_pc, &flags);
tail_pc->mem_cgroup = head_pc->mem_cgroup;
smp_wmb(); /* see __commit_charge() */
if (PageCgroupAcctLRU(head_pc)) {
enum lru_list lru;
struct mem_cgroup_per_zone *mz;
/*
* LRU flags cannot be copied because we need to add tail
*.page to LRU by generic call and our hook will be called.
* We hold lru_lock, then, reduce counter directly.
*/
lru = page_lru(head);
mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
MEM_CGROUP_ZSTAT(mz, lru) -= 1;
}
tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
move_unlock_page_cgroup(head_pc, &flags);
}
#endif
* mem_cgroup_move_account - move account of the page
* @nr_pages: number of regular pages (>1 for huge pages)
* @pc: page_cgroup of the page.
* @from: mem_cgroup which the page is moved from.
* @to: mem_cgroup which the page is moved to. @from != @to.
* @uncharge: whether we should call uncharge and css_put against @from.
*
* The caller must confirm following.
* - page is not on LRU (isolate_page() is useful.)
* - compound_lock is held when nr_pages > 1
* This function doesn't do "charge" nor css_get to new cgroup. It should be
* done by a caller(__mem_cgroup_try_charge would be useful). If @uncharge is
* true, this function does "uncharge" from old cgroup, but it doesn't if
* @uncharge is false, so a caller should do "uncharge".
static int mem_cgroup_move_account(struct page *page,
unsigned int nr_pages,
struct page_cgroup *pc,
struct mem_cgroup *from,
struct mem_cgroup *to,
bool uncharge)
unsigned long flags;
int ret;
VM_BUG_ON(from == to);
VM_BUG_ON(PageLRU(page));
/*
* The page is isolated from LRU. So, collapse function
* will not handle this page. But page splitting can happen.
* Do this check under compound_page_lock(). The caller should
* hold it.
*/
ret = -EBUSY;
if (nr_pages > 1 && !PageTransHuge(page))
goto out;
lock_page_cgroup(pc);
ret = -EINVAL;
if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
goto unlock;
move_lock_page_cgroup(pc, &flags);
if (PageCgroupFileMapped(pc)) {
/* Update mapped_file data for mem_cgroup */
preempt_disable();
__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
__this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
preempt_enable();
mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
if (uncharge)
/* This is not "cancel", but cancel_charge does all we need. */
__mem_cgroup_cancel_charge(from, nr_pages);
/* caller should have done css_get */
mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages);
/*
* We charges against "to" which may not have any tasks. Then, "to"
* can be under rmdir(). But in current implementation, caller of
* this function is just force_empty() and move charge, so it's
* guaranteed that "to" is never removed. So, we don't check rmdir
move_unlock_page_cgroup(pc, &flags);
ret = 0;
unlock:
/*
* check events
*/
memcg_check_events(to, page);
memcg_check_events(from, page);
return ret;
}
/*
* move charges to its parent.
*/
static int mem_cgroup_move_parent(struct page *page,
struct page_cgroup *pc,
struct mem_cgroup *child,
gfp_t gfp_mask)
{
struct cgroup *cg = child->css.cgroup;
struct cgroup *pcg = cg->parent;
struct mem_cgroup *parent;
unsigned int nr_pages;
unsigned long uninitialized_var(flags);
int ret;
/* Is ROOT ? */
if (!pcg)
return -EINVAL;
ret = -EBUSY;
if (!get_page_unless_zero(page))
goto out;
if (isolate_lru_page(page))
goto put;
nr_pages = hpage_nr_pages(page);
parent = mem_cgroup_from_cont(pcg);
ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false);
if (ret || !parent)
if (nr_pages > 1)
flags = compound_lock_irqsave(page);
ret = mem_cgroup_move_account(page, nr_pages, pc, child, parent, true);
__mem_cgroup_cancel_charge(parent, nr_pages);
if (nr_pages > 1)
compound_unlock_irqrestore(page, flags);
put_back:
return ret;
}
/*
* Charge the memory controller for page usage.
* Return
* 0 if the charge was successful
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, enum charge_type ctype)
struct mem_cgroup *mem = NULL;
unsigned int nr_pages = 1;
struct page_cgroup *pc;
nr_pages <<= compound_order(page);
/*
* Never OOM-kill a process for a huge page. The
* fault handler will fall back to regular pages.
*/
oom = false;
pc = lookup_page_cgroup(page);
BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */
ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &mem, oom);
return ret;
__mem_cgroup_commit_charge(mem, page, nr_pages, pc, ctype);
int mem_cgroup_newpage_charge(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
/*
* If already mapped, we don't have to account.
* If page cache, page->mapping has address_space.
* But page->mapping may have out-of-use anon_vma pointer,
* detecit it by PageAnon() check. newly-mapped-anon's page->mapping
* is NULL.
*/
if (page_mapped(page) || (page->mapping && !PageAnon(page)))
return 0;
if (unlikely(!mm))
mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
static void
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
enum charge_type ctype);
static void
__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem,
enum charge_type ctype)
{
struct page_cgroup *pc = lookup_page_cgroup(page);
/*
* In some case, SwapCache, FUSE(splice_buf->radixtree), the page
* is already on LRU. It means the page may on some other page_cgroup's
* LRU. Take care of it.
*/
mem_cgroup_lru_del_before_commit(page);
__mem_cgroup_commit_charge(mem, page, 1, pc, ctype);
mem_cgroup_lru_add_after_commit(page);
return;
}
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
struct mem_cgroup *mem = NULL;
if (PageCompound(page))
return 0;
/*
* Corner case handling. This is called from add_to_page_cache()
* in usual. But some FS (shmem) precharges this page before calling it
* and call add_to_page_cache() with GFP_NOWAIT.
*
* For GFP_NOWAIT case, the page may be pre-charged before calling
* add_to_page_cache(). (See shmem.c) check it here and avoid to call
* charge twice. (It works but has to pay a bit larger cost.)
* And when the page is SwapCache, it should take swap information
* into account. This is under lock_page() now.
*/
if (!(gfp_mask & __GFP_WAIT)) {
struct page_cgroup *pc;
pc = lookup_page_cgroup(page);
if (!pc)
return 0;
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
unlock_page_cgroup(pc);
mm = &init_mm;
if (page_is_file_cache(page)) {
ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &mem, true);
if (ret || !mem)
return ret;
/*
* FUSE reuses pages without going through the final
* put that would remove them from the LRU list, make
* sure that they get relinked properly.
*/
__mem_cgroup_commit_charge_lrucare(page, mem,
MEM_CGROUP_CHARGE_TYPE_CACHE);
return ret;
}
/* shmem */
if (PageSwapCache(page)) {
ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
if (!ret)
__mem_cgroup_commit_charge_swapin(page, mem,
MEM_CGROUP_CHARGE_TYPE_SHMEM);
} else
ret = mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_SHMEM);
/*
* While swap-in, try_charge -> commit or cancel, the page is locked.
* And when try_charge() successfully returns, one refcnt to memcg without
* struct page_cgroup is acquired. This refcnt will be consumed by
* "commit()" or removed by "cancel()"
*/
int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
struct page *page,
gfp_t mask, struct mem_cgroup **ptr)
{
struct mem_cgroup *mem;
*ptr = NULL;
return 0;
if (!do_swap_account)
goto charge_cur_mm;
/*
* A racing thread's fault, or swapoff, may have already updated
* the pte, and even removed page from swap cache: in those cases
* do_swap_page()'s pte_same() test will fail; but there's also a
* KSM case which does need to charge the page.
mem = try_get_mem_cgroup_from_page(page);
ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true);
css_put(&mem->css);
return ret;
charge_cur_mm:
if (unlikely(!mm))
mm = &init_mm;
return __mem_cgroup_try_charge(mm, mask, 1, ptr, true);
static void
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
enum charge_type ctype)
return;
if (!ptr)
return;
cgroup_exclude_rmdir(&ptr->css);
__mem_cgroup_commit_charge_lrucare(page, ptr, ctype);
/*
* Now swap is on-memory. This means this page may be
* counted both as mem and swap....double count.
* Fix it by uncharging from memsw. Basically, this SwapCache is stable
* under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
* may call delete_from_swap_cache() before reach here.
if (do_swap_account && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
unsigned short id;
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
/*
* This recorded memcg can be obsolete one. So, avoid
* calling css_tryget
*/
if (!mem_cgroup_is_root(memcg))

KAMEZAWA Hiroyuki
committed
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_swap_statistics(memcg, false);
rcu_read_unlock();
/*
* At swapin, we may charge account against cgroup which has no tasks.
* So, rmdir()->pre_destroy() can be called while we do this charge.
* In that case, we need to call pre_destroy() again. check it here.
*/
cgroup_release_and_wakeup_rmdir(&ptr->css);
void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
{
__mem_cgroup_commit_charge_swapin(page, ptr,
MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
{
return;
if (!mem)
return;
__mem_cgroup_cancel_charge(mem, 1);
static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
unsigned int nr_pages,
const enum charge_type ctype)
{
struct memcg_batch_info *batch = NULL;
bool uncharge_memsw = true;
/* If swapout, usage of swap doesn't decrease */
if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
uncharge_memsw = false;
batch = ¤t->memcg_batch;
/*
* In usual, we do css_get() when we remember memcg pointer.
* But in this case, we keep res->usage until end of a series of
* uncharges. Then, it's ok to ignore memcg's refcnt.
*/
if (!batch->memcg)
batch->memcg = mem;
/*
* do_batch > 0 when unmapping pages or inode invalidate/truncate.
* In those cases, all pages freed continuously can be expected to be in
* the same cgroup and we have chance to coalesce uncharges.
* But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
* because we want to do uncharge as soon as possible.
*/
if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
goto direct_uncharge;
if (nr_pages > 1)
/*
* In typical case, batch->memcg == mem. This means we can
* merge a series of uncharges to an uncharge of res_counter.
* If not, we uncharge res_counter ony by one.
*/
if (batch->memcg != mem)
goto direct_uncharge;
/* remember freed charge and uncharge it later */
batch->nr_pages++;
batch->memsw_nr_pages++;
return;
direct_uncharge:
res_counter_uncharge(&mem->res, nr_pages * PAGE_SIZE);
res_counter_uncharge(&mem->memsw, nr_pages * PAGE_SIZE);
if (unlikely(batch->memcg != mem))
memcg_oom_recover(mem);
* uncharge if !page_mapped(page)
__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
unsigned int nr_pages = 1;
struct page_cgroup *pc;
nr_pages <<= compound_order(page);
* Check if our page_cgroup is valid
pc = lookup_page_cgroup(page);
if (unlikely(!pc || !PageCgroupUsed(pc)))
if (!PageCgroupUsed(pc))
goto unlock_out;
switch (ctype) {
case MEM_CGROUP_CHARGE_TYPE_MAPPED:
/* See mem_cgroup_prepare_migration() */
if (page_mapped(page) || PageCgroupMigration(pc))
goto unlock_out;
break;
case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
if (!PageAnon(page)) { /* Shared memory */
if (page->mapping && !page_is_file_cache(page))
goto unlock_out;
} else if (page_mapped(page)) /* Anon */
goto unlock_out;
break;
default:
break;
mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -nr_pages);
/*
* pc->mem_cgroup is not cleared here. It will be accessed when it's
* freed from LRU. This is safe because uncharged page is expected not
* to be reused (freed soon). Exception is SwapCache, it's handled by
* special functions.
*/
/*
* even after unlock, we have mem->res.usage here and this memcg
* will never be freed.
*/
memcg_check_events(mem, page);
if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
mem_cgroup_swap_statistics(mem, true);
mem_cgroup_get(mem);
}
if (!mem_cgroup_is_root(mem))
mem_cgroup_do_uncharge(mem, nr_pages, ctype);

KAMEZAWA Hiroyuki
committed
unlock_out:
unlock_page_cgroup(pc);
void mem_cgroup_uncharge_page(struct page *page)
{
/* early check. */
if (page_mapped(page))
return;
if (page->mapping && !PageAnon(page))
return;
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
void mem_cgroup_uncharge_cache_page(struct page *page)
{
VM_BUG_ON(page_mapped(page));
VM_BUG_ON(page->mapping);
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
}