Newer
Older

KAMEZAWA Hiroyuki
committed
rcu_read_lock();
memcg = pc->mem_cgroup;
if (unlikely(!memcg || !PageCgroupUsed(pc)))

KAMEZAWA Hiroyuki
committed
goto out;
/* pc->mem_cgroup is unstable ? */
if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) {

KAMEZAWA Hiroyuki
committed
/* take a lock against to access pc->mem_cgroup */
move_lock_page_cgroup(pc, &flags);

KAMEZAWA Hiroyuki
committed
need_unlock = true;
memcg = pc->mem_cgroup;
if (!memcg || !PageCgroupUsed(pc))

KAMEZAWA Hiroyuki
committed
goto out;
}
case MEMCG_NR_FILE_MAPPED:
if (val > 0)
SetPageCgroupFileMapped(pc);
else if (!page_mapped(page))
ClearPageCgroupFileMapped(pc);
idx = MEM_CGROUP_STAT_FILE_MAPPED;
break;
default:
BUG();
this_cpu_add(memcg->stat->count[idx], val);

KAMEZAWA Hiroyuki
committed
out:
if (unlikely(need_unlock))
move_unlock_page_cgroup(pc, &flags);

KAMEZAWA Hiroyuki
committed
rcu_read_unlock();
return;
EXPORT_SYMBOL(mem_cgroup_update_page_stat);
/*
* size of first charge trial. "32" comes from vmscan.c's magic value.
* TODO: maybe necessary to use big numbers in big irons.
*/
#define CHARGE_BATCH 32U
struct memcg_stock_pcp {
struct mem_cgroup *cached; /* this never be root cgroup */
unsigned int nr_pages;
struct work_struct work;
unsigned long flags;
#define FLUSHING_CACHED_CHARGE (0)
};
static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
static DEFINE_MUTEX(percpu_charge_mutex);
* Try to consume stocked charge on this cpu. If success, one page is consumed
* from local stock and true is returned. If the stock is 0 or charges from a
* cgroup which is not current target, returns false. This stock will be
* refilled.
*/
static bool consume_stock(struct mem_cgroup *memcg)
{
struct memcg_stock_pcp *stock;
bool ret = true;
stock = &get_cpu_var(memcg_stock);
if (memcg == stock->cached && stock->nr_pages)
stock->nr_pages--;
else /* need to call res_counter_charge */
ret = false;
put_cpu_var(memcg_stock);
return ret;
}
/*
* Returns stocks cached in percpu to res_counter and reset cached information.
*/
static void drain_stock(struct memcg_stock_pcp *stock)
{
struct mem_cgroup *old = stock->cached;
if (stock->nr_pages) {
unsigned long bytes = stock->nr_pages * PAGE_SIZE;
res_counter_uncharge(&old->res, bytes);
res_counter_uncharge(&old->memsw, bytes);
stock->nr_pages = 0;
}
stock->cached = NULL;
}
/*
* This must be called under preempt disabled or must be called by
* a thread which is pinned to local cpu.
*/
static void drain_local_stock(struct work_struct *dummy)
{
struct memcg_stock_pcp *stock = &__get_cpu_var(memcg_stock);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
}
/*
* Cache charges(val) which is from res_counter, to local per_cpu area.
* This will be consumed by consume_stock() function, later.
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
if (stock->cached != memcg) { /* reset if necessary */
stock->nr_pages += nr_pages;
put_cpu_var(memcg_stock);
}
/*
* Drains all per-CPU charge caches for given root_memcg resp. subtree
* of the hierarchy under it. sync flag says whether we should block
* until the work is done.
static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
int cpu, curcpu;
/* Notify other cpus that system-wide "drain" is running */
get_online_cpus();
curcpu = get_cpu();
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
memcg = stock->cached;
if (!memcg || !stock->nr_pages)
if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
if (cpu == curcpu)
drain_local_stock(&stock->work);
else
schedule_work_on(cpu, &stock->work);
}
if (!sync)
goto out;
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
if (test_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
flush_work(&stock->work);
}
out:
}
/*
* Tries to drain stocked charges in other cpus. This function is asynchronous
* and just put a work per cpu for draining localy on each cpu. Caller can
* expects some charges will be back to res_counter later but cannot wait for
* it.
*/
static void drain_all_stock_async(struct mem_cgroup *root_memcg)
/*
* If someone calls draining, avoid adding more kworker runs.
*/
if (!mutex_trylock(&percpu_charge_mutex))
return;
drain_all_stock(root_memcg, false);
mutex_unlock(&percpu_charge_mutex);
}
/* This is a synchronous drain interface. */
static void drain_all_stock_sync(struct mem_cgroup *root_memcg)
{
/* called when force_empty is called */
mutex_lock(&percpu_charge_mutex);
drain_all_stock(root_memcg, true);
mutex_unlock(&percpu_charge_mutex);
/*
* This function drains percpu counter value from DEAD cpu and
* move it to local cpu. Note that this function can be preempted.
*/
static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
spin_lock(&memcg->pcp_counter_lock);
for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) {
long x = per_cpu(memcg->stat->count[i], cpu);
per_cpu(memcg->stat->count[i], cpu) = 0;
memcg->nocpu_base.count[i] += x;
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
unsigned long x = per_cpu(memcg->stat->events[i], cpu);
per_cpu(memcg->stat->events[i], cpu) = 0;
memcg->nocpu_base.events[i] += x;
/* need to clear ON_MOVE value, works as a kind of lock. */
per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0;
spin_unlock(&memcg->pcp_counter_lock);
static void synchronize_mem_cgroup_on_move(struct mem_cgroup *memcg, int cpu)
{
int idx = MEM_CGROUP_ON_MOVE;
spin_lock(&memcg->pcp_counter_lock);
per_cpu(memcg->stat->count[idx], cpu) = memcg->nocpu_base.count[idx];
spin_unlock(&memcg->pcp_counter_lock);
}
static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
unsigned long action,
void *hcpu)
{
int cpu = (unsigned long)hcpu;
struct memcg_stock_pcp *stock;
struct mem_cgroup *iter;
if ((action == CPU_ONLINE)) {
for_each_mem_cgroup_all(iter)
synchronize_mem_cgroup_on_move(iter, cpu);
return NOTIFY_OK;
}
if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN)
for_each_mem_cgroup_all(iter)
mem_cgroup_drain_pcp_counter(iter, cpu);
stock = &per_cpu(memcg_stock, cpu);
drain_stock(stock);
return NOTIFY_OK;
}
/* See __mem_cgroup_try_charge() for details */
enum {
CHARGE_OK, /* success */
CHARGE_RETRY, /* need to retry but retry is not bad */
CHARGE_NOMEM, /* we can't do more. return -ENOMEM */
CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */
CHARGE_OOM_DIE, /* the current is killed because of OOM */
};
static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
unsigned int nr_pages, bool oom_check)
unsigned long csize = nr_pages * PAGE_SIZE;
struct mem_cgroup *mem_over_limit;
struct res_counter *fail_res;
unsigned long flags = 0;
int ret;
ret = res_counter_charge(&memcg->res, csize, &fail_res);
if (likely(!ret)) {
if (!do_swap_account)
return CHARGE_OK;
ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
if (likely(!ret))
return CHARGE_OK;
res_counter_uncharge(&memcg->res, csize);
mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
flags |= MEM_CGROUP_RECLAIM_NOSWAP;
} else
mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
* nr_pages can be either a huge page (HPAGE_PMD_NR), a batch
* of regular pages (CHARGE_BATCH), or a single regular page (1).
*
* Never reclaim on behalf of optional batching, retry with a
* single page instead.
*/
if (nr_pages == CHARGE_BATCH)
return CHARGE_RETRY;
if (!(gfp_mask & __GFP_WAIT))
return CHARGE_WOULDBLOCK;
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
gfp_mask, flags, NULL);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
return CHARGE_RETRY;
* Even though the limit is exceeded at this point, reclaim
* may have been able to free some pages. Retry the charge
* before killing the task.
*
* Only for regular pages, though: huge pages are rather
* unlikely to succeed so close to the limit, and we fall back
* to regular pages anyway in case of failure.
if (nr_pages == 1 && ret)
return CHARGE_RETRY;
/*
* At task move, charge accounts can be doubly counted. So, it's
* better to wait until the end of task_move if something is going on.
*/
if (mem_cgroup_wait_acct_move(mem_over_limit))
return CHARGE_RETRY;
/* If we don't need to call oom-killer at el, return immediately */
if (!oom_check)
return CHARGE_NOMEM;
/* check OOM */
if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask))
return CHARGE_OOM_DIE;
return CHARGE_RETRY;
}
/*
* Unlike exported interface, "oom" parameter is added. if oom==true,
* oom-killer can be invoked.
static int __mem_cgroup_try_charge(struct mm_struct *mm,
unsigned int nr_pages,
unsigned int batch = max(CHARGE_BATCH, nr_pages);
int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
/*
* Unlike gloval-vm's OOM-kill, we're not in memory shortage
* in system level. So, allow to go ahead dying process in addition to
* MEMDIE process.
*/
if (unlikely(test_thread_flag(TIF_MEMDIE)
|| fatal_signal_pending(current)))
goto bypass;
* We always charge the cgroup the mm_struct belongs to.
* The mm_struct's mem_cgroup changes on task migration if the
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
if (*ptr) { /* css should be a valid one */
memcg = *ptr;
VM_BUG_ON(css_is_removed(&memcg->css));
if (mem_cgroup_is_root(memcg))
if (nr_pages == 1 && consume_stock(memcg))
rcu_read_lock();
p = rcu_dereference(mm->owner);
/*
* Because we don't have task_lock(), "p" can exit.
* In that case, "memcg" can point to root or p can be NULL with
* race with swapoff. Then, we have small risk of mis-accouning.
* But such kind of mis-account by race always happens because
* we don't have cgroup_mutex(). It's overkill and we allo that
* small race, here.
* (*) swapoff at el will charge against mm-struct not against
* task-struct. So, mm->owner can be NULL.
memcg = mem_cgroup_from_task(p);
if (!memcg || mem_cgroup_is_root(memcg)) {
if (nr_pages == 1 && consume_stock(memcg)) {
/*
* It seems dagerous to access memcg without css_get().
* But considering how consume_stok works, it's not
* necessary. If consume_stock success, some charges
* from this memcg are cached on this cpu. So, we
* don't need to call css_get()/css_tryget() before
* calling consume_stock().
*/
rcu_read_unlock();
goto done;
}
/* after here, we may be blocked. we need to get refcnt */
rcu_read_unlock();
goto again;
}
rcu_read_unlock();
}
/* If killed, bypass charge */
oom_check = false;
if (oom && !nr_oom_retries) {
oom_check = true;
nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, oom_check);
switch (ret) {
case CHARGE_OK:
break;
case CHARGE_RETRY: /* not in OOM situation but retry */
batch = nr_pages;
css_put(&memcg->css);
memcg = NULL;
case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
goto nomem;
case CHARGE_NOMEM: /* OOM routine works */
/* If oom, we never return -ENOMEM */
nr_oom_retries--;
break;
case CHARGE_OOM_DIE: /* Killed by OOM Killer */
} while (ret != CHARGE_OK);
if (batch > nr_pages)
refill_stock(memcg, batch - nr_pages);
css_put(&memcg->css);
return 0;
nomem:
return -ENOMEM;
/*
* Somemtimes we have to undo a charge we got by try_charge().
* This function is for that and do uncharge, put css's refcnt.
* gotten by try_charge().
*/
static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
unsigned int nr_pages)
if (!mem_cgroup_is_root(memcg)) {
unsigned long bytes = nr_pages * PAGE_SIZE;
res_counter_uncharge(&memcg->res, bytes);
res_counter_uncharge(&memcg->memsw, bytes);
/*
* A helper function to get mem_cgroup from ID. must be called under
* rcu_read_lock(). The caller must check css_is_removed() or some if
* it's concern. (dropping refcnt from swap can be called against removed
* memcg.)
*/
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
{
struct cgroup_subsys_state *css;
/* ID 0 is unused ID */
if (!id)
return NULL;
css = css_lookup(&mem_cgroup_subsys, id);
if (!css)
return NULL;
return container_of(css, struct mem_cgroup, css);
}
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
unsigned short id;
VM_BUG_ON(!PageLocked(page));
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
memcg = pc->mem_cgroup;
if (memcg && !css_tryget(&memcg->css))
memcg = NULL;
} else if (PageSwapCache(page)) {
ent.val = page_private(page);
id = lookup_swap_cgroup(ent);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
if (memcg && !css_tryget(&memcg->css))
memcg = NULL;
rcu_read_unlock();
unlock_page_cgroup(pc);
static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
unsigned int nr_pages,
struct page_cgroup *pc,
enum charge_type ctype)
lock_page_cgroup(pc);
if (unlikely(PageCgroupUsed(pc))) {
unlock_page_cgroup(pc);
__mem_cgroup_cancel_charge(memcg, nr_pages);
return;
}
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
* accessed by any other context at this point.
*/
/*
* We access a page_cgroup asynchronously without lock_page_cgroup().
* Especially when a page_cgroup is taken from a page, pc->mem_cgroup
* is accessed after testing USED bit. To make pc->mem_cgroup visible
* before USED bit, we need memory barrier here.
* See mem_cgroup_add_lru_list(), etc.
*/
switch (ctype) {
case MEM_CGROUP_CHARGE_TYPE_CACHE:
case MEM_CGROUP_CHARGE_TYPE_SHMEM:
SetPageCgroupCache(pc);
SetPageCgroupUsed(pc);
break;
case MEM_CGROUP_CHARGE_TYPE_MAPPED:
ClearPageCgroupCache(pc);
SetPageCgroupUsed(pc);
break;
default:
break;
}
mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
/*
* "charge_statistics" updated event counter. Then, check it.
* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
* if they exceeds softlimit.
*/
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
(1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
/*
* Because tail pages are not marked as "used", set it. We're under
* zone->lru_lock, 'splitting on pmd' and compund_lock.
*/
void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
{
struct page_cgroup *head_pc = lookup_page_cgroup(head);
struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
unsigned long flags;
if (mem_cgroup_disabled())
return;
* We have no races with charge/uncharge but will have races with
* page state accounting.
*/
move_lock_page_cgroup(head_pc, &flags);
tail_pc->mem_cgroup = head_pc->mem_cgroup;
smp_wmb(); /* see __commit_charge() */
if (PageCgroupAcctLRU(head_pc)) {
enum lru_list lru;
struct mem_cgroup_per_zone *mz;
/*
* LRU flags cannot be copied because we need to add tail
*.page to LRU by generic call and our hook will be called.
* We hold lru_lock, then, reduce counter directly.
*/
lru = page_lru(head);
mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
MEM_CGROUP_ZSTAT(mz, lru) -= 1;
}
tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
move_unlock_page_cgroup(head_pc, &flags);
}
#endif
* mem_cgroup_move_account - move account of the page
* @nr_pages: number of regular pages (>1 for huge pages)
* @pc: page_cgroup of the page.
* @from: mem_cgroup which the page is moved from.
* @to: mem_cgroup which the page is moved to. @from != @to.
* @uncharge: whether we should call uncharge and css_put against @from.
*
* The caller must confirm following.
* - page is not on LRU (isolate_page() is useful.)
* - compound_lock is held when nr_pages > 1
* This function doesn't do "charge" nor css_get to new cgroup. It should be
* done by a caller(__mem_cgroup_try_charge would be useful). If @uncharge is
* true, this function does "uncharge" from old cgroup, but it doesn't if
* @uncharge is false, so a caller should do "uncharge".
static int mem_cgroup_move_account(struct page *page,
unsigned int nr_pages,
struct page_cgroup *pc,
struct mem_cgroup *from,
struct mem_cgroup *to,
bool uncharge)
unsigned long flags;
int ret;
VM_BUG_ON(from == to);
VM_BUG_ON(PageLRU(page));
/*
* The page is isolated from LRU. So, collapse function
* will not handle this page. But page splitting can happen.
* Do this check under compound_page_lock(). The caller should
* hold it.
*/
ret = -EBUSY;
if (nr_pages > 1 && !PageTransHuge(page))
goto out;
lock_page_cgroup(pc);
ret = -EINVAL;
if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
goto unlock;
move_lock_page_cgroup(pc, &flags);
if (PageCgroupFileMapped(pc)) {
/* Update mapped_file data for mem_cgroup */
preempt_disable();
__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
__this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
preempt_enable();
mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
if (uncharge)
/* This is not "cancel", but cancel_charge does all we need. */
__mem_cgroup_cancel_charge(from, nr_pages);
/* caller should have done css_get */
mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages);
/*
* We charges against "to" which may not have any tasks. Then, "to"
* can be under rmdir(). But in current implementation, caller of
* this function is just force_empty() and move charge, so it's
* guaranteed that "to" is never removed. So, we don't check rmdir
move_unlock_page_cgroup(pc, &flags);
ret = 0;
unlock:
/*
* check events
*/
memcg_check_events(to, page);
memcg_check_events(from, page);
return ret;
}
/*
* move charges to its parent.
*/
static int mem_cgroup_move_parent(struct page *page,
struct page_cgroup *pc,
struct mem_cgroup *child,
gfp_t gfp_mask)
{
struct cgroup *cg = child->css.cgroup;
struct cgroup *pcg = cg->parent;
struct mem_cgroup *parent;
unsigned int nr_pages;
unsigned long uninitialized_var(flags);
int ret;
/* Is ROOT ? */
if (!pcg)
return -EINVAL;
ret = -EBUSY;
if (!get_page_unless_zero(page))
goto out;
if (isolate_lru_page(page))
goto put;
nr_pages = hpage_nr_pages(page);
parent = mem_cgroup_from_cont(pcg);
ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false);
if (ret || !parent)
if (nr_pages > 1)
flags = compound_lock_irqsave(page);
ret = mem_cgroup_move_account(page, nr_pages, pc, child, parent, true);
__mem_cgroup_cancel_charge(parent, nr_pages);
if (nr_pages > 1)
compound_unlock_irqrestore(page, flags);
put_back:
return ret;
}
/*
* Charge the memory controller for page usage.
* Return
* 0 if the charge was successful
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, enum charge_type ctype)
unsigned int nr_pages = 1;
struct page_cgroup *pc;
nr_pages <<= compound_order(page);
/*
* Never OOM-kill a process for a huge page. The
* fault handler will fall back to regular pages.
*/
oom = false;
pc = lookup_page_cgroup(page);
BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */
ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
if (ret || !memcg)
return ret;
__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
int mem_cgroup_newpage_charge(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
/*
* If already mapped, we don't have to account.
* If page cache, page->mapping has address_space.
* But page->mapping may have out-of-use anon_vma pointer,
* detecit it by PageAnon() check. newly-mapped-anon's page->mapping
* is NULL.
*/
if (page_mapped(page) || (page->mapping && !PageAnon(page)))
return 0;
if (unlikely(!mm))
mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
static void
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
enum charge_type ctype);
__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
enum charge_type ctype)
{
struct page_cgroup *pc = lookup_page_cgroup(page);
/*
* In some case, SwapCache, FUSE(splice_buf->radixtree), the page
* is already on LRU. It means the page may on some other page_cgroup's
* LRU. Take care of it.
*/
mem_cgroup_lru_del_before_commit(page);
__mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
mem_cgroup_lru_add_after_commit(page);
return;
}
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
if (PageCompound(page))
return 0;
mm = &init_mm;
if (page_is_file_cache(page)) {
ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true);
if (ret || !memcg)
/*
* FUSE reuses pages without going through the final
* put that would remove them from the LRU list, make
* sure that they get relinked properly.
*/
__mem_cgroup_commit_charge_lrucare(page, memcg,
MEM_CGROUP_CHARGE_TYPE_CACHE);
return ret;
}
/* shmem */
if (PageSwapCache(page)) {
ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
__mem_cgroup_commit_charge_swapin(page, memcg,
MEM_CGROUP_CHARGE_TYPE_SHMEM);
} else
ret = mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_SHMEM);
/*
* While swap-in, try_charge -> commit or cancel, the page is locked.
* And when try_charge() successfully returns, one refcnt to memcg without
* struct page_cgroup is acquired. This refcnt will be consumed by
* "commit()" or removed by "cancel()"
*/
int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
struct page *page,
gfp_t mask, struct mem_cgroup **ptr)
{
*ptr = NULL;
return 0;
if (!do_swap_account)
goto charge_cur_mm;
/*
* A racing thread's fault, or swapoff, may have already updated
* the pte, and even removed page from swap cache: in those cases
* do_swap_page()'s pte_same() test will fail; but there's also a
* KSM case which does need to charge the page.
memcg = try_get_mem_cgroup_from_page(page);
if (!memcg)
ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true);
charge_cur_mm:
if (unlikely(!mm))
mm = &init_mm;
return __mem_cgroup_try_charge(mm, mask, 1, ptr, true);
static void
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
enum charge_type ctype)
return;
if (!ptr)
return;
cgroup_exclude_rmdir(&ptr->css);
__mem_cgroup_commit_charge_lrucare(page, ptr, ctype);
/*
* Now swap is on-memory. This means this page may be
* counted both as mem and swap....double count.
* Fix it by uncharging from memsw. Basically, this SwapCache is stable
* under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
* may call delete_from_swap_cache() before reach here.
if (do_swap_account && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
unsigned short id;
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
/*
* This recorded memcg can be obsolete one. So, avoid
* calling css_tryget
*/
if (!mem_cgroup_is_root(memcg))

KAMEZAWA Hiroyuki
committed
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_swap_statistics(memcg, false);
rcu_read_unlock();
/*
* At swapin, we may charge account against cgroup which has no tasks.
* So, rmdir()->pre_destroy() can be called while we do this charge.
* In that case, we need to call pre_destroy() again. check it here.
*/
cgroup_release_and_wakeup_rmdir(&ptr->css);
void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
{
__mem_cgroup_commit_charge_swapin(page, ptr,
MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
__mem_cgroup_cancel_charge(memcg, 1);
static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
unsigned int nr_pages,
const enum charge_type ctype)
{
struct memcg_batch_info *batch = NULL;
bool uncharge_memsw = true;
/* If swapout, usage of swap doesn't decrease */
if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
uncharge_memsw = false;
batch = ¤t->memcg_batch;
/*
* In usual, we do css_get() when we remember memcg pointer.
* But in this case, we keep res->usage until end of a series of
* uncharges. Then, it's ok to ignore memcg's refcnt.
*/
if (!batch->memcg)
/*
* do_batch > 0 when unmapping pages or inode invalidate/truncate.
* In those cases, all pages freed continuously can be expected to be in
* the same cgroup and we have chance to coalesce uncharges.
* But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
* because we want to do uncharge as soon as possible.
*/
if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
goto direct_uncharge;
if (nr_pages > 1)