Newer
Older
return 0;
nomem:
return -ENOMEM;
bypass:
*memcg = NULL;
return 0;
/*
* Somemtimes we have to undo a charge we got by try_charge().
* This function is for that and do uncharge, put css's refcnt.
* gotten by try_charge().
*/
static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem,
unsigned long count)
{
if (!mem_cgroup_is_root(mem)) {
res_counter_uncharge(&mem->res, PAGE_SIZE * count);
res_counter_uncharge(&mem->memsw, PAGE_SIZE * count);
static void mem_cgroup_cancel_charge(struct mem_cgroup *mem,
int page_size)
__mem_cgroup_cancel_charge(mem, page_size >> PAGE_SHIFT);
/*
* A helper function to get mem_cgroup from ID. must be called under
* rcu_read_lock(). The caller must check css_is_removed() or some if
* it's concern. (dropping refcnt from swap can be called against removed
* memcg.)
*/
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
{
struct cgroup_subsys_state *css;
/* ID 0 is unused ID */
if (!id)
return NULL;
css = css_lookup(&mem_cgroup_subsys, id);
if (!css)
return NULL;
return container_of(css, struct mem_cgroup, css);
}
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
struct mem_cgroup *mem = NULL;
unsigned short id;
VM_BUG_ON(!PageLocked(page));
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
if (mem && !css_tryget(&mem->css))
mem = NULL;
} else if (PageSwapCache(page)) {
ent.val = page_private(page);
id = lookup_swap_cgroup(ent);
rcu_read_lock();
mem = mem_cgroup_lookup(id);
if (mem && !css_tryget(&mem->css))
mem = NULL;
rcu_read_unlock();
unlock_page_cgroup(pc);
static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
struct page_cgroup *pc,
enum charge_type ctype,
int page_size)
int nr_pages = page_size >> PAGE_SHIFT;
/* try_charge() can return NULL to *memcg, taking care of it. */
if (!mem)
return;
lock_page_cgroup(pc);
if (unlikely(PageCgroupUsed(pc))) {
unlock_page_cgroup(pc);
mem_cgroup_cancel_charge(mem, page_size);
return;
}
/*
* we don't need page_cgroup_lock about tail pages, becase they are not
* accessed by any other context at this point.
*/
/*
* We access a page_cgroup asynchronously without lock_page_cgroup().
* Especially when a page_cgroup is taken from a page, pc->mem_cgroup
* is accessed after testing USED bit. To make pc->mem_cgroup visible
* before USED bit, we need memory barrier here.
* See mem_cgroup_add_lru_list(), etc.
*/
switch (ctype) {
case MEM_CGROUP_CHARGE_TYPE_CACHE:
case MEM_CGROUP_CHARGE_TYPE_SHMEM:
SetPageCgroupCache(pc);
SetPageCgroupUsed(pc);
break;
case MEM_CGROUP_CHARGE_TYPE_MAPPED:
ClearPageCgroupCache(pc);
SetPageCgroupUsed(pc);
break;
default:
break;
}
mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
/*
* "charge_statistics" updated event counter. Then, check it.
* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
* if they exceeds softlimit.
*/
memcg_check_events(mem, pc->page);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
(1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
/*
* Because tail pages are not marked as "used", set it. We're under
* zone->lru_lock, 'splitting on pmd' and compund_lock.
*/
void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
{
struct page_cgroup *head_pc = lookup_page_cgroup(head);
struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
unsigned long flags;
if (mem_cgroup_disabled())
return;
* We have no races with charge/uncharge but will have races with
* page state accounting.
*/
move_lock_page_cgroup(head_pc, &flags);
tail_pc->mem_cgroup = head_pc->mem_cgroup;
smp_wmb(); /* see __commit_charge() */
if (PageCgroupAcctLRU(head_pc)) {
enum lru_list lru;
struct mem_cgroup_per_zone *mz;
/*
* LRU flags cannot be copied because we need to add tail
*.page to LRU by generic call and our hook will be called.
* We hold lru_lock, then, reduce counter directly.
*/
lru = page_lru(head);
mz = page_cgroup_zoneinfo(head_pc);
MEM_CGROUP_ZSTAT(mz, lru) -= 1;
}
tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
move_unlock_page_cgroup(head_pc, &flags);
}
#endif
* __mem_cgroup_move_account - move account of the page
* @pc: page_cgroup of the page.
* @from: mem_cgroup which the page is moved from.
* @to: mem_cgroup which the page is moved to. @from != @to.
* @uncharge: whether we should call uncharge and css_put against @from.
*
* The caller must confirm following.
* - page is not on LRU (isolate_page() is useful.)
* - the pc is locked, used, and ->mem_cgroup points to @from.
* This function doesn't do "charge" nor css_get to new cgroup. It should be
* done by a caller(__mem_cgroup_try_charge would be usefull). If @uncharge is
* true, this function does "uncharge" from old cgroup, but it doesn't if
* @uncharge is false, so a caller should do "uncharge".
static void __mem_cgroup_move_account(struct page_cgroup *pc,
struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge,
int charge_size)
int nr_pages = charge_size >> PAGE_SHIFT;
VM_BUG_ON(from == to);
VM_BUG_ON(!page_is_cgroup_locked(pc));
VM_BUG_ON(!PageCgroupUsed(pc));
VM_BUG_ON(pc->mem_cgroup != from);
if (PageCgroupFileMapped(pc)) {
/* Update mapped_file data for mem_cgroup */
preempt_disable();
__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
__this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
preempt_enable();
mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
if (uncharge)
/* This is not "cancel", but cancel_charge does all we need. */
mem_cgroup_cancel_charge(from, charge_size);
/* caller should have done css_get */
mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages);
/*
* We charges against "to" which may not have any tasks. Then, "to"
* can be under rmdir(). But in current implementation, caller of
* this function is just force_empty() and move charge, so it's
* garanteed that "to" is never removed. So, we don't check rmdir
* status here.
}
/*
* check whether the @pc is valid for moving account and call
* __mem_cgroup_move_account()
*/
static int mem_cgroup_move_account(struct page_cgroup *pc,
struct mem_cgroup *from, struct mem_cgroup *to,
bool uncharge, int charge_size)
unsigned long flags;
if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page))
return -EBUSY;
lock_page_cgroup(pc);
if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
move_lock_page_cgroup(pc, &flags);
__mem_cgroup_move_account(pc, from, to, uncharge, charge_size);
move_unlock_page_cgroup(pc, &flags);
ret = 0;
}
unlock_page_cgroup(pc);
/*
* check events
*/
memcg_check_events(to, pc->page);
memcg_check_events(from, pc->page);
return ret;
}
/*
* move charges to its parent.
*/
static int mem_cgroup_move_parent(struct page_cgroup *pc,
struct mem_cgroup *child,
gfp_t gfp_mask)
{
struct cgroup *cg = child->css.cgroup;
struct cgroup *pcg = cg->parent;
struct mem_cgroup *parent;
int charge = PAGE_SIZE;
unsigned long flags;
int ret;
/* Is ROOT ? */
if (!pcg)
return -EINVAL;
ret = -EBUSY;
if (!get_page_unless_zero(page))
goto out;
if (isolate_lru_page(page))
goto put;
/* The page is isolated from LRU and we have no race with splitting */
charge = PAGE_SIZE << compound_order(page);
parent = mem_cgroup_from_cont(pcg);
ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, charge);
if (ret || !parent)
if (charge > PAGE_SIZE)
flags = compound_lock_irqsave(page);
ret = mem_cgroup_move_account(pc, child, parent, true, charge);
mem_cgroup_cancel_charge(parent, charge);
if (charge > PAGE_SIZE)
compound_unlock_irqrestore(page, flags);
put_back:
return ret;
}
/*
* Charge the memory controller for page usage.
* Return
* 0 if the charge was successful
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask, enum charge_type ctype)
struct mem_cgroup *mem = NULL;
struct page_cgroup *pc;
int ret;
pc = lookup_page_cgroup(page);
/* can happen at boot */
if (unlikely(!pc))
return 0;
prefetchw(pc);
ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size);
return ret;
__mem_cgroup_commit_charge(mem, pc, ctype, page_size);
int mem_cgroup_newpage_charge(struct page *page,
struct mm_struct *mm, gfp_t gfp_mask)
{
/*
* If already mapped, we don't have to account.
* If page cache, page->mapping has address_space.
* But page->mapping may have out-of-use anon_vma pointer,
* detecit it by PageAnon() check. newly-mapped-anon's page->mapping
* is NULL.
*/
if (page_mapped(page) || (page->mapping && !PageAnon(page)))
return 0;
if (unlikely(!mm))
mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
static void
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
enum charge_type ctype);
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
if (PageCompound(page))
return 0;
/*
* Corner case handling. This is called from add_to_page_cache()
* in usual. But some FS (shmem) precharges this page before calling it
* and call add_to_page_cache() with GFP_NOWAIT.
*
* For GFP_NOWAIT case, the page may be pre-charged before calling
* add_to_page_cache(). (See shmem.c) check it here and avoid to call
* charge twice. (It works but has to pay a bit larger cost.)
* And when the page is SwapCache, it should take swap information
* into account. This is under lock_page() now.
*/
if (!(gfp_mask & __GFP_WAIT)) {
struct page_cgroup *pc;
pc = lookup_page_cgroup(page);
if (!pc)
return 0;
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
unlock_page_cgroup(pc);
mm = &init_mm;
if (page_is_file_cache(page))
return mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_CACHE);
/* shmem */
if (PageSwapCache(page)) {
struct mem_cgroup *mem = NULL;
ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
if (!ret)
__mem_cgroup_commit_charge_swapin(page, mem,
MEM_CGROUP_CHARGE_TYPE_SHMEM);
} else
ret = mem_cgroup_charge_common(page, mm, gfp_mask,
MEM_CGROUP_CHARGE_TYPE_SHMEM);
/*
* While swap-in, try_charge -> commit or cancel, the page is locked.
* And when try_charge() successfully returns, one refcnt to memcg without
* struct page_cgroup is acquired. This refcnt will be consumed by
* "commit()" or removed by "cancel()"
*/
int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
struct page *page,
gfp_t mask, struct mem_cgroup **ptr)
{
struct mem_cgroup *mem;
return 0;
if (!do_swap_account)
goto charge_cur_mm;
/*
* A racing thread's fault, or swapoff, may have already updated
* the pte, and even removed page from swap cache: in those cases
* do_swap_page()'s pte_same() test will fail; but there's also a
* KSM case which does need to charge the page.
mem = try_get_mem_cgroup_from_page(page);
ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, PAGE_SIZE);
css_put(&mem->css);
return ret;
charge_cur_mm:
if (unlikely(!mm))
mm = &init_mm;
return __mem_cgroup_try_charge(mm, mask, ptr, true, PAGE_SIZE);
static void
__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
enum charge_type ctype)
{
struct page_cgroup *pc;
return;
if (!ptr)
return;
cgroup_exclude_rmdir(&ptr->css);
pc = lookup_page_cgroup(page);
mem_cgroup_lru_del_before_commit_swapcache(page);
__mem_cgroup_commit_charge(ptr, pc, ctype, PAGE_SIZE);
mem_cgroup_lru_add_after_commit_swapcache(page);
/*
* Now swap is on-memory. This means this page may be
* counted both as mem and swap....double count.
* Fix it by uncharging from memsw. Basically, this SwapCache is stable
* under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
* may call delete_from_swap_cache() before reach here.
if (do_swap_account && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)};
unsigned short id;
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
/*
* This recorded memcg can be obsolete one. So, avoid
* calling css_tryget
*/
if (!mem_cgroup_is_root(memcg))

KAMEZAWA Hiroyuki
committed
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_swap_statistics(memcg, false);
rcu_read_unlock();
/*
* At swapin, we may charge account against cgroup which has no tasks.
* So, rmdir()->pre_destroy() can be called while we do this charge.
* In that case, we need to call pre_destroy() again. check it here.
*/
cgroup_release_and_wakeup_rmdir(&ptr->css);
void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
{
__mem_cgroup_commit_charge_swapin(page, ptr,
MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
{
return;
if (!mem)
return;
__do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype,
int page_size)
{
struct memcg_batch_info *batch = NULL;
bool uncharge_memsw = true;
/* If swapout, usage of swap doesn't decrease */
if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
uncharge_memsw = false;
batch = ¤t->memcg_batch;
/*
* In usual, we do css_get() when we remember memcg pointer.
* But in this case, we keep res->usage until end of a series of
* uncharges. Then, it's ok to ignore memcg's refcnt.
*/
if (!batch->memcg)
batch->memcg = mem;
/*
* do_batch > 0 when unmapping pages or inode invalidate/truncate.
* In those cases, all pages freed continously can be expected to be in
* the same cgroup and we have chance to coalesce uncharges.
* But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
* because we want to do uncharge as soon as possible.
*/
if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
goto direct_uncharge;
if (page_size != PAGE_SIZE)
goto direct_uncharge;
/*
* In typical case, batch->memcg == mem. This means we can
* merge a series of uncharges to an uncharge of res_counter.
* If not, we uncharge res_counter ony by one.
*/
if (batch->memcg != mem)
goto direct_uncharge;
/* remember freed charge and uncharge it later */
batch->bytes += PAGE_SIZE;
if (uncharge_memsw)
batch->memsw_bytes += PAGE_SIZE;
return;
direct_uncharge:
if (unlikely(batch->memcg != mem))
memcg_oom_recover(mem);
* uncharge if !page_mapped(page)
__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
count = page_size >> PAGE_SHIFT;
* Check if our page_cgroup is valid
pc = lookup_page_cgroup(page);
if (unlikely(!pc || !PageCgroupUsed(pc)))
if (!PageCgroupUsed(pc))
goto unlock_out;
switch (ctype) {
case MEM_CGROUP_CHARGE_TYPE_MAPPED:
/* See mem_cgroup_prepare_migration() */
if (page_mapped(page) || PageCgroupMigration(pc))
goto unlock_out;
break;
case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
if (!PageAnon(page)) { /* Shared memory */
if (page->mapping && !page_is_file_cache(page))
goto unlock_out;
} else if (page_mapped(page)) /* Anon */
goto unlock_out;
break;
default:
break;
mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count);
/*
* pc->mem_cgroup is not cleared here. It will be accessed when it's
* freed from LRU. This is safe because uncharged page is expected not
* to be reused (freed soon). Exception is SwapCache, it's handled by
* special functions.
*/
/*
* even after unlock, we have mem->res.usage here and this memcg
* will never be freed.
*/
memcg_check_events(mem, page);
if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
mem_cgroup_swap_statistics(mem, true);
mem_cgroup_get(mem);
}
if (!mem_cgroup_is_root(mem))

KAMEZAWA Hiroyuki
committed
unlock_out:
unlock_page_cgroup(pc);
void mem_cgroup_uncharge_page(struct page *page)
{
/* early check. */
if (page_mapped(page))
return;
if (page->mapping && !PageAnon(page))
return;
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
}
void mem_cgroup_uncharge_cache_page(struct page *page)
{
VM_BUG_ON(page_mapped(page));
VM_BUG_ON(page->mapping);
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
}
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
/*
* Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
* In that cases, pages are freed continuously and we can expect pages
* are in the same memcg. All these calls itself limits the number of
* pages freed at once, then uncharge_start/end() is called properly.
* This may be called prural(2) times in a context,
*/
void mem_cgroup_uncharge_start(void)
{
current->memcg_batch.do_batch++;
/* We can do nest. */
if (current->memcg_batch.do_batch == 1) {
current->memcg_batch.memcg = NULL;
current->memcg_batch.bytes = 0;
current->memcg_batch.memsw_bytes = 0;
}
}
void mem_cgroup_uncharge_end(void)
{
struct memcg_batch_info *batch = ¤t->memcg_batch;
if (!batch->do_batch)
return;
batch->do_batch--;
if (batch->do_batch) /* If stacked, do nothing. */
return;
if (!batch->memcg)
return;
/*
* This "batch->memcg" is valid without any css_get/put etc...
* bacause we hide charges behind us.
*/
if (batch->bytes)
res_counter_uncharge(&batch->memcg->res, batch->bytes);
if (batch->memsw_bytes)
res_counter_uncharge(&batch->memcg->memsw, batch->memsw_bytes);
memcg_oom_recover(batch->memcg);
/* forget this pointer (for sanity check) */
batch->memcg = NULL;
}
#ifdef CONFIG_SWAP
* called after __delete_from_swap_cache() and drop "page" account.
* memcg information is recorded to swap_cgroup of "ent"
*/
void
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
if (!swapout) /* this was a swap cache but the swap is unused ! */
ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
memcg = __mem_cgroup_uncharge_common(page, ctype);
/*
* record memcg information, if swapout && memcg != NULL,
* mem_cgroup_get() was called in uncharge().
*/
if (do_swap_account && swapout && memcg)
swap_cgroup_record(ent, css_id(&memcg->css));
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
/*
* called from swap_entry_free(). remove record in swap_cgroup and
* uncharge "memsw" account.
*/
void mem_cgroup_uncharge_swap(swp_entry_t ent)
unsigned short id;
if (!do_swap_account)
return;
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
/*
* We uncharge this because swap is freed.
* This memcg can be obsolete one. We avoid calling css_tryget
*/
if (!mem_cgroup_is_root(memcg))

KAMEZAWA Hiroyuki
committed
res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_swap_statistics(memcg, false);
rcu_read_unlock();
/**
* mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
* @entry: swap entry to be moved
* @from: mem_cgroup which the entry is moved from
* @to: mem_cgroup which the entry is moved to
* @need_fixup: whether we should fixup res_counters and refcounts.
*
* It succeeds only when the swap_cgroup's record for this entry is the same
* as the mem_cgroup's id of @from.
*
* Returns 0 on success, -EINVAL on failure.
*
* The caller must have charged to @to, IOW, called res_counter_charge() about
* both res and memsw, and called css_get().
*/
static int mem_cgroup_move_swap_account(swp_entry_t entry,
struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup)
{
unsigned short old_id, new_id;
old_id = css_id(&from->css);
new_id = css_id(&to->css);
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
mem_cgroup_swap_statistics(from, false);
mem_cgroup_swap_statistics(to, true);
* This function is only called from task migration context now.
* It postpones res_counter and refcount handling till the end
* of task migration(mem_cgroup_clear_mc()) for performance
* improvement. But we cannot postpone mem_cgroup_get(to)
* because if the process that has been moved to @to does
* swap-in, the refcount of @to might be decreased to 0.
if (need_fixup) {
if (!mem_cgroup_is_root(from))
res_counter_uncharge(&from->memsw, PAGE_SIZE);
mem_cgroup_put(from);
/*
* we charged both to->res and to->memsw, so we should
* uncharge to->res.
*/
if (!mem_cgroup_is_root(to))
res_counter_uncharge(&to->res, PAGE_SIZE);
}
return 0;
}
return -EINVAL;
}
#else
static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup)
{
return -EINVAL;
}

KAMEZAWA Hiroyuki
committed
/*
* Before starting migration, account PAGE_SIZE to mem_cgroup that the old
* page belongs to.

KAMEZAWA Hiroyuki
committed
*/
int mem_cgroup_prepare_migration(struct page *page,
struct page *newpage, struct mem_cgroup **ptr)

KAMEZAWA Hiroyuki
committed
{
struct page_cgroup *pc;
enum charge_type ctype;
return 0;
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc);
if (PageCgroupUsed(pc)) {
mem = pc->mem_cgroup;
css_get(&mem->css);
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
/*
* At migrating an anonymous page, its mapcount goes down
* to 0 and uncharge() will be called. But, even if it's fully
* unmapped, migration may fail and this page has to be
* charged again. We set MIGRATION flag here and delay uncharge
* until end_migration() is called
*
* Corner Case Thinking
* A)
* When the old page was mapped as Anon and it's unmap-and-freed
* while migration was ongoing.
* If unmap finds the old page, uncharge() of it will be delayed
* until end_migration(). If unmap finds a new page, it's
* uncharged when it make mapcount to be 1->0. If unmap code
* finds swap_migration_entry, the new page will not be mapped
* and end_migration() will find it(mapcount==0).
*
* B)
* When the old page was mapped but migraion fails, the kernel
* remaps it. A charge for it is kept by MIGRATION flag even
* if mapcount goes down to 0. We can do remap successfully
* without charging it again.
*
* C)
* The "old" page is under lock_page() until the end of
* migration, so, the old page itself will not be swapped-out.
* If the new page is swapped out before end_migraton, our
* hook to usual swap-out path will catch the event.
*/
if (PageAnon(page))
SetPageCgroupMigration(pc);
/*
* If the page is not charged at this point,
* we return here.
*/
if (!mem)
return 0;
ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false, PAGE_SIZE);
css_put(&mem->css);/* drop extra refcnt */
if (ret || *ptr == NULL) {
if (PageAnon(page)) {
lock_page_cgroup(pc);
ClearPageCgroupMigration(pc);
unlock_page_cgroup(pc);
/*
* The old page may be fully unmapped while we kept it.
*/
mem_cgroup_uncharge_page(page);
}
return -ENOMEM;
/*
* We charge new page before it's used/mapped. So, even if unlock_page()
* is called before end_migration, we can catch all events on this new
* page. In the case new page is migrated but not remapped, new page's
* mapcount will be finally 0 and we call uncharge in end_migration().
*/
pc = lookup_page_cgroup(newpage);
if (PageAnon(page))
ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
else if (page_is_file_cache(page))
ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
else
ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
__mem_cgroup_commit_charge(mem, pc, ctype, PAGE_SIZE);

KAMEZAWA Hiroyuki
committed
}
/* remove redundant charge if migration failed*/
void mem_cgroup_end_migration(struct mem_cgroup *mem,
struct page *oldpage, struct page *newpage, bool migration_ok)

KAMEZAWA Hiroyuki
committed
{
struct page *used, *unused;
struct page_cgroup *pc;
if (!mem)
return;
/* blocks rmdir() */
cgroup_exclude_rmdir(&mem->css);
if (!migration_ok) {
used = oldpage;
unused = newpage;
used = newpage;
* We disallowed uncharge of pages under migration because mapcount
* of the page goes down to zero, temporarly.
* Clear the flag and check the page should be charged.
pc = lookup_page_cgroup(oldpage);
lock_page_cgroup(pc);
ClearPageCgroupMigration(pc);
unlock_page_cgroup(pc);
__mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE);
* If a page is a file cache, radix-tree replacement is very atomic
* and we can skip this check. When it was an Anon page, its mapcount
* goes down to 0. But because we added MIGRATION flage, it's not
* uncharged yet. There are several case but page->mapcount check
* and USED bit check in mem_cgroup_uncharge_page() will do enough
* check. (see prepare_charge() also)
if (PageAnon(used))
mem_cgroup_uncharge_page(used);
* At migration, we may charge account against cgroup which has no
* tasks.
* So, rmdir()->pre_destroy() can be called while we do this charge.
* In that case, we need to call pre_destroy() again. check it here.
*/
cgroup_release_and_wakeup_rmdir(&mem->css);

KAMEZAWA Hiroyuki
committed
}
* A call to try to shrink memory usage on charge failure at shmem's swapin.
* Calling hierarchical_reclaim is not enough because we should update
* last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
* Moreover considering hierarchy, we should reclaim from the mem_over_limit,
* not from the memcg which this page would be charged to.
* try_charge_swapin does all of these works properly.
int mem_cgroup_shmem_charge_fallback(struct page *page,
struct mm_struct *mm,
gfp_t gfp_mask)
ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
if (!ret)
mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */