Newer
Older
list_del(&ev->list);
kfree(ev);
}
}
mutex_unlock(&memcg_oom_mutex);
}
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
struct cftype *cft, struct cgroup_map_cb *cb)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
cb->fill(cb, "oom_kill_disable", mem->oom_kill_disable);
if (atomic_read(&mem->oom_lock))
cb->fill(cb, "under_oom", 1);
else
cb->fill(cb, "under_oom", 0);
return 0;
}
static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
struct cftype *cft, u64 val)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
struct mem_cgroup *parent;
/* cannot set to root cgroup and only 0 and 1 are allowed */
if (!cgrp->parent || !((val == 0) || (val == 1)))
return -EINVAL;
parent = mem_cgroup_from_cont(cgrp->parent);
cgroup_lock();
/* oom-kill-disable is a flag for subhierarchy. */
if ((parent->use_hierarchy) ||
(mem->use_hierarchy && !list_empty(&cgrp->children))) {
cgroup_unlock();
return -EINVAL;
}
mem->oom_kill_disable = val;
if (!val)
memcg_oom_recover(mem);
cgroup_unlock();
return 0;
}
static struct cftype mem_cgroup_files[] = {
{
.private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
.read_u64 = mem_cgroup_read,
.register_event = mem_cgroup_usage_register_event,
.unregister_event = mem_cgroup_usage_unregister_event,
{
.name = "max_usage_in_bytes",
.private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
.read_u64 = mem_cgroup_read,
},
.private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),

Paul Menage
committed
.write_string = mem_cgroup_write,
.read_u64 = mem_cgroup_read,
{
.name = "soft_limit_in_bytes",
.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
.write_string = mem_cgroup_write,
.read_u64 = mem_cgroup_read,
},
.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
.read_u64 = mem_cgroup_read,
{
.name = "stat",
.read_map = mem_control_stat_show,
{
.name = "force_empty",
.trigger = mem_cgroup_force_empty_write,
},
{
.name = "use_hierarchy",
.write_u64 = mem_cgroup_hierarchy_write,
.read_u64 = mem_cgroup_hierarchy_read,
},
{
.name = "swappiness",
.read_u64 = mem_cgroup_swappiness_read,
.write_u64 = mem_cgroup_swappiness_write,
},
{
.name = "move_charge_at_immigrate",
.read_u64 = mem_cgroup_move_charge_read,
.write_u64 = mem_cgroup_move_charge_write,
},
.read_map = mem_cgroup_oom_control_read,
.write_u64 = mem_cgroup_oom_control_write,
.register_event = mem_cgroup_oom_register_event,
.unregister_event = mem_cgroup_oom_unregister_event,
.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
},
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
static struct cftype memsw_cgroup_files[] = {
{
.name = "memsw.usage_in_bytes",
.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
.read_u64 = mem_cgroup_read,
.register_event = mem_cgroup_usage_register_event,
.unregister_event = mem_cgroup_usage_unregister_event,
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
},
{
.name = "memsw.max_usage_in_bytes",
.private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
.trigger = mem_cgroup_reset,
.read_u64 = mem_cgroup_read,
},
{
.name = "memsw.limit_in_bytes",
.private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
.write_string = mem_cgroup_write,
.read_u64 = mem_cgroup_read,
},
{
.name = "memsw.failcnt",
.private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
.trigger = mem_cgroup_reset,
.read_u64 = mem_cgroup_read,
},
};
static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
{
if (!do_swap_account)
return 0;
return cgroup_add_files(cont, ss, memsw_cgroup_files,
ARRAY_SIZE(memsw_cgroup_files));
};
#else
static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
{
return 0;
}
#endif

KAMEZAWA Hiroyuki
committed
static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
{
struct mem_cgroup_per_node *pn;

KAMEZAWA Hiroyuki
committed
struct mem_cgroup_per_zone *mz;

KAMEZAWA Hiroyuki
committed
/*
* This routine is called against possible nodes.
* But it's BUG to call kmalloc() against offline node.
*
* TODO: this routine can waste much memory for nodes which will
* never be onlined. It's better to use memory hotplug callback
* function.
*/
if (!node_state(node, N_NORMAL_MEMORY))
tmp = -1;
pn = kmalloc_node(sizeof(*pn), GFP_KERNEL, tmp);

KAMEZAWA Hiroyuki
committed
if (!pn)
return 1;

KAMEZAWA Hiroyuki
committed

KAMEZAWA Hiroyuki
committed
mem->info.nodeinfo[node] = pn;
memset(pn, 0, sizeof(*pn));

KAMEZAWA Hiroyuki
committed
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
mz = &pn->zoneinfo[zone];
for_each_lru(l)
INIT_LIST_HEAD(&mz->lists[l]);
mz->on_tree = false;
mz->mem = mem;

KAMEZAWA Hiroyuki
committed
}

KAMEZAWA Hiroyuki
committed
return 0;
}

KAMEZAWA Hiroyuki
committed
static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
{
kfree(mem->info.nodeinfo[node]);
}
static struct mem_cgroup *mem_cgroup_alloc(void)
{
struct mem_cgroup *mem;
int size = sizeof(struct mem_cgroup);
/* Can be very big if MAX_NUMNODES is very big */
if (size < PAGE_SIZE)
mem = kmalloc(size, GFP_KERNEL);
if (!mem)
return NULL;
memset(mem, 0, size);
mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
if (!mem->stat)
goto out_free;
spin_lock_init(&mem->pcp_counter_lock);
out_free:
if (size < PAGE_SIZE)
kfree(mem);
else
vfree(mem);
return NULL;
/*
* At destroying mem_cgroup, references from swap_cgroup can remain.
* (scanning all at force_empty is too costly...)
*
* Instead of clearing all references at force_empty, we remember
* the number of reference from swap_cgroup and free mem_cgroup when
* it goes down to 0.
*
* Removal of cgroup itself succeeds regardless of refs from swap.
*/
static void __mem_cgroup_free(struct mem_cgroup *mem)
mem_cgroup_remove_from_trees(mem);
for_each_node_state(node, N_POSSIBLE)
free_mem_cgroup_per_zone_info(mem, node);
free_percpu(mem->stat);
if (sizeof(struct mem_cgroup) < PAGE_SIZE)
kfree(mem);
else
vfree(mem);
}
static void mem_cgroup_get(struct mem_cgroup *mem)
{
atomic_inc(&mem->refcnt);
}
static void __mem_cgroup_put(struct mem_cgroup *mem, int count)
if (atomic_sub_and_test(count, &mem->refcnt)) {
struct mem_cgroup *parent = parent_mem_cgroup(mem);
__mem_cgroup_free(mem);
if (parent)
mem_cgroup_put(parent);
}
static void mem_cgroup_put(struct mem_cgroup *mem)
{
__mem_cgroup_put(mem, 1);
}
/*
* Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
*/
static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem)
{
if (!mem->res.parent)
return NULL;
return mem_cgroup_from_res_counter(mem->res.parent, res);
}
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
static void __init enable_swap_cgroup(void)
{
if (!mem_cgroup_disabled() && really_do_swap_account)
do_swap_account = 1;
}
#else
static void __init enable_swap_cgroup(void)
{
}
#endif
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
static int mem_cgroup_soft_limit_tree_init(void)
{
struct mem_cgroup_tree_per_node *rtpn;
struct mem_cgroup_tree_per_zone *rtpz;
int tmp, node, zone;
for_each_node_state(node, N_POSSIBLE) {
tmp = node;
if (!node_state(node, N_NORMAL_MEMORY))
tmp = -1;
rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
if (!rtpn)
return 1;
soft_limit_tree.rb_tree_per_node[node] = rtpn;
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
rtpz = &rtpn->rb_tree_per_zone[zone];
rtpz->rb_root = RB_ROOT;
spin_lock_init(&rtpz->lock);
}
}
return 0;
}
mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
{
struct mem_cgroup *mem, *parent;

KAMEZAWA Hiroyuki
committed
int node;
mem = mem_cgroup_alloc();
if (!mem)

KAMEZAWA Hiroyuki
committed
for_each_node_state(node, N_POSSIBLE)
if (alloc_mem_cgroup_per_zone_info(mem, node))
goto free_out;
if (cont->parent == NULL) {
root_mem_cgroup = mem;
if (mem_cgroup_soft_limit_tree_init())
goto free_out;
for_each_possible_cpu(cpu) {
struct memcg_stock_pcp *stock =
&per_cpu(memcg_stock, cpu);
INIT_WORK(&stock->work, drain_local_stock);
}
hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
parent = mem_cgroup_from_cont(cont->parent);
mem->use_hierarchy = parent->use_hierarchy;
mem->oom_kill_disable = parent->oom_kill_disable;
if (parent && parent->use_hierarchy) {
res_counter_init(&mem->res, &parent->res);
res_counter_init(&mem->memsw, &parent->memsw);
/*
* We increment refcnt of the parent to ensure that we can
* safely access it on res_counter_charge/uncharge.
* This refcnt will be decremented when freeing this
* mem_cgroup(see mem_cgroup_put).
*/
mem_cgroup_get(parent);
} else {
res_counter_init(&mem->res, NULL);
res_counter_init(&mem->memsw, NULL);
}
spin_lock_init(&mem->reclaim_param_lock);
if (parent)
mem->swappiness = get_swappiness(parent);
atomic_set(&mem->refcnt, 1);
mem->move_charge_at_immigrate = 0;
mutex_init(&mem->thresholds_lock);

KAMEZAWA Hiroyuki
committed
free_out:
__mem_cgroup_free(mem);
root_mem_cgroup = NULL;
static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
return mem_cgroup_force_empty(mem, false);
static void mem_cgroup_destroy(struct cgroup_subsys *ss,
struct cgroup *cont)
{
struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
mem_cgroup_put(mem);
}
static int mem_cgroup_populate(struct cgroup_subsys *ss,
struct cgroup *cont)
{
int ret;
ret = cgroup_add_files(cont, ss, mem_cgroup_files,
ARRAY_SIZE(mem_cgroup_files));
if (!ret)
ret = register_memsw_files(cont, ss);
return ret;
/* Handlers for move charge at task migration. */
#define PRECHARGE_COUNT_AT_ONCE 256
static int mem_cgroup_do_precharge(unsigned long count)
int ret = 0;
int batch_count = PRECHARGE_COUNT_AT_ONCE;
struct mem_cgroup *mem = mc.to;
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
if (mem_cgroup_is_root(mem)) {
mc.precharge += count;
/* we don't need css_get for root */
return ret;
}
/* try to charge at once */
if (count > 1) {
struct res_counter *dummy;
/*
* "mem" cannot be under rmdir() because we've already checked
* by cgroup_lock_live_cgroup() that it is not removed and we
* are still under the same cgroup_mutex. So we can postpone
* css_get().
*/
if (res_counter_charge(&mem->res, PAGE_SIZE * count, &dummy))
goto one_by_one;
if (do_swap_account && res_counter_charge(&mem->memsw,
PAGE_SIZE * count, &dummy)) {
res_counter_uncharge(&mem->res, PAGE_SIZE * count);
goto one_by_one;
}
mc.precharge += count;
return ret;
}
one_by_one:
/* fall back to one by one charge */
while (count--) {
if (signal_pending(current)) {
ret = -EINTR;
break;
}
if (!batch_count--) {
batch_count = PRECHARGE_COUNT_AT_ONCE;
cond_resched();
}
ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
if (ret || !mem)
/* mem_cgroup_clear_mc() will do uncharge later */
return -ENOMEM;
mc.precharge++;
}
return ret;
}
/**
* is_target_pte_for_mc - check a pte whether it is valid for move charge
* @vma: the vma the pte to be checked belongs
* @addr: the address corresponding to the pte to be checked
* @ptent: the pte to be checked
* @target: the pointer the target page or swap ent will be stored(can be NULL)
*
* Returns
* 0(MC_TARGET_NONE): if the pte is not a target for move charge.
* 1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
* move charge. if @target is not NULL, the page is stored in target->page
* with extra refcnt got(Callers should handle it).
* 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
* target for charge migration. if @target is not NULL, the entry is stored
* in target->ent.
*
* Called with pte lock held.
*/
union mc_target {
struct page *page;
};
enum mc_target_type {
MC_TARGET_NONE, /* not used */
MC_TARGET_PAGE,
static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent)
struct page *page = vm_normal_page(vma, addr, ptent);
if (!page || !page_mapped(page))
return NULL;
if (PageAnon(page)) {
/* we don't move shared anon */
if (!move_anon() || page_mapcount(page) > 2)
return NULL;
} else if (!move_file())
/* we ignore mapcount for file pages */
return NULL;
if (!get_page_unless_zero(page))
return NULL;
return page;
}
static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent, swp_entry_t *entry)
{
int usage_count;
struct page *page = NULL;
swp_entry_t ent = pte_to_swp_entry(ptent);
if (!move_anon() || non_swap_entry(ent))
return NULL;
usage_count = mem_cgroup_count_swap_user(ent, &page);
if (usage_count > 1) { /* we don't move shared anon */
if (page)
put_page(page);
if (do_swap_account)
entry->val = ent.val;
return page;
}
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent, swp_entry_t *entry)
{
struct page *page = NULL;
struct inode *inode;
struct address_space *mapping;
pgoff_t pgoff;
if (!vma->vm_file) /* anonymous vma */
return NULL;
if (!move_file())
return NULL;
inode = vma->vm_file->f_path.dentry->d_inode;
mapping = vma->vm_file->f_mapping;
if (pte_none(ptent))
pgoff = linear_page_index(vma, addr);
else /* pte_file(ptent) is true */
pgoff = pte_to_pgoff(ptent);
/* page is moved even if it's not RSS of this task(page-faulted). */
if (!mapping_cap_swap_backed(mapping)) { /* normal file */
page = find_get_page(mapping, pgoff);
} else { /* shmem/tmpfs file. we should take account of swap too. */
swp_entry_t ent;
mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent);
if (do_swap_account)
entry->val = ent.val;
}
return page;
}
static int is_target_pte_for_mc(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent, union mc_target *target)
{
struct page *page = NULL;
struct page_cgroup *pc;
int ret = 0;
swp_entry_t ent = { .val = 0 };
if (pte_present(ptent))
page = mc_handle_present_pte(vma, addr, ptent);
else if (is_swap_pte(ptent))
page = mc_handle_swap_pte(vma, addr, ptent, &ent);
else if (pte_none(ptent) || pte_file(ptent))
page = mc_handle_file_pte(vma, addr, ptent, &ent);
if (!page && !ent.val)
return 0;
if (page) {
pc = lookup_page_cgroup(page);
/*
* Do only loose check w/o page_cgroup lock.
* mem_cgroup_move_account() checks the pc is valid or not under
* the lock.
*/
if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target)
target->page = page;
}
if (!ret || !target)
put_page(page);
}
/* There is a swap entry and a page doesn't exist or isn't charged */
if (ent.val && !ret &&
css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
ret = MC_TARGET_SWAP;
if (target)
target->ent = ent;
}
return ret;
}
static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->private;
pte_t *pte;
spinlock_t *ptl;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE)
if (is_target_pte_for_mc(vma, addr, *pte, NULL))
mc.precharge++; /* increment precharge temporarily */
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
return 0;
}
static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
{
unsigned long precharge;
struct vm_area_struct *vma;
/* We've already held the mmap_sem */
for (vma = mm->mmap; vma; vma = vma->vm_next) {
struct mm_walk mem_cgroup_count_precharge_walk = {
.pmd_entry = mem_cgroup_count_precharge_pte_range,
.mm = mm,
.private = vma,
};
if (is_vm_hugetlb_page(vma))
continue;
walk_page_range(vma->vm_start, vma->vm_end,
&mem_cgroup_count_precharge_walk);
}
precharge = mc.precharge;
mc.precharge = 0;
return precharge;
}
static int mem_cgroup_precharge_mc(struct mm_struct *mm)
{
return mem_cgroup_do_precharge(mem_cgroup_count_precharge(mm));
}
static void mem_cgroup_clear_mc(void)
{
struct mem_cgroup *from = mc.from;
struct mem_cgroup *to = mc.to;
/* we must uncharge all the leftover precharges from mc.to */
if (mc.precharge) {
__mem_cgroup_cancel_charge(mc.to, mc.precharge);
mc.precharge = 0;
}
/*
* we didn't uncharge from mc.from at mem_cgroup_move_account(), so
* we must uncharge here.
*/
if (mc.moved_charge) {
__mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
mc.moved_charge = 0;
/* we must fixup refcnts and charges */
if (mc.moved_swap) {
/* uncharge swap account from the old cgroup */
if (!mem_cgroup_is_root(mc.from))
res_counter_uncharge(&mc.from->memsw,
PAGE_SIZE * mc.moved_swap);
__mem_cgroup_put(mc.from, mc.moved_swap);
if (!mem_cgroup_is_root(mc.to)) {
/*
* we charged both to->res and to->memsw, so we should
* uncharge to->res.
*/
res_counter_uncharge(&mc.to->res,
PAGE_SIZE * mc.moved_swap);
}
/* we've already done mem_cgroup_get(mc.to) */
mc.moved_swap = 0;
}
if (mc.mm) {
up_read(&mc.mm->mmap_sem);
mmput(mc.mm);
}
mc.from = NULL;
mc.to = NULL;
mc.moving_task = NULL;
mc.mm = NULL;

KAMEZAWA Hiroyuki
committed
mem_cgroup_end_move(from);
memcg_oom_recover(from);
memcg_oom_recover(to);
static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
struct cgroup *cgroup,
struct task_struct *p,
bool threadgroup)
{
int ret = 0;
struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup);
if (mem->move_charge_at_immigrate) {
struct mm_struct *mm;
struct mem_cgroup *from = mem_cgroup_from_task(p);
VM_BUG_ON(from == mem);
mm = get_task_mm(p);
if (!mm)
return 0;
/* We move charges only when we move a owner of the mm */
/*
* We do all the move charge works under one mmap_sem to
* avoid deadlock with down_write(&mmap_sem)
* -> try_charge() -> if (mc.moving_task) -> sleep.
*/
down_read(&mm->mmap_sem);
VM_BUG_ON(mc.from);
VM_BUG_ON(mc.to);
VM_BUG_ON(mc.precharge);
VM_BUG_ON(mc.moved_charge);
VM_BUG_ON(mc.moved_swap);
VM_BUG_ON(mc.mm);

KAMEZAWA Hiroyuki
committed
mem_cgroup_start_move(from);
mc.from = from;
mc.to = mem;
mc.precharge = 0;
mc.moved_swap = 0;
mc.moving_task = current;
mc.mm = mm;
ret = mem_cgroup_precharge_mc(mm);
if (ret)
mem_cgroup_clear_mc();
/* We call up_read() and mmput() in clear_mc(). */
} else
mmput(mm);
}
return ret;
}
static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
struct cgroup *cgroup,
struct task_struct *p,
bool threadgroup)
{
static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
int ret = 0;
struct vm_area_struct *vma = walk->private;
pte_t *pte;
spinlock_t *ptl;
retry:
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; addr += PAGE_SIZE) {
pte_t ptent = *(pte++);
union mc_target target;
int type;
struct page *page;
struct page_cgroup *pc;
if (!mc.precharge)
break;
type = is_target_pte_for_mc(vma, addr, ptent, &target);
switch (type) {
case MC_TARGET_PAGE:
page = target.page;
if (isolate_lru_page(page))
goto put;
pc = lookup_page_cgroup(page);
if (!mem_cgroup_move_account(pc,
mc.from, mc.to, false)) {
/* we uncharge from mc.from later. */
mc.moved_charge++;
}
putback_lru_page(page);
put: /* is_target_pte_for_mc() gets the page */
put_page(page);
break;
case MC_TARGET_SWAP:
ent = target.ent;
if (!mem_cgroup_move_swap_account(ent,
mc.from, mc.to, false)) {
/* we fixup refcnts and charges later. */
mc.moved_swap++;
}
default:
break;
}
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
if (addr != end) {
/*
* We have consumed all precharges we got in can_attach().
* We try charge one by one, but don't do any additional
* charges to mc.to if we have failed in charge once in attach()
* phase.
*/
ret = mem_cgroup_do_precharge(1);
if (!ret)
goto retry;
}
return ret;
}
static void mem_cgroup_move_charge(struct mm_struct *mm)
{
struct vm_area_struct *vma;
lru_add_drain_all();
/* We've already held the mmap_sem */
for (vma = mm->mmap; vma; vma = vma->vm_next) {
int ret;
struct mm_walk mem_cgroup_move_charge_walk = {
.pmd_entry = mem_cgroup_move_charge_pte_range,
.mm = mm,
.private = vma,
};
if (is_vm_hugetlb_page(vma))
continue;
ret = walk_page_range(vma->vm_start, vma->vm_end,
&mem_cgroup_move_charge_walk);
if (ret)
/*
* means we have consumed all precharges and failed in
* doing additional charge. Just abandon here.
*/
break;
}
static void mem_cgroup_move_task(struct cgroup_subsys *ss,
struct cgroup *cont,
struct cgroup *old_cont,
struct task_struct *p,
bool threadgroup)
if (!mc.mm)
/* no need to move charge */
return;
mem_cgroup_move_charge(mc.mm);
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
struct cgroup *cgroup,
struct task_struct *p,
bool threadgroup)
{
return 0;
}
static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
struct cgroup *cgroup,
struct task_struct *p,
bool threadgroup)
{
}
static void mem_cgroup_move_task(struct cgroup_subsys *ss,
struct cgroup *cont,
struct cgroup *old_cont,
struct task_struct *p,
bool threadgroup)
{
}
#endif
struct cgroup_subsys mem_cgroup_subsys = {
.name = "memory",
.subsys_id = mem_cgroup_subsys_id,
.create = mem_cgroup_create,
.pre_destroy = mem_cgroup_pre_destroy,
.destroy = mem_cgroup_destroy,
.populate = mem_cgroup_populate,
.can_attach = mem_cgroup_can_attach,
.cancel_attach = mem_cgroup_cancel_attach,

KAMEZAWA Hiroyuki
committed
.early_init = 0,