Newer
Older
/*
* We are now commited to this value whatever it is. Changes in this
* tunable will only affect upcoming migrations, not the current one.
* So we need to save it, and keep it going.
*/
move_charge_at_immigrate = memcg->move_charge_at_immigrate;
if (move_charge_at_immigrate) {
struct mm_struct *mm;
struct mem_cgroup *from = mem_cgroup_from_task(p);
mm = get_task_mm(p);
if (!mm)
return 0;
/* We move charges only when we move a owner of the mm */
if (mm->owner == p) {
VM_BUG_ON(mc.from);
VM_BUG_ON(mc.to);
VM_BUG_ON(mc.precharge);
VM_BUG_ON(mc.moved_charge);
VM_BUG_ON(mc.moved_swap);

KAMEZAWA Hiroyuki
committed
mem_cgroup_start_move(from);
mc.immigrate_flags = move_charge_at_immigrate;
/* We set mc.moving_task later */
ret = mem_cgroup_precharge_mc(mm);
if (ret)
mem_cgroup_clear_mc();
}
return ret;
}
static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
int ret = 0;
struct vm_area_struct *vma = walk->private;
pte_t *pte;
spinlock_t *ptl;
enum mc_target_type target_type;
union mc_target target;
struct page *page;
struct page_cgroup *pc;
/*
* We don't take compound_lock() here but no race with splitting thp
* happens because:
* - if pmd_trans_huge_lock() returns 1, the relevant thp is not
* under splitting, which means there's no concurrent thp split,
* - if another thread runs into split_huge_page() just after we
* entered this if-block, the thread must wait for page table lock
* to be unlocked in __split_huge_page_splitting(), where the main
* part of thp split is not executed yet.
*/
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
if (mc.precharge < HPAGE_PMD_NR) {
spin_unlock(ptl);
return 0;
}
target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
if (target_type == MC_TARGET_PAGE) {
page = target.page;
if (!isolate_lru_page(page)) {
pc = lookup_page_cgroup(page);
if (!mem_cgroup_move_account(page, HPAGE_PMD_NR,
pc, mc.from, mc.to)) {
mc.precharge -= HPAGE_PMD_NR;
mc.moved_charge += HPAGE_PMD_NR;
}
putback_lru_page(page);
}
put_page(page);
}
spin_unlock(ptl);

Andrea Arcangeli
committed
return 0;
if (pmd_trans_unstable(pmd))
return 0;
retry:
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; addr += PAGE_SIZE) {
pte_t ptent = *(pte++);
if (!mc.precharge)
break;
switch (get_mctgt_type(vma, addr, ptent, &target)) {
case MC_TARGET_PAGE:
page = target.page;
if (isolate_lru_page(page))
goto put;
pc = lookup_page_cgroup(page);
if (!mem_cgroup_move_account(page, 1, pc,
mc.from, mc.to)) {
/* we uncharge from mc.from later. */
mc.moved_charge++;
}
putback_lru_page(page);
put: /* get_mctgt_type() gets the page */
case MC_TARGET_SWAP:
ent = target.ent;
if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) {
/* we fixup refcnts and charges later. */
mc.moved_swap++;
}
default:
break;
}
}
pte_unmap_unlock(pte - 1, ptl);
cond_resched();
if (addr != end) {
/*
* We have consumed all precharges we got in can_attach().
* We try charge one by one, but don't do any additional
* charges to mc.to if we have failed in charge once in attach()
* phase.
*/
ret = mem_cgroup_do_precharge(1);
if (!ret)
goto retry;
}
return ret;
}
static void mem_cgroup_move_charge(struct mm_struct *mm)
{
struct vm_area_struct *vma;
lru_add_drain_all();
retry:
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
/*
* Someone who are holding the mmap_sem might be waiting in
* waitq. So we cancel all extra charges, wake up all waiters,
* and retry. Because we cancel precharges, we might not be able
* to move enough charges, but moving charge is a best-effort
* feature anyway, so it wouldn't be a big problem.
*/
__mem_cgroup_clear_mc();
cond_resched();
goto retry;
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
int ret;
struct mm_walk mem_cgroup_move_charge_walk = {
.pmd_entry = mem_cgroup_move_charge_pte_range,
.mm = mm,
.private = vma,
};
if (is_vm_hugetlb_page(vma))
continue;
ret = walk_page_range(vma->vm_start, vma->vm_end,
&mem_cgroup_move_charge_walk);
if (ret)
/*
* means we have consumed all precharges and failed in
* doing additional charge. Just abandon here.
*/
break;
}
up_read(&mm->mmap_sem);
static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)

Tejun Heo
committed
struct task_struct *p = cgroup_taskset_first(tset);
struct mm_struct *mm = get_task_mm(p);
if (mc.to)
mem_cgroup_move_charge(mm);
if (mc.to)
mem_cgroup_clear_mc();
static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
/*
* Cgroup retains root cgroups across [un]mount cycles making it necessary
* to verify sane_behavior flag on each mount attempt.
*/
static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
{
/*
* use_hierarchy is forced with sane_behavior. cgroup core
* guarantees that @root doesn't have any children, so turning it
* on for the root memcg is enough.
*/
if (cgroup_sane_behavior(root_css->cgroup))
mem_cgroup_from_css(root_css)->use_hierarchy = true;
struct cgroup_subsys mem_cgroup_subsys = {
.name = "memory",
.subsys_id = mem_cgroup_subsys_id,

Tejun Heo
committed
.css_alloc = mem_cgroup_css_alloc,
.css_online = mem_cgroup_css_online,

Tejun Heo
committed
.css_offline = mem_cgroup_css_offline,
.css_free = mem_cgroup_css_free,
.can_attach = mem_cgroup_can_attach,
.cancel_attach = mem_cgroup_cancel_attach,
.base_cftypes = mem_cgroup_files,

KAMEZAWA Hiroyuki
committed
.early_init = 0,
static int __init enable_swap_account(char *s)
{
really_do_swap_account = 1;
really_do_swap_account = 0;
return 1;
}
__setup("swapaccount=", enable_swap_account);
static void __init memsw_file_init(void)
{
WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, memsw_cgroup_files));
}
static void __init enable_swap_cgroup(void)
{
if (!mem_cgroup_disabled() && really_do_swap_account) {
do_swap_account = 1;
memsw_file_init();
}
static void __init enable_swap_cgroup(void)
{
}
* subsys_initcall() for memory controller.
*
* Some parts like hotcpu_notifier() have to be initialized from this context
* because of lock dependencies (cgroup_lock -> cpu hotplug) but basically
* everything that doesn't depend on a specific mem_cgroup structure should
* be initialized from here.
*/
static int __init mem_cgroup_init(void)
{
hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
enable_swap_cgroup();
mem_cgroup_soft_limit_tree_init();
memcg_stock_init();
return 0;
}
subsys_initcall(mem_cgroup_init);