Newer
Older
if (signal_pending(current))
return -EINTR;
progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL,
/* maybe some writeback is necessary */
congestion_wait(BLK_RW_ASYNC, HZ/10);
mem_cgroup_reparent_charges(memcg);
return 0;

KAMEZAWA Hiroyuki
committed
}
static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css,
unsigned int event)
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
if (mem_cgroup_is_root(memcg))
return -EINVAL;
return mem_cgroup_force_empty(memcg);
static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css,
struct cftype *cft)
return mem_cgroup_from_css(css)->use_hierarchy;
static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css));
mutex_lock(&memcg_create_mutex);
if (memcg->use_hierarchy == val)
goto out;
* If parent's use_hierarchy is set, we can't make any modifications
* in the child subtrees. If it is unset, then the change can
* occur, provided the current cgroup has no children.
*
* For the root cgroup, parent_mem is NULL, we allow value to be
* set if there are no children.
*/
if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
(val == 1 || val == 0)) {
if (list_empty(&memcg->css.cgroup->children))
else
retval = -EBUSY;
} else
retval = -EINVAL;
mutex_unlock(&memcg_create_mutex);
return retval;
}
static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
/* Per-cpu values can be negative, use a signed accumulator */
for_each_mem_cgroup_tree(iter, memcg)
val += mem_cgroup_read_stat(iter, idx);
if (val < 0) /* race ? */
val = 0;
return val;
static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
if (!mem_cgroup_is_root(memcg)) {
if (!swap)

Glauber Costa
committed
return res_counter_read_u64(&memcg->res, RES_USAGE);

Glauber Costa
committed
return res_counter_read_u64(&memcg->memsw, RES_USAGE);
/*
* Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
* as well as in MEM_CGROUP_STAT_RSS_HUGE.
*/
val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
return val << PAGE_SHIFT;
}
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
struct cftype *cft)
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
type = MEMFILE_TYPE(cft->private);
name = MEMFILE_ATTR(cft->private);
if (name == RES_USAGE)
val = mem_cgroup_usage(memcg, false);
val = res_counter_read_u64(&memcg->res, name);
if (name == RES_USAGE)
val = mem_cgroup_usage(memcg, true);
val = res_counter_read_u64(&memcg->memsw, name);
case _KMEM:
val = res_counter_read_u64(&memcg->kmem, name);
break;
#ifdef CONFIG_MEMCG_KMEM
/* should be called with activate_kmem_mutex held */
static int __memcg_activate_kmem(struct mem_cgroup *memcg,
unsigned long long limit)
{
int err = 0;
int memcg_id;
if (memcg_kmem_is_active(memcg))
return 0;
/*
* We are going to allocate memory for data shared by all memory
* cgroups so let's stop accounting here.
*/
memcg_stop_kmem_account();
/*
* For simplicity, we won't allow this to be disabled. It also can't
* be changed if the cgroup has children already, or if tasks had
* already joined.
*
* If tasks join before we set the limit, a person looking at
* kmem.usage_in_bytes will have no way to determine when it took
* place, which makes the value quite meaningless.
*
* After it first became limited, changes in the value of the limit are
* of course permitted.
*/
mutex_lock(&memcg_create_mutex);
if (cgroup_task_count(memcg->css.cgroup) || memcg_has_children(memcg))
err = -EBUSY;
mutex_unlock(&memcg_create_mutex);
if (err)
goto out;
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
memcg_id = ida_simple_get(&kmem_limited_groups,
0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
if (memcg_id < 0) {
err = memcg_id;
goto out;
}
/*
* Make sure we have enough space for this cgroup in each root cache's
* memcg_params.
*/
err = memcg_update_all_caches(memcg_id + 1);
if (err)
goto out_rmid;
memcg->kmemcg_id = memcg_id;
INIT_LIST_HEAD(&memcg->memcg_slab_caches);
mutex_init(&memcg->slab_caches_mutex);
/*
* We couldn't have accounted to this cgroup, because it hasn't got the
* active bit set yet, so this should succeed.
*/
err = res_counter_set_limit(&memcg->kmem, limit);
VM_BUG_ON(err);
static_key_slow_inc(&memcg_kmem_enabled_key);
/*
* Setting the active bit after enabling static branching will
* guarantee no one starts accounting before all call sites are
* patched.
*/
memcg_kmem_set_active(memcg);
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
memcg_resume_kmem_account();
return err;
out_rmid:
ida_simple_remove(&kmem_limited_groups, memcg_id);
goto out;
}
static int memcg_activate_kmem(struct mem_cgroup *memcg,
unsigned long long limit)
{
int ret;
mutex_lock(&activate_kmem_mutex);
ret = __memcg_activate_kmem(memcg, limit);
mutex_unlock(&activate_kmem_mutex);
return ret;
}
static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
unsigned long long val)
{
int ret;
if (!memcg_kmem_is_active(memcg))
ret = memcg_activate_kmem(memcg, val);
else
ret = res_counter_set_limit(&memcg->kmem, val);
static int memcg_propagate_kmem(struct mem_cgroup *memcg)
int ret = 0;
struct mem_cgroup *parent = parent_mem_cgroup(memcg);
if (!parent)
return 0;
mutex_lock(&activate_kmem_mutex);
/*
* If the parent cgroup is not kmem-active now, it cannot be activated
* after this point, because it has at least one child already.
*/
if (memcg_kmem_is_active(parent))
ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX);
mutex_unlock(&activate_kmem_mutex);
return ret;
#else
static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
unsigned long long val)
{
return -EINVAL;
}
#endif /* CONFIG_MEMCG_KMEM */
/*
* The user of this function is...
* RES_LIMIT.
*/
static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,

Paul Menage
committed
const char *buffer)
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
unsigned long long val;
int ret;
type = MEMFILE_TYPE(cft->private);
name = MEMFILE_ATTR(cft->private);
if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
ret = -EINVAL;
break;
}
/* This function does all necessary parse...reuse it */
ret = res_counter_memparse_write_strategy(buffer, &val);
if (ret)
break;
if (type == _MEM)
ret = mem_cgroup_resize_limit(memcg, val);
ret = mem_cgroup_resize_memsw_limit(memcg, val);
ret = memcg_update_kmem_limit(memcg, val);
case RES_SOFT_LIMIT:
ret = res_counter_memparse_write_strategy(buffer, &val);
if (ret)
break;
/*
* For memsw, soft limits are hard to implement in terms
* of semantics, for now, we support soft limits for
* control without swap
*/
if (type == _MEM)
ret = res_counter_set_soft_limit(&memcg->res, val);
else
ret = -EINVAL;
break;
default:
ret = -EINVAL; /* should be BUG() ? */
break;
}
return ret;
static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
unsigned long long *mem_limit, unsigned long long *memsw_limit)
{
unsigned long long min_limit, min_memsw_limit, tmp;
min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
if (!memcg->use_hierarchy)
goto out;
while (css_parent(&memcg->css)) {
memcg = mem_cgroup_from_css(css_parent(&memcg->css));
if (!memcg->use_hierarchy)
break;
tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
min_limit = min(min_limit, tmp);
tmp = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
min_memsw_limit = min(min_memsw_limit, tmp);
}
out:
*mem_limit = min_limit;
*memsw_limit = min_memsw_limit;
}
static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
type = MEMFILE_TYPE(event);
name = MEMFILE_ATTR(event);
res_counter_reset_max(&memcg->res);
res_counter_reset_max(&memcg->memsw);
else if (type == _KMEM)
res_counter_reset_max(&memcg->kmem);
else
return -EINVAL;
res_counter_reset_failcnt(&memcg->res);
res_counter_reset_failcnt(&memcg->memsw);
else if (type == _KMEM)
res_counter_reset_failcnt(&memcg->kmem);
else
return -EINVAL;
static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
return mem_cgroup_from_css(css)->move_charge_at_immigrate;
static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
if (val >= (1 << NR_MOVE_TYPE))
return -EINVAL;
* No kind of locking is needed in here, because ->can_attach() will
* check this value once in the beginning of the process, and then carry
* on with stale data. This means that changes to this value will only
* affect task migrations starting after the change.
memcg->move_charge_at_immigrate = val;
return 0;
}
static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
return -ENOSYS;
}
#endif
static int memcg_numa_stat_show(struct seq_file *m, void *v)
struct numa_stat {
const char *name;
unsigned int lru_mask;
};
static const struct numa_stat stats[] = {
{ "total", LRU_ALL },
{ "file", LRU_ALL_FILE },
{ "anon", LRU_ALL_ANON },
{ "unevictable", BIT(LRU_UNEVICTABLE) },
};
const struct numa_stat *stat;
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask);
seq_printf(m, "%s=%lu", stat->name, nr);
for_each_node_state(nid, N_MEMORY) {
nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
stat->lru_mask);
seq_printf(m, " N%d=%lu", nid, nr);
}
seq_putc(m, '\n');
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
struct mem_cgroup *iter;
nr = 0;
for_each_mem_cgroup_tree(iter, memcg)
nr += mem_cgroup_nr_lru_pages(iter, stat->lru_mask);
seq_printf(m, "hierarchical_%s=%lu", stat->name, nr);
for_each_node_state(nid, N_MEMORY) {
nr = 0;
for_each_mem_cgroup_tree(iter, memcg)
nr += mem_cgroup_node_nr_lru_pages(
iter, nid, stat->lru_mask);
seq_printf(m, " N%d=%lu", nid, nr);
}
seq_putc(m, '\n');
}
return 0;
}
#endif /* CONFIG_NUMA */
static inline void mem_cgroup_lru_names_not_uptodate(void)
{
BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
}
static int memcg_stat_show(struct seq_file *m, void *v)
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
struct mem_cgroup *mi;
unsigned int i;
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i],
mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++)
seq_printf(m, "%s %lu\n", mem_cgroup_events_names[i],
mem_cgroup_read_events(memcg, i));
for (i = 0; i < NR_LRU_LISTS; i++)
seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i],
mem_cgroup_nr_lru_pages(memcg, BIT(i)) * PAGE_SIZE);
{
unsigned long long limit, memsw_limit;
memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit);
seq_printf(m, "hierarchical_memory_limit %llu\n", limit);
seq_printf(m, "hierarchical_memsw_limit %llu\n",
memsw_limit);
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
long long val = 0;
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
for_each_mem_cgroup_tree(mi, memcg)
val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val);
}
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
unsigned long long val = 0;
for_each_mem_cgroup_tree(mi, memcg)
val += mem_cgroup_read_events(mi, i);
seq_printf(m, "total_%s %llu\n",
mem_cgroup_events_names[i], val);
}
for (i = 0; i < NR_LRU_LISTS; i++) {
unsigned long long val = 0;
for_each_mem_cgroup_tree(mi, memcg)
val += mem_cgroup_nr_lru_pages(mi, BIT(i)) * PAGE_SIZE;
seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], val);
#ifdef CONFIG_DEBUG_VM
{
int nid, zid;
struct mem_cgroup_per_zone *mz;
struct zone_reclaim_stat *rstat;
unsigned long recent_rotated[2] = {0, 0};
unsigned long recent_scanned[2] = {0, 0};
for_each_online_node(nid)
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
mz = mem_cgroup_zoneinfo(memcg, nid, zid);
rstat = &mz->lruvec.reclaim_stat;
recent_rotated[0] += rstat->recent_rotated[0];
recent_rotated[1] += rstat->recent_rotated[1];
recent_scanned[0] += rstat->recent_scanned[0];
recent_scanned[1] += rstat->recent_scanned[1];
seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]);
seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]);
seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]);
seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]);
return 0;
}
static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css,
struct cftype *cft)
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
return mem_cgroup_swappiness(memcg);
static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
mutex_lock(&memcg_create_mutex);
/* If under hierarchy, only empty-root can set this value */
if ((parent->use_hierarchy) || memcg_has_children(memcg)) {
mutex_unlock(&memcg_create_mutex);
mutex_unlock(&memcg_create_mutex);
static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
{
struct mem_cgroup_threshold_ary *t;
u64 usage;
int i;
rcu_read_lock();
if (!swap)
t = rcu_dereference(memcg->thresholds.primary);
t = rcu_dereference(memcg->memsw_thresholds.primary);
if (!t)
goto unlock;
usage = mem_cgroup_usage(memcg, swap);
/*
* current_threshold points to threshold just below or equal to usage.
* If it's not true, a threshold was crossed after last
* call of __mem_cgroup_threshold().
*/
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
/*
* Iterate backward over array of thresholds starting from
* current_threshold and check if a threshold is crossed.
* If none of thresholds below usage is crossed, we read
* only one element of the array here.
*/
for (; i >= 0 && unlikely(t->entries[i].threshold > usage); i--)
eventfd_signal(t->entries[i].eventfd, 1);
/* i = current_threshold + 1 */
i++;
/*
* Iterate forward over array of thresholds starting from
* current_threshold+1 and check if a threshold is crossed.
* If none of thresholds above usage is crossed, we read
* only one element of the array here.
*/
for (; i < t->size && unlikely(t->entries[i].threshold <= usage); i++)
eventfd_signal(t->entries[i].eventfd, 1);
/* Update current_threshold */
unlock:
rcu_read_unlock();
}
static void mem_cgroup_threshold(struct mem_cgroup *memcg)
{
while (memcg) {
__mem_cgroup_threshold(memcg, false);
if (do_swap_account)
__mem_cgroup_threshold(memcg, true);
memcg = parent_mem_cgroup(memcg);
}
}
static int compare_thresholds(const void *a, const void *b)
{
const struct mem_cgroup_threshold *_a = a;
const struct mem_cgroup_threshold *_b = b;
if (_a->threshold > _b->threshold)
return 1;
if (_a->threshold < _b->threshold)
return -1;
return 0;
static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
{
struct mem_cgroup_eventfd_list *ev;
list_for_each_entry(ev, &memcg->oom_notify, list)
eventfd_signal(ev->eventfd, 1);
return 0;
}
static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
for_each_mem_cgroup_tree(iter, memcg)
static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd, const char *args, enum res_type type)
struct mem_cgroup_thresholds *thresholds;
struct mem_cgroup_threshold_ary *new;
ret = res_counter_memparse_write_strategy(args, &threshold);
if (ret)
return ret;
mutex_lock(&memcg->thresholds_lock);
thresholds = &memcg->thresholds;
thresholds = &memcg->memsw_thresholds;
else
BUG();
usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
/* Check if a threshold crossed before adding a new one */
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
size = thresholds->primary ? thresholds->primary->size + 1 : 1;
/* Allocate memory for new array of thresholds */
new = kmalloc(sizeof(*new) + size * sizeof(struct mem_cgroup_threshold),
ret = -ENOMEM;
goto unlock;
}
/* Copy thresholds (if any) to new array */
if (thresholds->primary) {
memcpy(new->entries, thresholds->primary->entries, (size - 1) *
sizeof(struct mem_cgroup_threshold));
new->entries[size - 1].eventfd = eventfd;
new->entries[size - 1].threshold = threshold;
/* Sort thresholds. Registering of new threshold isn't time-critical */
sort(new->entries, size, sizeof(struct mem_cgroup_threshold),
compare_thresholds, NULL);
/* Find current threshold */
if (new->entries[i].threshold <= usage) {
* new->current_threshold will not be used until
* rcu_assign_pointer(), so it's safe to increment
/* Free old spare buffer and save old primary buffer as spare */
kfree(thresholds->spare);
thresholds->spare = thresholds->primary;
rcu_assign_pointer(thresholds->primary, new);
/* To be sure that nobody uses thresholds */
synchronize_rcu();
unlock:
mutex_unlock(&memcg->thresholds_lock);
return ret;
}
static int mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd, const char *args)
{
return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEM);
static int memsw_cgroup_usage_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd, const char *args)
{
return __mem_cgroup_usage_register_event(memcg, eventfd, args, _MEMSWAP);
static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd, enum res_type type)
struct mem_cgroup_thresholds *thresholds;
struct mem_cgroup_threshold_ary *new;
mutex_lock(&memcg->thresholds_lock);
if (type == _MEM)
thresholds = &memcg->thresholds;
thresholds = &memcg->memsw_thresholds;
if (!thresholds->primary)
goto unlock;
usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
/* Check if a threshold crossed before removing */
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
/* Calculate new number of threshold */
size = 0;
for (i = 0; i < thresholds->primary->size; i++) {
if (thresholds->primary->entries[i].eventfd != eventfd)
/* Set thresholds array to NULL if we don't have thresholds */
if (!size) {
goto swap_buffers;
/* Copy thresholds and find current threshold */
new->current_threshold = -1;
for (i = 0, j = 0; i < thresholds->primary->size; i++) {
if (thresholds->primary->entries[i].eventfd == eventfd)
new->entries[j] = thresholds->primary->entries[i];
if (new->entries[j].threshold <= usage) {
* new->current_threshold will not be used
* until rcu_assign_pointer(), so it's safe to increment
* it here.
*/
swap_buffers:
/* Swap primary and spare array */
thresholds->spare = thresholds->primary;
/* If all events are unregistered, free the spare array */
if (!new) {
kfree(thresholds->spare);
thresholds->spare = NULL;
}
rcu_assign_pointer(thresholds->primary, new);
/* To be sure that nobody uses thresholds */
unlock:
mutex_unlock(&memcg->thresholds_lock);
}
static void mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEM);
static void memsw_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
return __mem_cgroup_usage_unregister_event(memcg, eventfd, _MEMSWAP);
static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd, const char *args)
{
struct mem_cgroup_eventfd_list *event;
event = kmalloc(sizeof(*event), GFP_KERNEL);
if (!event)
return -ENOMEM;
event->eventfd = eventfd;
list_add(&event->list, &memcg->oom_notify);
/* already in OOM ? */
if (atomic_read(&memcg->under_oom))
static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg,
{
struct mem_cgroup_eventfd_list *ev, *tmp;
list_for_each_entry_safe(ev, tmp, &memcg->oom_notify, list) {
if (ev->eventfd == eventfd) {
list_del(&ev->list);
kfree(ev);
}
}
static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(sf));
seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
seq_printf(sf, "under_oom %d\n", (bool)atomic_read(&memcg->under_oom));
static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
struct cftype *cft, u64 val)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
/* cannot set to root cgroup and only 0 and 1 are allowed */
mutex_lock(&memcg_create_mutex);
/* oom-kill-disable is a flag for subhierarchy. */
if ((parent->use_hierarchy) || memcg_has_children(memcg)) {
mutex_unlock(&memcg_create_mutex);
mutex_unlock(&memcg_create_mutex);
static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
int ret;
memcg->kmemcg_id = -1;
ret = memcg_propagate_kmem(memcg);
if (ret)
return ret;
return mem_cgroup_sockets_init(memcg, ss);
static void memcg_destroy_kmem(struct mem_cgroup *memcg)
mem_cgroup_sockets_destroy(memcg);
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980
}
static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
{
if (!memcg_kmem_is_active(memcg))
return;
/*
* kmem charges can outlive the cgroup. In the case of slab
* pages, for instance, a page contain objects from various
* processes. As we prevent from taking a reference for every
* such allocation we have to be careful when doing uncharge
* (see memcg_uncharge_kmem) and here during offlining.
*
* The idea is that that only the _last_ uncharge which sees
* the dead memcg will drop the last reference. An additional
* reference is taken here before the group is marked dead
* which is then paired with css_put during uncharge resp. here.
*
* Although this might sound strange as this path is called from
* css_offline() when the referencemight have dropped down to 0
* and shouldn't be incremented anymore (css_tryget would fail)
* we do not have other options because of the kmem allocations
* lifetime.
*/
css_get(&memcg->css);
memcg_kmem_mark_dead(memcg);
if (res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0)
return;
if (memcg_kmem_test_and_clear_dead(memcg))
static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
return 0;
}