Newer
Older
/*
* Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH)
*
* Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* Interactivity improvements by Mike Galbraith
* (C) 2007 Mike Galbraith <efault@gmx.de>
*
* Various enhancements by Dmitry Adamushko.
* (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com>
*
* Group scheduling enhancements by Srivatsa Vaddagiri
* Copyright IBM Corporation, 2007
* Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
*
* Scaled math optimizations by Thomas Gleixner
* Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
*
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
#include <linux/sched.h>
* Targeted preemption latency for CPU-bound tasks:
* (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
* NOTE: this latency value is not the same as the concept of
* 'timeslice length' - timeslices in CFS are of variable length
* and have no persistent notion like in traditional, time-slice
* based scheduling concepts.
* (to see the precise effective timeslice length of your workload,
* run vmstat and monitor the context-switches (cs) field)
unsigned int sysctl_sched_latency = 6000000ULL;
unsigned int normalized_sysctl_sched_latency = 6000000ULL;
/*
* The initial- and re-scaling of tunables is configurable
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
*
* Options are:
* SCHED_TUNABLESCALING_NONE - unscaled, always *1
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
*/
enum sched_tunable_scaling sysctl_sched_tunable_scaling
= SCHED_TUNABLESCALING_LOG;
* Minimal preemption granularity for CPU-bound tasks:
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)

Ingo Molnar
committed
unsigned int sysctl_sched_min_granularity = 750000ULL;
unsigned int normalized_sysctl_sched_min_granularity = 750000ULL;
* is kept at sysctl_sched_latency / sysctl_sched_min_granularity
*/

Ingo Molnar
committed
static unsigned int sched_nr_latency = 8;
* After fork, child runs first. If set to 0 (default) then
* parent will (try to) run first.
unsigned int sysctl_sched_child_runs_first __read_mostly;
/*
* SCHED_OTHER wake-up granularity.

Mike Galbraith
committed
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
*
* This option delays the preemption effects of decoupled workloads
* and reduces their over-scheduling. Synchronous workloads will still
* have immediate wakeup/sleep latencies.
*/

Mike Galbraith
committed
unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
/*
* The exponential sliding window over which load is averaged for shares
* distribution.
* (default: 10msec)
*/
unsigned int __read_mostly sysctl_sched_shares_window = 10000000UL;
static const struct sched_class fair_sched_class;
/**************************************************************
* CFS operations on generic schedulable entities:
*/
#ifdef CONFIG_FAIR_GROUP_SCHED
/* cpu runqueue to which this cfs_rq is attached */
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
{
/* An entity is a task if it doesn't "own" a runqueue */
#define entity_is_task(se) (!se->my_q)
static inline struct task_struct *task_of(struct sched_entity *se)
{
#ifdef CONFIG_SCHED_DEBUG
WARN_ON_ONCE(!entity_is_task(se));
#endif
return container_of(se, struct task_struct, se);
}
/* Walk up scheduling entities hierarchy */
#define for_each_sched_entity(se) \
for (; se; se = se->parent)
static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
{
return p->se.cfs_rq;
}
/* runqueue on which this entity is (to be) queued */
static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
{
return se->cfs_rq;
}
/* runqueue "owned" by this group */
static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
{
return grp->my_q;
}
static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
if (!cfs_rq->on_list) {
/*
* Ensure we either appear before our parent (if already
* enqueued) or force our parent to appear after us when it is
* enqueued. The fact that we always enqueue bottom-up
* reduces this to two cases.
*/
if (cfs_rq->tg->parent &&
cfs_rq->tg->parent->cfs_rq[cpu_of(rq_of(cfs_rq))]->on_list) {
list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
&rq_of(cfs_rq)->leaf_cfs_rq_list);
} else {
list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
&rq_of(cfs_rq)->leaf_cfs_rq_list);
cfs_rq->on_list = 1;
}
}
static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
if (cfs_rq->on_list) {
list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
cfs_rq->on_list = 0;
}
}
/* Iterate thr' all leaf cfs_rq's on a runqueue */
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
/* Do the two (enqueued) entities belong to the same group ? */
static inline int
is_same_group(struct sched_entity *se, struct sched_entity *pse)
{
if (se->cfs_rq == pse->cfs_rq)
return 1;
return 0;
}
static inline struct sched_entity *parent_entity(struct sched_entity *se)
{
return se->parent;
}
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
/* return depth at which a sched entity is present in the hierarchy */
static inline int depth_se(struct sched_entity *se)
{
int depth = 0;
for_each_sched_entity(se)
depth++;
return depth;
}
static void
find_matching_se(struct sched_entity **se, struct sched_entity **pse)
{
int se_depth, pse_depth;
/*
* preemption test can be made between sibling entities who are in the
* same cfs_rq i.e who have a common parent. Walk up the hierarchy of
* both tasks until we find their ancestors who are siblings of common
* parent.
*/
/* First walk up until both entities are at same depth */
se_depth = depth_se(*se);
pse_depth = depth_se(*pse);
while (se_depth > pse_depth) {
se_depth--;
*se = parent_entity(*se);
}
while (pse_depth > se_depth) {
pse_depth--;
*pse = parent_entity(*pse);
}
while (!is_same_group(*se, *pse)) {
*se = parent_entity(*se);
*pse = parent_entity(*pse);
}
}
#else /* !CONFIG_FAIR_GROUP_SCHED */
static inline struct task_struct *task_of(struct sched_entity *se)
{
return container_of(se, struct task_struct, se);
}
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
{
return container_of(cfs_rq, struct rq, cfs);
}
#define entity_is_task(se) 1
#define for_each_sched_entity(se) \
for (; se; se = NULL)
static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
{
struct task_struct *p = task_of(se);
struct rq *rq = task_rq(p);
return &rq->cfs;
}
/* runqueue "owned" by this group */
static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
{
return NULL;
}
static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
}
static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
{
}
#define for_each_leaf_cfs_rq(rq, cfs_rq) \
for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
static inline int
is_same_group(struct sched_entity *se, struct sched_entity *pse)
{
return 1;
}
static inline struct sched_entity *parent_entity(struct sched_entity *se)
{
return NULL;
}
static inline void
find_matching_se(struct sched_entity **se, struct sched_entity **pse)
{
}
#endif /* CONFIG_FAIR_GROUP_SCHED */
/**************************************************************
* Scheduling class tree data structure manipulation methods:
*/
static inline u64 max_vruntime(u64 min_vruntime, u64 vruntime)
s64 delta = (s64)(vruntime - min_vruntime);
if (delta > 0)
min_vruntime = vruntime;
return min_vruntime;
}
static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
{
s64 delta = (s64)(vruntime - min_vruntime);
if (delta < 0)
min_vruntime = vruntime;
return min_vruntime;
}
static inline int entity_before(struct sched_entity *a,
struct sched_entity *b)
{
return (s64)(a->vruntime - b->vruntime) < 0;
}
static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)

Dmitry Adamushko
committed
return se->vruntime - cfs_rq->min_vruntime;
static void update_min_vruntime(struct cfs_rq *cfs_rq)
{
u64 vruntime = cfs_rq->min_vruntime;
if (cfs_rq->curr)
vruntime = cfs_rq->curr->vruntime;
if (cfs_rq->rb_leftmost) {
struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
struct sched_entity,
run_node);
vruntime = se->vruntime;
else
vruntime = min_vruntime(vruntime, se->vruntime);
}
cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
#ifndef CONFIG_64BIT
smp_wmb();
cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
#endif
/*
* Enqueue an entity into the rb-tree:
*/
static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
struct rb_node *parent = NULL;
struct sched_entity *entry;
s64 key = entity_key(cfs_rq, se);
int leftmost = 1;
/*
* Find the right place in the rbtree:
*/
while (*link) {
parent = *link;
entry = rb_entry(parent, struct sched_entity, run_node);
/*
* We dont care about collisions. Nodes with
* the same key stay together.
*/
if (key < entity_key(cfs_rq, entry)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = 0;
}
}
/*
* Maintain a cache of leftmost tree entries (it is frequently
* used):
*/
rb_link_node(&se->run_node, parent, link);
rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
}
static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (cfs_rq->rb_leftmost == &se->run_node) {
struct rb_node *next_node;
next_node = rb_next(&se->run_node);
cfs_rq->rb_leftmost = next_node;
}
rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
}
static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
struct rb_node *left = cfs_rq->rb_leftmost;
if (!left)
return NULL;
return rb_entry(left, struct sched_entity, run_node);
static struct sched_entity *__pick_next_entity(struct sched_entity *se)
{
struct rb_node *next = rb_next(&se->run_node);
if (!next)
return NULL;
return rb_entry(next, struct sched_entity, run_node);
}
#ifdef CONFIG_SCHED_DEBUG
static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
if (!last)
return NULL;
return rb_entry(last, struct sched_entity, run_node);
/**************************************************************
* Scheduling class statistics methods:
*/
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
int factor = get_update_sysctl_factor();
if (ret || !write)
return ret;
sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
sysctl_sched_min_granularity);
#define WRT_SYSCTL(name) \
(normalized_sysctl_##name = sysctl_##name / (factor))
WRT_SYSCTL(sched_min_granularity);
WRT_SYSCTL(sched_latency);
WRT_SYSCTL(sched_wakeup_granularity);
#undef WRT_SYSCTL
return 0;
}
#endif
*/
static inline unsigned long
calc_delta_fair(unsigned long delta, struct sched_entity *se)
{
if (unlikely(se->load.weight != NICE_0_LOAD))
delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
return delta;
}
/*
* The idea is to set a period in which each task runs once.
*
* When there are too many tasks (sysctl_sched_nr_latency) we have to stretch
* this period because otherwise the slices get too small.
*
* p = (nr <= nl) ? l : l*nr/nl
*/
static u64 __sched_period(unsigned long nr_running)
{
u64 period = sysctl_sched_latency;
unsigned long nr_latency = sched_nr_latency;
if (unlikely(nr_running > nr_latency)) {
period = sysctl_sched_min_granularity;
period *= nr_running;
}
return period;
}
/*
* We calculate the wall-time slice from the period by taking a part
* proportional to the weight.
*
static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq);
struct load_weight lw;
cfs_rq = cfs_rq_of(se);
load = &cfs_rq->load;
lw = cfs_rq->load;
update_load_add(&lw, se->load.weight);
load = &lw;
}
slice = calc_delta_mine(slice, se->load.weight, load);
}
return slice;
* We calculate the vruntime slice of a to be inserted task
static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
return calc_delta_fair(sched_slice(cfs_rq, se), se);
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
static void update_cfs_shares(struct cfs_rq *cfs_rq);
/*
* Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class.
*/
static inline void
__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
unsigned long delta_exec)
unsigned long delta_exec_weighted;
schedstat_set(curr->statistics.exec_max,
max((u64)delta_exec, curr->statistics.exec_max));
curr->sum_exec_runtime += delta_exec;
schedstat_add(cfs_rq, exec_clock, delta_exec);
delta_exec_weighted = calc_delta_fair(delta_exec, curr);
update_min_vruntime(cfs_rq);
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
cfs_rq->load_unacc_exec_time += delta_exec;
#endif
static void update_curr(struct cfs_rq *cfs_rq)
struct sched_entity *curr = cfs_rq->curr;
u64 now = rq_of(cfs_rq)->clock_task;
unsigned long delta_exec;
if (unlikely(!curr))
return;
/*
* Get the amount of time the current task was running
* since the last time we changed load (this cannot
* overflow on 32 bits):
*/
delta_exec = (unsigned long)(now - curr->exec_start);
__update_curr(cfs_rq, curr, delta_exec);
curr->exec_start = now;
if (entity_is_task(curr)) {
struct task_struct *curtask = task_of(curr);
trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
cpuacct_charge(curtask, delta_exec);
account_group_exec_runtime(curtask, delta_exec);
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
schedstat_set(se->statistics.wait_start, rq_of(cfs_rq)->clock);
}
/*
* Task is being enqueued - update stats:
*/
static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
/*
* Are we enqueueing a waiting task? (for current tasks
* a dequeue/enqueue event is a NOP)
*/
update_stats_wait_start(cfs_rq, se);
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
rq_of(cfs_rq)->clock - se->statistics.wait_start));
schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
rq_of(cfs_rq)->clock - se->statistics.wait_start);
#ifdef CONFIG_SCHEDSTATS
if (entity_is_task(se)) {
trace_sched_stat_wait(task_of(se),
rq_of(cfs_rq)->clock - se->statistics.wait_start);
schedstat_set(se->statistics.wait_start, 0);
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
/*
* Mark the end of the wait period if dequeueing a
* waiting task:
*/
update_stats_wait_end(cfs_rq, se);
}
/*
* We are picking a new current task - update its stats:
*/
static inline void
update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
/*
* We are starting a new run period:
*/
se->exec_start = rq_of(cfs_rq)->clock_task;
}
/**************************************************
* Scheduling class queueing methods:
*/
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
static void
add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
{
cfs_rq->task_weight += weight;
}
#else
static inline void
add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
{
}
#endif

Dmitry Adamushko
committed
static void
account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
update_load_add(&cfs_rq->load, se->load.weight);
if (!parent_entity(se))
inc_cpu_load(rq_of(cfs_rq), se->load.weight);
if (entity_is_task(se)) {
add_cfs_task_weight(cfs_rq, se->load.weight);
list_add(&se->group_node, &cfs_rq->tasks);
}

Dmitry Adamushko
committed
cfs_rq->nr_running++;
}
static void
account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
update_load_sub(&cfs_rq->load, se->load.weight);
if (!parent_entity(se))
dec_cpu_load(rq_of(cfs_rq), se->load.weight);
if (entity_is_task(se)) {
add_cfs_task_weight(cfs_rq, -se->load.weight);
list_del_init(&se->group_node);
}

Dmitry Adamushko
committed
cfs_rq->nr_running--;
}

Yong Zhang
committed
#ifdef CONFIG_FAIR_GROUP_SCHED
# ifdef CONFIG_SMP
static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
int global_update)
{
struct task_group *tg = cfs_rq->tg;
long load_avg;
load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
load_avg -= cfs_rq->load_contribution;
if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) {
atomic_add(load_avg, &tg->load_weight);
cfs_rq->load_contribution += load_avg;
}
}
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
u64 period = sysctl_sched_shares_window;
unsigned long load = cfs_rq->load.weight;
if (cfs_rq->tg == &root_task_group)

Paul Turner
committed
now = rq_of(cfs_rq)->clock_task;
/* truncate load history at 4 idle periods */
if (cfs_rq->load_stamp > cfs_rq->load_last &&
now - cfs_rq->load_last > 4 * period) {
cfs_rq->load_period = 0;
cfs_rq->load_avg = 0;
cfs_rq->load_unacc_exec_time = 0;
if (load) {
cfs_rq->load_last = now;
cfs_rq->load_avg += delta * load;
}
/* consider updating load contribution on each fold or truncate */
if (global_update || cfs_rq->load_period > period
|| !cfs_rq->load_period)
update_cfs_rq_load_contribution(cfs_rq, global_update);
while (cfs_rq->load_period > period) {
/*
* Inline assembly required to prevent the compiler
* optimising this loop into a divmod call.
* See __iter_div_u64_rem() for another example of this.
*/
asm("" : "+rm" (cfs_rq->load_period));
cfs_rq->load_period /= 2;
cfs_rq->load_avg /= 2;
}
if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
list_del_leaf_cfs_rq(cfs_rq);
static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)

Yong Zhang
committed
{
long load_weight, load, shares;

Yong Zhang
committed
load_weight = atomic_read(&tg->load_weight);
load_weight += load;
load_weight -= cfs_rq->load_contribution;

Yong Zhang
committed
shares = (tg->shares * load);
if (load_weight)
shares /= load_weight;
if (shares < MIN_SHARES)
shares = MIN_SHARES;
if (shares > tg->shares)
shares = tg->shares;
return shares;
}
static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
{
if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
update_cfs_load(cfs_rq, 0);

Yong Zhang
committed
}
}
# else /* CONFIG_SMP */
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
{
}
static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)

Yong Zhang
committed
{
return tg->shares;
}
static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
{
}
# endif /* CONFIG_SMP */
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
unsigned long weight)
{

Paul Turner
committed
if (se->on_rq) {
/* commit outstanding execution time */
if (cfs_rq->curr == se)
update_curr(cfs_rq);

Paul Turner
committed
}
update_load_set(&se->load, weight);
if (se->on_rq)
account_entity_enqueue(cfs_rq, se);
}
static void update_cfs_shares(struct cfs_rq *cfs_rq)
{
struct task_group *tg;
struct sched_entity *se;

Yong Zhang
committed
long shares;
tg = cfs_rq->tg;
se = tg->se[cpu_of(rq_of(cfs_rq))];
if (!se)
return;

Yong Zhang
committed
#ifndef CONFIG_SMP
if (likely(se->load.weight == tg->shares))
return;
#endif
shares = calc_cfs_shares(cfs_rq, tg);
reweight_entity(cfs_rq_of(se), se, shares);
}
#else /* CONFIG_FAIR_GROUP_SCHED */
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
{
}
static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
struct task_struct *tsk = NULL;
if (entity_is_task(se))
tsk = task_of(se);
if (se->statistics.sleep_start) {
u64 delta = rq_of(cfs_rq)->clock - se->statistics.sleep_start;
if ((s64)delta < 0)
delta = 0;
if (unlikely(delta > se->statistics.sleep_max))
se->statistics.sleep_max = delta;
se->statistics.sleep_start = 0;
se->statistics.sum_sleep_runtime += delta;
account_scheduler_latency(tsk, delta >> 10, 1);
trace_sched_stat_sleep(tsk, delta);
}
if (se->statistics.block_start) {
u64 delta = rq_of(cfs_rq)->clock - se->statistics.block_start;
if ((s64)delta < 0)
delta = 0;
if (unlikely(delta > se->statistics.block_max))
se->statistics.block_max = delta;
se->statistics.block_start = 0;
se->statistics.sum_sleep_runtime += delta;
if (tsk) {
se->statistics.iowait_sum += delta;
se->statistics.iowait_count++;
trace_sched_stat_iowait(tsk, delta);
/*
* Blocking time is in units of nanosecs, so shift by
* 20 to get a milliseconds-range estimation of the
* amount of time that the task spent sleeping:
*/
if (unlikely(prof_on == SLEEP_PROFILING)) {
profile_hits(SLEEP_PROFILING,
(void *)get_wchan(tsk),
delta >> 20);
}
account_scheduler_latency(tsk, delta >> 10, 0);
static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
#ifdef CONFIG_SCHED_DEBUG
s64 d = se->vruntime - cfs_rq->min_vruntime;
if (d < 0)
d = -d;
if (d > 3*sysctl_sched_latency)
schedstat_inc(cfs_rq, nr_spread_over);
#endif
}
static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
u64 vruntime = cfs_rq->min_vruntime;
/*
* The 'current' period is already promised to the current tasks,
* however the extra weight of the new task will slow them down a
* little, place the new task so that it fits in the slot that
* stays open at the end.
*/
vruntime += sched_vslice(cfs_rq, se);
/* sleeps up to a single latency don't count. */
unsigned long thresh = sysctl_sched_latency;
/*
* Halve their sleep time's effect, to allow
* for a gentler effect of sleepers:
*/
if (sched_feat(GENTLE_FAIR_SLEEPERS))
thresh >>= 1;
vruntime -= thresh;
/* ensure we never gain time by being placed backwards. */
vruntime = max_vruntime(se->vruntime, vruntime);
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
/*
* Update the normalized vruntime before updating min_vruntime
* through callig update_curr().
*/
if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
se->vruntime += cfs_rq->min_vruntime;

Dmitry Adamushko
committed
* Update run-time statistics of the 'current'.
update_curr(cfs_rq);
update_cfs_load(cfs_rq, 0);
if (flags & ENQUEUE_WAKEUP) {
enqueue_sleeper(cfs_rq, se);
update_stats_enqueue(cfs_rq, se);
if (se != cfs_rq->curr)
__enqueue_entity(cfs_rq, se);
if (cfs_rq->nr_running == 1)
list_add_leaf_cfs_rq(cfs_rq);
static void __clear_buddies_last(struct sched_entity *se)
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
if (cfs_rq->last == se)
cfs_rq->last = NULL;
else
break;
}