概述
update_load_avg
/* Update task and its cfs_rq load average */
static inline void update_load_avg(struct sched_entity *se, int flags)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);//se中的cfs_rq字段
u64 now = cfs_rq_clock_task(cfs_rq);
struct rq *rq = rq_of(cfs_rq);
int cpu = cpu_of(rq);
int decayed;
/*
* Track task load average for carrying it to new CPU after migrated, and
* track group sched_entity load average for task_h_load calc in migration
*/
if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
__update_load_avg_se(now, cpu, cfs_rq, se);
decayed = update_cfs_rq_load_avg(now, cfs_rq);
decayed |= propagate_entity_load_avg(se);
if (decayed && (flags & UPDATE_TG))
update_tg_load_avg(cfs_rq, 0);
}
__update_load_avg_se
更新entity的load avg,核心是调用到了 ___update_load_avg函数
static int
__update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (___update_load_avg(now, cpu, &se->avg,
se->on_rq * scale_load_down(se->load.weight),
cfs_rq->curr == se, NULL, 0, 0)) {
trace_sched_load_se(se);
cfs_se_util_change(&se->avg);
#ifdef UTIL_EST_DEBUG
/*
* Trace utilization only for actual tasks.
*
* These trace events are mostly useful to get easier to
* read plots for the estimated utilization, where we can
* compare it with the actual grow/decrease of the original
* PELT signal.
* Let's keep them disabled by default in "production kernels".
*/
if (entity_is_task(se)) {
struct task_struct *tsk = task_of(se);
trace_sched_util_est_task(tsk, &se->avg);
/* Trace utilization only for top level CFS RQ */
cfs_rq = &(task_rq(tsk)->cfs);
trace_sched_util_est_cpu(cpu, cfs_rq);
}
#endif /* UTIL_EST_DEBUG */
return 1;
}
return 0;
}
update_cfs_rq_load_avg
最后还是调用到了___update_load_avg函数
/**
* update_cfs_rq_load_avg - update the cfs_rq's load/util averages
* @now: current time, as per cfs_rq_clock_task()
* @cfs_rq: cfs_rq to update
*
* The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
* avg. The immediate(即时) corollary(必然的) is that all (fair) tasks must be attached, see
* post_init_entity_util_avg().
*
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
*
* Returns true if the load decayed or we removed load.
*
* Since both these conditions indicate a changed cfs_rq->avg.load we should
* call update_tg_load_avg() when this function returns true.
*/
//计算cfs的负载
static inline int
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
{
struct sched_avg *sa = &cfs_rq->avg;
int decayed, removed_load = 0, removed_util = 0;
bool update_freq = false;
if (atomic_long_read(&cfs_rq->removed_load_avg)) {
s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
sub_positive(&sa->load_avg, r);
sub_positive(&sa->load_sum, r * LOAD_AVG_MAX);
removed_load = 1;
set_tg_cfs_propagate(cfs_rq);
}
if (atomic_long_read(&cfs_rq->removed_util_avg)) {
long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
sub_positive(&sa->util_avg, r);
sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
removed_util = 1;
set_tg_cfs_propagate(cfs_rq);
}
decayed = __update_load_avg_cfs_rq(now, cpu_of(rq_of(cfs_rq)), cfs_rq);
#ifndef CONFIG_64BIT
smp_wmb();
cfs_rq->load_last_update_time_copy = sa->last_update_time;
#endif
#ifdef CONFIG_SCHED_WALT
if (likely(!walt_disabled && sysctl_sched_use_walt_cpu_util))
update_freq = true;
#endif
if (update_freq || decayed || removed_util)
cfs_rq_util_change(cfs_rq);
return decayed || removed_load;
}
还是调用到这个核心函数
static int
__update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
{
int ret;
//还是调用到这个核心函数
ret = ___update_load_avg(now, cpu, &cfs_rq->avg,
scale_load_down(cfs_rq->load.weight),
cfs_rq->curr != NULL, cfs_rq, 0, 0);
trace_sched_load_cfs_rq(cfs_rq);
return ret;
}
___update_load_avg
之前版本的kernel没有accumulate_sum函数,而是直接将这部分放在本函数内执行。
accumulate_sum详见https://blog.csdn.net/feifei_csdn/article/details/103814876
主要是对time进行衰减
/*
* We can represent the historical contribution to runnable average as the
* coefficients of a geometric series. To do this we sub-divide our runnable
* history into segments of approximately 1ms (1024us); label the segment that
* occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
*
* [<- 1024us ->|<- 1024us ->|<- 1024us ->| ...
* p0 p1 p2
* (now) (~1ms ago) (~2ms ago)
*
* Let u_i denote the fraction of p_i that the entity was runnable.
*
* We then designate the fractions u_i as our co-efficients, yielding the
* following representation of historical load:
* u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ...
*
* We choose y based on the with of a reasonably scheduling period, fixing:
* y^32 = 0.5
*
* This means that the contribution to load ~32ms ago (u_32) will be weighted
* approximately half as much as the contribution to load within the last ms
* (u_0).
*
* When a period "rolls over" and we have new u_0`, multiplying the previous
* sum again by y is sufficient to update:
* load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
* = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
*/
//第四个参数weight = se->on_rq * scale_load_down(se->load.weight)
static __always_inline int
___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
unsigned long weight, int running, struct cfs_rq *cfs_rq,
int irq, int last_accum)
{
u64 delta;
u32 ret;
delta = now - sa->last_update_time;
/*
* This should only happen when time goes backwards, which it
* unfortunately does during sched clock init when we swap over to TSC.
*/
if ((s64)delta < 0) {
sa->last_update_time = now;
return 0;
}
/*
* Use 1024ns as the unit of measurement since it's a reasonable
* approximation of 1us and fast to compute.
*/
/* (1) 把时间单位从ns,收缩成us */
delta >>= 10;
if (!delta)
return 0;
sa->last_update_time += delta << 10;
/*
* running is a subset of runnable (weight) so running can't be set if
* runnable is clear. But there are some corner cases where the current
* se has been already dequeued but cfs_rq->curr still points to it.
* This means that weight will be 0 but not running for a sched_entity
* but also for a cfs_rq if the latter becomes idle. As an example,
* this happens during idle_balance() which calls
* update_blocked_averages()
*/
if (!weight)
running = 0;
/*
* Now we know we crossed measurement unit boundaries. The *_avg
* accrues by two steps:
*
* Step 1: accumulate *_sum since last_update_time. If we haven't
* crossed period boundaries, finish.
*/
ret = accumulate_sum(delta, cpu, sa, weight, running, cfs_rq);
if (!ret) {
if (!irq || (irq && !last_accum))
return 0;
} else if (irq == 1)
return 1;
/*
* Step 2: update *_avg.
*/
sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib);
if (cfs_rq) {
cfs_rq->runnable_load_avg =
div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib);
}
sa->util_avg = sa->util_sum / (LOAD_AVG_MAX - 1024 + sa->period_contrib);
return 1;
}
sa->load_sum = weight * decay_time*freq_scale;
Sa.load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib)
sa->util_sum = decay_time*freq_scale*scale_cpu
Sa.util_avg = sa->util_sum / (LOAD_AVG_MAX - 1024 + sa->period_contrib)
cfs_rq->runnable_load_sum = weight * decay_time*freq_scale;
cfs_rq->runnable_load_avg= div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib);
最后
以上就是冷傲热狗为你收集整理的【内核调度、负载计算】【update_load_avg】update_load_avg__update_load_avg_seupdate_cfs_rq_load_avg___update_load_avg的全部内容,希望文章能够帮你解决【内核调度、负载计算】【update_load_avg】update_load_avg__update_load_avg_seupdate_cfs_rq_load_avg___update_load_avg所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复