概述
load
target_load
target_load是迁移到的cpu的load,或者我们可以说是目的cpu的load
static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq)
{
return cfs_rq->runnable_load_avg;
}
/* Used instead of source_load when we know the type == 0 */
static unsigned long weighted_cpuload(struct rq *rq)
{
return cfs_rq_runnable_load_avg(&rq->cfs);
}
/*
* Return a high guess(猜测) at the load of a migration-target cpu weighted
* according to the scheduling class and "nice" value.
*/
//这个函数计算目的cpu的负载,内核通过这个值决定是否将进程迁移到这个cpu
static unsigned long target_load(int cpu, int type)
{
struct rq *rq = cpu_rq(cpu);
unsigned long total = weighted_cpuload(rq);
if (type == 0 || !sched_feat(LB_BIAS))
return total;
//回答一个最大值,告诉内核我的进程足够多,尽量不要给我进程
return max(rq->cpu_load[type-1], total);
}
source_load
/*
* Return a low guess at the load of a migration-source cpu weighted
* according to the scheduling class and "nice" value.
*
* We want to under-estimate the load of migration sources, to
* balance conservatively.
*/
//这个函数计算源cpu的负载,内核通过这个值决定是否将进程从这个cpu迁出。
static unsigned long source_load(int cpu, int type)
{
struct rq *rq = cpu_rq(cpu);
unsigned long total = weighted_cpuload(rq);
if (type == 0 || !sched_feat(LB_BIAS))
return total;
return min(rq->cpu_load[type-1], total);//回答一个最小值,指示内核尽量不要迁移我的进程
}
util
这里只是基于PELT来进行计算util
/**
* Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
* @cpu: the CPU to get the utilization of
*
* The unit of the return value must be the one of capacity so we can compare
* the utilization with the capacity of the CPU that is available for CFS task
* (ie cpu_capacity).
*
* cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
* recent utilization of currently non-runnable tasks on a CPU. It represents
* the amount of utilization of a CPU in the range [0..capacity_orig] where
* capacity_orig is the cpu_capacity available at the highest frequency,
* i.e. arch_scale_cpu_capacity().
* The utilization of a CPU converges towards a sum equal to or less than the
* current capacity (capacity_curr <= capacity_orig) of the CPU because it is
* the running time on this CPU scaled by capacity_curr.
*
* The estimated utilization of a CPU is defined to be the maximum between its
* cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
* currently RUNNABLE on that CPU.
* This allows to properly represent the expected utilization of a CPU which
* has just got a big task running since a long sleep period. At the same time
* however it preserves the benefits of the "blocked utilization" in
* describing the potential for other tasks waking up on the same CPU.
*
* Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
* higher than capacity_orig because of unfortunate(不幸的) rounding(舍入) in
* cfs.avg.util_avg or just after migrating tasks and new task wakeups until
* the average stabilizes(稳定) with the new running time. We need to check that the
* utilization stays within the range of [0..capacity_orig] and cap it if
* necessary. Without utilization capping(上限), a group could be seen as overloaded
* (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
* available capacity. We allow utilization to overshoot(过冲) capacity_curr (but not
* capacity_orig) as it useful for predicting the capacity required after task
* migrations (scheduler-driven DVFS).
*
* Return: the (estimated) utilization for the specified(指定的) CPU
*/
static inline unsigned long cpu_util(int cpu)
{
struct cfs_rq *cfs_rq;
unsigned int util;
#ifdef CONFIG_SCHED_WALT
if (likely(!walt_disabled && sysctl_sched_use_walt_cpu_util)) {
u64 walt_cpu_util = cpu_rq(cpu)->cumulative_runnable_avg;
walt_cpu_util <<= SCHED_CAPACITY_SHIFT;
do_div(walt_cpu_util, walt_ravg_window);
return min_t(unsigned long, walt_cpu_util,
capacity_orig_of(cpu));
}
#endif
cfs_rq = &cpu_rq(cpu)->cfs;
util = READ_ONCE(cfs_rq->avg.util_avg);
if (sched_feat(UTIL_EST))
//新任务唤醒,cfs.avg.util_est,做四舍五入的时候,可能大于capacity_curr或者capacity_original
util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
//需要将util限制在0-capacity_orig
return min_t(unsigned long, util, capacity_orig_of(cpu));
}
update_sg_lb_stats
/**
* update_sg_lb_stats - Update sched_group's statistics for load balancing.
* @env: The load balancing environment.
* @group: sched_group whose statistics are to be updated.
* @load_idx: Load index of sched_domain of this_cpu for load calc.
* @local_group: Does group contain this_cpu.
* @sgs: variable to hold the statistics for this group.
* @overload: Indicate(表明) pullable(可拉式) load (e.g. >1 runnable task).
* @overutilized: Indicate overutilization(过度利用) for any CPU.
* @misfit_task: Indicate misfit_task for any CPU
*/
static inline void update_sg_lb_stats(struct lb_env *env,
struct sched_group *group, int load_idx,
int local_group, struct sg_lb_stats *sgs,
bool *overload, bool *overutilized, bool *misfit_task)
{
unsigned long load;
int i, nr_running;
memset(sgs, 0, sizeof(*sgs));
/* (7.3.1.5.1) 遍历sched_group中的每个cpu */
for_each_cpu_and(i, sched_group_span(group), env->cpus) {
struct rq *rq = cpu_rq(i);
/* Bias balancing toward cpus of our domain */
if (local_group)//目的cpu的group
/* 如果是local_group,负载往小的取:min(rq->cpu_load[load_idx-1], weighted_cpuload(cpu)) */
load = target_load(i, load_idx);
else
/* 如果不是local_group,负载往大的取:max(rq->cpu_load[load_idx-1], weighted_cpuload(cppu))*/
load = source_load(i, load_idx);
/* (7.3.1.5.4) 累加sgs各项值:
sgs->group_load // runnable负载带weight分量(cpu_rq(cpu)->cfs.avg.util_avg),经过rq->cpu_load[]计算
sgs->group_util // running负载(cpu_rq(cpu)->cfs.avg.load_avg/cpu_rq(cpu)->cfs.runnable_load_avg)
sgs->sum_nr_running // rq中所有se的总和,
sgs->sum_weighted_load // runnable负载带weight分量(cpu_rq(cpu)->cfs.avg.util_avg)
sgs->idle_cpus // idle状态的cpu计数
*/
sgs->group_load += load;
sgs->group_util += cpu_util(i);
sgs->sum_nr_running += rq->cfs.h_nr_running;
nr_running = rq->nr_running;
/* (7.3.1.5.5) 如果rq中进程数量>1,则就会有进程处于runnable状态,
overload = true
*/
if (nr_running > 1)
*overload = true;
#ifdef CONFIG_NUMA_BALANCING
sgs->nr_numa_running += rq->nr_numa_running;
sgs->nr_preferred_running += rq->nr_preferred_running;
#endif
sgs->sum_weighted_load += weighted_cpuload(rq);
/*
* No need to call idle_cpu() if nr_running is not 0
*/
if (!nr_running && idle_cpu(i))
sgs->idle_cpus++;
if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
sgs->group_misfit_task_load < rq->misfit_task_load) {
sgs->group_misfit_task_load = rq->misfit_task_load;
*overload = 1;
}
/* (7.3.1.5.6) cpu的capacity小于cpu的running状态负载,overutilized = true*/
if (cpu_overutilized(i)) {
*overutilized = true;
if (rq->misfit_task_load)
*misfit_task = true;
}
}
/* (7.3.1.5.7) 更新汇总后sgs的统计数据:
sgs->group_capacity // sgs所有cpu capacity的累加
sgs->avg_load // 按照group_capacity,等比例放大group_load负载,capacity越小avg_load越大
sgs->load_per_task // sgs的平均每个进程的weight负载
sgs->group_weight // sgs的online cpu个数
sgs->group_no_capacity // sgs的capacity已经不够用,赶不上util
sgs->group_type // 严重级别 group_overloaded > group_imbalanced > group_other
// group_imbalanced: 下一等级的load_balance因为cpu_affinity的原因没有完成
*/
/* Adjust by relative CPU capacity of the group */
sgs->group_capacity = group->sgc->capacity;
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
if (sgs->sum_nr_running)
sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
sgs->group_weight = group->group_weight;
sgs->group_no_capacity = group_is_overloaded(env, sgs);
sgs->group_type = group_classify(group, sgs);
}
负载值 | 计算方法 | 说明 |
sgs->group_load | += load Target_cpu取大值,source_cpu取小值 |
|
sgs->group_util | += min( max(READ_ONCE(cfs_rq->avg.util_avg),READ_ONCE(cfs_rq->avg.util_est.enqueued)), capacity_orig_of(cpu)) |
|
sgs->sum_nr_running | += rq->cfs.h_nr_running |
|
sgs->sum_weighted_load | += cfs_rq->runnable_load_avg
|
|
sgs->group_misfit_task_load | 实际上他只是一个值 |
|
sgs->idle_cpus | ++ |
|
sgs->group_capacity | group->sgc->capacity |
|
sgs->avg_load | = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity |
|
gs->load_per_task | = sgs->sum_weighted_load / sgs->sum_nr_running; |
|
sgs->group_weight | = group->group_weight |
|
sgs->group_no_capacity | group_is_overloaded(env, sgs) |
|
sgs->group_type | = group_classify(group, sgs) | 严重级别 group_overloaded > group_imbalanced > group_other
|
sgs->group_no_capacity实际上实际上只看返回true的一个条件。那就是util比他本身的capacity还要大。
/*
* group_is_overloaded returns true if the group has more tasks than it can
* handle.
* group_is_overloaded is not equals to !group_has_capacity because a group
* with the exact(确切的) right number of tasks, has no more spare capacity but is not
* overloaded so both group_has_capacity and group_is_overloaded return
* false.
*/
static inline bool
group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
{
if (sgs->sum_nr_running <= sgs->group_weight)
return false;
if ((sgs->group_capacity * 100) <
(sgs->group_util * env->sd->imbalance_pct))
return true;
return false;
}
最后
以上就是完美书包为你收集整理的【内核调度、负载均衡】【update_sg_lb_stats】loadutilupdate_sg_lb_stats的全部内容,希望文章能够帮你解决【内核调度、负载均衡】【update_sg_lb_stats】loadutilupdate_sg_lb_stats所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复