diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 570b8d056282..0951d1c58d2f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3242,6 +3242,32 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna sa->util_avg = sa->util_sum / divider; } +/* + * When a task is dequeued, its estimated utilization should not be update if + * its util_avg has not been updated at least once. + * This flag is used to synchronize util_avg updates with util_est updates. + * We map this information into the LSB bit of the utilization saved at + * dequeue time (i.e. util_est.dequeued). + */ +#define UTIL_AVG_UNCHANGED 0x1 + +static inline void cfs_se_util_change(struct sched_avg *avg) +{ + unsigned int enqueued; + + if (!sched_feat(UTIL_EST)) + return; + + /* Avoid store if the flag has been already set */ + enqueued = avg->util_est.enqueued; + if (!(enqueued & UTIL_AVG_UNCHANGED)) + return; + + /* Reset flag to report util_avg has been updated */ + enqueued &= ~UTIL_AVG_UNCHANGED; + WRITE_ONCE(avg->util_est.enqueued, enqueued); +} + /* * sched_entity: * @@ -3293,6 +3319,7 @@ __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entit cfs_rq->curr == se)) { ___update_load_avg(&se->avg, se_weight(se), se_runnable(se)); + cfs_se_util_change(&se->avg); return 1; } @@ -3900,7 +3927,7 @@ static inline void util_est_enqueue(struct cfs_rq *cfs_rq, /* Update root cfs_rq's estimated utilization */ enqueued = cfs_rq->avg.util_est.enqueued; - enqueued += _task_util_est(p); + enqueued += (_task_util_est(p) | UTIL_AVG_UNCHANGED); WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued); } @@ -3936,7 +3963,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) if (cfs_rq->nr_running) { ue.enqueued = cfs_rq->avg.util_est.enqueued; ue.enqueued -= min_t(unsigned int, ue.enqueued, - _task_util_est(p)); + (_task_util_est(p) | UTIL_AVG_UNCHANGED)); } WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued); @@ -3947,12 +3974,19 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) if (!task_sleep) return; + /* + * If the PELT values haven't changed since enqueue time, + * skip the util_est update. + */ + ue = p->se.avg.util_est; + if (ue.enqueued & UTIL_AVG_UNCHANGED) + return; + /* * Skip update of task's estimated utilization when its EWMA is * already ~1% close to its last activation value. */ - ue = p->se.avg.util_est; - ue.enqueued = task_util(p); + ue.enqueued = (task_util(p) | UTIL_AVG_UNCHANGED); last_ewma_diff = ue.enqueued - ue.ewma; if (within_margin(last_ewma_diff, (SCHED_CAPACITY_SCALE / 100))) return; diff --git a/kernel/sched/features.h b/kernel/sched/features.h index c459a4b61544..85ae8488039c 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -89,4 +89,4 @@ SCHED_FEAT(WA_BIAS, true) /* * UtilEstimation. Use estimated CPU utilization. */ -SCHED_FEAT(UTIL_EST, false) +SCHED_FEAT(UTIL_EST, true)