Lines 19-24
Link Here
|
19 |
* |
19 |
* |
20 |
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra |
20 |
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra |
21 |
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra |
21 |
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra |
|
|
22 |
* |
23 |
* Burst-Oriented Response Enhancer (BORE) CPU Scheduler |
24 |
* Copyright (C) 2021-2023 Masahito Suzuki <firelzrd@gmail.com> |
22 |
*/ |
25 |
*/ |
23 |
#include <linux/energy_model.h> |
26 |
#include <linux/energy_model.h> |
24 |
#include <linux/mmap_lock.h> |
27 |
#include <linux/mmap_lock.h> |
Lines 66-82
Link Here
|
66 |
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) |
69 |
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) |
67 |
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus |
70 |
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus |
68 |
* |
71 |
* |
69 |
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) |
72 |
* (BORE default SCHED_TUNABLESCALING_NONE = *1 constant) |
|
|
73 |
* (EEVDF default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) |
70 |
*/ |
74 |
*/ |
|
|
75 |
#ifdef CONFIG_SCHED_BORE |
76 |
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; |
77 |
#else // CONFIG_SCHED_BORE |
71 |
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; |
78 |
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; |
|
|
79 |
#endif // CONFIG_SCHED_BORE |
72 |
|
80 |
|
73 |
/* |
81 |
/* |
74 |
* Minimal preemption granularity for CPU-bound tasks: |
82 |
* Minimal preemption granularity for CPU-bound tasks: |
75 |
* |
83 |
* |
76 |
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) |
84 |
* (BORE default: 3 msec constant, units: nanoseconds) |
|
|
85 |
* (EEVDF default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) |
77 |
*/ |
86 |
*/ |
|
|
87 |
#ifdef CONFIG_SCHED_BORE |
88 |
unsigned int sysctl_sched_base_slice = 3000000ULL; |
89 |
static unsigned int normalized_sysctl_sched_base_slice = 3000000ULL; |
90 |
#else // CONFIG_SCHED_BORE |
78 |
unsigned int sysctl_sched_base_slice = 750000ULL; |
91 |
unsigned int sysctl_sched_base_slice = 750000ULL; |
79 |
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; |
92 |
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; |
|
|
93 |
#endif // CONFIG_SCHED_BORE |
80 |
|
94 |
|
81 |
/* |
95 |
/* |
82 |
* After fork, child runs first. If set to 0 (default) then |
96 |
* After fork, child runs first. If set to 0 (default) then |
Lines 86-91
unsigned int sysctl_sched_child_runs_first __read_mostly;
Link Here
|
86 |
|
100 |
|
87 |
const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
101 |
const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
88 |
|
102 |
|
|
|
103 |
#ifdef CONFIG_SCHED_BORE |
104 |
bool __read_mostly sched_bore = 1; |
105 |
bool __read_mostly sched_burst_score_rounding = 0; |
106 |
bool __read_mostly sched_burst_smoothness_long = 1; |
107 |
bool __read_mostly sched_burst_smoothness_short = 0; |
108 |
u8 __read_mostly sched_burst_fork_atavistic = 2; |
109 |
u8 __read_mostly sched_burst_penalty_offset = 22; |
110 |
uint __read_mostly sched_burst_penalty_scale = 1280; |
111 |
uint __read_mostly sched_burst_cache_lifetime = 60000000; |
112 |
static u8 sixty_four = 64; |
113 |
static uint maxval_12_bits = 4095; |
114 |
|
115 |
#define MAX_BURST_PENALTY (39U <<2) |
116 |
|
117 |
static inline u32 log2plus1_u64_u32f8(u64 v) { |
118 |
u32 msb = fls64(v); |
119 |
s32 excess_bits = msb - 9; |
120 |
u8 fractional = (0 <= excess_bits)? v >> excess_bits: v << -excess_bits; |
121 |
return msb << 8 | fractional; |
122 |
} |
123 |
|
124 |
static inline u32 calc_burst_penalty(u64 burst_time) { |
125 |
u32 greed, tolerance, penalty, scaled_penalty; |
126 |
|
127 |
greed = log2plus1_u64_u32f8(burst_time); |
128 |
tolerance = sched_burst_penalty_offset << 8; |
129 |
penalty = max(0, (s32)greed - (s32)tolerance); |
130 |
scaled_penalty = penalty * sched_burst_penalty_scale >> 16; |
131 |
|
132 |
return min(MAX_BURST_PENALTY, scaled_penalty); |
133 |
} |
134 |
|
135 |
static inline void update_burst_penalty(struct sched_entity *se) { |
136 |
se->curr_burst_penalty = calc_burst_penalty(se->burst_time); |
137 |
se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty); |
138 |
} |
139 |
|
140 |
static inline void update_slice_score(struct sched_entity *se) { |
141 |
u32 penalty = se->burst_penalty; |
142 |
if (sched_burst_score_rounding) penalty += 0x2U; |
143 |
se->slice_score = penalty >> 2; |
144 |
} |
145 |
|
146 |
static inline u64 scale_slice(u64 delta, struct sched_entity *se) { |
147 |
return mul_u64_u32_shr(delta, sched_prio_to_wmult[se->slice_score], 22); |
148 |
} |
149 |
|
150 |
static inline u32 binary_smooth(u32 new, u32 old) { |
151 |
int increment = new - old; |
152 |
return (0 <= increment)? |
153 |
old + ( increment >> (int)sched_burst_smoothness_long): |
154 |
old - (-increment >> (int)sched_burst_smoothness_short); |
155 |
} |
156 |
|
157 |
static void restart_burst(struct sched_entity *se) { |
158 |
se->burst_penalty = se->prev_burst_penalty = |
159 |
binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty); |
160 |
se->curr_burst_penalty = 0; |
161 |
se->burst_time = 0; |
162 |
} |
163 |
#endif // CONFIG_SCHED_BORE |
164 |
|
89 |
int sched_thermal_decay_shift; |
165 |
int sched_thermal_decay_shift; |
90 |
static int __init setup_sched_thermal_decay_shift(char *str) |
166 |
static int __init setup_sched_thermal_decay_shift(char *str) |
91 |
{ |
167 |
{ |
Lines 145-150
static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
Link Here
|
145 |
|
221 |
|
146 |
#ifdef CONFIG_SYSCTL |
222 |
#ifdef CONFIG_SYSCTL |
147 |
static struct ctl_table sched_fair_sysctls[] = { |
223 |
static struct ctl_table sched_fair_sysctls[] = { |
|
|
224 |
#ifdef CONFIG_SCHED_BORE |
225 |
{ |
226 |
.procname = "sched_bore", |
227 |
.data = &sched_bore, |
228 |
.maxlen = sizeof(bool), |
229 |
.mode = 0644, |
230 |
.proc_handler = &proc_dobool, |
231 |
}, |
232 |
{ |
233 |
.procname = "sched_burst_cache_lifetime", |
234 |
.data = &sched_burst_cache_lifetime, |
235 |
.maxlen = sizeof(uint), |
236 |
.mode = 0644, |
237 |
.proc_handler = proc_douintvec, |
238 |
}, |
239 |
{ |
240 |
.procname = "sched_burst_fork_atavistic", |
241 |
.data = &sched_burst_fork_atavistic, |
242 |
.maxlen = sizeof(u8), |
243 |
.mode = 0644, |
244 |
.proc_handler = &proc_dou8vec_minmax, |
245 |
.extra1 = SYSCTL_ZERO, |
246 |
.extra2 = SYSCTL_THREE, |
247 |
}, |
248 |
{ |
249 |
.procname = "sched_burst_penalty_offset", |
250 |
.data = &sched_burst_penalty_offset, |
251 |
.maxlen = sizeof(u8), |
252 |
.mode = 0644, |
253 |
.proc_handler = &proc_dou8vec_minmax, |
254 |
.extra1 = SYSCTL_ZERO, |
255 |
.extra2 = &sixty_four, |
256 |
}, |
257 |
{ |
258 |
.procname = "sched_burst_penalty_scale", |
259 |
.data = &sched_burst_penalty_scale, |
260 |
.maxlen = sizeof(uint), |
261 |
.mode = 0644, |
262 |
.proc_handler = &proc_douintvec_minmax, |
263 |
.extra1 = SYSCTL_ZERO, |
264 |
.extra2 = &maxval_12_bits, |
265 |
}, |
266 |
{ |
267 |
.procname = "sched_burst_score_rounding", |
268 |
.data = &sched_burst_score_rounding, |
269 |
.maxlen = sizeof(bool), |
270 |
.mode = 0644, |
271 |
.proc_handler = &proc_dobool, |
272 |
}, |
273 |
{ |
274 |
.procname = "sched_burst_smoothness_long", |
275 |
.data = &sched_burst_smoothness_long, |
276 |
.maxlen = sizeof(bool), |
277 |
.mode = 0644, |
278 |
.proc_handler = &proc_dobool, |
279 |
}, |
280 |
{ |
281 |
.procname = "sched_burst_smoothness_short", |
282 |
.data = &sched_burst_smoothness_short, |
283 |
.maxlen = sizeof(bool), |
284 |
.mode = 0644, |
285 |
.proc_handler = &proc_dobool, |
286 |
}, |
287 |
#endif // CONFIG_SCHED_BORE |
148 |
{ |
288 |
{ |
149 |
.procname = "sched_child_runs_first", |
289 |
.procname = "sched_child_runs_first", |
150 |
.data = &sysctl_sched_child_runs_first, |
290 |
.data = &sysctl_sched_child_runs_first, |
Lines 313-318
static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
Link Here
|
313 |
if (unlikely(se->load.weight != NICE_0_LOAD)) |
453 |
if (unlikely(se->load.weight != NICE_0_LOAD)) |
314 |
delta = __calc_delta(delta, NICE_0_LOAD, &se->load); |
454 |
delta = __calc_delta(delta, NICE_0_LOAD, &se->load); |
315 |
|
455 |
|
|
|
456 |
#ifdef CONFIG_SCHED_BORE |
457 |
if (likely(sched_bore)) delta = scale_slice(delta, se); |
458 |
#endif // CONFIG_SCHED_BORE |
316 |
return delta; |
459 |
return delta; |
317 |
} |
460 |
} |
318 |
|
461 |
|
Lines 668-674
void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
Link Here
|
668 |
* Specifically: avg_runtime() + 0 must result in entity_eligible() := true |
811 |
* Specifically: avg_runtime() + 0 must result in entity_eligible() := true |
669 |
* For this to be so, the result of this function must have a left bias. |
812 |
* For this to be so, the result of this function must have a left bias. |
670 |
*/ |
813 |
*/ |
671 |
u64 avg_vruntime(struct cfs_rq *cfs_rq) |
814 |
static u64 avg_key(struct cfs_rq *cfs_rq) |
672 |
{ |
815 |
{ |
673 |
struct sched_entity *curr = cfs_rq->curr; |
816 |
struct sched_entity *curr = cfs_rq->curr; |
674 |
s64 avg = cfs_rq->avg_vruntime; |
817 |
s64 avg = cfs_rq->avg_vruntime; |
Lines 688-694
u64 avg_vruntime(struct cfs_rq *cfs_rq)
Link Here
|
688 |
avg = div_s64(avg, load); |
831 |
avg = div_s64(avg, load); |
689 |
} |
832 |
} |
690 |
|
833 |
|
691 |
return cfs_rq->min_vruntime + avg; |
834 |
return avg; |
|
|
835 |
} |
836 |
|
837 |
inline u64 avg_vruntime(struct cfs_rq *cfs_rq) { |
838 |
return cfs_rq->min_vruntime + avg_key(cfs_rq); |
692 |
} |
839 |
} |
693 |
|
840 |
|
694 |
/* |
841 |
/* |
Lines 709-721
u64 avg_vruntime(struct cfs_rq *cfs_rq)
Link Here
|
709 |
*/ |
856 |
*/ |
710 |
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) |
857 |
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) |
711 |
{ |
858 |
{ |
712 |
s64 lag, limit; |
|
|
713 |
|
714 |
SCHED_WARN_ON(!se->on_rq); |
859 |
SCHED_WARN_ON(!se->on_rq); |
715 |
lag = avg_vruntime(cfs_rq) - se->vruntime; |
860 |
se->vlag = avg_vruntime(cfs_rq) - se->vruntime; |
716 |
|
|
|
717 |
limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); |
718 |
se->vlag = clamp(lag, -limit, limit); |
719 |
} |
861 |
} |
720 |
|
862 |
|
721 |
/* |
863 |
/* |
Lines 1031-1036
static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
Link Here
|
1031 |
/* |
1173 |
/* |
1032 |
* EEVDF: vd_i = ve_i + r_i / w_i |
1174 |
* EEVDF: vd_i = ve_i + r_i / w_i |
1033 |
*/ |
1175 |
*/ |
|
|
1176 |
#ifdef CONFIG_SCHED_BORE |
1177 |
update_slice_score(se); |
1178 |
#endif // CONFIG_SCHED_BORE |
1034 |
se->deadline = se->vruntime + calc_delta_fair(se->slice, se); |
1179 |
se->deadline = se->vruntime + calc_delta_fair(se->slice, se); |
1035 |
|
1180 |
|
1036 |
/* |
1181 |
/* |
Lines 1173-1179
static void update_curr(struct cfs_rq *cfs_rq)
Link Here
|
1173 |
curr->sum_exec_runtime += delta_exec; |
1318 |
curr->sum_exec_runtime += delta_exec; |
1174 |
schedstat_add(cfs_rq->exec_clock, delta_exec); |
1319 |
schedstat_add(cfs_rq->exec_clock, delta_exec); |
1175 |
|
1320 |
|
1176 |
curr->vruntime += calc_delta_fair(delta_exec, curr); |
1321 |
#ifdef CONFIG_SCHED_BORE |
|
|
1322 |
curr->burst_time += delta_exec; |
1323 |
update_burst_penalty(curr); |
1324 |
#endif // CONFIG_SCHED_BORE |
1325 |
curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr)); |
1177 |
update_deadline(cfs_rq, curr); |
1326 |
update_deadline(cfs_rq, curr); |
1178 |
update_min_vruntime(cfs_rq); |
1327 |
update_min_vruntime(cfs_rq); |
1179 |
|
1328 |
|
Lines 4970-4975
static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
Link Here
|
4970 |
|
5119 |
|
4971 |
#endif /* CONFIG_SMP */ |
5120 |
#endif /* CONFIG_SMP */ |
4972 |
|
5121 |
|
|
|
5122 |
static inline s64 calc_placement_lag(struct sched_entity *se) { |
5123 |
u64 slice = se->slice; |
5124 |
#ifdef CONFIG_SCHED_BORE |
5125 |
if (unlikely(!sched_bore)) |
5126 |
#endif // CONFIG_SCHED_BORE |
5127 |
slice *= 2; |
5128 |
s64 limit = calc_delta_fair(max_t(u64, slice, TICK_NSEC), se); |
5129 |
return clamp(se->vlag, -limit, limit); |
5130 |
} |
5131 |
|
4973 |
static void |
5132 |
static void |
4974 |
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
5133 |
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
4975 |
{ |
5134 |
{ |
Lines 4977-4982
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
Link Here
|
4977 |
s64 lag = 0; |
5136 |
s64 lag = 0; |
4978 |
|
5137 |
|
4979 |
se->slice = sysctl_sched_base_slice; |
5138 |
se->slice = sysctl_sched_base_slice; |
|
|
5139 |
#ifdef CONFIG_SCHED_BORE |
5140 |
update_slice_score(se); |
5141 |
#endif // CONFIG_SCHED_BORE |
4980 |
vslice = calc_delta_fair(se->slice, se); |
5142 |
vslice = calc_delta_fair(se->slice, se); |
4981 |
|
5143 |
|
4982 |
/* |
5144 |
/* |
Lines 4991-4997
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
Link Here
|
4991 |
struct sched_entity *curr = cfs_rq->curr; |
5153 |
struct sched_entity *curr = cfs_rq->curr; |
4992 |
unsigned long load; |
5154 |
unsigned long load; |
4993 |
|
5155 |
|
4994 |
lag = se->vlag; |
5156 |
lag = calc_placement_lag(se); |
4995 |
|
5157 |
|
4996 |
/* |
5158 |
/* |
4997 |
* If we want to place a task and preserve lag, we have to |
5159 |
* If we want to place a task and preserve lag, we have to |
Lines 6611-6616
static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
Link Here
|
6611 |
util_est_dequeue(&rq->cfs, p); |
6773 |
util_est_dequeue(&rq->cfs, p); |
6612 |
|
6774 |
|
6613 |
for_each_sched_entity(se) { |
6775 |
for_each_sched_entity(se) { |
|
|
6776 |
#ifdef CONFIG_SCHED_BORE |
6777 |
if (task_sleep) restart_burst(se); |
6778 |
#endif // CONFIG_SCHED_BORE |
6614 |
cfs_rq = cfs_rq_of(se); |
6779 |
cfs_rq = cfs_rq_of(se); |
6615 |
dequeue_entity(cfs_rq, se, flags); |
6780 |
dequeue_entity(cfs_rq, se, flags); |
6616 |
|
6781 |
|
Lines 8341-8348
static void yield_task_fair(struct rq *rq)
Link Here
|
8341 |
/* |
8506 |
/* |
8342 |
* Are we the only task in the tree? |
8507 |
* Are we the only task in the tree? |
8343 |
*/ |
8508 |
*/ |
8344 |
if (unlikely(rq->nr_running == 1)) |
8509 |
if (unlikely(rq->nr_running == 1)) { |
|
|
8510 |
#ifdef CONFIG_SCHED_BORE |
8511 |
restart_burst(se); |
8512 |
#endif // CONFIG_SCHED_BORE |
8345 |
return; |
8513 |
return; |
|
|
8514 |
} |
8346 |
|
8515 |
|
8347 |
clear_buddies(cfs_rq, se); |
8516 |
clear_buddies(cfs_rq, se); |
8348 |
|
8517 |
|
Lines 8351-8356
static void yield_task_fair(struct rq *rq)
Link Here
|
8351 |
* Update run-time statistics of the 'current'. |
8520 |
* Update run-time statistics of the 'current'. |
8352 |
*/ |
8521 |
*/ |
8353 |
update_curr(cfs_rq); |
8522 |
update_curr(cfs_rq); |
|
|
8523 |
#ifdef CONFIG_SCHED_BORE |
8524 |
restart_burst(se); |
8525 |
update_slice_score(se); |
8526 |
#endif // CONFIG_SCHED_BORE |
8354 |
/* |
8527 |
/* |
8355 |
* Tell update_rq_clock() that we've just updated, |
8528 |
* Tell update_rq_clock() that we've just updated, |
8356 |
* so we don't do microscopic update in schedule() |
8529 |
* so we don't do microscopic update in schedule() |