Lines 19-24
Link Here
|
19 |
* |
19 |
* |
20 |
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra |
20 |
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra |
21 |
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra |
21 |
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra |
|
|
22 |
* |
23 |
* Burst-Oriented Response Enhancer (BORE) CPU Scheduler |
24 |
* Copyright (C) 2021-2023 Masahito Suzuki <firelzrd@gmail.com> |
22 |
*/ |
25 |
*/ |
23 |
#include <linux/energy_model.h> |
26 |
#include <linux/energy_model.h> |
24 |
#include <linux/mmap_lock.h> |
27 |
#include <linux/mmap_lock.h> |
Lines 66-82
Link Here
|
66 |
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) |
69 |
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) |
67 |
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus |
70 |
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus |
68 |
* |
71 |
* |
69 |
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) |
72 |
* (BORE default SCHED_TUNABLESCALING_NONE = *1 constant) |
|
|
73 |
* (EEVDF default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) |
70 |
*/ |
74 |
*/ |
|
|
75 |
#ifdef CONFIG_SCHED_BORE |
76 |
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; |
77 |
#else // CONFIG_SCHED_BORE |
71 |
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; |
78 |
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; |
|
|
79 |
#endif // CONFIG_SCHED_BORE |
72 |
|
80 |
|
73 |
/* |
81 |
/* |
74 |
* Minimal preemption granularity for CPU-bound tasks: |
82 |
* Minimal preemption granularity for CPU-bound tasks: |
75 |
* |
83 |
* |
76 |
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) |
84 |
* (BORE default: 3 msec constant, units: nanoseconds) |
|
|
85 |
* (EEVDF default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) |
77 |
*/ |
86 |
*/ |
|
|
87 |
#ifdef CONFIG_SCHED_BORE |
88 |
unsigned int sysctl_sched_base_slice = 3000000ULL; |
89 |
static unsigned int normalized_sysctl_sched_base_slice = 3000000ULL; |
90 |
#else // CONFIG_SCHED_BORE |
78 |
unsigned int sysctl_sched_base_slice = 750000ULL; |
91 |
unsigned int sysctl_sched_base_slice = 750000ULL; |
79 |
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; |
92 |
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; |
|
|
93 |
#endif // CONFIG_SCHED_BORE |
80 |
|
94 |
|
81 |
/* |
95 |
/* |
82 |
* After fork, child runs first. If set to 0 (default) then |
96 |
* After fork, child runs first. If set to 0 (default) then |
Lines 86-91
unsigned int sysctl_sched_child_runs_first __read_mostly;
Link Here
|
86 |
|
100 |
|
87 |
const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
101 |
const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
88 |
|
102 |
|
|
|
103 |
#ifdef CONFIG_SCHED_BORE |
104 |
unsigned int __read_mostly sched_bore = 1; |
105 |
unsigned int __read_mostly sched_burst_cache_lifetime = 60000000; |
106 |
unsigned int __read_mostly sched_burst_penalty_offset = 22; |
107 |
unsigned int __read_mostly sched_burst_penalty_scale = 1280; |
108 |
unsigned int __read_mostly sched_burst_smoothness_up = 1; |
109 |
unsigned int __read_mostly sched_burst_smoothness_down = 0; |
110 |
unsigned int __read_mostly sched_burst_fork_atavistic = 2; |
111 |
static int three = 3; |
112 |
static int seven = 7; |
113 |
static int sixty_four = 64; |
114 |
static int maxval_12_bits = 4095; |
115 |
|
116 |
#define MAX_BURST_PENALTY ((40U << 8) - 1) |
117 |
|
118 |
static inline u32 log2plus1_u64_u32f8(u64 v) { |
119 |
x32 result; |
120 |
int msb = fls64(v); |
121 |
int excess_bits = msb - 9; |
122 |
result.u8[0] = (0 <= excess_bits)? v >> excess_bits: v << -excess_bits; |
123 |
result.u8[1] = msb; |
124 |
return result.u32; |
125 |
} |
126 |
|
127 |
static inline u32 calc_burst_penalty(u64 burst_time) { |
128 |
u32 greed, tolerance, penalty, scaled_penalty; |
129 |
|
130 |
greed = log2plus1_u64_u32f8(burst_time); |
131 |
tolerance = sched_burst_penalty_offset << 8; |
132 |
penalty = max(0, (s32)greed - (s32)tolerance); |
133 |
scaled_penalty = penalty * sched_burst_penalty_scale >> 10; |
134 |
|
135 |
return min(MAX_BURST_PENALTY, scaled_penalty); |
136 |
} |
137 |
|
138 |
static void update_burst_penalty(struct sched_entity *se) { |
139 |
se->curr_burst_penalty = calc_burst_penalty(se->burst_time); |
140 |
se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty); |
141 |
} |
142 |
|
143 |
static inline u64 penalty_scale(u64 delta, struct sched_entity *se) { |
144 |
u32 score = ((x16*)&se->burst_penalty)->u8[1]; |
145 |
return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22); |
146 |
} |
147 |
|
148 |
static inline u32 binary_smooth(u32 new, u32 old) { |
149 |
int increment = new - old; |
150 |
return (0 <= increment)? |
151 |
old + ( increment >> sched_burst_smoothness_up): |
152 |
old - (-increment >> sched_burst_smoothness_down); |
153 |
} |
154 |
|
155 |
static void restart_burst(struct sched_entity *se) { |
156 |
se->burst_penalty = se->prev_burst_penalty = |
157 |
binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty); |
158 |
se->curr_burst_penalty = 0; |
159 |
se->burst_time = 0; |
160 |
} |
161 |
#endif // CONFIG_SCHED_BORE |
162 |
|
89 |
int sched_thermal_decay_shift; |
163 |
int sched_thermal_decay_shift; |
90 |
static int __init setup_sched_thermal_decay_shift(char *str) |
164 |
static int __init setup_sched_thermal_decay_shift(char *str) |
91 |
{ |
165 |
{ |
Lines 145-150
static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
Link Here
|
145 |
|
219 |
|
146 |
#ifdef CONFIG_SYSCTL |
220 |
#ifdef CONFIG_SYSCTL |
147 |
static struct ctl_table sched_fair_sysctls[] = { |
221 |
static struct ctl_table sched_fair_sysctls[] = { |
|
|
222 |
#ifdef CONFIG_SCHED_BORE |
223 |
{ |
224 |
.procname = "sched_bore", |
225 |
.data = &sched_bore, |
226 |
.maxlen = sizeof(unsigned int), |
227 |
.mode = 0644, |
228 |
.proc_handler = &proc_dointvec_minmax, |
229 |
.extra1 = SYSCTL_ZERO, |
230 |
.extra2 = SYSCTL_ONE, |
231 |
}, |
232 |
{ |
233 |
.procname = "sched_burst_cache_lifetime", |
234 |
.data = &sched_burst_cache_lifetime, |
235 |
.maxlen = sizeof(unsigned int), |
236 |
.mode = 0644, |
237 |
.proc_handler = proc_dointvec, |
238 |
}, |
239 |
{ |
240 |
.procname = "sched_burst_fork_atavistic", |
241 |
.data = &sched_burst_fork_atavistic, |
242 |
.maxlen = sizeof(unsigned int), |
243 |
.mode = 0644, |
244 |
.proc_handler = &proc_dointvec_minmax, |
245 |
.extra1 = SYSCTL_ZERO, |
246 |
.extra2 = &seven, |
247 |
}, |
248 |
{ |
249 |
.procname = "sched_burst_penalty_offset", |
250 |
.data = &sched_burst_penalty_offset, |
251 |
.maxlen = sizeof(unsigned int), |
252 |
.mode = 0644, |
253 |
.proc_handler = &proc_dointvec_minmax, |
254 |
.extra1 = SYSCTL_ZERO, |
255 |
.extra2 = &sixty_four, |
256 |
}, |
257 |
{ |
258 |
.procname = "sched_burst_penalty_scale", |
259 |
.data = &sched_burst_penalty_scale, |
260 |
.maxlen = sizeof(unsigned int), |
261 |
.mode = 0644, |
262 |
.proc_handler = &proc_dointvec_minmax, |
263 |
.extra1 = SYSCTL_ZERO, |
264 |
.extra2 = &maxval_12_bits, |
265 |
}, |
266 |
{ |
267 |
.procname = "sched_burst_smoothness_down", |
268 |
.data = &sched_burst_smoothness_down, |
269 |
.maxlen = sizeof(unsigned int), |
270 |
.mode = 0644, |
271 |
.proc_handler = &proc_dointvec_minmax, |
272 |
.extra1 = SYSCTL_ZERO, |
273 |
.extra2 = &three, |
274 |
}, |
275 |
{ |
276 |
.procname = "sched_burst_smoothness_up", |
277 |
.data = &sched_burst_smoothness_up, |
278 |
.maxlen = sizeof(unsigned int), |
279 |
.mode = 0644, |
280 |
.proc_handler = &proc_dointvec_minmax, |
281 |
.extra1 = SYSCTL_ZERO, |
282 |
.extra2 = &three, |
283 |
}, |
284 |
#endif // CONFIG_SCHED_BORE |
148 |
{ |
285 |
{ |
149 |
.procname = "sched_child_runs_first", |
286 |
.procname = "sched_child_runs_first", |
150 |
.data = &sysctl_sched_child_runs_first, |
287 |
.data = &sysctl_sched_child_runs_first, |
Lines 313-318
static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
Link Here
|
313 |
if (unlikely(se->load.weight != NICE_0_LOAD)) |
450 |
if (unlikely(se->load.weight != NICE_0_LOAD)) |
314 |
delta = __calc_delta(delta, NICE_0_LOAD, &se->load); |
451 |
delta = __calc_delta(delta, NICE_0_LOAD, &se->load); |
315 |
|
452 |
|
|
|
453 |
#ifdef CONFIG_SCHED_BORE |
454 |
if (likely(sched_bore)) delta = penalty_scale(delta, se); |
455 |
#endif // CONFIG_SCHED_BORE |
316 |
return delta; |
456 |
return delta; |
317 |
} |
457 |
} |
318 |
|
458 |
|
Lines 668-674
void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
Link Here
|
668 |
* Specifically: avg_runtime() + 0 must result in entity_eligible() := true |
808 |
* Specifically: avg_runtime() + 0 must result in entity_eligible() := true |
669 |
* For this to be so, the result of this function must have a left bias. |
809 |
* For this to be so, the result of this function must have a left bias. |
670 |
*/ |
810 |
*/ |
671 |
u64 avg_vruntime(struct cfs_rq *cfs_rq) |
811 |
static u64 avg_key(struct cfs_rq *cfs_rq) |
672 |
{ |
812 |
{ |
673 |
struct sched_entity *curr = cfs_rq->curr; |
813 |
struct sched_entity *curr = cfs_rq->curr; |
674 |
s64 avg = cfs_rq->avg_vruntime; |
814 |
s64 avg = cfs_rq->avg_vruntime; |
Lines 688-694
u64 avg_vruntime(struct cfs_rq *cfs_rq)
Link Here
|
688 |
avg = div_s64(avg, load); |
828 |
avg = div_s64(avg, load); |
689 |
} |
829 |
} |
690 |
|
830 |
|
691 |
return cfs_rq->min_vruntime + avg; |
831 |
return avg; |
|
|
832 |
} |
833 |
|
834 |
inline u64 avg_vruntime(struct cfs_rq *cfs_rq) { |
835 |
return cfs_rq->min_vruntime + avg_key(cfs_rq); |
692 |
} |
836 |
} |
693 |
|
837 |
|
694 |
/* |
838 |
/* |
Lines 981-987
static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
Link Here
|
981 |
return se; |
1125 |
return se; |
982 |
} |
1126 |
} |
983 |
|
1127 |
|
984 |
#ifdef CONFIG_SCHED_DEBUG |
|
|
985 |
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
1128 |
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
986 |
{ |
1129 |
{ |
987 |
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root); |
1130 |
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root); |
Lines 995-1000
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
Link Here
|
995 |
/************************************************************** |
1138 |
/************************************************************** |
996 |
* Scheduling class statistics methods: |
1139 |
* Scheduling class statistics methods: |
997 |
*/ |
1140 |
*/ |
|
|
1141 |
#ifdef CONFIG_SCHED_DEBUG |
998 |
#ifdef CONFIG_SMP |
1142 |
#ifdef CONFIG_SMP |
999 |
int sched_update_scaling(void) |
1143 |
int sched_update_scaling(void) |
1000 |
{ |
1144 |
{ |
Lines 1173-1179
static void update_curr(struct cfs_rq *cfs_rq)
Link Here
|
1173 |
curr->sum_exec_runtime += delta_exec; |
1317 |
curr->sum_exec_runtime += delta_exec; |
1174 |
schedstat_add(cfs_rq->exec_clock, delta_exec); |
1318 |
schedstat_add(cfs_rq->exec_clock, delta_exec); |
1175 |
|
1319 |
|
1176 |
curr->vruntime += calc_delta_fair(delta_exec, curr); |
1320 |
#ifdef CONFIG_SCHED_BORE |
|
|
1321 |
curr->burst_time += delta_exec; |
1322 |
update_burst_penalty(curr); |
1323 |
#endif // CONFIG_SCHED_BORE |
1324 |
curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr)); |
1177 |
update_deadline(cfs_rq, curr); |
1325 |
update_deadline(cfs_rq, curr); |
1178 |
update_min_vruntime(cfs_rq); |
1326 |
update_min_vruntime(cfs_rq); |
1179 |
|
1327 |
|
Lines 5053-5058
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
Link Here
|
5053 |
if (WARN_ON_ONCE(!load)) |
5201 |
if (WARN_ON_ONCE(!load)) |
5054 |
load = 1; |
5202 |
load = 1; |
5055 |
lag = div_s64(lag, load); |
5203 |
lag = div_s64(lag, load); |
|
|
5204 |
|
5205 |
#ifdef CONFIG_SCHED_BORE |
5206 |
if (flags & ENQUEUE_MIGRATED && likely(sched_bore)) { |
5207 |
struct sched_entity *last, *first; |
5208 |
s64 left_vruntime = vruntime, right_vruntime = vruntime; |
5209 |
|
5210 |
if (first = __pick_first_entity(cfs_rq)) |
5211 |
left_vruntime = first->vruntime; |
5212 |
|
5213 |
if (last = __pick_last_entity(cfs_rq)) |
5214 |
right_vruntime = last->vruntime; |
5215 |
|
5216 |
lag = clamp(lag, |
5217 |
(s64)vruntime - right_vruntime, |
5218 |
(s64)vruntime - left_vruntime); |
5219 |
} |
5220 |
#endif // CONFIG_SCHED_BORE |
5056 |
} |
5221 |
} |
5057 |
|
5222 |
|
5058 |
se->vruntime = vruntime - lag; |
5223 |
se->vruntime = vruntime - lag; |
Lines 6611-6616
static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
Link Here
|
6611 |
util_est_dequeue(&rq->cfs, p); |
6776 |
util_est_dequeue(&rq->cfs, p); |
6612 |
|
6777 |
|
6613 |
for_each_sched_entity(se) { |
6778 |
for_each_sched_entity(se) { |
|
|
6779 |
#ifdef CONFIG_SCHED_BORE |
6780 |
if (task_sleep) restart_burst(se); |
6781 |
#endif // CONFIG_SCHED_BORE |
6614 |
cfs_rq = cfs_rq_of(se); |
6782 |
cfs_rq = cfs_rq_of(se); |
6615 |
dequeue_entity(cfs_rq, se, flags); |
6783 |
dequeue_entity(cfs_rq, se, flags); |
6616 |
|
6784 |
|
Lines 8341-8348
static void yield_task_fair(struct rq *rq)
Link Here
|
8341 |
/* |
8509 |
/* |
8342 |
* Are we the only task in the tree? |
8510 |
* Are we the only task in the tree? |
8343 |
*/ |
8511 |
*/ |
8344 |
if (unlikely(rq->nr_running == 1)) |
8512 |
if (unlikely(rq->nr_running == 1)) { |
|
|
8513 |
#ifdef CONFIG_SCHED_BORE |
8514 |
restart_burst(se); |
8515 |
#endif // CONFIG_SCHED_BORE |
8345 |
return; |
8516 |
return; |
|
|
8517 |
} |
8346 |
|
8518 |
|
8347 |
clear_buddies(cfs_rq, se); |
8519 |
clear_buddies(cfs_rq, se); |
8348 |
|
8520 |
|
Lines 8351-8356
static void yield_task_fair(struct rq *rq)
Link Here
|
8351 |
* Update run-time statistics of the 'current'. |
8523 |
* Update run-time statistics of the 'current'. |
8352 |
*/ |
8524 |
*/ |
8353 |
update_curr(cfs_rq); |
8525 |
update_curr(cfs_rq); |
|
|
8526 |
#ifdef CONFIG_SCHED_BORE |
8527 |
restart_burst(se); |
8528 |
#endif // CONFIG_SCHED_BORE |
8354 |
/* |
8529 |
/* |
8355 |
* Tell update_rq_clock() that we've just updated, |
8530 |
* Tell update_rq_clock() that we've just updated, |
8356 |
* so we don't do microscopic update in schedule() |
8531 |
* so we don't do microscopic update in schedule() |