Lines 19-24
Link Here
|
19 |
* |
19 |
* |
20 |
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra |
20 |
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra |
21 |
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra |
21 |
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra |
|
|
22 |
* |
23 |
* Burst-Oriented Response Enhancer (BORE) CPU Scheduler |
24 |
* Copyright (C) 2021-2023 Masahito Suzuki <firelzrd@gmail.com> |
22 |
*/ |
25 |
*/ |
23 |
#include <linux/energy_model.h> |
26 |
#include <linux/energy_model.h> |
24 |
#include <linux/mmap_lock.h> |
27 |
#include <linux/mmap_lock.h> |
Lines 66-82
Link Here
|
66 |
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) |
69 |
* SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus) |
67 |
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus |
70 |
* SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus |
68 |
* |
71 |
* |
69 |
* (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) |
72 |
* (BORE default SCHED_TUNABLESCALING_NONE = *1 constant) |
|
|
73 |
* (EEVDF default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) |
70 |
*/ |
74 |
*/ |
|
|
75 |
#ifdef CONFIG_SCHED_BORE |
76 |
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE; |
77 |
#else // CONFIG_SCHED_BORE |
71 |
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; |
78 |
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG; |
|
|
79 |
#endif // CONFIG_SCHED_BORE |
72 |
|
80 |
|
73 |
/* |
81 |
/* |
74 |
* Minimal preemption granularity for CPU-bound tasks: |
82 |
* Minimal preemption granularity for CPU-bound tasks: |
75 |
* |
83 |
* |
76 |
* (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) |
84 |
* (BORE default: 3 msec constant, units: nanoseconds) |
|
|
85 |
* (EEVDF default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) |
77 |
*/ |
86 |
*/ |
|
|
87 |
#ifdef CONFIG_SCHED_BORE |
88 |
unsigned int sysctl_sched_base_slice = 3000000ULL; |
89 |
static unsigned int normalized_sysctl_sched_base_slice = 3000000ULL; |
90 |
#else // CONFIG_SCHED_BORE |
78 |
unsigned int sysctl_sched_base_slice = 750000ULL; |
91 |
unsigned int sysctl_sched_base_slice = 750000ULL; |
79 |
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; |
92 |
static unsigned int normalized_sysctl_sched_base_slice = 750000ULL; |
|
|
93 |
#endif // CONFIG_SCHED_BORE |
80 |
|
94 |
|
81 |
/* |
95 |
/* |
82 |
* After fork, child runs first. If set to 0 (default) then |
96 |
* After fork, child runs first. If set to 0 (default) then |
Lines 86-91
unsigned int sysctl_sched_child_runs_first __read_mostly;
Link Here
|
86 |
|
100 |
|
87 |
const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
101 |
const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
88 |
|
102 |
|
|
|
103 |
#ifdef CONFIG_SCHED_BORE |
104 |
unsigned int __read_mostly sched_bore = 1; |
105 |
unsigned int __read_mostly sched_burst_cache_lifetime = 60000000; |
106 |
unsigned int __read_mostly sched_burst_penalty_offset = 22; |
107 |
unsigned int __read_mostly sched_burst_penalty_scale = 1280; |
108 |
unsigned int __read_mostly sched_burst_smoothness_up = 1; |
109 |
unsigned int __read_mostly sched_burst_smoothness_down = 0; |
110 |
unsigned int __read_mostly sched_burst_fork_atavistic = 2; |
111 |
static int three = 3; |
112 |
static int sixty_four = 64; |
113 |
static int maxval_12_bits = 4095; |
114 |
|
115 |
#define MAX_BURST_PENALTY ((40U << 8) - 1) |
116 |
|
117 |
static inline u32 log2plus1_u64_u32f8(u64 v) { |
118 |
x32 result; |
119 |
int msb = fls64(v); |
120 |
int excess_bits = msb - 9; |
121 |
result.u8[0] = (0 <= excess_bits)? v >> excess_bits: v << -excess_bits; |
122 |
result.u8[1] = msb; |
123 |
return result.u32; |
124 |
} |
125 |
|
126 |
static inline u32 calc_burst_penalty(u64 burst_time) { |
127 |
u32 greed, tolerance, penalty, scaled_penalty; |
128 |
|
129 |
greed = log2plus1_u64_u32f8(burst_time); |
130 |
tolerance = sched_burst_penalty_offset << 8; |
131 |
penalty = max(0, (s32)greed - (s32)tolerance); |
132 |
scaled_penalty = penalty * sched_burst_penalty_scale >> 10; |
133 |
|
134 |
return min(MAX_BURST_PENALTY, scaled_penalty); |
135 |
} |
136 |
|
137 |
static void update_burst_penalty(struct sched_entity *se) { |
138 |
se->curr_burst_penalty = calc_burst_penalty(se->burst_time); |
139 |
se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty); |
140 |
} |
141 |
|
142 |
static inline u64 penalty_scale(u64 delta, struct sched_entity *se) { |
143 |
u32 score = ((x16*)&se->burst_penalty)->u8[1]; |
144 |
return mul_u64_u32_shr(delta, sched_prio_to_wmult[score], 22); |
145 |
} |
146 |
|
147 |
static inline u32 binary_smooth(u32 new, u32 old) { |
148 |
int increment = new - old; |
149 |
return (0 <= increment)? |
150 |
old + ( increment >> sched_burst_smoothness_up): |
151 |
old - (-increment >> sched_burst_smoothness_down); |
152 |
} |
153 |
|
154 |
static void restart_burst(struct sched_entity *se) { |
155 |
se->burst_penalty = se->prev_burst_penalty = |
156 |
binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty); |
157 |
se->curr_burst_penalty = 0; |
158 |
se->burst_time = 0; |
159 |
} |
160 |
#endif // CONFIG_SCHED_BORE |
161 |
|
89 |
int sched_thermal_decay_shift; |
162 |
int sched_thermal_decay_shift; |
90 |
static int __init setup_sched_thermal_decay_shift(char *str) |
163 |
static int __init setup_sched_thermal_decay_shift(char *str) |
91 |
{ |
164 |
{ |
Lines 145-150
static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536;
Link Here
|
145 |
|
218 |
|
146 |
#ifdef CONFIG_SYSCTL |
219 |
#ifdef CONFIG_SYSCTL |
147 |
static struct ctl_table sched_fair_sysctls[] = { |
220 |
static struct ctl_table sched_fair_sysctls[] = { |
|
|
221 |
#ifdef CONFIG_SCHED_BORE |
222 |
{ |
223 |
.procname = "sched_bore", |
224 |
.data = &sched_bore, |
225 |
.maxlen = sizeof(unsigned int), |
226 |
.mode = 0644, |
227 |
.proc_handler = &proc_dointvec_minmax, |
228 |
.extra1 = SYSCTL_ZERO, |
229 |
.extra2 = SYSCTL_ONE, |
230 |
}, |
231 |
{ |
232 |
.procname = "sched_burst_cache_lifetime", |
233 |
.data = &sched_burst_cache_lifetime, |
234 |
.maxlen = sizeof(unsigned int), |
235 |
.mode = 0644, |
236 |
.proc_handler = proc_dointvec, |
237 |
}, |
238 |
{ |
239 |
.procname = "sched_burst_fork_atavistic", |
240 |
.data = &sched_burst_fork_atavistic, |
241 |
.maxlen = sizeof(unsigned int), |
242 |
.mode = 0644, |
243 |
.proc_handler = &proc_dointvec_minmax, |
244 |
.extra1 = SYSCTL_ZERO, |
245 |
.extra2 = &three, |
246 |
}, |
247 |
{ |
248 |
.procname = "sched_burst_penalty_offset", |
249 |
.data = &sched_burst_penalty_offset, |
250 |
.maxlen = sizeof(unsigned int), |
251 |
.mode = 0644, |
252 |
.proc_handler = &proc_dointvec_minmax, |
253 |
.extra1 = SYSCTL_ZERO, |
254 |
.extra2 = &sixty_four, |
255 |
}, |
256 |
{ |
257 |
.procname = "sched_burst_penalty_scale", |
258 |
.data = &sched_burst_penalty_scale, |
259 |
.maxlen = sizeof(unsigned int), |
260 |
.mode = 0644, |
261 |
.proc_handler = &proc_dointvec_minmax, |
262 |
.extra1 = SYSCTL_ZERO, |
263 |
.extra2 = &maxval_12_bits, |
264 |
}, |
265 |
{ |
266 |
.procname = "sched_burst_smoothness_down", |
267 |
.data = &sched_burst_smoothness_down, |
268 |
.maxlen = sizeof(unsigned int), |
269 |
.mode = 0644, |
270 |
.proc_handler = &proc_dointvec_minmax, |
271 |
.extra1 = SYSCTL_ZERO, |
272 |
.extra2 = &three, |
273 |
}, |
274 |
{ |
275 |
.procname = "sched_burst_smoothness_up", |
276 |
.data = &sched_burst_smoothness_up, |
277 |
.maxlen = sizeof(unsigned int), |
278 |
.mode = 0644, |
279 |
.proc_handler = &proc_dointvec_minmax, |
280 |
.extra1 = SYSCTL_ZERO, |
281 |
.extra2 = &three, |
282 |
}, |
283 |
#endif // CONFIG_SCHED_BORE |
148 |
{ |
284 |
{ |
149 |
.procname = "sched_child_runs_first", |
285 |
.procname = "sched_child_runs_first", |
150 |
.data = &sysctl_sched_child_runs_first, |
286 |
.data = &sysctl_sched_child_runs_first, |
Lines 313-318
static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
Link Here
|
313 |
if (unlikely(se->load.weight != NICE_0_LOAD)) |
449 |
if (unlikely(se->load.weight != NICE_0_LOAD)) |
314 |
delta = __calc_delta(delta, NICE_0_LOAD, &se->load); |
450 |
delta = __calc_delta(delta, NICE_0_LOAD, &se->load); |
315 |
|
451 |
|
|
|
452 |
#ifdef CONFIG_SCHED_BORE |
453 |
if (likely(sched_bore)) delta = penalty_scale(delta, se); |
454 |
#endif // CONFIG_SCHED_BORE |
316 |
return delta; |
455 |
return delta; |
317 |
} |
456 |
} |
318 |
|
457 |
|
Lines 668-674
void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
Link Here
|
668 |
* Specifically: avg_runtime() + 0 must result in entity_eligible() := true |
807 |
* Specifically: avg_runtime() + 0 must result in entity_eligible() := true |
669 |
* For this to be so, the result of this function must have a left bias. |
808 |
* For this to be so, the result of this function must have a left bias. |
670 |
*/ |
809 |
*/ |
671 |
u64 avg_vruntime(struct cfs_rq *cfs_rq) |
810 |
static u64 avg_key(struct cfs_rq *cfs_rq) |
672 |
{ |
811 |
{ |
673 |
struct sched_entity *curr = cfs_rq->curr; |
812 |
struct sched_entity *curr = cfs_rq->curr; |
674 |
s64 avg = cfs_rq->avg_vruntime; |
813 |
s64 avg = cfs_rq->avg_vruntime; |
Lines 688-694
u64 avg_vruntime(struct cfs_rq *cfs_rq)
Link Here
|
688 |
avg = div_s64(avg, load); |
827 |
avg = div_s64(avg, load); |
689 |
} |
828 |
} |
690 |
|
829 |
|
691 |
return cfs_rq->min_vruntime + avg; |
830 |
return avg; |
|
|
831 |
} |
832 |
|
833 |
inline u64 avg_vruntime(struct cfs_rq *cfs_rq) { |
834 |
return cfs_rq->min_vruntime + avg_key(cfs_rq); |
692 |
} |
835 |
} |
693 |
|
836 |
|
694 |
/* |
837 |
/* |
Lines 981-987
static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
Link Here
|
981 |
return se; |
1124 |
return se; |
982 |
} |
1125 |
} |
983 |
|
1126 |
|
984 |
#ifdef CONFIG_SCHED_DEBUG |
|
|
985 |
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
1127 |
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
986 |
{ |
1128 |
{ |
987 |
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root); |
1129 |
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root); |
Lines 995-1000
struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
Link Here
|
995 |
/************************************************************** |
1137 |
/************************************************************** |
996 |
* Scheduling class statistics methods: |
1138 |
* Scheduling class statistics methods: |
997 |
*/ |
1139 |
*/ |
|
|
1140 |
#ifdef CONFIG_SCHED_DEBUG |
998 |
#ifdef CONFIG_SMP |
1141 |
#ifdef CONFIG_SMP |
999 |
int sched_update_scaling(void) |
1142 |
int sched_update_scaling(void) |
1000 |
{ |
1143 |
{ |
Lines 1173-1179
static void update_curr(struct cfs_rq *cfs_rq)
Link Here
|
1173 |
curr->sum_exec_runtime += delta_exec; |
1316 |
curr->sum_exec_runtime += delta_exec; |
1174 |
schedstat_add(cfs_rq->exec_clock, delta_exec); |
1317 |
schedstat_add(cfs_rq->exec_clock, delta_exec); |
1175 |
|
1318 |
|
1176 |
curr->vruntime += calc_delta_fair(delta_exec, curr); |
1319 |
#ifdef CONFIG_SCHED_BORE |
|
|
1320 |
curr->burst_time += delta_exec; |
1321 |
update_burst_penalty(curr); |
1322 |
#endif // CONFIG_SCHED_BORE |
1323 |
curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr)); |
1177 |
update_deadline(cfs_rq, curr); |
1324 |
update_deadline(cfs_rq, curr); |
1178 |
update_min_vruntime(cfs_rq); |
1325 |
update_min_vruntime(cfs_rq); |
1179 |
|
1326 |
|
Lines 5053-5058
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
Link Here
|
5053 |
if (WARN_ON_ONCE(!load)) |
5200 |
if (WARN_ON_ONCE(!load)) |
5054 |
load = 1; |
5201 |
load = 1; |
5055 |
lag = div_s64(lag, load); |
5202 |
lag = div_s64(lag, load); |
|
|
5203 |
|
5204 |
#ifdef CONFIG_SCHED_BORE |
5205 |
if (flags & ENQUEUE_MIGRATED && likely(sched_bore)) { |
5206 |
struct sched_entity *last, *first; |
5207 |
s64 left_vruntime = vruntime, right_vruntime = vruntime; |
5208 |
|
5209 |
if (first = __pick_first_entity(cfs_rq)) |
5210 |
left_vruntime = first->vruntime; |
5211 |
|
5212 |
if (last = __pick_last_entity(cfs_rq)) |
5213 |
right_vruntime = last->vruntime; |
5214 |
|
5215 |
lag = clamp(lag, |
5216 |
(s64)vruntime - right_vruntime, |
5217 |
(s64)vruntime - left_vruntime); |
5218 |
} |
5219 |
#endif // CONFIG_SCHED_BORE |
5056 |
} |
5220 |
} |
5057 |
|
5221 |
|
5058 |
se->vruntime = vruntime - lag; |
5222 |
se->vruntime = vruntime - lag; |
Lines 6611-6616
static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
Link Here
|
6611 |
util_est_dequeue(&rq->cfs, p); |
6775 |
util_est_dequeue(&rq->cfs, p); |
6612 |
|
6776 |
|
6613 |
for_each_sched_entity(se) { |
6777 |
for_each_sched_entity(se) { |
|
|
6778 |
#ifdef CONFIG_SCHED_BORE |
6779 |
if (task_sleep) restart_burst(se); |
6780 |
#endif // CONFIG_SCHED_BORE |
6614 |
cfs_rq = cfs_rq_of(se); |
6781 |
cfs_rq = cfs_rq_of(se); |
6615 |
dequeue_entity(cfs_rq, se, flags); |
6782 |
dequeue_entity(cfs_rq, se, flags); |
6616 |
|
6783 |
|
Lines 8341-8348
static void yield_task_fair(struct rq *rq)
Link Here
|
8341 |
/* |
8508 |
/* |
8342 |
* Are we the only task in the tree? |
8509 |
* Are we the only task in the tree? |
8343 |
*/ |
8510 |
*/ |
8344 |
if (unlikely(rq->nr_running == 1)) |
8511 |
if (unlikely(rq->nr_running == 1)) { |
|
|
8512 |
#ifdef CONFIG_SCHED_BORE |
8513 |
restart_burst(se); |
8514 |
#endif // CONFIG_SCHED_BORE |
8345 |
return; |
8515 |
return; |
|
|
8516 |
} |
8346 |
|
8517 |
|
8347 |
clear_buddies(cfs_rq, se); |
8518 |
clear_buddies(cfs_rq, se); |
8348 |
|
8519 |
|
Lines 8351-8356
static void yield_task_fair(struct rq *rq)
Link Here
|
8351 |
* Update run-time statistics of the 'current'. |
8522 |
* Update run-time statistics of the 'current'. |
8352 |
*/ |
8523 |
*/ |
8353 |
update_curr(cfs_rq); |
8524 |
update_curr(cfs_rq); |
|
|
8525 |
#ifdef CONFIG_SCHED_BORE |
8526 |
restart_burst(se); |
8527 |
#endif // CONFIG_SCHED_BORE |
8354 |
/* |
8528 |
/* |
8355 |
* Tell update_rq_clock() that we've just updated, |
8529 |
* Tell update_rq_clock() that we've just updated, |
8356 |
* so we don't do microscopic update in schedule() |
8530 |
* so we don't do microscopic update in schedule() |