Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 916954 | Differences between
and this patch

Collapse All | Expand All

(-)a/include/linux/sched.h (+11 lines)
Lines 562-567 struct sched_entity { Link Here
562
	u64				sum_exec_runtime;
562
	u64				sum_exec_runtime;
563
	u64				prev_sum_exec_runtime;
563
	u64				prev_sum_exec_runtime;
564
	u64				vruntime;
564
	u64				vruntime;
565
#ifdef CONFIG_SCHED_BORE
566
	u64				burst_time;
567
	u8				prev_burst_penalty;
568
	u8				curr_burst_penalty;
569
	u8				burst_penalty;
570
	u8				slice_score;
571
	u8				child_burst;
572
	u16				child_burst_cnt;
573
	u64				child_burst_last_cached;
574
	u32				slice_load;
575
#endif // CONFIG_SCHED_BORE
565
	s64				vlag;
576
	s64				vlag;
566
	u64				slice;
577
	u64				slice;
567
578
(-)a/init/Kconfig (+19 lines)
Lines 1258-1263 config CHECKPOINT_RESTORE Link Here
1258
1258
1259
	  If unsure, say N here.
1259
	  If unsure, say N here.
1260
1260
1261
config SCHED_BORE
1262
	bool "Burst-Oriented Response Enhancer"
1263
	default y
1264
	help
1265
	  In Desktop and Mobile computing, one might prefer interactive
1266
	  tasks to keep responsive no matter what they run in the background.
1267
1268
	  Enabling this kernel feature modifies the scheduler to discriminate
1269
	  tasks by their burst time (runtime since it last went sleeping or
1270
	  yielding state) and prioritize those that run less bursty.
1271
	  Such tasks usually include window compositor, widgets backend,
1272
	  terminal emulator, video playback, games and so on.
1273
	  With a little impact to scheduling fairness, it may improve
1274
	  responsiveness especially under heavy background workload.
1275
1276
	  You can turn it off by setting the sysctl kernel.sched_bore = 0.
1277
1278
	  If unsure, say Y here.
1279
1261
config SCHED_AUTOGROUP
1280
config SCHED_AUTOGROUP
1262
	bool "Automatic process group scheduling"
1281
	bool "Automatic process group scheduling"
1263
	select CGROUPS
1282
	select CGROUPS
(-)a/kernel/sched/core.c (+140 lines)
Lines 4480-4485 int wake_up_state(struct task_struct *p, unsigned int state) Link Here
4480
	return try_to_wake_up(p, state, 0);
4480
	return try_to_wake_up(p, state, 0);
4481
}
4481
}
4482
4482
4483
#ifdef CONFIG_SCHED_BORE
4484
extern bool sched_bore;
4485
extern u8   sched_burst_fork_atavistic;
4486
extern uint sched_burst_cache_lifetime;
4487
4488
void __init sched_init_bore(void) {
4489
	init_task.se.burst_time = 0;
4490
	init_task.se.prev_burst_penalty = 0;
4491
	init_task.se.curr_burst_penalty = 0;
4492
	init_task.se.burst_penalty = 0;
4493
	init_task.se.slice_score = 0;
4494
	init_task.se.child_burst_last_cached = 0;
4495
	init_task.se.slice_load = 0;
4496
}
4497
4498
void inline sched_fork_bore(struct task_struct *p) {
4499
	p->se.burst_time = 0;
4500
	p->se.curr_burst_penalty = 0;
4501
	p->se.slice_score = 0;
4502
	p->se.child_burst_last_cached = 0;
4503
	p->se.slice_load = 0;
4504
}
4505
4506
static u32 count_child_tasks(struct task_struct *p) {
4507
	struct task_struct *child;
4508
	u32 cnt = 0;
4509
	list_for_each_entry(child, &p->children, sibling) {cnt++;}
4510
	return cnt;
4511
}
4512
4513
static inline bool child_burst_cache_expired(struct task_struct *p, u64 now) {
4514
	return (p->se.child_burst_last_cached + sched_burst_cache_lifetime < now);
4515
}
4516
4517
static void __update_child_burst_cache(
4518
	struct task_struct *p, u32 cnt, u32 sum, u64 now) {
4519
	u8 avg = 0;
4520
	if (cnt) avg = sum / cnt;
4521
	p->se.child_burst = max(avg, p->se.burst_penalty);
4522
	p->se.child_burst_cnt = cnt;
4523
	p->se.child_burst_last_cached = now;
4524
}
4525
4526
static inline void update_child_burst_direct(struct task_struct *p, u64 now) {
4527
	struct task_struct *child;
4528
	u32 cnt = 0;
4529
	u32 sum = 0;
4530
4531
	list_for_each_entry(child, &p->children, sibling) {
4532
		if (child->sched_class != &fair_sched_class) continue;
4533
		cnt++;
4534
		sum += child->se.burst_penalty;
4535
	}
4536
4537
	__update_child_burst_cache(p, cnt, sum, now);
4538
}
4539
4540
static inline u8 __inherit_burst_direct(struct task_struct *p, u64 now) {
4541
	struct task_struct *parent = p->real_parent;
4542
	if (child_burst_cache_expired(parent, now))
4543
		update_child_burst_direct(parent, now);
4544
4545
	return parent->se.child_burst;
4546
}
4547
4548
static inline void update_child_burst_topological(
4549
	struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) {
4550
	struct task_struct *child, *dec;
4551
	u32 cnt = 0, dcnt = 0;
4552
	u32 sum = 0;
4553
4554
	list_for_each_entry(child, &p->children, sibling) {
4555
		dec = child;
4556
		while ((dcnt = count_child_tasks(dec)) == 1)
4557
			dec = list_first_entry(&dec->children, struct task_struct, sibling);
4558
		
4559
		if (!dcnt || !depth) {
4560
			if (dec->sched_class != &fair_sched_class) continue;
4561
			cnt++;
4562
			sum += dec->se.burst_penalty;
4563
			continue;
4564
		}
4565
		if (!child_burst_cache_expired(dec, now)) {
4566
			cnt += dec->se.child_burst_cnt;
4567
			sum += (u32)dec->se.child_burst * dec->se.child_burst_cnt;
4568
			continue;
4569
		}
4570
		update_child_burst_topological(dec, now, depth - 1, &cnt, &sum);
4571
	}
4572
4573
	__update_child_burst_cache(p, cnt, sum, now);
4574
	*acnt += cnt;
4575
	*asum += sum;
4576
}
4577
4578
static inline u8 __inherit_burst_topological(struct task_struct *p, u64 now) {
4579
	struct task_struct *anc = p->real_parent;
4580
	u32 cnt = 0, sum = 0;
4581
4582
	while (anc->real_parent != anc && count_child_tasks(anc) == 1)
4583
		anc = anc->real_parent;
4584
4585
	if (child_burst_cache_expired(anc, now))
4586
		update_child_burst_topological(
4587
			anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum);
4588
4589
	return anc->se.child_burst;
4590
}
4591
4592
static inline void inherit_burst(struct task_struct *p) {
4593
	u8 burst_cache;
4594
	u64 now = ktime_get_ns();
4595
4596
	read_lock(&tasklist_lock);
4597
	burst_cache = likely(sched_burst_fork_atavistic)?
4598
		__inherit_burst_topological(p, now):
4599
		__inherit_burst_direct(p, now);
4600
	read_unlock(&tasklist_lock);
4601
4602
	p->se.prev_burst_penalty = max(p->se.prev_burst_penalty, burst_cache);
4603
}
4604
4605
static inline void sched_post_fork_bore(struct task_struct *p) {
4606
	if (p->sched_class == &fair_sched_class && likely(sched_bore))
4607
		inherit_burst(p);
4608
	p->se.burst_penalty = p->se.prev_burst_penalty;
4609
}
4610
#endif // CONFIG_SCHED_BORE
4611
4483
/*
4612
/*
4484
 * Perform scheduler related setup for a newly forked process p.
4613
 * Perform scheduler related setup for a newly forked process p.
4485
 * p is forked by current.
4614
 * p is forked by current.
Lines 4496-4501 static void __sched_fork(unsigned long clone_flags, struct task_struct *p) Link Here
4496
	p->se.prev_sum_exec_runtime	= 0;
4625
	p->se.prev_sum_exec_runtime	= 0;
4497
	p->se.nr_migrations		= 0;
4626
	p->se.nr_migrations		= 0;
4498
	p->se.vruntime			= 0;
4627
	p->se.vruntime			= 0;
4628
#ifdef CONFIG_SCHED_BORE
4629
	sched_fork_bore(p);
4630
#endif // CONFIG_SCHED_BORE
4499
	p->se.vlag			= 0;
4631
	p->se.vlag			= 0;
4500
	p->se.slice			= sysctl_sched_base_slice;
4632
	p->se.slice			= sysctl_sched_base_slice;
4501
	INIT_LIST_HEAD(&p->se.group_node);
4633
	INIT_LIST_HEAD(&p->se.group_node);
Lines 4815-4820 void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) Link Here
4815
4947
4816
void sched_post_fork(struct task_struct *p)
4948
void sched_post_fork(struct task_struct *p)
4817
{
4949
{
4950
#ifdef CONFIG_SCHED_BORE
4951
	sched_post_fork_bore(p);
4952
#endif // CONFIG_SCHED_BORE
4818
	uclamp_post_fork(p);
4953
	uclamp_post_fork(p);
4819
}
4954
}
4820
4955
Lines 9885-9890 void __init sched_init(void) Link Here
9885
	BUG_ON(&dl_sched_class != &stop_sched_class + 1);
10020
	BUG_ON(&dl_sched_class != &stop_sched_class + 1);
9886
#endif
10021
#endif
9887
10022
10023
#ifdef CONFIG_SCHED_BORE
10024
	sched_init_bore();
10025
	printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 4.1.3 by Masahito Suzuki");
10026
#endif // CONFIG_SCHED_BORE
10027
9888
	wait_bit_init();
10028
	wait_bit_init();
9889
10029
9890
#ifdef CONFIG_FAIR_GROUP_SCHED
10030
#ifdef CONFIG_FAIR_GROUP_SCHED
(-)a/kernel/sched/debug.c (-10 / +65 lines)
Lines 167-173 static const struct file_operations sched_feat_fops = { Link Here
167
};
167
};
168
168
169
#ifdef CONFIG_SMP
169
#ifdef CONFIG_SMP
170
#ifdef CONFIG_SCHED_BORE
171
static ssize_t sched_min_base_slice_write(struct file *filp, const char __user *ubuf,
172
				   size_t cnt, loff_t *ppos)
173
{
174
	char buf[16];
175
	unsigned int value;
176
177
	if (cnt > 15)
178
		cnt = 15;
179
180
	if (copy_from_user(&buf, ubuf, cnt))
181
		return -EFAULT;
182
	buf[cnt] = '\0';
183
184
	if (kstrtouint(buf, 10, &value))
185
		return -EINVAL;
170
186
187
	if (!value)
188
		return -EINVAL;
189
190
	sysctl_sched_min_base_slice = value;
191
	sched_update_min_base_slice();
192
193
	*ppos += cnt;
194
	return cnt;
195
}
196
197
static int sched_min_base_slice_show(struct seq_file *m, void *v)
198
{
199
	seq_printf(m, "%d\n", sysctl_sched_min_base_slice);
200
	return 0;
201
}
202
203
static int sched_min_base_slice_open(struct inode *inode, struct file *filp)
204
{
205
	return single_open(filp, sched_min_base_slice_show, NULL);
206
}
207
208
static const struct file_operations sched_min_base_slice_fops = {
209
	.open		= sched_min_base_slice_open,
210
	.write		= sched_min_base_slice_write,
211
	.read		= seq_read,
212
	.llseek		= seq_lseek,
213
	.release	= single_release,
214
};
215
#else // CONFIG_SCHED_BORE
171
static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
216
static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
172
				   size_t cnt, loff_t *ppos)
217
				   size_t cnt, loff_t *ppos)
173
{
218
{
Lines 213-219 static const struct file_operations sched_scaling_fops = { Link Here
213
	.llseek		= seq_lseek,
258
	.llseek		= seq_lseek,
214
	.release	= single_release,
259
	.release	= single_release,
215
};
260
};
216
261
#endif // CONFIG_SCHED_BORE
217
#endif /* SMP */
262
#endif /* SMP */
218
263
219
#ifdef CONFIG_PREEMPT_DYNAMIC
264
#ifdef CONFIG_PREEMPT_DYNAMIC
Lines 355-369 Link Here
355
#endif
355
#endif
356
356
357
#ifndef CONFIG_SCHED_ALT
357
#ifndef CONFIG_SCHED_ALT
358
	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
358
#ifdef CONFIG_SCHED_BORE
359
359
	debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops);
360
	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
360
	debugfs_create_u32("base_slice_ns", 0400, debugfs_sched, &sysctl_sched_base_slice);
361
	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
361
#else // CONFIG_SCHED_BORE
362
362
 	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
363
#ifdef CONFIG_SMP
363
#endif // CONFIG_SCHED_BORE
364
	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
364
 
365
	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
365
 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
366
	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
366
 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
367
 
368
 #ifdef CONFIG_SMP
369
#if !defined(CONFIG_SCHED_BORE)
370
 	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
371
#endif // CONFIG_SCHED_BORE
372
 	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
373
 	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
367
374
368
	mutex_lock(&sched_domains_mutex);
375
	mutex_lock(&sched_domains_mutex);
369
	update_sched_domain_debugfs();
376
	update_sched_domain_debugfs();
Lines 595-600 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) Link Here
595
		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
647
		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
596
		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
648
		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
597
649
650
#ifdef CONFIG_SCHED_BORE
651
	SEQ_printf(m, " %2d", p->se.slice_score);
652
#endif // CONFIG_SCHED_BORE
598
#ifdef CONFIG_NUMA_BALANCING
653
#ifdef CONFIG_NUMA_BALANCING
599
	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
654
	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
600
#endif
655
#endif
(-)a/kernel/sched/fair.c (-20 / +253 lines)
Lines 19-24 Link Here
19
 *
19
 *
20
 *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
20
 *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
21
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
21
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
22
 *
23
 *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
24
 *  Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com>
22
 */
25
 */
23
#include <linux/energy_model.h>
26
#include <linux/energy_model.h>
24
#include <linux/mmap_lock.h>
27
#include <linux/mmap_lock.h>
Lines 64-83 Link Here
64
 *   SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
67
 *   SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
65
 *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
68
 *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
66
 *
69
 *
67
 * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
70
 * (BORE  default SCHED_TUNABLESCALING_NONE = *1 constant)
71
 * (EEVDF default SCHED_TUNABLESCALING_LOG  = *(1+ilog(ncpus))
68
 */
72
 */
73
#ifdef CONFIG_SCHED_BORE
74
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
75
#else // CONFIG_SCHED_BORE
69
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
76
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
77
#endif // CONFIG_SCHED_BORE
70
78
71
/*
79
/*
72
 * Minimal preemption granularity for CPU-bound tasks:
80
 * Minimal preemption granularity for CPU-bound tasks:
73
 *
81
 *
74
 * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
82
 * (BORE  default: max(1 sec / HZ, min_base_slice) constant, units: nanoseconds)
83
 * (EEVDF default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
75
 */
84
 */
85
#ifdef CONFIG_SCHED_BORE
86
unsigned int            sysctl_sched_base_slice = 1000000000ULL / HZ;
87
static unsigned int configured_sched_base_slice = 1000000000ULL / HZ;
88
unsigned int        sysctl_sched_min_base_slice =    2000000ULL;
89
#else // CONFIG_SCHED_BORE
76
unsigned int sysctl_sched_base_slice			= 750000ULL;
90
unsigned int sysctl_sched_base_slice			= 750000ULL;
77
static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
91
static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
92
#endif // CONFIG_SCHED_BORE
78
93
79
const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
94
const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
80
95
96
#ifdef CONFIG_SCHED_BORE
97
bool __read_mostly sched_bore                   = 1;
98
bool __read_mostly sched_burst_score_rounding   = 0;
99
bool __read_mostly sched_burst_smoothness_long  = 1;
100
bool __read_mostly sched_burst_smoothness_short = 0;
101
u8   __read_mostly sched_burst_fork_atavistic   = 2;
102
u8   __read_mostly sched_burst_penalty_offset   = 22;
103
uint __read_mostly sched_burst_penalty_scale    = 1280;
104
uint __read_mostly sched_burst_cache_lifetime   = 60000000;
105
static u8   sixty_four     = 64;
106
static uint maxval_12_bits = 4095;
107
108
#define MAX_BURST_PENALTY (39U <<2)
109
110
static inline u32 log2plus1_u64_u32f8(u64 v) {
111
	u32 msb = fls64(v);
112
	s32 excess_bits = msb - 9;
113
    u8 fractional = (0 <= excess_bits)? v >> excess_bits: v << -excess_bits;
114
	return msb << 8 | fractional;
115
}
116
117
static inline u32 calc_burst_penalty(u64 burst_time) {
118
	u32 greed, tolerance, penalty, scaled_penalty;
119
	
120
	greed = log2plus1_u64_u32f8(burst_time);
121
	tolerance = sched_burst_penalty_offset << 8;
122
	penalty = max(0, (s32)greed - (s32)tolerance);
123
	scaled_penalty = penalty * sched_burst_penalty_scale >> 16;
124
125
	return min(MAX_BURST_PENALTY, scaled_penalty);
126
}
127
128
static inline void update_burst_penalty(struct sched_entity *se) {
129
	se->curr_burst_penalty = calc_burst_penalty(se->burst_time);
130
	se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
131
}
132
133
static inline u64 scale_slice(u64 delta, struct sched_entity *se) {
134
	return mul_u64_u32_shr(delta, sched_prio_to_wmult[se->slice_score], 22);
135
}
136
137
static inline u64 __unscale_slice(u64 delta, u8 score) {
138
	return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);
139
}
140
141
static inline u64 unscale_slice(u64 delta, struct sched_entity *se) {
142
	return __unscale_slice(delta, se->slice_score);
143
}
144
145
static void avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se);
146
static void avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se);
147
148
static void update_slice_score(struct sched_entity *se) {
149
	struct cfs_rq *cfs_rq = cfs_rq_of(se);
150
	u8 prev_score = se->slice_score;
151
	u32 penalty = se->burst_penalty;
152
	if (sched_burst_score_rounding) penalty += 0x2U;
153
	se->slice_score = penalty >> 2;
154
155
	if (se->slice_score != prev_score && se->slice_load) {
156
		avg_vruntime_sub(cfs_rq, se);
157
		avg_vruntime_add(cfs_rq, se);
158
	}
159
}
160
161
static inline u32 binary_smooth(u32 new, u32 old) {
162
  int increment = new - old;
163
  return (0 <= increment)?
164
    old + ( increment >> (int)sched_burst_smoothness_long):
165
    old - (-increment >> (int)sched_burst_smoothness_short);
166
}
167
168
static void restart_burst(struct sched_entity *se) {
169
	se->burst_penalty = se->prev_burst_penalty =
170
		binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty);
171
	se->curr_burst_penalty = 0;
172
	se->burst_time = 0;
173
	update_slice_score(se);
174
}
175
176
static inline void restart_burst_rescale_deadline(struct sched_entity *se) {
177
	u64 wremain, vremain = se->deadline - se->vruntime;
178
	u8 prev_score = se->slice_score;
179
	restart_burst(se);
180
	if (prev_score > se->slice_score) {
181
		wremain = __unscale_slice(vremain, prev_score);
182
		se->deadline = se->vruntime + scale_slice(wremain, se);
183
	}
184
}
185
#endif // CONFIG_SCHED_BORE
186
81
int sched_thermal_decay_shift;
187
int sched_thermal_decay_shift;
82
static int __init setup_sched_thermal_decay_shift(char *str)
188
static int __init setup_sched_thermal_decay_shift(char *str)
83
{
189
{
Lines 137-142 static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; Link Here
137
243
138
#ifdef CONFIG_SYSCTL
244
#ifdef CONFIG_SYSCTL
139
static struct ctl_table sched_fair_sysctls[] = {
245
static struct ctl_table sched_fair_sysctls[] = {
246
#ifdef CONFIG_SCHED_BORE
247
	{
248
		.procname	= "sched_bore",
249
		.data		= &sched_bore,
250
		.maxlen		= sizeof(bool),
251
		.mode		= 0644,
252
		.proc_handler	= &proc_dobool,
253
	},
254
	{
255
		.procname	= "sched_burst_cache_lifetime",
256
		.data		= &sched_burst_cache_lifetime,
257
		.maxlen		= sizeof(uint),
258
		.mode		= 0644,
259
		.proc_handler = proc_douintvec,
260
	},
261
	{
262
		.procname	= "sched_burst_fork_atavistic",
263
		.data		= &sched_burst_fork_atavistic,
264
		.maxlen		= sizeof(u8),
265
		.mode		= 0644,
266
		.proc_handler	= &proc_dou8vec_minmax,
267
		.extra1		= SYSCTL_ZERO,
268
		.extra2		= SYSCTL_THREE,
269
	},
270
	{
271
		.procname	= "sched_burst_penalty_offset",
272
		.data		= &sched_burst_penalty_offset,
273
		.maxlen		= sizeof(u8),
274
		.mode		= 0644,
275
		.proc_handler	= &proc_dou8vec_minmax,
276
		.extra1		= SYSCTL_ZERO,
277
		.extra2		= &sixty_four,
278
	},
279
	{
280
		.procname	= "sched_burst_penalty_scale",
281
		.data		= &sched_burst_penalty_scale,
282
		.maxlen		= sizeof(uint),
283
		.mode		= 0644,
284
		.proc_handler	= &proc_douintvec_minmax,
285
		.extra1		= SYSCTL_ZERO,
286
		.extra2		= &maxval_12_bits,
287
	},
288
	{
289
		.procname	= "sched_burst_score_rounding",
290
		.data		= &sched_burst_score_rounding,
291
		.maxlen		= sizeof(bool),
292
		.mode		= 0644,
293
		.proc_handler	= &proc_dobool,
294
	},
295
	{
296
		.procname	= "sched_burst_smoothness_long",
297
		.data		= &sched_burst_smoothness_long,
298
		.maxlen		= sizeof(bool),
299
		.mode		= 0644,
300
		.proc_handler	= &proc_dobool,
301
	},
302
	{
303
		.procname	= "sched_burst_smoothness_short",
304
		.data		= &sched_burst_smoothness_short,
305
		.maxlen		= sizeof(bool),
306
		.mode		= 0644,
307
		.proc_handler	= &proc_dobool,
308
	},
309
#endif // CONFIG_SCHED_BORE
140
#ifdef CONFIG_CFS_BANDWIDTH
310
#ifdef CONFIG_CFS_BANDWIDTH
141
	{
311
	{
142
		.procname       = "sched_cfs_bandwidth_slice_us",
312
		.procname       = "sched_cfs_bandwidth_slice_us",
Lines 195-200 static inline void update_load_set(struct load_weight *lw, unsigned long w) Link Here
195
 *
365
 *
196
 * This idea comes from the SD scheduler of Con Kolivas:
366
 * This idea comes from the SD scheduler of Con Kolivas:
197
 */
367
 */
368
#ifdef CONFIG_SCHED_BORE
369
static void update_sysctl(void) {
370
	sysctl_sched_base_slice =
371
		max(sysctl_sched_min_base_slice, configured_sched_base_slice);
372
}
373
void sched_update_min_base_slice(void) { update_sysctl(); }
374
#else // CONFIG_SCHED_BORE
198
static unsigned int get_update_sysctl_factor(void)
375
static unsigned int get_update_sysctl_factor(void)
199
{
376
{
200
	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
377
	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
Lines 225-230 static void update_sysctl(void) Link Here
225
	SET_SYSCTL(sched_base_slice);
402
	SET_SYSCTL(sched_base_slice);
226
#undef SET_SYSCTL
403
#undef SET_SYSCTL
227
}
404
}
405
#endif // CONFIG_SCHED_BORE
228
406
229
void __init sched_init_granularity(void)
407
void __init sched_init_granularity(void)
230
{
408
{
Lines 298-303 static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se) Link Here
298
	if (unlikely(se->load.weight != NICE_0_LOAD))
476
	if (unlikely(se->load.weight != NICE_0_LOAD))
299
		delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
477
		delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
300
478
479
#ifdef CONFIG_SCHED_BORE
480
	if (likely(sched_bore)) delta = scale_slice(delta, se);
481
#endif // CONFIG_SCHED_BORE
301
	return delta;
482
	return delta;
302
}
483
}
303
484
Lines 620-629 static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) Link Here
620
 *
801
 *
621
 * As measured, the max (key * weight) value was ~44 bits for a kernel build.
802
 * As measured, the max (key * weight) value was ~44 bits for a kernel build.
622
 */
803
 */
804
#if !defined(CONFIG_SCHED_BORE)
805
#define entity_weight(se) scale_load_down(se->load.weight)
806
#else // CONFIG_SCHED_BORE
807
static unsigned long entity_weight(struct sched_entity *se) {
808
	unsigned long weight = se->load.weight >> SCHED_AVG_LOAD_SHIFT;
809
	if (likely(weight)) {
810
		weight >>= SCHED_AVG_LOAD_SHIFT;
811
		if (likely(sched_bore)) weight = unscale_slice(weight, se);
812
		weight = max(2UL, weight);
813
	}
814
	return weight;
815
}
816
#endif // CONFIG_SCHED_BORE
817
623
static void
818
static void
624
avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
819
avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
625
{
820
{
626
	unsigned long weight = scale_load_down(se->load.weight);
821
	unsigned long weight = entity_weight(se);
822
#ifdef CONFIG_SCHED_BORE
823
	se->slice_load = weight;
824
#endif // CONFIG_SCHED_BORE
627
	s64 key = entity_key(cfs_rq, se);
825
	s64 key = entity_key(cfs_rq, se);
628
826
629
	cfs_rq->avg_vruntime += key * weight;
827
	cfs_rq->avg_vruntime += key * weight;
Lines 633-639 avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se) Link Here
633
static void
831
static void
634
avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
832
avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
635
{
833
{
636
	unsigned long weight = scale_load_down(se->load.weight);
834
	unsigned long weight;
835
#if !defined(CONFIG_SCHED_BORE)
836
	weight = scale_load_down(se->load.weight);
837
#else // CONFIG_SCHED_BORE
838
	weight = se->slice_load;
839
	se->slice_load = 0;
840
#endif // CONFIG_SCHED_BORE
637
	s64 key = entity_key(cfs_rq, se);
841
	s64 key = entity_key(cfs_rq, se);
638
842
639
	cfs_rq->avg_vruntime -= key * weight;
843
	cfs_rq->avg_vruntime -= key * weight;
Lines 653-666 void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) Link Here
653
 * Specifically: avg_runtime() + 0 must result in entity_eligible() := true
857
 * Specifically: avg_runtime() + 0 must result in entity_eligible() := true
654
 * For this to be so, the result of this function must have a left bias.
858
 * For this to be so, the result of this function must have a left bias.
655
 */
859
 */
656
u64 avg_vruntime(struct cfs_rq *cfs_rq)
860
static u64 avg_key(struct cfs_rq *cfs_rq)
657
{
861
{
658
	struct sched_entity *curr = cfs_rq->curr;
862
	struct sched_entity *curr = cfs_rq->curr;
659
	s64 avg = cfs_rq->avg_vruntime;
863
	s64 avg = cfs_rq->avg_vruntime;
660
	long load = cfs_rq->avg_load;
864
	long load = cfs_rq->avg_load;
661
865
662
	if (curr && curr->on_rq) {
866
	if (curr && curr->on_rq) {
663
		unsigned long weight = scale_load_down(curr->load.weight);
867
		unsigned long weight = entity_weight(curr);
664
868
665
		avg += entity_key(cfs_rq, curr) * weight;
869
		avg += entity_key(cfs_rq, curr) * weight;
666
		load += weight;
870
		load += weight;
Lines 673-679 u64 avg_vruntime(struct cfs_rq *cfs_rq) Link Here
673
		avg = div_s64(avg, load);
877
		avg = div_s64(avg, load);
674
	}
878
	}
675
879
676
	return cfs_rq->min_vruntime + avg;
880
	return avg;
881
}
882
883
inline u64 avg_vruntime(struct cfs_rq *cfs_rq) {
884
	return cfs_rq->min_vruntime + avg_key(cfs_rq);
677
}
885
}
678
886
679
/*
887
/*
Lines 694-706 u64 avg_vruntime(struct cfs_rq *cfs_rq) Link Here
694
 */
902
 */
695
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
903
static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
696
{
904
{
697
	s64 lag, limit;
698
699
	SCHED_WARN_ON(!se->on_rq);
905
	SCHED_WARN_ON(!se->on_rq);
700
	lag = avg_vruntime(cfs_rq) - se->vruntime;
906
	se->vlag = avg_vruntime(cfs_rq) - se->vruntime;
701
702
	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
703
	se->vlag = clamp(lag, -limit, limit);
704
}
907
}
705
908
706
/*
909
/*
Lines 727-733 int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se) Link Here
727
	long load = cfs_rq->avg_load;
930
	long load = cfs_rq->avg_load;
728
931
729
	if (curr && curr->on_rq) {
932
	if (curr && curr->on_rq) {
730
		unsigned long weight = scale_load_down(curr->load.weight);
933
		unsigned long weight = entity_weight(curr);
731
934
732
		avg += entity_key(cfs_rq, curr) * weight;
935
		avg += entity_key(cfs_rq, curr) * weight;
733
		load += weight;
936
		load += weight;
Lines 981-986 struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) Link Here
981
 * Scheduling class statistics methods:
1184
 * Scheduling class statistics methods:
982
 */
1185
 */
983
#ifdef CONFIG_SMP
1186
#ifdef CONFIG_SMP
1187
#if !defined(CONFIG_SCHED_BORE)
984
int sched_update_scaling(void)
1188
int sched_update_scaling(void)
985
{
1189
{
986
	unsigned int factor = get_update_sysctl_factor();
1190
	unsigned int factor = get_update_sysctl_factor();
Lines 992-997 int sched_update_scaling(void) Link Here
992
1196
993
	return 0;
1197
	return 0;
994
}
1198
}
1199
#endif // CONFIG_SCHED_BORE
995
#endif
1200
#endif
996
#endif
1201
#endif
997
1202
Lines 1016-1021 static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) Link Here
1016
	/*
1221
	/*
1017
	 * EEVDF: vd_i = ve_i + r_i / w_i
1222
	 * EEVDF: vd_i = ve_i + r_i / w_i
1018
	 */
1223
	 */
1224
#ifdef CONFIG_SCHED_BORE
1225
	update_slice_score(se);
1226
#endif // CONFIG_SCHED_BORE
1019
	se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
1227
	se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
1020
1228
1021
	/*
1229
	/*
Lines 1158-1164 static void update_curr(struct cfs_rq *cfs_rq) Link Here
1158
	curr->sum_exec_runtime += delta_exec;
1366
	curr->sum_exec_runtime += delta_exec;
1159
	schedstat_add(cfs_rq->exec_clock, delta_exec);
1367
	schedstat_add(cfs_rq->exec_clock, delta_exec);
1160
1368
1161
	curr->vruntime += calc_delta_fair(delta_exec, curr);
1369
#ifdef CONFIG_SCHED_BORE
1370
	curr->burst_time += delta_exec;
1371
	update_burst_penalty(curr);
1372
#endif // CONFIG_SCHED_BORE
1373
	curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr));
1162
	update_deadline(cfs_rq, curr);
1374
	update_deadline(cfs_rq, curr);
1163
	update_min_vruntime(cfs_rq);
1375
	update_min_vruntime(cfs_rq);
1164
1376
Lines 5131-5137 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) Link Here
5131
		struct sched_entity *curr = cfs_rq->curr;
5343
		struct sched_entity *curr = cfs_rq->curr;
5132
		unsigned long load;
5344
		unsigned long load;
5133
5345
5134
		lag = se->vlag;
5346
		u64 limit = calc_delta_fair(max_t(u64, se->slice*2, TICK_NSEC), se);
5347
		s64 overmet = limit, undermet = limit;
5348
#ifdef CONFIG_SCHED_BORE
5349
		if (likely(sched_bore)) overmet = div_s64(overmet, 2);
5350
#endif // CONFIG_SCHED_BORE
5351
		lag = clamp(se->vlag, -overmet, undermet);
5135
5352
5136
		/*
5353
		/*
5137
		 * If we want to place a task and preserve lag, we have to
5354
		 * If we want to place a task and preserve lag, we have to
Lines 5187-5195 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) Link Here
5187
		 */
5404
		 */
5188
		load = cfs_rq->avg_load;
5405
		load = cfs_rq->avg_load;
5189
		if (curr && curr->on_rq)
5406
		if (curr && curr->on_rq)
5190
			load += scale_load_down(curr->load.weight);
5407
			load += entity_weight(curr);
5191
5408
5192
		lag *= load + scale_load_down(se->load.weight);
5409
		lag *= load + entity_weight(se);
5193
		if (WARN_ON_ONCE(!load))
5410
		if (WARN_ON_ONCE(!load))
5194
			load = 1;
5411
			load = 1;
5195
		lag = div_s64(lag, load);
5412
		lag = div_s64(lag, load);
Lines 6759-6764 static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) Link Here
6759
	bool was_sched_idle = sched_idle_rq(rq);
6976
	bool was_sched_idle = sched_idle_rq(rq);
6760
6977
6761
	util_est_dequeue(&rq->cfs, p);
6978
	util_est_dequeue(&rq->cfs, p);
6979
#ifdef CONFIG_SCHED_BORE
6980
	if (task_sleep) {
6981
		update_curr(cfs_rq_of(se));
6982
		restart_burst(se);
6983
	}
6984
#endif // CONFIG_SCHED_BORE
6762
6985
6763
	for_each_sched_entity(se) {
6986
	for_each_sched_entity(se) {
6764
		cfs_rq = cfs_rq_of(se);
6987
		cfs_rq = cfs_rq_of(se);
Lines 8494-8509 static void yield_task_fair(struct rq *rq) Link Here
8494
	/*
8717
	/*
8495
	 * Are we the only task in the tree?
8718
	 * Are we the only task in the tree?
8496
	 */
8719
	 */
8720
#ifdef CONFIG_SCHED_BORE
8721
	if (unlikely(!sched_bore))
8722
#endif // CONFIG_SCHED_BORE
8497
	if (unlikely(rq->nr_running == 1))
8723
	if (unlikely(rq->nr_running == 1))
8498
		return;
8724
		return;
8499
8725
8500
	clear_buddies(cfs_rq, se);
8501
8502
	update_rq_clock(rq);
8726
	update_rq_clock(rq);
8503
	/*
8727
	/*
8504
	 * Update run-time statistics of the 'current'.
8728
	 * Update run-time statistics of the 'current'.
8505
	 */
8729
	 */
8506
	update_curr(cfs_rq);
8730
	update_curr(cfs_rq);
8731
#ifdef CONFIG_SCHED_BORE
8732
	restart_burst_rescale_deadline(se);
8733
	if (unlikely(rq->nr_running == 1)) return;
8734
#endif // CONFIG_SCHED_BORE
8507
	/*
8735
	/*
8508
	 * Tell update_rq_clock() that we've just updated,
8736
	 * Tell update_rq_clock() that we've just updated,
8509
	 * so we don't do microscopic update in schedule()
8737
	 * so we don't do microscopic update in schedule()
Lines 8511-8516 static void yield_task_fair(struct rq *rq) Link Here
8511
	 */
8739
	 */
8512
	rq_clock_skip_update(rq);
8740
	rq_clock_skip_update(rq);
8513
8741
8742
	clear_buddies(cfs_rq, se);
8743
8514
	se->deadline += calc_delta_fair(se->slice, se);
8744
	se->deadline += calc_delta_fair(se->slice, se);
8515
}
8745
}
8516
8746
Lines 12590-12595 static void task_fork_fair(struct task_struct *p) Link Here
12590
	curr = cfs_rq->curr;
12820
	curr = cfs_rq->curr;
12591
	if (curr)
12821
	if (curr)
12592
		update_curr(cfs_rq);
12822
		update_curr(cfs_rq);
12823
#ifdef CONFIG_SCHED_BORE
12824
	update_slice_score(se);
12825
#endif // CONFIG_SCHED_BORE
12593
	place_entity(cfs_rq, se, ENQUEUE_INITIAL);
12826
	place_entity(cfs_rq, se, ENQUEUE_INITIAL);
12594
	rq_unlock(rq, &rf);
12827
	rq_unlock(rq, &rf);
12595
}
12828
}
(-)a/kernel/sched/features.h (+4 lines)
Lines 6-12 Link Here
6
 */
6
 */
7
SCHED_FEAT(PLACE_LAG, true)
7
SCHED_FEAT(PLACE_LAG, true)
8
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
8
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
9
#ifdef CONFIG_SCHED_BORE
10
SCHED_FEAT(RUN_TO_PARITY, false)
11
#else // CONFIG_SCHED_BORE
9
SCHED_FEAT(RUN_TO_PARITY, true)
12
SCHED_FEAT(RUN_TO_PARITY, true)
13
#endif // CONFIG_SCHED_BORE
10
14
11
/*
15
/*
12
 * Prefer to schedule the task we woke last (assuming it failed
16
 * Prefer to schedule the task we woke last (assuming it failed
(-)a/kernel/sched/sched.h (-1 / +18 lines)
Lines 144-149 extern int sched_rr_timeslice; Link Here
144
# define scale_load_down(w)	(w)
144
# define scale_load_down(w)	(w)
145
#endif
145
#endif
146
146
147
#ifdef CONFIG_SCHED_BORE
148
# ifdef CONFIG_64BIT
149
#  define SCHED_AVG_LOAD_EXTRA_RESOLUTION 5
150
#  define SCHED_AVG_LOAD_SHIFT \
151
          (SCHED_FIXEDPOINT_SHIFT - SCHED_AVG_LOAD_EXTRA_RESOLUTION)
152
# else // CONFIG_64BIT
153
#  define SCHED_AVG_LOAD_EXTRA_RESOLUTION 0
154
#  define SCHED_AVG_LOAD_SHIFT 0
155
# endif // CONFIG_64BIT
156
#endif // CONFIG_SCHED_BORE
157
147
/*
158
/*
148
 * Task weight (visible to users) and its load (invisible to users) have
159
 * Task weight (visible to users) and its load (invisible to users) have
149
 * independent resolution, but they should be well calibrated. We use
160
 * independent resolution, but they should be well calibrated. We use
Lines 1929-1935 static inline void dirty_sched_domain_sysctl(int cpu) Link Here
1929
}
1940
}
1930
#endif
1941
#endif
1931
1942
1943
#ifdef CONFIG_SCHED_BORE
1944
extern void sched_update_min_base_slice(void);
1945
#else // CONFIG_SCHED_BORE
1932
extern int sched_update_scaling(void);
1946
extern int sched_update_scaling(void);
1947
#endif // CONFIG_SCHED_BORE
1933
1948
1934
static inline const struct cpumask *task_user_cpus(struct task_struct *p)
1949
static inline const struct cpumask *task_user_cpus(struct task_struct *p)
1935
{
1950
{
Lines 2509-2514 extern const_debug unsigned int sysctl_sched_nr_migrate; Link Here
2509
extern const_debug unsigned int sysctl_sched_migration_cost;
2524
extern const_debug unsigned int sysctl_sched_migration_cost;
2510
2525
2511
extern unsigned int sysctl_sched_base_slice;
2526
extern unsigned int sysctl_sched_base_slice;
2527
#ifdef CONFIG_SCHED_BORE
2528
extern unsigned int sysctl_sched_min_base_slice;
2529
#endif // CONFIG_SCHED_BORE
2512
2530
2513
#ifdef CONFIG_SCHED_DEBUG
2531
#ifdef CONFIG_SCHED_DEBUG
2514
extern int sysctl_resched_latency_warn_ms;
2532
extern int sysctl_resched_latency_warn_ms;
2515
- 
(-)a/arch/arm/Kconfig (-1 / +3 lines)
Lines 35-40 config ARM Link Here
35
	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
35
	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
36
	select ARCH_SUPPORTS_ATOMIC_RMW
36
	select ARCH_SUPPORTS_ATOMIC_RMW
37
	select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
37
	select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
38
	select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
38
	select ARCH_USE_BUILTIN_BSWAP
39
	select ARCH_USE_BUILTIN_BSWAP
39
	select ARCH_USE_CMPXCHG_LOCKREF
40
	select ARCH_USE_CMPXCHG_LOCKREF
40
	select ARCH_USE_MEMTEST
41
	select ARCH_USE_MEMTEST
Lines 74-80 config ARM Link Here
74
	select HAS_IOPORT
75
	select HAS_IOPORT
75
	select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
76
	select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
76
	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
77
	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
77
	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
78
	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
78
	select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
79
	select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
79
	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
80
	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
80
	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
81
	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
Lines 119-124 config ARM Link Here
119
	select HAVE_PERF_EVENTS
120
	select HAVE_PERF_EVENTS
120
	select HAVE_PERF_REGS
121
	select HAVE_PERF_REGS
121
	select HAVE_PERF_USER_STACK_DUMP
122
	select HAVE_PERF_USER_STACK_DUMP
123
	select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
122
	select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
124
	select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
123
	select HAVE_REGS_AND_STACK_ACCESS_API
125
	select HAVE_REGS_AND_STACK_ACCESS_API
124
	select HAVE_RSEQ
126
	select HAVE_RSEQ
(-)a/arch/arm/mm/fault.c (+6 lines)
Lines 404-409 do_translation_fault(unsigned long addr, unsigned int fsr, Link Here
404
	if (addr < TASK_SIZE)
404
	if (addr < TASK_SIZE)
405
		return do_page_fault(addr, fsr, regs);
405
		return do_page_fault(addr, fsr, regs);
406
406
407
	if (interrupts_enabled(regs))
408
		local_irq_enable();
409
407
	if (user_mode(regs))
410
	if (user_mode(regs))
408
		goto bad_area;
411
		goto bad_area;
409
412
Lines 474-479 do_translation_fault(unsigned long addr, unsigned int fsr, Link Here
474
static int
477
static int
475
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
478
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
476
{
479
{
480
	if (interrupts_enabled(regs))
481
		local_irq_enable();
482
477
	do_bad_area(addr, fsr, regs);
483
	do_bad_area(addr, fsr, regs);
478
	return 0;
484
	return 0;
479
}
485
}
(-)a/arch/arm/vfp/vfpmodule.c (-21 / +53 lines)
Lines 55-60 extern unsigned int VFP_arch_feroceon __alias(VFP_arch); Link Here
55
 */
55
 */
56
union vfp_state *vfp_current_hw_state[NR_CPUS];
56
union vfp_state *vfp_current_hw_state[NR_CPUS];
57
57
58
/*
59
 * Claim ownership of the VFP unit.
60
 *
61
 * The caller may change VFP registers until vfp_unlock() is called.
62
 *
63
 * local_bh_disable() is used to disable preemption and to disable VFP
64
 * processing in softirq context. On PREEMPT_RT kernels local_bh_disable() is
65
 * not sufficient because it only serializes soft interrupt related sections
66
 * via a local lock, but stays preemptible. Disabling preemption is the right
67
 * choice here as bottom half processing is always in thread context on RT
68
 * kernels so it implicitly prevents bottom half processing as well.
69
 */
70
static void vfp_lock(void)
71
{
72
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
73
		local_bh_disable();
74
	else
75
		preempt_disable();
76
}
77
78
static void vfp_unlock(void)
79
{
80
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
81
		local_bh_enable();
82
	else
83
		preempt_enable();
84
}
85
58
/*
86
/*
59
 * Is 'thread's most up to date state stored in this CPUs hardware?
87
 * Is 'thread's most up to date state stored in this CPUs hardware?
60
 * Must be called from non-preemptible context.
88
 * Must be called from non-preemptible context.
Lines 240-246 static void vfp_panic(char *reason, u32 inst) Link Here
240
/*
268
/*
241
 * Process bitmask of exception conditions.
269
 * Process bitmask of exception conditions.
242
 */
270
 */
243
static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_regs *regs)
271
static int vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr)
244
{
272
{
245
	int si_code = 0;
273
	int si_code = 0;
246
274
Lines 248-255 static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ Link Here
248
276
249
	if (exceptions == VFP_EXCEPTION_ERROR) {
277
	if (exceptions == VFP_EXCEPTION_ERROR) {
250
		vfp_panic("unhandled bounce", inst);
278
		vfp_panic("unhandled bounce", inst);
251
		vfp_raise_sigfpe(FPE_FLTINV, regs);
279
		return FPE_FLTINV;
252
		return;
253
	}
280
	}
254
281
255
	/*
282
	/*
Lines 277-284 static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ Link Here
277
	RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF);
304
	RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF);
278
	RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV);
305
	RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV);
279
306
280
	if (si_code)
307
	return si_code;
281
		vfp_raise_sigfpe(si_code, regs);
282
}
308
}
283
309
284
/*
310
/*
Lines 324-329 static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) Link Here
324
static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
350
static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
325
{
351
{
326
	u32 fpscr, orig_fpscr, fpsid, exceptions;
352
	u32 fpscr, orig_fpscr, fpsid, exceptions;
353
	int si_code2 = 0;
354
	int si_code = 0;
327
355
328
	pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);
356
	pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);
329
357
Lines 369-376 static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) Link Here
369
		 * unallocated VFP instruction but with FPSCR.IXE set and not
397
		 * unallocated VFP instruction but with FPSCR.IXE set and not
370
		 * on VFP subarch 1.
398
		 * on VFP subarch 1.
371
		 */
399
		 */
372
		 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
400
		si_code = vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr);
373
		return;
401
		goto exit;
374
	}
402
	}
375
403
376
	/*
404
	/*
Lines 394-407 static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) Link Here
394
	 */
422
	 */
395
	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
423
	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
396
	if (exceptions)
424
	if (exceptions)
397
		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
425
		si_code2 = vfp_raise_exceptions(exceptions, trigger, orig_fpscr);
398
426
399
	/*
427
	/*
400
	 * If there isn't a second FP instruction, exit now. Note that
428
	 * If there isn't a second FP instruction, exit now. Note that
401
	 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
429
	 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
402
	 */
430
	 */
403
	if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
431
	if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
404
		return;
432
		goto exit;
405
433
406
	/*
434
	/*
407
	 * The barrier() here prevents fpinst2 being read
435
	 * The barrier() here prevents fpinst2 being read
Lines 413-419 static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) Link Here
413
 emulate:
441
 emulate:
414
	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
442
	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
415
	if (exceptions)
443
	if (exceptions)
416
		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
444
		si_code = vfp_raise_exceptions(exceptions, trigger, orig_fpscr);
445
exit:
446
	vfp_unlock();
447
	if (si_code2)
448
		vfp_raise_sigfpe(si_code2, regs);
449
	if (si_code)
450
		vfp_raise_sigfpe(si_code, regs);
417
}
451
}
418
452
419
static void vfp_enable(void *unused)
453
static void vfp_enable(void *unused)
Lines 512-522 static inline void vfp_pm_init(void) { } Link Here
512
 */
546
 */
513
void vfp_sync_hwstate(struct thread_info *thread)
547
void vfp_sync_hwstate(struct thread_info *thread)
514
{
548
{
515
	unsigned int cpu = get_cpu();
549
	vfp_lock();
516
550
517
	local_bh_disable();
551
	if (vfp_state_in_hw(raw_smp_processor_id(), thread)) {
518
519
	if (vfp_state_in_hw(cpu, thread)) {
520
		u32 fpexc = fmrx(FPEXC);
552
		u32 fpexc = fmrx(FPEXC);
521
553
522
		/*
554
		/*
Lines 527-534 void vfp_sync_hwstate(struct thread_info *thread) Link Here
527
		fmxr(FPEXC, fpexc);
559
		fmxr(FPEXC, fpexc);
528
	}
560
	}
529
561
530
	local_bh_enable();
562
	vfp_unlock();
531
	put_cpu();
532
}
563
}
533
564
534
/* Ensure that the thread reloads the hardware VFP state on the next use. */
565
/* Ensure that the thread reloads the hardware VFP state on the next use. */
Lines 683-689 static int vfp_support_entry(struct pt_regs *regs, u32 trigger) Link Here
683
	if (!user_mode(regs))
714
	if (!user_mode(regs))
684
		return vfp_kmode_exception(regs, trigger);
715
		return vfp_kmode_exception(regs, trigger);
685
716
686
	local_bh_disable();
717
	vfp_lock();
687
	fpexc = fmrx(FPEXC);
718
	fpexc = fmrx(FPEXC);
688
719
689
	/*
720
	/*
Lines 748-753 static int vfp_support_entry(struct pt_regs *regs, u32 trigger) Link Here
748
		 * replay the instruction that trapped.
779
		 * replay the instruction that trapped.
749
		 */
780
		 */
750
		fmxr(FPEXC, fpexc);
781
		fmxr(FPEXC, fpexc);
782
		vfp_unlock();
751
	} else {
783
	} else {
752
		/* Check for synchronous or asynchronous exceptions */
784
		/* Check for synchronous or asynchronous exceptions */
753
		if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
785
		if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
Lines 762-778 static int vfp_support_entry(struct pt_regs *regs, u32 trigger) Link Here
762
			if (!(fpscr & FPSCR_IXE)) {
794
			if (!(fpscr & FPSCR_IXE)) {
763
				if (!(fpscr & FPSCR_LENGTH_MASK)) {
795
				if (!(fpscr & FPSCR_LENGTH_MASK)) {
764
					pr_debug("not VFP\n");
796
					pr_debug("not VFP\n");
765
					local_bh_enable();
797
					vfp_unlock();
766
					return -ENOEXEC;
798
					return -ENOEXEC;
767
				}
799
				}
768
				fpexc |= FPEXC_DEX;
800
				fpexc |= FPEXC_DEX;
769
			}
801
			}
770
		}
802
		}
771
bounce:		regs->ARM_pc += 4;
803
bounce:		regs->ARM_pc += 4;
804
		/* VFP_bounce() will invoke vfp_unlock() */
772
		VFP_bounce(trigger, fpexc, regs);
805
		VFP_bounce(trigger, fpexc, regs);
773
	}
806
	}
774
807
775
	local_bh_enable();
776
	return 0;
808
	return 0;
777
}
809
}
778
810
Lines 819-825 void kernel_neon_begin(void) Link Here
819
	unsigned int cpu;
851
	unsigned int cpu;
820
	u32 fpexc;
852
	u32 fpexc;
821
853
822
	local_bh_disable();
854
	vfp_lock();
823
855
824
	/*
856
	/*
825
	 * Kernel mode NEON is only allowed outside of hardirq context with
857
	 * Kernel mode NEON is only allowed outside of hardirq context with
Lines 850-856 void kernel_neon_end(void) Link Here
850
{
882
{
851
	/* Disable the NEON/VFP unit. */
883
	/* Disable the NEON/VFP unit. */
852
	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
884
	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
853
	local_bh_enable();
885
	vfp_unlock();
854
}
886
}
855
EXPORT_SYMBOL(kernel_neon_end);
887
EXPORT_SYMBOL(kernel_neon_end);
856
888
(-)a/arch/arm64/Kconfig (+1 lines)
Lines 97-102 config ARM64 Link Here
97
	select ARCH_SUPPORTS_NUMA_BALANCING
97
	select ARCH_SUPPORTS_NUMA_BALANCING
98
	select ARCH_SUPPORTS_PAGE_TABLE_CHECK
98
	select ARCH_SUPPORTS_PAGE_TABLE_CHECK
99
	select ARCH_SUPPORTS_PER_VMA_LOCK
99
	select ARCH_SUPPORTS_PER_VMA_LOCK
100
	select ARCH_SUPPORTS_RT
100
	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
101
	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
101
	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
102
	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
102
	select ARCH_WANT_DEFAULT_BPF_JIT
103
	select ARCH_WANT_DEFAULT_BPF_JIT
(-)a/arch/powerpc/Kconfig (+2 lines)
Lines 166-171 config PPC Link Here
166
	select ARCH_STACKWALK
166
	select ARCH_STACKWALK
167
	select ARCH_SUPPORTS_ATOMIC_RMW
167
	select ARCH_SUPPORTS_ATOMIC_RMW
168
	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC_BOOK3S || PPC_8xx || 40x
168
	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC_BOOK3S || PPC_8xx || 40x
169
	select ARCH_SUPPORTS_RT			if HAVE_POSIX_CPU_TIMERS_TASK_WORK
169
	select ARCH_USE_BUILTIN_BSWAP
170
	select ARCH_USE_BUILTIN_BSWAP
170
	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
171
	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
171
	select ARCH_USE_MEMTEST
172
	select ARCH_USE_MEMTEST
Lines 269-274 config PPC Link Here
269
	select HAVE_PERF_USER_STACK_DUMP
270
	select HAVE_PERF_USER_STACK_DUMP
270
	select HAVE_REGS_AND_STACK_ACCESS_API
271
	select HAVE_REGS_AND_STACK_ACCESS_API
271
	select HAVE_RELIABLE_STACKTRACE
272
	select HAVE_RELIABLE_STACKTRACE
273
	select HAVE_POSIX_CPU_TIMERS_TASK_WORK	if !KVM
272
	select HAVE_RSEQ
274
	select HAVE_RSEQ
273
	select HAVE_SETUP_PER_CPU_AREA		if PPC64
275
	select HAVE_SETUP_PER_CPU_AREA		if PPC64
274
	select HAVE_SOFTIRQ_ON_OWN_STACK
276
	select HAVE_SOFTIRQ_ON_OWN_STACK
(-)a/arch/powerpc/include/asm/stackprotector.h (-1 / +6 lines)
Lines 19-26 Link Here
19
 */
19
 */
20
static __always_inline void boot_init_stack_canary(void)
20
static __always_inline void boot_init_stack_canary(void)
21
{
21
{
22
	unsigned long canary = get_random_canary();
22
	unsigned long canary;
23
23
24
#ifndef CONFIG_PREEMPT_RT
25
	canary = get_random_canary();
26
#else
27
	canary = ((unsigned long)&canary) & CANARY_MASK;
28
#endif
24
	current->stack_canary = canary;
29
	current->stack_canary = canary;
25
#ifdef CONFIG_PPC64
30
#ifdef CONFIG_PPC64
26
	get_paca()->canary = canary;
31
	get_paca()->canary = canary;
(-)a/arch/powerpc/kernel/traps.c (-1 / +6 lines)
Lines 261-272 static char *get_mmu_str(void) Link Here
261
261
262
static int __die(const char *str, struct pt_regs *regs, long err)
262
static int __die(const char *str, struct pt_regs *regs, long err)
263
{
263
{
264
	const char *pr = "";
265
264
	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
266
	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
265
267
268
	if (IS_ENABLED(CONFIG_PREEMPTION))
269
		pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
270
266
	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
271
	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
267
	       IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
272
	       IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
268
	       PAGE_SIZE / 1024, get_mmu_str(),
273
	       PAGE_SIZE / 1024, get_mmu_str(),
269
	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
274
	       pr,
270
	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
275
	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
271
	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
276
	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
272
	       debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
277
	       debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
(-)a/arch/powerpc/kvm/Kconfig (+1 lines)
Lines 224-229 config KVM_E500MC Link Here
224
config KVM_MPIC
224
config KVM_MPIC
225
	bool "KVM in-kernel MPIC emulation"
225
	bool "KVM in-kernel MPIC emulation"
226
	depends on KVM && PPC_E500
226
	depends on KVM && PPC_E500
227
	depends on !PREEMPT_RT
227
	select HAVE_KVM_IRQCHIP
228
	select HAVE_KVM_IRQCHIP
228
	select HAVE_KVM_IRQFD
229
	select HAVE_KVM_IRQFD
229
	select HAVE_KVM_IRQ_ROUTING
230
	select HAVE_KVM_IRQ_ROUTING
(-)a/arch/powerpc/platforms/pseries/Kconfig (+1 lines)
Lines 2-7 Link Here
2
config PPC_PSERIES
2
config PPC_PSERIES
3
	depends on PPC64 && PPC_BOOK3S
3
	depends on PPC64 && PPC_BOOK3S
4
	bool "IBM pSeries & new (POWER5-based) iSeries"
4
	bool "IBM pSeries & new (POWER5-based) iSeries"
5
	select GENERIC_ALLOCATOR
5
	select HAVE_PCSPKR_PLATFORM
6
	select HAVE_PCSPKR_PLATFORM
6
	select MPIC
7
	select MPIC
7
	select OF_DYNAMIC
8
	select OF_DYNAMIC
(-)a/arch/powerpc/platforms/pseries/iommu.c (-11 / +20 lines)
Lines 25-30 Link Here
25
#include <linux/of_address.h>
25
#include <linux/of_address.h>
26
#include <linux/iommu.h>
26
#include <linux/iommu.h>
27
#include <linux/rculist.h>
27
#include <linux/rculist.h>
28
#include <linux/local_lock.h>
28
#include <asm/io.h>
29
#include <asm/io.h>
29
#include <asm/prom.h>
30
#include <asm/prom.h>
30
#include <asm/rtas.h>
31
#include <asm/rtas.h>
Lines 206-212 static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, Link Here
206
	return ret;
207
	return ret;
207
}
208
}
208
209
209
static DEFINE_PER_CPU(__be64 *, tce_page);
210
struct tce_page {
211
	__be64 * page;
212
	local_lock_t lock;
213
};
214
static DEFINE_PER_CPU(struct tce_page, tce_page) = {
215
	.lock = INIT_LOCAL_LOCK(lock),
216
};
210
217
211
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
218
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
212
				     long npages, unsigned long uaddr,
219
				     long npages, unsigned long uaddr,
Lines 229-237 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, Link Here
229
		                           direction, attrs);
236
		                           direction, attrs);
230
	}
237
	}
231
238
232
	local_irq_save(flags);	/* to protect tcep and the page behind it */
239
	/* to protect tcep and the page behind it */
240
	local_lock_irqsave(&tce_page.lock, flags);
233
241
234
	tcep = __this_cpu_read(tce_page);
242
	tcep = __this_cpu_read(tce_page.page);
235
243
236
	/* This is safe to do since interrupts are off when we're called
244
	/* This is safe to do since interrupts are off when we're called
237
	 * from iommu_alloc{,_sg}()
245
	 * from iommu_alloc{,_sg}()
Lines 240-251 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, Link Here
240
		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
248
		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
241
		/* If allocation fails, fall back to the loop implementation */
249
		/* If allocation fails, fall back to the loop implementation */
242
		if (!tcep) {
250
		if (!tcep) {
243
			local_irq_restore(flags);
251
			local_unlock_irqrestore(&tce_page.lock, flags);
244
			return tce_build_pSeriesLP(tbl->it_index, tcenum,
252
			return tce_build_pSeriesLP(tbl->it_index, tcenum,
245
					tceshift,
253
					tceshift,
246
					npages, uaddr, direction, attrs);
254
					npages, uaddr, direction, attrs);
247
		}
255
		}
248
		__this_cpu_write(tce_page, tcep);
256
		__this_cpu_write(tce_page.page, tcep);
249
	}
257
	}
250
258
251
	rpn = __pa(uaddr) >> tceshift;
259
	rpn = __pa(uaddr) >> tceshift;
Lines 275-281 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, Link Here
275
		tcenum += limit;
283
		tcenum += limit;
276
	} while (npages > 0 && !rc);
284
	} while (npages > 0 && !rc);
277
285
278
	local_irq_restore(flags);
286
	local_unlock_irqrestore(&tce_page.lock, flags);
279
287
280
	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
288
	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
281
		ret = (int)rc;
289
		ret = (int)rc;
Lines 459-474 static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, Link Here
459
				DMA_BIDIRECTIONAL, 0);
467
				DMA_BIDIRECTIONAL, 0);
460
	}
468
	}
461
469
462
	local_irq_disable();	/* to protect tcep and the page behind it */
470
	/* to protect tcep and the page behind it */
463
	tcep = __this_cpu_read(tce_page);
471
	local_lock_irq(&tce_page.lock);
472
	tcep = __this_cpu_read(tce_page.page);
464
473
465
	if (!tcep) {
474
	if (!tcep) {
466
		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
475
		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
467
		if (!tcep) {
476
		if (!tcep) {
468
			local_irq_enable();
477
			local_unlock_irq(&tce_page.lock);
469
			return -ENOMEM;
478
			return -ENOMEM;
470
		}
479
		}
471
		__this_cpu_write(tce_page, tcep);
480
		__this_cpu_write(tce_page.page, tcep);
472
	}
481
	}
473
482
474
	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
483
	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
Lines 511-517 static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, Link Here
511
520
512
	/* error cleanup: caller will clear whole range */
521
	/* error cleanup: caller will clear whole range */
513
522
514
	local_irq_enable();
523
	local_unlock_irq(&tce_page.lock);
515
	return rc;
524
	return rc;
516
}
525
}
517
526
(-)a/arch/riscv/Kconfig (+2 lines)
Lines 49-54 config RISCV Link Here
49
	select ARCH_SUPPORTS_HUGETLBFS if MMU
49
	select ARCH_SUPPORTS_HUGETLBFS if MMU
50
	select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
50
	select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
51
	select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
51
	select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
52
	select ARCH_SUPPORTS_RT
52
	select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
53
	select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
53
	select ARCH_USE_MEMTEST
54
	select ARCH_USE_MEMTEST
54
	select ARCH_USE_QUEUED_RWLOCKS
55
	select ARCH_USE_QUEUED_RWLOCKS
Lines 137-142 config RISCV Link Here
137
	select HAVE_PERF_USER_STACK_DUMP
138
	select HAVE_PERF_USER_STACK_DUMP
138
	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
139
	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
139
	select HAVE_PREEMPT_DYNAMIC_KEY if !XIP_KERNEL
140
	select HAVE_PREEMPT_DYNAMIC_KEY if !XIP_KERNEL
141
	select HAVE_PREEMPT_AUTO
140
	select HAVE_REGS_AND_STACK_ACCESS_API
142
	select HAVE_REGS_AND_STACK_ACCESS_API
141
	select HAVE_RETHOOK if !XIP_KERNEL
143
	select HAVE_RETHOOK if !XIP_KERNEL
142
	select HAVE_RSEQ
144
	select HAVE_RSEQ
(-)a/arch/riscv/include/asm/thread_info.h (+2 lines)
Lines 95-100 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); Link Here
95
 * - pending work-to-be-done flags are in lowest half-word
95
 * - pending work-to-be-done flags are in lowest half-word
96
 * - other flags in upper half-word(s)
96
 * - other flags in upper half-word(s)
97
 */
97
 */
98
#define TIF_ARCH_RESCHED_LAZY	0	/* Lazy rescheduling */
98
#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
99
#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
99
#define TIF_SIGPENDING		2	/* signal pending */
100
#define TIF_SIGPENDING		2	/* signal pending */
100
#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
101
#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
Lines 109-114 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); Link Here
109
#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
110
#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
110
#define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
111
#define _TIF_NOTIFY_SIGNAL	(1 << TIF_NOTIFY_SIGNAL)
111
#define _TIF_UPROBE		(1 << TIF_UPROBE)
112
#define _TIF_UPROBE		(1 << TIF_UPROBE)
113
#define _TIF_ARCH_RESCHED_LAZY	(1 << TIF_ARCH_RESCHED_LAZY)
112
114
113
#define _TIF_WORK_MASK \
115
#define _TIF_WORK_MASK \
114
	(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
116
	(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
(-)a/arch/x86/Kconfig (-1 / +3 lines)
Lines 27-33 config X86_64 Link Here
27
	# Options that are inherently 64-bit kernel only:
27
	# Options that are inherently 64-bit kernel only:
28
	select ARCH_HAS_GIGANTIC_PAGE
28
	select ARCH_HAS_GIGANTIC_PAGE
29
	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
29
	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
30
	select ARCH_SUPPORTS_PER_VMA_LOCK
30
	select ARCH_SUPPORTS_RT
31
	select HAVE_ARCH_SOFT_DIRTY
31
	select HAVE_ARCH_SOFT_DIRTY
32
	select MODULES_USE_ELF_RELA
32
	select MODULES_USE_ELF_RELA
33
	select NEED_DMA_MAP_STATE
33
	select NEED_DMA_MAP_STATE
Lines 116-121 config X86 Link Here
116
	select ARCH_USES_CFI_TRAPS		if X86_64 && CFI_CLANG
116
	select ARCH_USES_CFI_TRAPS		if X86_64 && CFI_CLANG
117
	select ARCH_SUPPORTS_LTO_CLANG
117
	select ARCH_SUPPORTS_LTO_CLANG
118
	select ARCH_SUPPORTS_LTO_CLANG_THIN
118
	select ARCH_SUPPORTS_LTO_CLANG_THIN
119
	select ARCH_SUPPORTS_RT
119
	select ARCH_USE_BUILTIN_BSWAP
120
	select ARCH_USE_BUILTIN_BSWAP
120
	select ARCH_USE_CMPXCHG_LOCKREF		if X86_CMPXCHG64
121
	select ARCH_USE_CMPXCHG_LOCKREF		if X86_CMPXCHG64
121
	select ARCH_USE_MEMTEST
122
	select ARCH_USE_MEMTEST
Lines 271-276 config X86 Link Here
271
	select HAVE_STATIC_CALL
272
	select HAVE_STATIC_CALL
272
	select HAVE_STATIC_CALL_INLINE		if HAVE_OBJTOOL
273
	select HAVE_STATIC_CALL_INLINE		if HAVE_OBJTOOL
273
	select HAVE_PREEMPT_DYNAMIC_CALL
274
	select HAVE_PREEMPT_DYNAMIC_CALL
275
	select HAVE_PREEMPT_AUTO
274
	select HAVE_RSEQ
276
	select HAVE_RSEQ
275
	select HAVE_RUST			if X86_64
277
	select HAVE_RUST			if X86_64
276
	select HAVE_SYSCALL_TRACEPOINTS
278
	select HAVE_SYSCALL_TRACEPOINTS
(-)a/arch/x86/include/asm/thread_info.h (-2 / +4 lines)
Lines 81-88 struct thread_info { Link Here
81
#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
81
#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
82
#define TIF_SIGPENDING		2	/* signal pending */
82
#define TIF_SIGPENDING		2	/* signal pending */
83
#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
83
#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
84
#define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
84
#define TIF_ARCH_RESCHED_LAZY	4	/* Lazy rescheduling */
85
#define TIF_SSBD		5	/* Speculative store bypass disable */
85
#define TIF_SINGLESTEP		5	/* reenable singlestep on user return*/
86
#define TIF_SSBD		6	/* Speculative store bypass disable */
86
#define TIF_SPEC_IB		9	/* Indirect branch speculation mitigation */
87
#define TIF_SPEC_IB		9	/* Indirect branch speculation mitigation */
87
#define TIF_SPEC_L1D_FLUSH	10	/* Flush L1D on mm switches (processes) */
88
#define TIF_SPEC_L1D_FLUSH	10	/* Flush L1D on mm switches (processes) */
88
#define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
89
#define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
Lines 104-109 struct thread_info { Link Here
104
#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
105
#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
105
#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
106
#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
106
#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
107
#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
108
#define _TIF_ARCH_RESCHED_LAZY	(1 << TIF_ARCH_RESCHED_LAZY)
107
#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
109
#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
108
#define _TIF_SSBD		(1 << TIF_SSBD)
110
#define _TIF_SSBD		(1 << TIF_SSBD)
109
#define _TIF_SPEC_IB		(1 << TIF_SPEC_IB)
111
#define _TIF_SPEC_IB		(1 << TIF_SPEC_IB)
(-)a/drivers/acpi/processor_idle.c (-1 / +1 lines)
Lines 108-114 static const struct dmi_system_id processor_power_dmi_table[] = { Link Here
108
 */
108
 */
109
static void __cpuidle acpi_safe_halt(void)
109
static void __cpuidle acpi_safe_halt(void)
110
{
110
{
111
	if (!tif_need_resched()) {
111
	if (!need_resched()) {
112
		raw_safe_halt();
112
		raw_safe_halt();
113
		raw_local_irq_disable();
113
		raw_local_irq_disable();
114
	}
114
	}
(-)a/drivers/block/zram/zram_drv.c (+37 lines)
Lines 57-62 static void zram_free_page(struct zram *zram, size_t index); Link Here
57
static int zram_read_page(struct zram *zram, struct page *page, u32 index,
57
static int zram_read_page(struct zram *zram, struct page *page, u32 index,
58
			  struct bio *parent);
58
			  struct bio *parent);
59
59
60
#ifdef CONFIG_PREEMPT_RT
61
static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
62
{
63
	size_t index;
64
65
	for (index = 0; index < num_pages; index++)
66
		spin_lock_init(&zram->table[index].lock);
67
}
68
69
static int zram_slot_trylock(struct zram *zram, u32 index)
70
{
71
	int ret;
72
73
	ret = spin_trylock(&zram->table[index].lock);
74
	if (ret)
75
		__set_bit(ZRAM_LOCK, &zram->table[index].flags);
76
	return ret;
77
}
78
79
static void zram_slot_lock(struct zram *zram, u32 index)
80
{
81
	spin_lock(&zram->table[index].lock);
82
	__set_bit(ZRAM_LOCK, &zram->table[index].flags);
83
}
84
85
static void zram_slot_unlock(struct zram *zram, u32 index)
86
{
87
	__clear_bit(ZRAM_LOCK, &zram->table[index].flags);
88
	spin_unlock(&zram->table[index].lock);
89
}
90
91
#else
92
93
static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
94
60
static int zram_slot_trylock(struct zram *zram, u32 index)
95
static int zram_slot_trylock(struct zram *zram, u32 index)
61
{
96
{
62
	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
97
	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
Lines 71-76 static void zram_slot_unlock(struct zram *zram, u32 index) Link Here
71
{
106
{
72
	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
107
	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
73
}
108
}
109
#endif
74
110
75
static inline bool init_done(struct zram *zram)
111
static inline bool init_done(struct zram *zram)
76
{
112
{
Lines 1242-1247 static bool zram_meta_alloc(struct zram *zram, u64 disksize) Link Here
1242
1278
1243
	if (!huge_class_size)
1279
	if (!huge_class_size)
1244
		huge_class_size = zs_huge_class_size(zram->mem_pool);
1280
		huge_class_size = zs_huge_class_size(zram->mem_pool);
1281
	zram_meta_init_table_locks(zram, num_pages);
1245
	return true;
1282
	return true;
1246
}
1283
}
1247
1284
(-)a/drivers/block/zram/zram_drv.h (+3 lines)
Lines 69-74 struct zram_table_entry { Link Here
69
		unsigned long element;
69
		unsigned long element;
70
	};
70
	};
71
	unsigned long flags;
71
	unsigned long flags;
72
#ifdef CONFIG_PREEMPT_RT
73
	spinlock_t lock;
74
#endif
72
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
75
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
73
	ktime_t ac_time;
76
	ktime_t ac_time;
74
#endif
77
#endif
(-)a/drivers/gpu/drm/i915/Kconfig (-1 lines)
Lines 3-9 config DRM_I915 Link Here
3
	tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
3
	tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
4
	depends on DRM
4
	depends on DRM
5
	depends on X86 && PCI
5
	depends on X86 && PCI
6
	depends on !PREEMPT_RT
7
	select INTEL_GTT if X86
6
	select INTEL_GTT if X86
8
	select INTERVAL_TREE
7
	select INTERVAL_TREE
9
	# we need shmfs for the swappable backing store, and in particular
8
	# we need shmfs for the swappable backing store, and in particular
(-)a/drivers/gpu/drm/i915/display/intel_crtc.c (-5 / +10 lines)
Lines 573-579 void intel_pipe_update_start(struct intel_atomic_state *state, Link Here
573
	 */
573
	 */
574
	intel_psr_wait_for_idle_locked(new_crtc_state);
574
	intel_psr_wait_for_idle_locked(new_crtc_state);
575
575
576
	local_irq_disable();
576
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
577
		local_irq_disable();
577
578
578
	crtc->debug.min_vbl = min;
579
	crtc->debug.min_vbl = min;
579
	crtc->debug.max_vbl = max;
580
	crtc->debug.max_vbl = max;
Lines 598-608 void intel_pipe_update_start(struct intel_atomic_state *state, Link Here
598
			break;
599
			break;
599
		}
600
		}
600
601
601
		local_irq_enable();
602
		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
603
			local_irq_enable();
602
604
603
		timeout = schedule_timeout(timeout);
605
		timeout = schedule_timeout(timeout);
604
606
605
		local_irq_disable();
607
		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
608
			local_irq_disable();
606
	}
609
	}
607
610
608
	finish_wait(wq, &wait);
611
	finish_wait(wq, &wait);
Lines 635-641 void intel_pipe_update_start(struct intel_atomic_state *state, Link Here
635
	return;
638
	return;
636
639
637
irq_disable:
640
irq_disable:
638
	local_irq_disable();
641
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
642
		local_irq_disable();
639
}
643
}
640
644
641
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
645
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
Lines 737-743 void intel_pipe_update_end(struct intel_atomic_state *state, Link Here
737
	 */
741
	 */
738
	intel_vrr_send_push(new_crtc_state);
742
	intel_vrr_send_push(new_crtc_state);
739
743
740
	local_irq_enable();
744
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
745
		local_irq_enable();
741
746
742
	if (intel_vgpu_active(dev_priv))
747
	if (intel_vgpu_active(dev_priv))
743
		goto out;
748
		goto out;
(-)a/drivers/gpu/drm/i915/display/intel_vblank.c (-2 / +4 lines)
Lines 308-314 static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, Link Here
308
	 */
308
	 */
309
	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
309
	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
310
310
311
	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
311
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
312
		preempt_disable();
312
313
313
	/* Get optional system timestamp before query. */
314
	/* Get optional system timestamp before query. */
314
	if (stime)
315
	if (stime)
Lines 372-378 static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, Link Here
372
	if (etime)
373
	if (etime)
373
		*etime = ktime_get();
374
		*etime = ktime_get();
374
375
375
	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
376
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
377
		preempt_enable();
376
378
377
	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
379
	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
378
380
(-)a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c (-3 / +2 lines)
Lines 312-321 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) Link Here
312
	/* Kick the work once more to drain the signalers, and disarm the irq */
312
	/* Kick the work once more to drain the signalers, and disarm the irq */
313
	irq_work_sync(&b->irq_work);
313
	irq_work_sync(&b->irq_work);
314
	while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
314
	while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
315
		local_irq_disable();
315
		irq_work_queue(&b->irq_work);
316
		signal_irq_work(&b->irq_work);
317
		local_irq_enable();
318
		cond_resched();
316
		cond_resched();
317
		irq_work_sync(&b->irq_work);
319
	}
318
	}
320
}
319
}
321
320
(-)a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c (-12 / +5 lines)
Lines 1303-1309 static void execlists_dequeue(struct intel_engine_cs *engine) Link Here
1303
	 * and context switches) submission.
1303
	 * and context switches) submission.
1304
	 */
1304
	 */
1305
1305
1306
	spin_lock(&sched_engine->lock);
1306
	spin_lock_irq(&sched_engine->lock);
1307
1307
1308
	/*
1308
	/*
1309
	 * If the queue is higher priority than the last
1309
	 * If the queue is higher priority than the last
Lines 1403-1409 static void execlists_dequeue(struct intel_engine_cs *engine) Link Here
1403
				 * Even if ELSP[1] is occupied and not worthy
1403
				 * Even if ELSP[1] is occupied and not worthy
1404
				 * of timeslices, our queue might be.
1404
				 * of timeslices, our queue might be.
1405
				 */
1405
				 */
1406
				spin_unlock(&sched_engine->lock);
1406
				spin_unlock_irq(&sched_engine->lock);
1407
				return;
1407
				return;
1408
			}
1408
			}
1409
		}
1409
		}
Lines 1429-1435 static void execlists_dequeue(struct intel_engine_cs *engine) Link Here
1429
1429
1430
		if (last && !can_merge_rq(last, rq)) {
1430
		if (last && !can_merge_rq(last, rq)) {
1431
			spin_unlock(&ve->base.sched_engine->lock);
1431
			spin_unlock(&ve->base.sched_engine->lock);
1432
			spin_unlock(&engine->sched_engine->lock);
1432
			spin_unlock_irq(&engine->sched_engine->lock);
1433
			return; /* leave this for another sibling */
1433
			return; /* leave this for another sibling */
1434
		}
1434
		}
1435
1435
Lines 1591-1597 static void execlists_dequeue(struct intel_engine_cs *engine) Link Here
1591
	 */
1591
	 */
1592
	sched_engine->queue_priority_hint = queue_prio(sched_engine);
1592
	sched_engine->queue_priority_hint = queue_prio(sched_engine);
1593
	i915_sched_engine_reset_on_empty(sched_engine);
1593
	i915_sched_engine_reset_on_empty(sched_engine);
1594
	spin_unlock(&sched_engine->lock);
1594
	spin_unlock_irq(&sched_engine->lock);
1595
1595
1596
	/*
1596
	/*
1597
	 * We can skip poking the HW if we ended up with exactly the same set
1597
	 * We can skip poking the HW if we ended up with exactly the same set
Lines 1617-1629 static void execlists_dequeue(struct intel_engine_cs *engine) Link Here
1617
	}
1617
	}
1618
}
1618
}
1619
1619
1620
static void execlists_dequeue_irq(struct intel_engine_cs *engine)
1621
{
1622
	local_irq_disable(); /* Suspend interrupts across request submission */
1623
	execlists_dequeue(engine);
1624
	local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
1625
}
1626
1627
static void clear_ports(struct i915_request **ports, int count)
1620
static void clear_ports(struct i915_request **ports, int count)
1628
{
1621
{
1629
	memset_p((void **)ports, NULL, count);
1622
	memset_p((void **)ports, NULL, count);
Lines 2478-2484 static void execlists_submission_tasklet(struct tasklet_struct *t) Link Here
2478
	}
2471
	}
2479
2472
2480
	if (!engine->execlists.pending[0]) {
2473
	if (!engine->execlists.pending[0]) {
2481
		execlists_dequeue_irq(engine);
2474
		execlists_dequeue(engine);
2482
		start_timeslice(engine);
2475
		start_timeslice(engine);
2483
	}
2476
	}
2484
2477
(-)a/drivers/gpu/drm/i915/gt/uc/intel_guc.h (-1 / +1 lines)
Lines 349-355 static inline int intel_guc_send_busy_loop(struct intel_guc *guc, Link Here
349
{
349
{
350
	int err;
350
	int err;
351
	unsigned int sleep_period_ms = 1;
351
	unsigned int sleep_period_ms = 1;
352
	bool not_atomic = !in_atomic() && !irqs_disabled();
352
	bool not_atomic = !in_atomic() && !irqs_disabled() && !rcu_preempt_depth();
353
353
354
	/*
354
	/*
355
	 * FIXME: Have caller pass in if we are in an atomic context to avoid
355
	 * FIXME: Have caller pass in if we are in an atomic context to avoid
(-)a/drivers/gpu/drm/i915/i915_request.c (-2 lines)
Lines 609-615 bool __i915_request_submit(struct i915_request *request) Link Here
609
609
610
	RQ_TRACE(request, "\n");
610
	RQ_TRACE(request, "\n");
611
611
612
	GEM_BUG_ON(!irqs_disabled());
613
	lockdep_assert_held(&engine->sched_engine->lock);
612
	lockdep_assert_held(&engine->sched_engine->lock);
614
613
615
	/*
614
	/*
Lines 718-724 void __i915_request_unsubmit(struct i915_request *request) Link Here
718
	 */
717
	 */
719
	RQ_TRACE(request, "\n");
718
	RQ_TRACE(request, "\n");
720
719
721
	GEM_BUG_ON(!irqs_disabled());
722
	lockdep_assert_held(&engine->sched_engine->lock);
720
	lockdep_assert_held(&engine->sched_engine->lock);
723
721
724
	/*
722
	/*
(-)a/drivers/gpu/drm/i915/i915_trace.h (-1 / +5 lines)
Lines 6-11 Link Here
6
#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
6
#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
7
#define _I915_TRACE_H_
7
#define _I915_TRACE_H_
8
8
9
#ifdef CONFIG_PREEMPT_RT
10
#define NOTRACE
11
#endif
12
9
#include <linux/stringify.h>
13
#include <linux/stringify.h>
10
#include <linux/types.h>
14
#include <linux/types.h>
11
#include <linux/tracepoint.h>
15
#include <linux/tracepoint.h>
Lines 322-328 DEFINE_EVENT(i915_request, i915_request_add, Link Here
322
	     TP_ARGS(rq)
326
	     TP_ARGS(rq)
323
);
327
);
324
328
325
#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
329
#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE)
326
DEFINE_EVENT(i915_request, i915_request_guc_submit,
330
DEFINE_EVENT(i915_request, i915_request_guc_submit,
327
	     TP_PROTO(struct i915_request *rq),
331
	     TP_PROTO(struct i915_request *rq),
328
	     TP_ARGS(rq)
332
	     TP_ARGS(rq)
(-)a/drivers/gpu/drm/i915/i915_utils.h (-1 / +1 lines)
Lines 288-294 wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) Link Here
288
#define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
288
#define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
289
289
290
/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
290
/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
291
#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
291
#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
292
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
292
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
293
#else
293
#else
294
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
294
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
(-)a/drivers/tty/serial/8250/8250_core.c (-1 / +41 lines)
Lines 592-597 serial8250_register_ports(struct uart_driver *drv, struct device *dev) Link Here
592
592
593
#ifdef CONFIG_SERIAL_8250_CONSOLE
593
#ifdef CONFIG_SERIAL_8250_CONSOLE
594
594
595
#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE
595
static void univ8250_console_write(struct console *co, const char *s,
596
static void univ8250_console_write(struct console *co, const char *s,
596
				   unsigned int count)
597
				   unsigned int count)
597
{
598
{
Lines 599-604 static void univ8250_console_write(struct console *co, const char *s, Link Here
599
600
600
	serial8250_console_write(up, s, count);
601
	serial8250_console_write(up, s, count);
601
}
602
}
603
#else
604
static bool univ8250_console_write_atomic(struct console *co,
605
					  struct nbcon_write_context *wctxt)
606
{
607
	struct uart_8250_port *up = &serial8250_ports[co->index];
608
609
	return serial8250_console_write_atomic(up, wctxt);
610
}
611
612
static bool univ8250_console_write_thread(struct console *co,
613
					  struct nbcon_write_context *wctxt)
614
{
615
	struct uart_8250_port *up = &serial8250_ports[co->index];
616
617
	return serial8250_console_write_thread(up, wctxt);
618
}
619
620
static void univ8250_console_driver_enter(struct console *con, unsigned long *flags)
621
{
622
	struct uart_port *up = &serial8250_ports[con->index].port;
623
624
	__uart_port_lock_irqsave(up, flags);
625
}
626
627
static void univ8250_console_driver_exit(struct console *con, unsigned long flags)
628
{
629
	struct uart_port *up = &serial8250_ports[con->index].port;
630
631
	__uart_port_unlock_irqrestore(up, flags);
632
}
633
#endif /* CONFIG_SERIAL_8250_LEGACY_CONSOLE */
602
634
603
static int univ8250_console_setup(struct console *co, char *options)
635
static int univ8250_console_setup(struct console *co, char *options)
604
{
636
{
Lines 698-709 static int univ8250_console_match(struct console *co, char *name, int idx, Link Here
698
730
699
static struct console univ8250_console = {
731
static struct console univ8250_console = {
700
	.name		= "ttyS",
732
	.name		= "ttyS",
733
#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE
701
	.write		= univ8250_console_write,
734
	.write		= univ8250_console_write,
735
	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
736
#else
737
	.write_atomic	= univ8250_console_write_atomic,
738
	.write_thread	= univ8250_console_write_thread,
739
	.driver_enter	= univ8250_console_driver_enter,
740
	.driver_exit	= univ8250_console_driver_exit,
741
	.flags		= CON_PRINTBUFFER | CON_ANYTIME | CON_NBCON,
742
#endif
702
	.device		= uart_console_device,
743
	.device		= uart_console_device,
703
	.setup		= univ8250_console_setup,
744
	.setup		= univ8250_console_setup,
704
	.exit		= univ8250_console_exit,
745
	.exit		= univ8250_console_exit,
705
	.match		= univ8250_console_match,
746
	.match		= univ8250_console_match,
706
	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
707
	.index		= -1,
747
	.index		= -1,
708
	.data		= &serial8250_reg,
748
	.data		= &serial8250_reg,
709
};
749
};
(-)a/drivers/tty/serial/8250/8250_port.c (-2 / +157 lines)
Lines 550-555 static int serial8250_em485_init(struct uart_8250_port *p) Link Here
550
	if (!p->em485)
550
	if (!p->em485)
551
		return -ENOMEM;
551
		return -ENOMEM;
552
552
553
#ifndef CONFIG_SERIAL_8250_LEGACY_CONSOLE
554
	if (uart_console(&p->port))
555
		dev_warn(p->port.dev, "no atomic printing for rs485 consoles\n");
556
#endif
557
553
	hrtimer_init(&p->em485->stop_tx_timer, CLOCK_MONOTONIC,
558
	hrtimer_init(&p->em485->stop_tx_timer, CLOCK_MONOTONIC,
554
		     HRTIMER_MODE_REL);
559
		     HRTIMER_MODE_REL);
555
	hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC,
560
	hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC,
Lines 702-708 static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) Link Here
702
	serial8250_rpm_put(p);
707
	serial8250_rpm_put(p);
703
}
708
}
704
709
705
static void serial8250_clear_IER(struct uart_8250_port *up)
710
/*
711
 * Only to be used by write_atomic() and the legacy write(), which do not
712
 * require port lock.
713
 */
714
static void __serial8250_clear_IER(struct uart_8250_port *up)
706
{
715
{
707
	if (up->capabilities & UART_CAP_UUE)
716
	if (up->capabilities & UART_CAP_UUE)
708
		serial_out(up, UART_IER, UART_IER_UUE);
717
		serial_out(up, UART_IER, UART_IER_UUE);
Lines 710-715 static void serial8250_clear_IER(struct uart_8250_port *up) Link Here
710
		serial_out(up, UART_IER, 0);
719
		serial_out(up, UART_IER, 0);
711
}
720
}
712
721
722
static inline void serial8250_clear_IER(struct uart_8250_port *up)
723
{
724
	/* Port locked to synchronize UART_IER access against the console. */
725
	lockdep_assert_held_once(&up->port.lock);
726
727
	__serial8250_clear_IER(up);
728
}
729
713
#ifdef CONFIG_SERIAL_8250_RSA
730
#ifdef CONFIG_SERIAL_8250_RSA
714
/*
731
/*
715
 * Attempts to turn on the RSA FIFO.  Returns zero on failure.
732
 * Attempts to turn on the RSA FIFO.  Returns zero on failure.
Lines 3320-3325 static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) Link Here
3320
3337
3321
	wait_for_xmitr(up, UART_LSR_THRE);
3338
	wait_for_xmitr(up, UART_LSR_THRE);
3322
	serial_port_out(port, UART_TX, ch);
3339
	serial_port_out(port, UART_TX, ch);
3340
3341
	if (ch == '\n')
3342
		up->console_newline_needed = false;
3343
	else
3344
		up->console_newline_needed = true;
3323
}
3345
}
3324
3346
3325
/*
3347
/*
Lines 3348-3353 static void serial8250_console_restore(struct uart_8250_port *up) Link Here
3348
	serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
3370
	serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
3349
}
3371
}
3350
3372
3373
#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE
3351
/*
3374
/*
3352
 * Print a string to the serial port using the device FIFO
3375
 * Print a string to the serial port using the device FIFO
3353
 *
3376
 *
Lines 3406-3412 void serial8250_console_write(struct uart_8250_port *up, const char *s, Link Here
3406
	 *	First save the IER then disable the interrupts
3429
	 *	First save the IER then disable the interrupts
3407
	 */
3430
	 */
3408
	ier = serial_port_in(port, UART_IER);
3431
	ier = serial_port_in(port, UART_IER);
3409
	serial8250_clear_IER(up);
3432
	__serial8250_clear_IER(up);
3410
3433
3411
	/* check scratch reg to see if port powered off during system sleep */
3434
	/* check scratch reg to see if port powered off during system sleep */
3412
	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
3435
	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
Lines 3472-3477 void serial8250_console_write(struct uart_8250_port *up, const char *s, Link Here
3472
	if (locked)
3495
	if (locked)
3473
		uart_port_unlock_irqrestore(port, flags);
3496
		uart_port_unlock_irqrestore(port, flags);
3474
}
3497
}
3498
#else
3499
bool serial8250_console_write_thread(struct uart_8250_port *up,
3500
				     struct nbcon_write_context *wctxt)
3501
{
3502
	struct uart_8250_em485 *em485 = up->em485;
3503
	struct uart_port *port = &up->port;
3504
	bool done = false;
3505
	unsigned int ier;
3506
3507
	touch_nmi_watchdog();
3508
3509
	if (!nbcon_enter_unsafe(wctxt))
3510
		return false;
3511
3512
	/* First save IER then disable the interrupts. */
3513
	ier = serial_port_in(port, UART_IER);
3514
	serial8250_clear_IER(up);
3515
3516
	/* Check scratch reg if port powered off during system sleep. */
3517
	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
3518
		serial8250_console_restore(up);
3519
		up->canary = 0;
3520
	}
3521
3522
	if (em485) {
3523
		if (em485->tx_stopped)
3524
			up->rs485_start_tx(up);
3525
		mdelay(port->rs485.delay_rts_before_send);
3526
	}
3527
3528
	if (nbcon_exit_unsafe(wctxt)) {
3529
		int len = READ_ONCE(wctxt->len);
3530
		int i;
3531
3532
		/*
3533
		 * Write out the message. Toggle unsafe for each byte in order
3534
		 * to give another (higher priority) context the opportunity
3535
		 * for a friendly takeover. If such a takeover occurs, this
3536
		 * context must reacquire ownership in order to perform final
3537
		 * actions (such as re-enabling the interrupts).
3538
		 *
3539
		 * IMPORTANT: wctxt->outbuf and wctxt->len are no longer valid
3540
		 *	      after a reacquire so writing the message must be
3541
		 *	      aborted.
3542
		 */
3543
		for (i = 0; i < len; i++) {
3544
			if (!nbcon_enter_unsafe(wctxt)) {
3545
				nbcon_reacquire(wctxt);
3546
				break;
3547
			}
3548
3549
			uart_console_write(port, wctxt->outbuf + i, 1, serial8250_console_putchar);
3550
3551
			if (!nbcon_exit_unsafe(wctxt)) {
3552
				nbcon_reacquire(wctxt);
3553
				break;
3554
			}
3555
		}
3556
		done = (i == len);
3557
	} else {
3558
		nbcon_reacquire(wctxt);
3559
	}
3560
3561
	while (!nbcon_enter_unsafe(wctxt))
3562
		nbcon_reacquire(wctxt);
3563
3564
	/* Finally, wait for transmitter to become empty and restore IER. */
3565
	wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
3566
	if (em485) {
3567
		mdelay(port->rs485.delay_rts_after_send);
3568
		if (em485->tx_stopped)
3569
			up->rs485_stop_tx(up);
3570
	}
3571
	serial_port_out(port, UART_IER, ier);
3572
3573
	/*
3574
	 * The receive handling will happen properly because the receive ready
3575
	 * bit will still be set; it is not cleared on read.  However, modem
3576
	 * control will not, we must call it if we have saved something in the
3577
	 * saved flags while processing with interrupts off.
3578
	 */
3579
	if (up->msr_saved_flags)
3580
		serial8250_modem_status(up);
3581
3582
	/* Success if no handover/takeover and message fully printed. */
3583
	return (nbcon_exit_unsafe(wctxt) && done);
3584
}
3585
3586
bool serial8250_console_write_atomic(struct uart_8250_port *up,
3587
				     struct nbcon_write_context *wctxt)
3588
{
3589
	struct uart_port *port = &up->port;
3590
	unsigned int ier;
3591
3592
	/* Atomic console not supported for rs485 mode. */
3593
	if (up->em485)
3594
		return false;
3595
3596
	touch_nmi_watchdog();
3597
3598
	if (!nbcon_enter_unsafe(wctxt))
3599
		return false;
3600
3601
	/*
3602
	 * First save IER then disable the interrupts. The special variant to
3603
	 * clear IER is used because atomic printing may occur without holding
3604
	 * the port lock.
3605
	 */
3606
	ier = serial_port_in(port, UART_IER);
3607
	__serial8250_clear_IER(up);
3608
3609
	/* Check scratch reg if port powered off during system sleep. */
3610
	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
3611
		serial8250_console_restore(up);
3612
		up->canary = 0;
3613
	}
3614
3615
	if (up->console_newline_needed)
3616
		uart_console_write(port, "\n", 1, serial8250_console_putchar);
3617
	uart_console_write(port, wctxt->outbuf, wctxt->len, serial8250_console_putchar);
3618
3619
	/* Finally, wait for transmitter to become empty and restore IER. */
3620
	wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
3621
	serial_port_out(port, UART_IER, ier);
3622
3623
	/* Success if no handover/takeover. */
3624
	return nbcon_exit_unsafe(wctxt);
3625
}
3626
#endif /* CONFIG_SERIAL_8250_LEGACY_CONSOLE */
3475
3627
3476
static unsigned int probe_baud(struct uart_port *port)
3628
static unsigned int probe_baud(struct uart_port *port)
3477
{
3629
{
Lines 3490-3495 static unsigned int probe_baud(struct uart_port *port) Link Here
3490
3642
3491
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
3643
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
3492
{
3644
{
3645
	struct uart_8250_port *up = up_to_u8250p(port);
3493
	int baud = 9600;
3646
	int baud = 9600;
3494
	int bits = 8;
3647
	int bits = 8;
3495
	int parity = 'n';
3648
	int parity = 'n';
Lines 3499-3504 int serial8250_console_setup(struct uart_port *port, char *options, bool probe) Link Here
3499
	if (!port->iobase && !port->membase)
3652
	if (!port->iobase && !port->membase)
3500
		return -ENODEV;
3653
		return -ENODEV;
3501
3654
3655
	up->console_newline_needed = false;
3656
3502
	if (options)
3657
	if (options)
3503
		uart_parse_options(options, &baud, &parity, &bits, &flow);
3658
		uart_parse_options(options, &baud, &parity, &bits, &flow);
3504
	else if (probe)
3659
	else if (probe)
(-)a/drivers/tty/serial/amba-pl011.c (-8 / +4 lines)
Lines 2328-2340 pl011_console_write(struct console *co, const char *s, unsigned int count) Link Here
2328
2328
2329
	clk_enable(uap->clk);
2329
	clk_enable(uap->clk);
2330
2330
2331
	local_irq_save(flags);
2331
	if (uap->port.sysrq || oops_in_progress)
2332
	if (uap->port.sysrq)
2332
		locked = uart_port_trylock_irqsave(&uap->port, &flags);
2333
		locked = 0;
2334
	else if (oops_in_progress)
2335
		locked = uart_port_trylock(&uap->port);
2336
	else
2333
	else
2337
		uart_port_lock(&uap->port);
2334
		uart_port_lock_irqsave(&uap->port, &flags);
2338
2335
2339
	/*
2336
	/*
2340
	 *	First save the CR then disable the interrupts
2337
	 *	First save the CR then disable the interrupts
Lines 2360-2367 pl011_console_write(struct console *co, const char *s, unsigned int count) Link Here
2360
		pl011_write(old_cr, uap, REG_CR);
2357
		pl011_write(old_cr, uap, REG_CR);
2361
2358
2362
	if (locked)
2359
	if (locked)
2363
		uart_port_unlock(&uap->port);
2360
		uart_port_unlock_irqrestore(&uap->port, flags);
2364
	local_irq_restore(flags);
2365
2361
2366
	clk_disable(uap->clk);
2362
	clk_disable(uap->clk);
2367
}
2363
}
(-)a/drivers/tty/serial/omap-serial.c (-8 / +4 lines)
Lines 1212-1224 serial_omap_console_write(struct console *co, const char *s, Link Here
1212
	unsigned int ier;
1212
	unsigned int ier;
1213
	int locked = 1;
1213
	int locked = 1;
1214
1214
1215
	local_irq_save(flags);
1215
	if (up->port.sysrq || oops_in_progress)
1216
	if (up->port.sysrq)
1216
		locked = uart_port_trylock_irqsave(&up->port, &flags);
1217
		locked = 0;
1218
	else if (oops_in_progress)
1219
		locked = uart_port_trylock(&up->port);
1220
	else
1217
	else
1221
		uart_port_lock(&up->port);
1218
		uart_port_lock_irqsave(&up->port, &flags);
1222
1219
1223
	/*
1220
	/*
1224
	 * First save the IER then disable the interrupts
1221
	 * First save the IER then disable the interrupts
Lines 1245-1252 serial_omap_console_write(struct console *co, const char *s, Link Here
1245
		check_modem_status(up);
1242
		check_modem_status(up);
1246
1243
1247
	if (locked)
1244
	if (locked)
1248
		uart_port_unlock(&up->port);
1245
		uart_port_unlock_irqrestore(&up->port, flags);
1249
	local_irq_restore(flags);
1250
}
1246
}
1251
1247
1252
static int __init
1248
static int __init
(-)a/drivers/tty/tty_io.c (-2 / +9 lines)
Lines 3544-3551 static ssize_t show_cons_active(struct device *dev, Link Here
3544
	for_each_console(c) {
3544
	for_each_console(c) {
3545
		if (!c->device)
3545
		if (!c->device)
3546
			continue;
3546
			continue;
3547
		if (!c->write)
3547
		if (c->flags & CON_NBCON) {
3548
			continue;
3548
			if (!c->write_atomic &&
3549
			    !(c->write_thread && c->kthread)) {
3550
				continue;
3551
			}
3552
		} else {
3553
			if (!c->write)
3554
				continue;
3555
		}
3549
		if ((c->flags & CON_ENABLED) == 0)
3556
		if ((c->flags & CON_ENABLED) == 0)
3550
			continue;
3557
			continue;
3551
		cs[i++] = c;
3558
		cs[i++] = c;
(-)a/fs/proc/consoles.c (-3 / +11 lines)
Lines 21-32 static int show_console_dev(struct seq_file *m, void *v) Link Here
21
		{ CON_ENABLED,		'E' },
21
		{ CON_ENABLED,		'E' },
22
		{ CON_CONSDEV,		'C' },
22
		{ CON_CONSDEV,		'C' },
23
		{ CON_BOOT,		'B' },
23
		{ CON_BOOT,		'B' },
24
		{ CON_NBCON,		'N' },
24
		{ CON_PRINTBUFFER,	'p' },
25
		{ CON_PRINTBUFFER,	'p' },
25
		{ CON_BRL,		'b' },
26
		{ CON_BRL,		'b' },
26
		{ CON_ANYTIME,		'a' },
27
		{ CON_ANYTIME,		'a' },
27
	};
28
	};
28
	char flags[ARRAY_SIZE(con_flags) + 1];
29
	char flags[ARRAY_SIZE(con_flags) + 1];
29
	struct console *con = v;
30
	struct console *con = v;
31
	char con_write = '-';
30
	unsigned int a;
32
	unsigned int a;
31
	dev_t dev = 0;
33
	dev_t dev = 0;
32
34
Lines 57-65 static int show_console_dev(struct seq_file *m, void *v) Link Here
57
	seq_setwidth(m, 21 - 1);
59
	seq_setwidth(m, 21 - 1);
58
	seq_printf(m, "%s%d", con->name, con->index);
60
	seq_printf(m, "%s%d", con->name, con->index);
59
	seq_pad(m, ' ');
61
	seq_pad(m, ' ');
60
	seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-',
62
	if (con->flags & CON_NBCON) {
61
			con->write ? 'W' : '-', con->unblank ? 'U' : '-',
63
		if (con->write_atomic || con->write_thread)
62
			flags);
64
			con_write = 'W';
65
	} else {
66
		if (con->write)
67
			con_write = 'W';
68
	}
69
	seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con_write,
70
		   con->unblank ? 'U' : '-', flags);
63
	if (dev)
71
	if (dev)
64
		seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
72
		seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
65
73
(-)a/include/linux/bottom_half.h (+2 lines)
Lines 35-42 static inline void local_bh_enable(void) Link Here
35
35
36
#ifdef CONFIG_PREEMPT_RT
36
#ifdef CONFIG_PREEMPT_RT
37
extern bool local_bh_blocked(void);
37
extern bool local_bh_blocked(void);
38
extern void softirq_preempt(void);
38
#else
39
#else
39
static inline bool local_bh_blocked(void) { return false; }
40
static inline bool local_bh_blocked(void) { return false; }
41
static inline void softirq_preempt(void) { }
40
#endif
42
#endif
41
43
42
#endif /* _LINUX_BH_H */
44
#endif /* _LINUX_BH_H */
(-)a/include/linux/console.h (+23 lines)
Lines 16-22 Link Here
16
16
17
#include <linux/atomic.h>
17
#include <linux/atomic.h>
18
#include <linux/bits.h>
18
#include <linux/bits.h>
19
#include <linux/irq_work.h>
19
#include <linux/rculist.h>
20
#include <linux/rculist.h>
21
#include <linux/rcuwait.h>
20
#include <linux/types.h>
22
#include <linux/types.h>
21
23
22
struct vc_data;
24
struct vc_data;
Lines 303-311 struct nbcon_write_context { Link Here
303
 * @node:		hlist node for the console list
305
 * @node:		hlist node for the console list
304
 *
306
 *
305
 * @write_atomic:	Write callback for atomic context
307
 * @write_atomic:	Write callback for atomic context
308
 * @write_thread:	Write callback for non-atomic context
309
 * @driver_enter:	Callback to begin synchronization with driver code
310
 * @driver_exit:	Callback to finish synchronization with driver code
306
 * @nbcon_state:	State for nbcon consoles
311
 * @nbcon_state:	State for nbcon consoles
307
 * @nbcon_seq:		Sequence number of the next record for nbcon to print
312
 * @nbcon_seq:		Sequence number of the next record for nbcon to print
308
 * @pbufs:		Pointer to nbcon private buffer
313
 * @pbufs:		Pointer to nbcon private buffer
314
 * @locked_port:	True, if the port lock is locked by nbcon
315
 * @kthread:		Printer kthread for this console
316
 * @rcuwait:		RCU-safe wait object for @kthread waking
317
 * @irq_work:		Defer @kthread waking to IRQ work context
309
 */
318
 */
310
struct console {
319
struct console {
311
	char			name[16];
320
	char			name[16];
Lines 329-337 struct console { Link Here
329
	/* nbcon console specific members */
338
	/* nbcon console specific members */
330
	bool			(*write_atomic)(struct console *con,
339
	bool			(*write_atomic)(struct console *con,
331
						struct nbcon_write_context *wctxt);
340
						struct nbcon_write_context *wctxt);
341
	bool			(*write_thread)(struct console *con,
342
						struct nbcon_write_context *wctxt);
343
	void			(*driver_enter)(struct console *con, unsigned long *flags);
344
	void			(*driver_exit)(struct console *con, unsigned long flags);
332
	atomic_t		__private nbcon_state;
345
	atomic_t		__private nbcon_state;
333
	atomic_long_t		__private nbcon_seq;
346
	atomic_long_t		__private nbcon_seq;
334
	struct printk_buffers	*pbufs;
347
	struct printk_buffers	*pbufs;
348
	bool			locked_port;
349
	struct task_struct	*kthread;
350
	struct rcuwait		rcuwait;
351
	struct irq_work		irq_work;
335
};
352
};
336
353
337
#ifdef CONFIG_LOCKDEP
354
#ifdef CONFIG_LOCKDEP
Lines 459-471 static inline bool console_is_registered(const struct console *con) Link Here
459
	hlist_for_each_entry(con, &console_list, node)
476
	hlist_for_each_entry(con, &console_list, node)
460
477
461
#ifdef CONFIG_PRINTK
478
#ifdef CONFIG_PRINTK
479
extern void nbcon_cpu_emergency_enter(void);
480
extern void nbcon_cpu_emergency_exit(void);
462
extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
481
extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
463
extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
482
extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
464
extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
483
extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
484
extern void nbcon_reacquire(struct nbcon_write_context *wctxt);
465
#else
485
#else
486
static inline void nbcon_cpu_emergency_enter(void) { }
487
static inline void nbcon_cpu_emergency_exit(void) { }
466
static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
488
static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
467
static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
489
static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
468
static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
490
static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
491
static inline void nbcon_reacquire(struct nbcon_write_context *wctxt) { }
469
#endif
492
#endif
470
493
471
extern int console_set_on_cmdline;
494
extern int console_set_on_cmdline;
(-)a/include/linux/entry-common.h (-1 / +1 lines)
Lines 60-66 Link Here
60
#define EXIT_TO_USER_MODE_WORK						\
60
#define EXIT_TO_USER_MODE_WORK						\
61
	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
61
	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
62
	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
62
	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
63
	 ARCH_EXIT_TO_USER_MODE_WORK)
63
	 _TIF_NEED_RESCHED_LAZY | ARCH_EXIT_TO_USER_MODE_WORK)
64
64
65
/**
65
/**
66
 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs
66
 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs
(-)a/include/linux/entry-kvm.h (-1 / +1 lines)
Lines 18-24 Link Here
18
18
19
#define XFER_TO_GUEST_MODE_WORK						\
19
#define XFER_TO_GUEST_MODE_WORK						\
20
	(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL |	\
20
	(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL |	\
21
	 _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
21
	 _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED_LAZY | ARCH_XFER_TO_GUEST_MODE_WORK)
22
22
23
struct kvm_vcpu;
23
struct kvm_vcpu;
24
24
(-)a/include/linux/interrupt.h (+29 lines)
Lines 609-614 extern void __raise_softirq_irqoff(unsigned int nr); Link Here
609
extern void raise_softirq_irqoff(unsigned int nr);
609
extern void raise_softirq_irqoff(unsigned int nr);
610
extern void raise_softirq(unsigned int nr);
610
extern void raise_softirq(unsigned int nr);
611
611
612
#ifdef CONFIG_PREEMPT_RT
613
DECLARE_PER_CPU(struct task_struct *, timersd);
614
DECLARE_PER_CPU(unsigned long, pending_timer_softirq);
615
616
extern void raise_timer_softirq(void);
617
extern void raise_hrtimer_softirq(void);
618
619
static inline unsigned int local_pending_timers(void)
620
{
621
        return __this_cpu_read(pending_timer_softirq);
622
}
623
624
#else
625
static inline void raise_timer_softirq(void)
626
{
627
	raise_softirq(TIMER_SOFTIRQ);
628
}
629
630
static inline void raise_hrtimer_softirq(void)
631
{
632
	raise_softirq_irqoff(HRTIMER_SOFTIRQ);
633
}
634
635
static inline unsigned int local_pending_timers(void)
636
{
637
        return local_softirq_pending();
638
}
639
#endif
640
612
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
641
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
613
642
614
static inline struct task_struct *this_cpu_ksoftirqd(void)
643
static inline struct task_struct *this_cpu_ksoftirqd(void)
(-)a/include/linux/netdevice.h (+4 lines)
Lines 3288-3294 struct softnet_data { Link Here
3288
	int			defer_count;
3288
	int			defer_count;
3289
	int			defer_ipi_scheduled;
3289
	int			defer_ipi_scheduled;
3290
	struct sk_buff		*defer_list;
3290
	struct sk_buff		*defer_list;
3291
#ifndef CONFIG_PREEMPT_RT
3291
	call_single_data_t	defer_csd;
3292
	call_single_data_t	defer_csd;
3293
#else
3294
	struct work_struct	defer_work;
3295
#endif
3292
};
3296
};
3293
3297
3294
static inline void input_queue_head_incr(struct softnet_data *sd)
3298
static inline void input_queue_head_incr(struct softnet_data *sd)
(-)a/include/linux/preempt.h (-2 / +8 lines)
Lines 230-244 do { \ Link Here
230
#define preempt_enable() \
230
#define preempt_enable() \
231
do { \
231
do { \
232
	barrier(); \
232
	barrier(); \
233
	if (unlikely(preempt_count_dec_and_test())) \
233
	if (unlikely(preempt_count_dec_and_test())) { \
234
		instrumentation_begin(); \
234
		__preempt_schedule(); \
235
		__preempt_schedule(); \
236
		instrumentation_end(); \
237
	} \
235
} while (0)
238
} while (0)
236
239
237
#define preempt_enable_notrace() \
240
#define preempt_enable_notrace() \
238
do { \
241
do { \
239
	barrier(); \
242
	barrier(); \
240
	if (unlikely(__preempt_count_dec_and_test())) \
243
	if (unlikely(__preempt_count_dec_and_test())) { \
244
		instrumentation_begin(); \
241
		__preempt_schedule_notrace(); \
245
		__preempt_schedule_notrace(); \
246
		instrumentation_end(); \
247
	} \
242
} while (0)
248
} while (0)
243
249
244
#define preempt_check_resched() \
250
#define preempt_check_resched() \
(-)a/include/linux/printk.h (-2 / +28 lines)
Lines 9-14 Link Here
9
#include <linux/ratelimit_types.h>
9
#include <linux/ratelimit_types.h>
10
#include <linux/once_lite.h>
10
#include <linux/once_lite.h>
11
11
12
struct uart_port;
13
12
extern const char linux_banner[];
14
extern const char linux_banner[];
13
extern const char linux_proc_banner[];
15
extern const char linux_proc_banner[];
14
16
Lines 159-171 __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...); Link Here
159
161
160
extern void __printk_safe_enter(void);
162
extern void __printk_safe_enter(void);
161
extern void __printk_safe_exit(void);
163
extern void __printk_safe_exit(void);
164
extern void __printk_deferred_enter(void);
165
extern void __printk_deferred_exit(void);
166
162
/*
167
/*
163
 * The printk_deferred_enter/exit macros are available only as a hack for
168
 * The printk_deferred_enter/exit macros are available only as a hack for
164
 * some code paths that need to defer all printk console printing. Interrupts
169
 * some code paths that need to defer all printk console printing. Interrupts
165
 * must be disabled for the deferred duration.
170
 * must be disabled for the deferred duration.
166
 */
171
 */
167
#define printk_deferred_enter __printk_safe_enter
172
#define printk_deferred_enter() __printk_deferred_enter()
168
#define printk_deferred_exit __printk_safe_exit
173
#define printk_deferred_exit() __printk_deferred_exit()
169
174
170
/*
175
/*
171
 * Please don't use printk_ratelimit(), because it shares ratelimiting state
176
 * Please don't use printk_ratelimit(), because it shares ratelimiting state
Lines 192-197 void show_regs_print_info(const char *log_lvl); Link Here
192
extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
197
extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
193
extern asmlinkage void dump_stack(void) __cold;
198
extern asmlinkage void dump_stack(void) __cold;
194
void printk_trigger_flush(void);
199
void printk_trigger_flush(void);
200
void printk_legacy_allow_panic_sync(void);
201
extern void nbcon_acquire(struct uart_port *up);
202
extern void nbcon_release(struct uart_port *up);
203
void nbcon_atomic_flush_unsafe(void);
195
#else
204
#else
196
static inline __printf(1, 0)
205
static inline __printf(1, 0)
197
int vprintk(const char *s, va_list args)
206
int vprintk(const char *s, va_list args)
Lines 271-276 static inline void dump_stack(void) Link Here
271
static inline void printk_trigger_flush(void)
280
static inline void printk_trigger_flush(void)
272
{
281
{
273
}
282
}
283
284
static inline void printk_legacy_allow_panic_sync(void)
285
{
286
}
287
288
static inline void nbcon_acquire(struct uart_port *up)
289
{
290
}
291
292
static inline void nbcon_release(struct uart_port *up)
293
{
294
}
295
296
static inline void nbcon_atomic_flush_unsafe(void)
297
{
298
}
299
274
#endif
300
#endif
275
301
276
#ifdef CONFIG_SMP
302
#ifdef CONFIG_SMP
(-)a/include/linux/sched.h (-5 / +8 lines)
Lines 1910-1915 static inline int dl_task_check_affinity(struct task_struct *p, const struct cpu Link Here
1910
}
1910
}
1911
#endif
1911
#endif
1912
1912
1913
extern bool task_is_pi_boosted(const struct task_struct *p);
1913
extern int yield_to(struct task_struct *p, bool preempt);
1914
extern int yield_to(struct task_struct *p, bool preempt);
1914
extern void set_user_nice(struct task_struct *p, long nice);
1915
extern void set_user_nice(struct task_struct *p, long nice);
1915
extern int task_prio(const struct task_struct *p);
1916
extern int task_prio(const struct task_struct *p);
Lines 2054-2070 static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, Link Here
2054
	update_ti_thread_flag(task_thread_info(tsk), flag, value);
2055
	update_ti_thread_flag(task_thread_info(tsk), flag, value);
2055
}
2056
}
2056
2057
2057
static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
2058
static inline bool test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
2058
{
2059
{
2059
	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
2060
	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
2060
}
2061
}
2061
2062
2062
static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
2063
static inline bool test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
2063
{
2064
{
2064
	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
2065
	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
2065
}
2066
}
2066
2067
2067
static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
2068
static inline bool test_tsk_thread_flag(struct task_struct *tsk, int flag)
2068
{
2069
{
2069
	return test_ti_thread_flag(task_thread_info(tsk), flag);
2070
	return test_ti_thread_flag(task_thread_info(tsk), flag);
2070
}
2071
}
Lines 2077-2085 static inline void set_tsk_need_resched(struct task_struct *tsk) Link Here
2077
static inline void clear_tsk_need_resched(struct task_struct *tsk)
2078
static inline void clear_tsk_need_resched(struct task_struct *tsk)
2078
{
2079
{
2079
	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2080
	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2081
	if (IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO))
2082
		clear_tsk_thread_flag(tsk, TIF_NEED_RESCHED_LAZY);
2080
}
2083
}
2081
2084
2082
static inline int test_tsk_need_resched(struct task_struct *tsk)
2085
static inline bool test_tsk_need_resched(struct task_struct *tsk)
2083
{
2086
{
2084
	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
2087
	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
2085
}
2088
}
Lines 2260-2266 static inline int rwlock_needbreak(rwlock_t *lock) Link Here
2260
2263
2261
static __always_inline bool need_resched(void)
2264
static __always_inline bool need_resched(void)
2262
{
2265
{
2263
	return unlikely(tif_need_resched());
2266
	return unlikely(tif_need_resched_lazy() || tif_need_resched());
2264
}
2267
}
2265
2268
2266
/*
2269
/*
(-)a/include/linux/sched/idle.h (-4 / +4 lines)
Lines 63-69 static __always_inline bool __must_check current_set_polling_and_test(void) Link Here
63
	 */
63
	 */
64
	smp_mb__after_atomic();
64
	smp_mb__after_atomic();
65
65
66
	return unlikely(tif_need_resched());
66
	return unlikely(need_resched());
67
}
67
}
68
68
69
static __always_inline bool __must_check current_clr_polling_and_test(void)
69
static __always_inline bool __must_check current_clr_polling_and_test(void)
Lines 76-82 static __always_inline bool __must_check current_clr_polling_and_test(void) Link Here
76
	 */
76
	 */
77
	smp_mb__after_atomic();
77
	smp_mb__after_atomic();
78
78
79
	return unlikely(tif_need_resched());
79
	return unlikely(need_resched());
80
}
80
}
81
81
82
#else
82
#else
Lines 85-95 static inline void __current_clr_polling(void) { } Link Here
85
85
86
static inline bool __must_check current_set_polling_and_test(void)
86
static inline bool __must_check current_set_polling_and_test(void)
87
{
87
{
88
	return unlikely(tif_need_resched());
88
	return unlikely(need_resched());
89
}
89
}
90
static inline bool __must_check current_clr_polling_and_test(void)
90
static inline bool __must_check current_clr_polling_and_test(void)
91
{
91
{
92
	return unlikely(tif_need_resched());
92
	return unlikely(need_resched());
93
}
93
}
94
#endif
94
#endif
95
95
(-)a/include/linux/serial_8250.h (+6 lines)
Lines 153-158 struct uart_8250_port { Link Here
153
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
153
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
154
	unsigned char		msr_saved_flags;
154
	unsigned char		msr_saved_flags;
155
155
156
	bool			console_newline_needed;
157
156
	struct uart_8250_dma	*dma;
158
	struct uart_8250_dma	*dma;
157
	const struct uart_8250_ops *ops;
159
	const struct uart_8250_ops *ops;
158
160
Lines 204-209 void serial8250_init_port(struct uart_8250_port *up); Link Here
204
void serial8250_set_defaults(struct uart_8250_port *up);
206
void serial8250_set_defaults(struct uart_8250_port *up);
205
void serial8250_console_write(struct uart_8250_port *up, const char *s,
207
void serial8250_console_write(struct uart_8250_port *up, const char *s,
206
			      unsigned int count);
208
			      unsigned int count);
209
bool serial8250_console_write_atomic(struct uart_8250_port *up,
210
				     struct nbcon_write_context *wctxt);
211
bool serial8250_console_write_thread(struct uart_8250_port *up,
212
				     struct nbcon_write_context *wctxt);
207
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
213
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
208
int serial8250_console_exit(struct uart_port *port);
214
int serial8250_console_exit(struct uart_port *port);
209
215
(-)a/include/linux/serial_core.h (-2 / +28 lines)
Lines 595-600 struct uart_port { Link Here
595
static inline void uart_port_lock(struct uart_port *up)
595
static inline void uart_port_lock(struct uart_port *up)
596
{
596
{
597
	spin_lock(&up->lock);
597
	spin_lock(&up->lock);
598
	nbcon_acquire(up);
598
}
599
}
599
600
600
/**
601
/**
Lines 604-609 static inline void uart_port_lock(struct uart_port *up) Link Here
604
static inline void uart_port_lock_irq(struct uart_port *up)
605
static inline void uart_port_lock_irq(struct uart_port *up)
605
{
606
{
606
	spin_lock_irq(&up->lock);
607
	spin_lock_irq(&up->lock);
608
	nbcon_acquire(up);
607
}
609
}
608
610
609
/**
611
/**
Lines 614-619 static inline void uart_port_lock_irq(struct uart_port *up) Link Here
614
static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
616
static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
615
{
617
{
616
	spin_lock_irqsave(&up->lock, *flags);
618
	spin_lock_irqsave(&up->lock, *flags);
619
	nbcon_acquire(up);
617
}
620
}
618
621
619
/**
622
/**
Lines 624-630 static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *f Link Here
624
 */
627
 */
625
static inline bool uart_port_trylock(struct uart_port *up)
628
static inline bool uart_port_trylock(struct uart_port *up)
626
{
629
{
627
	return spin_trylock(&up->lock);
630
	if (!spin_trylock(&up->lock))
631
		return false;
632
633
	nbcon_acquire(up);
634
	return true;
628
}
635
}
629
636
630
/**
637
/**
Lines 636-642 static inline bool uart_port_trylock(struct uart_port *up) Link Here
636
 */
643
 */
637
static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags)
644
static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags)
638
{
645
{
639
	return spin_trylock_irqsave(&up->lock, *flags);
646
	if (!spin_trylock_irqsave(&up->lock, *flags))
647
		return false;
648
649
	nbcon_acquire(up);
650
	return true;
640
}
651
}
641
652
642
/**
653
/**
Lines 645-650 static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long Link Here
645
 */
656
 */
646
static inline void uart_port_unlock(struct uart_port *up)
657
static inline void uart_port_unlock(struct uart_port *up)
647
{
658
{
659
	nbcon_release(up);
648
	spin_unlock(&up->lock);
660
	spin_unlock(&up->lock);
649
}
661
}
650
662
Lines 654-659 static inline void uart_port_unlock(struct uart_port *up) Link Here
654
 */
666
 */
655
static inline void uart_port_unlock_irq(struct uart_port *up)
667
static inline void uart_port_unlock_irq(struct uart_port *up)
656
{
668
{
669
	nbcon_release(up);
657
	spin_unlock_irq(&up->lock);
670
	spin_unlock_irq(&up->lock);
658
}
671
}
659
672
Lines 663-668 static inline void uart_port_unlock_irq(struct uart_port *up) Link Here
663
 * @flags:	The saved interrupt flags for restore
676
 * @flags:	The saved interrupt flags for restore
664
 */
677
 */
665
static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
678
static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
679
{
680
	nbcon_release(up);
681
	spin_unlock_irqrestore(&up->lock, flags);
682
}
683
684
/* Only for use in the console->driver_enter() callback. */
685
static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
686
{
687
	spin_lock_irqsave(&up->lock, *flags);
688
}
689
690
/* Only for use in the console->driver_exit() callback. */
691
static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
666
{
692
{
667
	spin_unlock_irqrestore(&up->lock, flags);
693
	spin_unlock_irqrestore(&up->lock, flags);
668
}
694
}
(-)a/include/linux/thread_info.h (+24 lines)
Lines 59-64 enum syscall_work_bit { Link Here
59
59
60
#include <asm/thread_info.h>
60
#include <asm/thread_info.h>
61
61
62
#ifdef CONFIG_PREEMPT_BUILD_AUTO
63
# define TIF_NEED_RESCHED_LAZY		TIF_ARCH_RESCHED_LAZY
64
# define _TIF_NEED_RESCHED_LAZY		_TIF_ARCH_RESCHED_LAZY
65
# define TIF_NEED_RESCHED_LAZY_OFFSET	(TIF_NEED_RESCHED_LAZY - TIF_NEED_RESCHED)
66
#else
67
# define TIF_NEED_RESCHED_LAZY		TIF_NEED_RESCHED
68
# define _TIF_NEED_RESCHED_LAZY		_TIF_NEED_RESCHED
69
# define TIF_NEED_RESCHED_LAZY_OFFSET	0
70
#endif
71
62
#ifdef __KERNEL__
72
#ifdef __KERNEL__
63
73
64
#ifndef arch_set_restart_data
74
#ifndef arch_set_restart_data
Lines 185-190 static __always_inline bool tif_need_resched(void) Link Here
185
			     (unsigned long *)(&current_thread_info()->flags));
195
			     (unsigned long *)(&current_thread_info()->flags));
186
}
196
}
187
197
198
static __always_inline bool tif_need_resched_lazy(void)
199
{
200
	return IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) &&
201
		arch_test_bit(TIF_NEED_RESCHED_LAZY,
202
			      (unsigned long *)(&current_thread_info()->flags));
203
}
204
188
#else
205
#else
189
206
190
static __always_inline bool tif_need_resched(void)
207
static __always_inline bool tif_need_resched(void)
Lines 193-198 static __always_inline bool tif_need_resched(void) Link Here
193
			(unsigned long *)(&current_thread_info()->flags));
210
			(unsigned long *)(&current_thread_info()->flags));
194
}
211
}
195
212
213
static __always_inline bool tif_need_resched_lazy(void)
214
{
215
	return IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) &&
216
		test_bit(TIF_NEED_RESCHED_LAZY,
217
			 (unsigned long *)(&current_thread_info()->flags));
218
}
219
196
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
220
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
197
221
198
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
222
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
(-)a/include/linux/trace_events.h (-4 / +4 lines)
Lines 178-185 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); Link Here
178
178
179
enum trace_flag_type {
179
enum trace_flag_type {
180
	TRACE_FLAG_IRQS_OFF		= 0x01,
180
	TRACE_FLAG_IRQS_OFF		= 0x01,
181
	TRACE_FLAG_IRQS_NOSUPPORT	= 0x02,
181
	TRACE_FLAG_NEED_RESCHED		= 0x02,
182
	TRACE_FLAG_NEED_RESCHED		= 0x04,
182
	TRACE_FLAG_NEED_RESCHED_LAZY	= 0x04,
183
	TRACE_FLAG_HARDIRQ		= 0x08,
183
	TRACE_FLAG_HARDIRQ		= 0x08,
184
	TRACE_FLAG_SOFTIRQ		= 0x10,
184
	TRACE_FLAG_SOFTIRQ		= 0x10,
185
	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
185
	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
Lines 205-215 static inline unsigned int tracing_gen_ctx(void) Link Here
205
205
206
static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
206
static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
207
{
207
{
208
	return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
208
	return tracing_gen_ctx_irq_test(0);
209
}
209
}
210
static inline unsigned int tracing_gen_ctx(void)
210
static inline unsigned int tracing_gen_ctx(void)
211
{
211
{
212
	return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
212
	return tracing_gen_ctx_irq_test(0);
213
}
213
}
214
#endif
214
#endif
215
215
(-)a/kernel/Kconfig.preempt (-1 / +16 lines)
Lines 11-16 config PREEMPT_BUILD Link Here
11
	select PREEMPTION
11
	select PREEMPTION
12
	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
12
	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
13
13
14
config PREEMPT_BUILD_AUTO
15
	bool
16
	select PREEMPT_BUILD
17
18
config HAVE_PREEMPT_AUTO
19
	bool
20
14
choice
21
choice
15
	prompt "Preemption Model"
22
	prompt "Preemption Model"
16
	default PREEMPT_NONE
23
	default PREEMPT_NONE
Lines 67-75 config PREEMPT Link Here
67
	  embedded system with latency requirements in the milliseconds
74
	  embedded system with latency requirements in the milliseconds
68
	  range.
75
	  range.
69
76
77
config PREEMPT_AUTO
78
	bool "Automagic preemption mode with runtime tweaking support"
79
	depends on HAVE_PREEMPT_AUTO
80
	select PREEMPT_BUILD_AUTO
81
	help
82
	  Add some sensible blurb here
83
70
config PREEMPT_RT
84
config PREEMPT_RT
71
	bool "Fully Preemptible Kernel (Real-Time)"
85
	bool "Fully Preemptible Kernel (Real-Time)"
72
	depends on EXPERT && ARCH_SUPPORTS_RT
86
	depends on EXPERT && ARCH_SUPPORTS_RT
87
	select PREEMPT_BUILD_AUTO if HAVE_PREEMPT_AUTO
73
	select PREEMPTION
88
	select PREEMPTION
74
	help
89
	help
75
	  This option turns the kernel into a real-time kernel by replacing
90
	  This option turns the kernel into a real-time kernel by replacing
Lines 95-101 config PREEMPTION Link Here
95
110
96
config PREEMPT_DYNAMIC
111
config PREEMPT_DYNAMIC
97
	bool "Preemption behaviour defined on boot"
112
	bool "Preemption behaviour defined on boot"
98
	depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
113
	depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT && !PREEMPT_AUTO
99
	select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY
114
	select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY
100
	select PREEMPT_BUILD
115
	select PREEMPT_BUILD
101
	default y if HAVE_PREEMPT_DYNAMIC_CALL
116
	default y if HAVE_PREEMPT_DYNAMIC_CALL
(-)a/kernel/entry/common.c (-2 / +2 lines)
Lines 155-161 static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, Link Here
155
155
156
		local_irq_enable_exit_to_user(ti_work);
156
		local_irq_enable_exit_to_user(ti_work);
157
157
158
		if (ti_work & _TIF_NEED_RESCHED)
158
		if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
159
			schedule();
159
			schedule();
160
160
161
		if (ti_work & _TIF_UPROBE)
161
		if (ti_work & _TIF_UPROBE)
Lines 385-391 void raw_irqentry_exit_cond_resched(void) Link Here
385
		rcu_irq_exit_check_preempt();
385
		rcu_irq_exit_check_preempt();
386
		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
386
		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
387
			WARN_ON_ONCE(!on_thread_stack());
387
			WARN_ON_ONCE(!on_thread_stack());
388
		if (need_resched())
388
		if (test_tsk_need_resched(current))
389
			preempt_schedule_irq();
389
			preempt_schedule_irq();
390
	}
390
	}
391
}
391
}
(-)a/kernel/entry/kvm.c (-1 / +1 lines)
Lines 13-19 static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) Link Here
13
			return -EINTR;
13
			return -EINTR;
14
		}
14
		}
15
15
16
		if (ti_work & _TIF_NEED_RESCHED)
16
		if (ti_work & (_TIF_NEED_RESCHED | TIF_NEED_RESCHED_LAZY))
17
			schedule();
17
			schedule();
18
18
19
		if (ti_work & _TIF_NOTIFY_RESUME)
19
		if (ti_work & _TIF_NOTIFY_RESUME)
(-)a/kernel/ksysfs.c (+12 lines)
Lines 179-184 KERNEL_ATTR_RO(crash_elfcorehdr_size); Link Here
179
179
180
#endif /* CONFIG_CRASH_CORE */
180
#endif /* CONFIG_CRASH_CORE */
181
181
182
#if defined(CONFIG_PREEMPT_RT)
183
static ssize_t realtime_show(struct kobject *kobj,
184
			     struct kobj_attribute *attr, char *buf)
185
{
186
	return sprintf(buf, "%d\n", 1);
187
}
188
KERNEL_ATTR_RO(realtime);
189
#endif
190
182
/* whether file capabilities are enabled */
191
/* whether file capabilities are enabled */
183
static ssize_t fscaps_show(struct kobject *kobj,
192
static ssize_t fscaps_show(struct kobject *kobj,
184
				  struct kobj_attribute *attr, char *buf)
193
				  struct kobj_attribute *attr, char *buf)
Lines 274-279 static struct attribute * kernel_attrs[] = { Link Here
274
#ifndef CONFIG_TINY_RCU
283
#ifndef CONFIG_TINY_RCU
275
	&rcu_expedited_attr.attr,
284
	&rcu_expedited_attr.attr,
276
	&rcu_normal_attr.attr,
285
	&rcu_normal_attr.attr,
286
#endif
287
#ifdef CONFIG_PREEMPT_RT
288
	&realtime_attr.attr,
277
#endif
289
#endif
278
	NULL
290
	NULL
279
};
291
};
(-)a/kernel/locking/lockdep.c (+5 lines)
Lines 56-61 Link Here
56
#include <linux/kprobes.h>
56
#include <linux/kprobes.h>
57
#include <linux/lockdep.h>
57
#include <linux/lockdep.h>
58
#include <linux/context_tracking.h>
58
#include <linux/context_tracking.h>
59
#include <linux/console.h>
59
60
60
#include <asm/sections.h>
61
#include <asm/sections.h>
61
62
Lines 3971-3976 print_usage_bug(struct task_struct *curr, struct held_lock *this, Link Here
3971
	if (!debug_locks_off() || debug_locks_silent)
3972
	if (!debug_locks_off() || debug_locks_silent)
3972
		return;
3973
		return;
3973
3974
3975
	nbcon_cpu_emergency_enter();
3976
3974
	pr_warn("\n");
3977
	pr_warn("\n");
3975
	pr_warn("================================\n");
3978
	pr_warn("================================\n");
3976
	pr_warn("WARNING: inconsistent lock state\n");
3979
	pr_warn("WARNING: inconsistent lock state\n");
Lines 3999-4004 print_usage_bug(struct task_struct *curr, struct held_lock *this, Link Here
3999
4002
4000
	pr_warn("\nstack backtrace:\n");
4003
	pr_warn("\nstack backtrace:\n");
4001
	dump_stack();
4004
	dump_stack();
4005
4006
	nbcon_cpu_emergency_exit();
4002
}
4007
}
4003
4008
4004
/*
4009
/*
(-)a/kernel/panic.c (+17 lines)
Lines 370-375 void panic(const char *fmt, ...) Link Here
370
	 */
370
	 */
371
	atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
371
	atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
372
372
373
	printk_legacy_allow_panic_sync();
374
373
	panic_print_sys_info(false);
375
	panic_print_sys_info(false);
374
376
375
	kmsg_dump(KMSG_DUMP_PANIC);
377
	kmsg_dump(KMSG_DUMP_PANIC);
Lines 446-451 void panic(const char *fmt, ...) Link Here
446
448
447
	/* Do not scroll important messages printed above */
449
	/* Do not scroll important messages printed above */
448
	suppress_printk = 1;
450
	suppress_printk = 1;
451
452
	/*
453
	 * The final messages may not have been printed if in a context that
454
	 * defers printing (such as NMI) and irq_work is not available.
455
	 * Explicitly flush the kernel log buffer one last time.
456
	 */
457
	console_flush_on_panic(CONSOLE_FLUSH_PENDING);
458
	nbcon_atomic_flush_unsafe();
459
449
	local_irq_enable();
460
	local_irq_enable();
450
	for (i = 0; ; i += PANIC_TIMER_STEP) {
461
	for (i = 0; ; i += PANIC_TIMER_STEP) {
451
		touch_softlockup_watchdog();
462
		touch_softlockup_watchdog();
Lines 623-628 bool oops_may_print(void) Link Here
623
 */
634
 */
624
void oops_enter(void)
635
void oops_enter(void)
625
{
636
{
637
	nbcon_cpu_emergency_enter();
626
	tracing_off();
638
	tracing_off();
627
	/* can't trust the integrity of the kernel anymore: */
639
	/* can't trust the integrity of the kernel anymore: */
628
	debug_locks_off();
640
	debug_locks_off();
Lines 645-650 void oops_exit(void) Link Here
645
{
657
{
646
	do_oops_enter_exit();
658
	do_oops_enter_exit();
647
	print_oops_end_marker();
659
	print_oops_end_marker();
660
	nbcon_cpu_emergency_exit();
648
	kmsg_dump(KMSG_DUMP_OOPS);
661
	kmsg_dump(KMSG_DUMP_OOPS);
649
}
662
}
650
663
Lines 656-661 struct warn_args { Link Here
656
void __warn(const char *file, int line, void *caller, unsigned taint,
669
void __warn(const char *file, int line, void *caller, unsigned taint,
657
	    struct pt_regs *regs, struct warn_args *args)
670
	    struct pt_regs *regs, struct warn_args *args)
658
{
671
{
672
	nbcon_cpu_emergency_enter();
673
659
	disable_trace_on_warning();
674
	disable_trace_on_warning();
660
675
661
	if (file)
676
	if (file)
Lines 686-691 void __warn(const char *file, int line, void *caller, unsigned taint, Link Here
686
701
687
	/* Just a warning, don't kill lockdep. */
702
	/* Just a warning, don't kill lockdep. */
688
	add_taint(taint, LOCKDEP_STILL_OK);
703
	add_taint(taint, LOCKDEP_STILL_OK);
704
705
	nbcon_cpu_emergency_exit();
689
}
706
}
690
707
691
#ifdef CONFIG_BUG
708
#ifdef CONFIG_BUG
(-)a/kernel/printk/internal.h (+90 lines)
Lines 44-49 enum printk_info_flags { Link Here
44
};
44
};
45
45
46
extern struct printk_ringbuffer *prb;
46
extern struct printk_ringbuffer *prb;
47
extern bool printk_threads_enabled;
48
extern bool have_legacy_console;
49
extern bool have_boot_console;
50
51
/*
52
 * Specifies if the console lock/unlock dance is needed for console
53
 * printing. If @have_boot_console is true, the nbcon consoles will
54
 * be printed serially along with the legacy consoles because nbcon
55
 * consoles cannot print simultaneously with boot consoles.
56
 */
57
#define printing_via_unlock (have_legacy_console || have_boot_console)
47
58
48
__printf(4, 0)
59
__printf(4, 0)
49
int vprintk_store(int facility, int level,
60
int vprintk_store(int facility, int level,
Lines 71-82 void defer_console_output(void); Link Here
71
82
72
u16 printk_parse_prefix(const char *text, int *level,
83
u16 printk_parse_prefix(const char *text, int *level,
73
			enum printk_info_flags *flags);
84
			enum printk_info_flags *flags);
85
void console_lock_spinning_enable(void);
86
int console_lock_spinning_disable_and_check(int cookie);
74
87
75
u64 nbcon_seq_read(struct console *con);
88
u64 nbcon_seq_read(struct console *con);
76
void nbcon_seq_force(struct console *con, u64 seq);
89
void nbcon_seq_force(struct console *con, u64 seq);
77
bool nbcon_alloc(struct console *con);
90
bool nbcon_alloc(struct console *con);
78
void nbcon_init(struct console *con);
91
void nbcon_init(struct console *con);
79
void nbcon_free(struct console *con);
92
void nbcon_free(struct console *con);
93
enum nbcon_prio nbcon_get_default_prio(void);
94
void nbcon_atomic_flush_all(void);
95
bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cookie);
96
void nbcon_kthread_create(struct console *con);
97
void nbcon_wake_threads(void);
98
void nbcon_legacy_kthread_create(void);
99
100
/*
101
 * Check if the given console is currently capable and allowed to print
102
 * records. Note that this function does not consider the current context,
103
 * which can also play a role in deciding if @con can be used to print
104
 * records.
105
 */
106
static inline bool console_is_usable(struct console *con, short flags, bool use_atomic)
107
{
108
	if (!(flags & CON_ENABLED))
109
		return false;
110
111
	if ((flags & CON_SUSPENDED))
112
		return false;
113
114
	if (flags & CON_NBCON) {
115
		if (use_atomic) {
116
			if (!con->write_atomic)
117
				return false;
118
		} else {
119
			if (!con->write_thread || !con->kthread)
120
				return false;
121
		}
122
	} else {
123
		if (!con->write)
124
			return false;
125
	}
126
127
	/*
128
	 * Console drivers may assume that per-cpu resources have been
129
	 * allocated. So unless they're explicitly marked as being able to
130
	 * cope (CON_ANYTIME) don't call them until this CPU is officially up.
131
	 */
132
	if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
133
		return false;
134
135
	return true;
136
}
137
138
/**
139
 * nbcon_kthread_wake - Wake up a printk thread
140
 * @con:        Console to operate on
141
 */
142
static inline void nbcon_kthread_wake(struct console *con)
143
{
144
	/*
145
	 * Guarantee any new records can be seen by tasks preparing to wait
146
	 * before this context checks if the rcuwait is empty.
147
	 *
148
	 * The full memory barrier in rcuwait_wake_up() pairs with the full
149
	 * memory barrier within set_current_state() of
150
	 * ___rcuwait_wait_event(), which is called after prepare_to_rcuwait()
151
	 * adds the waiter but before it has checked the wait condition.
152
	 *
153
	 * This pairs with nbcon_kthread_func:A.
154
	 */
155
	rcuwait_wake_up(&con->rcuwait); /* LMM(nbcon_kthread_wake:A) */
156
}
80
157
81
#else
158
#else
82
159
Lines 84-89 void nbcon_free(struct console *con); Link Here
84
#define PRINTK_MESSAGE_MAX	0
161
#define PRINTK_MESSAGE_MAX	0
85
#define PRINTKRB_RECORD_MAX	0
162
#define PRINTKRB_RECORD_MAX	0
86
163
164
static inline void nbcon_kthread_wake(struct console *con) { }
165
static inline void nbcon_kthread_create(struct console *con) { }
166
#define printk_threads_enabled (false)
167
#define printing_via_unlock (false)
168
87
/*
169
/*
88
 * In !PRINTK builds we still export console_sem
170
 * In !PRINTK builds we still export console_sem
89
 * semaphore and some of console functions (console_unlock()/etc.), so
171
 * semaphore and some of console functions (console_unlock()/etc.), so
Lines 98-103 static inline void nbcon_seq_force(struct console *con, u64 seq) { } Link Here
98
static inline bool nbcon_alloc(struct console *con) { return false; }
180
static inline bool nbcon_alloc(struct console *con) { return false; }
99
static inline void nbcon_init(struct console *con) { }
181
static inline void nbcon_init(struct console *con) { }
100
static inline void nbcon_free(struct console *con) { }
182
static inline void nbcon_free(struct console *con) { }
183
static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; }
184
static inline void nbcon_atomic_flush_all(void) { }
185
static inline bool nbcon_atomic_emit_next_record(struct console *con, bool *handover,
186
						 int cookie) { return false; }
187
188
static inline bool console_is_usable(struct console *con, short flags,
189
				     bool use_atomic) { return false; }
101
190
102
#endif /* CONFIG_PRINTK */
191
#endif /* CONFIG_PRINTK */
103
192
Lines 130-135 struct printk_message { Link Here
130
};
219
};
131
220
132
bool other_cpu_in_panic(void);
221
bool other_cpu_in_panic(void);
222
bool this_cpu_in_panic(void);
133
bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
223
bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
134
			     bool is_extended, bool may_supress);
224
			     bool is_extended, bool may_supress);
135
225
(-)a/kernel/printk/nbcon.c (-48 / +683 lines)
Lines 5-11 Link Here
5
#include <linux/kernel.h>
5
#include <linux/kernel.h>
6
#include <linux/console.h>
6
#include <linux/console.h>
7
#include <linux/delay.h>
7
#include <linux/delay.h>
8
#include <linux/kthread.h>
8
#include <linux/slab.h>
9
#include <linux/slab.h>
10
#include <linux/serial_core.h>
11
#include <linux/syscore_ops.h>
12
#include "printk_ringbuffer.h"
9
#include "internal.h"
13
#include "internal.h"
10
/*
14
/*
11
 * Printk console printing implementation for consoles which does not depend
15
 * Printk console printing implementation for consoles which does not depend
Lines 140-178 static inline bool nbcon_state_try_cmpxchg(struct console *con, struct nbcon_sta Link Here
140
	return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom);
144
	return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom);
141
}
145
}
142
146
143
#ifdef CONFIG_64BIT
144
145
#define __seq_to_nbcon_seq(seq) (seq)
146
#define __nbcon_seq_to_seq(seq) (seq)
147
148
#else /* CONFIG_64BIT */
149
150
#define __seq_to_nbcon_seq(seq) ((u32)seq)
151
152
static inline u64 __nbcon_seq_to_seq(u32 nbcon_seq)
153
{
154
	u64 seq;
155
	u64 rb_next_seq;
156
157
	/*
158
	 * The provided sequence is only the lower 32 bits of the ringbuffer
159
	 * sequence. It needs to be expanded to 64bit. Get the next sequence
160
	 * number from the ringbuffer and fold it.
161
	 *
162
	 * Having a 32bit representation in the console is sufficient.
163
	 * If a console ever gets more than 2^31 records behind
164
	 * the ringbuffer then this is the least of the problems.
165
	 *
166
	 * Also the access to the ring buffer is always safe.
167
	 */
168
	rb_next_seq = prb_next_seq(prb);
169
	seq = rb_next_seq - ((u32)rb_next_seq - nbcon_seq);
170
171
	return seq;
172
}
173
174
#endif /* CONFIG_64BIT */
175
176
/**
147
/**
177
 * nbcon_seq_read - Read the current console sequence
148
 * nbcon_seq_read - Read the current console sequence
178
 * @con:	Console to read the sequence of
149
 * @con:	Console to read the sequence of
Lines 183-189 u64 nbcon_seq_read(struct console *con) Link Here
183
{
154
{
184
	unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq));
155
	unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq));
185
156
186
	return __nbcon_seq_to_seq(nbcon_seq);
157
	return __ulseq_to_u64seq(prb, nbcon_seq);
187
}
158
}
188
159
189
/**
160
/**
Lines 204-210 void nbcon_seq_force(struct console *con, u64 seq) Link Here
204
	 */
175
	 */
205
	u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb));
176
	u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb));
206
177
207
	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __seq_to_nbcon_seq(valid_seq));
178
	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq));
208
179
209
	/* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */
180
	/* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */
210
	con->seq = 0;
181
	con->seq = 0;
Lines 223-239 void nbcon_seq_force(struct console *con, u64 seq) Link Here
223
 */
194
 */
224
static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
195
static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
225
{
196
{
226
	unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq);
197
	unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq);
227
	struct console *con = ctxt->console;
198
	struct console *con = ctxt->console;
228
199
229
	if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq,
200
	if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq,
230
				    __seq_to_nbcon_seq(new_seq))) {
201
				    __u64seq_to_ulseq(new_seq))) {
231
		ctxt->seq = new_seq;
202
		ctxt->seq = new_seq;
232
	} else {
203
	} else {
233
		ctxt->seq = nbcon_seq_read(con);
204
		ctxt->seq = nbcon_seq_read(con);
234
	}
205
	}
235
}
206
}
236
207
208
bool printk_threads_enabled __ro_after_init;
209
237
/**
210
/**
238
 * nbcon_context_try_acquire_direct - Try to acquire directly
211
 * nbcon_context_try_acquire_direct - Try to acquire directly
239
 * @ctxt:	The context of the caller
212
 * @ctxt:	The context of the caller
Lines 564-569 static struct printk_buffers panic_nbcon_pbufs; Link Here
564
 * nbcon_context_try_acquire - Try to acquire nbcon console
537
 * nbcon_context_try_acquire - Try to acquire nbcon console
565
 * @ctxt:	The context of the caller
538
 * @ctxt:	The context of the caller
566
 *
539
 *
540
 * Context:	Any context which could not be migrated to another CPU.
567
 * Return:	True if the console was acquired. False otherwise.
541
 * Return:	True if the console was acquired. False otherwise.
568
 *
542
 *
569
 * If the caller allowed an unsafe hostile takeover, on success the
543
 * If the caller allowed an unsafe hostile takeover, on success the
Lines 571-577 static struct printk_buffers panic_nbcon_pbufs; Link Here
571
 * in an unsafe state. Otherwise, on success the caller may assume
545
 * in an unsafe state. Otherwise, on success the caller may assume
572
 * the console is not in an unsafe state.
546
 * the console is not in an unsafe state.
573
 */
547
 */
574
__maybe_unused
575
static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
548
static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
576
{
549
{
577
	unsigned int cpu = smp_processor_id();
550
	unsigned int cpu = smp_processor_id();
Lines 857-865 bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) Link Here
857
}
830
}
858
EXPORT_SYMBOL_GPL(nbcon_exit_unsafe);
831
EXPORT_SYMBOL_GPL(nbcon_exit_unsafe);
859
832
833
/**
834
 * nbcon_reacquire - Reacquire a console after losing ownership
835
 * @wctxt:	The write context that was handed to the write function
836
 *
837
 * Since ownership can be lost at any time due to handover or takeover, a
838
 * printing context _should_ be prepared to back out immediately and
839
 * carefully. However, there are many scenarios where the context _must_
840
 * reacquire ownership in order to finalize or revert hardware changes.
841
 *
842
 * This function allows a context to reacquire ownership using the same
843
 * priority as its previous ownership.
844
 *
845
 * Note that for printing contexts, after a successful reacquire the
846
 * context will have no output buffer because that has been lost. This
847
 * function cannot be used to resume printing.
848
 */
849
void nbcon_reacquire(struct nbcon_write_context *wctxt)
850
{
851
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
852
	struct console *con = ctxt->console;
853
	struct nbcon_state cur;
854
855
	while (!nbcon_context_try_acquire(ctxt))
856
		cpu_relax();
857
858
	wctxt->outbuf = NULL;
859
	wctxt->len = 0;
860
	nbcon_state_read(con, &cur);
861
	wctxt->unsafe_takeover = cur.unsafe_takeover;
862
}
863
EXPORT_SYMBOL_GPL(nbcon_reacquire);
864
860
/**
865
/**
861
 * nbcon_emit_next_record - Emit a record in the acquired context
866
 * nbcon_emit_next_record - Emit a record in the acquired context
862
 * @wctxt:	The write context that will be handed to the write function
867
 * @wctxt:	The write context that will be handed to the write function
868
 * @use_atomic:	True if the write_atomic callback is to be used
863
 *
869
 *
864
 * Return:	True if this context still owns the console. False if
870
 * Return:	True if this context still owns the console. False if
865
 *		ownership was handed over or taken.
871
 *		ownership was handed over or taken.
Lines 873-880 EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); Link Here
873
 * When true is returned, @wctxt->ctxt.backlog indicates whether there are
879
 * When true is returned, @wctxt->ctxt.backlog indicates whether there are
874
 * still records pending in the ringbuffer,
880
 * still records pending in the ringbuffer,
875
 */
881
 */
876
__maybe_unused
882
static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic)
877
static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt)
878
{
883
{
879
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
884
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
880
	struct console *con = ctxt->console;
885
	struct console *con = ctxt->console;
Lines 885-891 static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) Link Here
885
	unsigned long con_dropped;
890
	unsigned long con_dropped;
886
	struct nbcon_state cur;
891
	struct nbcon_state cur;
887
	unsigned long dropped;
892
	unsigned long dropped;
888
	bool done;
893
	bool done = false;
889
894
890
	/*
895
	/*
891
	 * The printk buffers are filled within an unsafe section. This
896
	 * The printk buffers are filled within an unsafe section. This
Lines 924-940 static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) Link Here
924
	nbcon_state_read(con, &cur);
929
	nbcon_state_read(con, &cur);
925
	wctxt->unsafe_takeover = cur.unsafe_takeover;
930
	wctxt->unsafe_takeover = cur.unsafe_takeover;
926
931
927
	if (con->write_atomic) {
932
	if (use_atomic &&
933
	    con->write_atomic) {
928
		done = con->write_atomic(con, wctxt);
934
		done = con->write_atomic(con, wctxt);
929
	} else {
935
930
		nbcon_context_release(ctxt);
936
	} else if (!use_atomic &&
931
		WARN_ON_ONCE(1);
937
		   con->write_thread &&
932
		done = false;
938
		   con->kthread) {
939
		WARN_ON_ONCE(con->kthread != current);
940
		done = con->write_thread(con, wctxt);
933
	}
941
	}
934
942
935
	/* If not done, the emit was aborted. */
943
	if (!done) {
936
	if (!done)
944
		/*
945
		 * The emit was aborted, probably due to a loss of ownership.
946
		 * Ensure ownership was lost or released before reporting the
947
		 * loss.
948
		 */
949
		nbcon_context_release(ctxt);
937
		return false;
950
		return false;
951
	}
938
952
939
	/*
953
	/*
940
	 * Since any dropped message was successfully output, reset the
954
	 * Since any dropped message was successfully output, reset the
Lines 961-966 static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) Link Here
961
	return nbcon_context_exit_unsafe(ctxt);
975
	return nbcon_context_exit_unsafe(ctxt);
962
}
976
}
963
977
978
/**
979
 * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup
980
 * @con:	Console to operate on
981
 * @ctxt:	The acquire context that contains the state
982
 *		at console_acquire()
983
 *
984
 * Return:	True if the thread should shutdown or if the console is
985
 *		allowed to print and a record is available. False otherwise.
986
 *
987
 * After the thread wakes up, it must first check if it should shutdown before
988
 * attempting any printing.
989
 */
990
static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt)
991
{
992
	bool is_usable;
993
	short flags;
994
	int cookie;
995
996
	if (kthread_should_stop())
997
		return true;
998
999
	cookie = console_srcu_read_lock();
1000
	flags = console_srcu_read_flags(con);
1001
	is_usable = console_is_usable(con, flags, false);
1002
	console_srcu_read_unlock(cookie);
1003
1004
	if (!is_usable)
1005
		return false;
1006
1007
	/* Bring the sequence in @ctxt up to date */
1008
	ctxt->seq = nbcon_seq_read(con);
1009
1010
	return prb_read_valid(prb, ctxt->seq, NULL);
1011
}
1012
1013
/**
1014
 * nbcon_kthread_func - The printer thread function
1015
 * @__console:	Console to operate on
1016
 */
1017
static int nbcon_kthread_func(void *__console)
1018
{
1019
	struct console *con = __console;
1020
	struct nbcon_write_context wctxt = {
1021
		.ctxt.console	= con,
1022
		.ctxt.prio	= NBCON_PRIO_NORMAL,
1023
	};
1024
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1025
	unsigned long flags;
1026
	short con_flags;
1027
	bool backlog;
1028
	int cookie;
1029
	int ret;
1030
1031
wait_for_event:
1032
	/*
1033
	 * Guarantee this task is visible on the rcuwait before
1034
	 * checking the wake condition.
1035
	 *
1036
	 * The full memory barrier within set_current_state() of
1037
	 * ___rcuwait_wait_event() pairs with the full memory
1038
	 * barrier within rcuwait_has_sleeper().
1039
	 *
1040
	 * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A.
1041
	 */
1042
	ret = rcuwait_wait_event(&con->rcuwait,
1043
				 nbcon_kthread_should_wakeup(con, ctxt),
1044
				 TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */
1045
1046
	if (kthread_should_stop())
1047
		return 0;
1048
1049
	/* Wait was interrupted by a spurious signal, go back to sleep. */
1050
	if (ret)
1051
		goto wait_for_event;
1052
1053
	do {
1054
		backlog = false;
1055
1056
		cookie = console_srcu_read_lock();
1057
1058
		con_flags = console_srcu_read_flags(con);
1059
1060
		if (console_is_usable(con, con_flags, false)) {
1061
			con->driver_enter(con, &flags);
1062
1063
			/*
1064
			 * Ensure this stays on the CPU to make handover and
1065
			 * takeover possible.
1066
			 */
1067
			cant_migrate();
1068
1069
			if (nbcon_context_try_acquire(ctxt)) {
1070
				/*
1071
				 * If the emit fails, this context is no
1072
				 * longer the owner.
1073
				 */
1074
				if (nbcon_emit_next_record(&wctxt, false)) {
1075
					nbcon_context_release(ctxt);
1076
					backlog = ctxt->backlog;
1077
				}
1078
			}
1079
1080
			con->driver_exit(con, flags);
1081
		}
1082
1083
		console_srcu_read_unlock(cookie);
1084
1085
	} while (backlog);
1086
1087
	goto wait_for_event;
1088
}
1089
1090
/**
1091
 * nbcon_irq_work - irq work to wake printk thread
1092
 * @irq_work:	The irq work to operate on
1093
 */
1094
static void nbcon_irq_work(struct irq_work *irq_work)
1095
{
1096
	struct console *con = container_of(irq_work, struct console, irq_work);
1097
1098
	nbcon_kthread_wake(con);
1099
}
1100
1101
static inline bool rcuwait_has_sleeper(struct rcuwait *w)
1102
{
1103
	bool has_sleeper;
1104
1105
	rcu_read_lock();
1106
	/*
1107
	 * Guarantee any new records can be seen by tasks preparing to wait
1108
	 * before this context checks if the rcuwait is empty.
1109
	 *
1110
	 * This full memory barrier pairs with the full memory barrier within
1111
	 * set_current_state() of ___rcuwait_wait_event(), which is called
1112
	 * after prepare_to_rcuwait() adds the waiter but before it has
1113
	 * checked the wait condition.
1114
	 *
1115
	 * This pairs with nbcon_kthread_func:A.
1116
	 */
1117
	smp_mb(); /* LMM(rcuwait_has_sleeper:A) */
1118
	has_sleeper = !!rcu_dereference(w->task);
1119
	rcu_read_unlock();
1120
1121
	return has_sleeper;
1122
}
1123
1124
/**
1125
 * nbcon_wake_threads - Wake up printing threads using irq_work
1126
 */
1127
void nbcon_wake_threads(void)
1128
{
1129
	struct console *con;
1130
	int cookie;
1131
1132
	cookie = console_srcu_read_lock();
1133
	for_each_console_srcu(con) {
1134
		/*
1135
		 * Only schedule irq_work if the printing thread is
1136
		 * actively waiting. If not waiting, the thread will
1137
		 * notice by itself that it has work to do.
1138
		 */
1139
		if (con->kthread && rcuwait_has_sleeper(&con->rcuwait))
1140
			irq_work_queue(&con->irq_work);
1141
	}
1142
	console_srcu_read_unlock(cookie);
1143
}
1144
1145
/* Track the nbcon emergency nesting per CPU. */
1146
static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting);
1147
static unsigned int early_nbcon_pcpu_emergency_nesting __initdata;
1148
1149
/**
1150
 * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer
1151
 *
1152
 * Return:	Either a pointer to the per CPU emergency nesting counter of
1153
 *		the current CPU or to the init data during early boot.
1154
 */
1155
static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
1156
{
1157
	/*
1158
	 * The value of __printk_percpu_data_ready gets set in normal
1159
	 * context and before SMP initialization. As a result it could
1160
	 * never change while inside an nbcon emergency section.
1161
	 */
1162
	if (!printk_percpu_data_ready())
1163
		return &early_nbcon_pcpu_emergency_nesting;
1164
1165
	return this_cpu_ptr(&nbcon_pcpu_emergency_nesting);
1166
}
1167
1168
/**
1169
 * nbcon_atomic_emit_one - Print one record for an nbcon console using the
1170
 *				write_atomic() callback
1171
 * @wctxt:	An initialized write context struct to use
1172
 *		for this context
1173
 *
1174
 * Return:	False if the given console could not print a record or there
1175
 *		are no more records to print, otherwise true.
1176
 *
1177
 * This is an internal helper to handle the locking of the console before
1178
 * calling nbcon_emit_next_record().
1179
 */
1180
static bool nbcon_atomic_emit_one(struct nbcon_write_context *wctxt)
1181
{
1182
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
1183
1184
	if (!nbcon_context_try_acquire(ctxt))
1185
		return false;
1186
1187
	/*
1188
	 * nbcon_emit_next_record() returns false when the console was
1189
	 * handed over or taken over. In both cases the context is no
1190
	 * longer valid.
1191
	 */
1192
	if (!nbcon_emit_next_record(wctxt, true))
1193
		return false;
1194
1195
	nbcon_context_release(ctxt);
1196
1197
	return ctxt->backlog;
1198
}
1199
1200
/**
1201
 * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon
1202
 *				printing on the current CPU
1203
 *
1204
 * Context:	Any context which could not be migrated to another CPU.
1205
 * Return:	The nbcon_prio to use for acquiring an nbcon console in this
1206
 *		context for printing.
1207
 */
1208
enum nbcon_prio nbcon_get_default_prio(void)
1209
{
1210
	unsigned int *cpu_emergency_nesting;
1211
1212
	if (this_cpu_in_panic())
1213
		return NBCON_PRIO_PANIC;
1214
1215
	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1216
	if (*cpu_emergency_nesting)
1217
		return NBCON_PRIO_EMERGENCY;
1218
1219
	return NBCON_PRIO_NORMAL;
1220
}
1221
1222
/**
1223
 * nbcon_atomic_emit_next_record - Print one record for an nbcon console
1224
 *					using the write_atomic() callback
1225
 * @con:	The console to print on
1226
 * @handover:	Will be set to true if a printk waiter has taken over the
1227
 *		console_lock, in which case the caller is no longer holding
1228
 *		both the console_lock and the SRCU read lock. Otherwise it
1229
 *		is set to false.
1230
 * @cookie:	The cookie from the SRCU read lock.
1231
 *
1232
 * Context:	Any context which could not be migrated to another CPU.
1233
 * Return:	True if a record could be printed, otherwise false.
1234
 *
1235
 * This function is meant to be called by console_flush_all() to print records
1236
 * on nbcon consoles using the write_atomic() callback. Essentially it is the
1237
 * nbcon version of console_emit_next_record().
1238
 */
1239
bool nbcon_atomic_emit_next_record(struct console *con, bool *handover, int cookie)
1240
{
1241
	struct nbcon_write_context wctxt = { };
1242
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1243
	unsigned long driver_flags;
1244
	bool progress = false;
1245
	unsigned long flags;
1246
1247
	*handover = false;
1248
1249
	/* Use the same locking order as console_emit_next_record(). */
1250
	if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
1251
		printk_safe_enter_irqsave(flags);
1252
		console_lock_spinning_enable();
1253
		stop_critical_timings();
1254
	}
1255
1256
	con->driver_enter(con, &driver_flags);
1257
	cant_migrate();
1258
1259
	ctxt->console	= con;
1260
	ctxt->prio	= nbcon_get_default_prio();
1261
1262
	progress = nbcon_atomic_emit_one(&wctxt);
1263
1264
	con->driver_exit(con, driver_flags);
1265
1266
	if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
1267
		start_critical_timings();
1268
		*handover = console_lock_spinning_disable_and_check(cookie);
1269
		printk_safe_exit_irqrestore(flags);
1270
	}
1271
1272
	return progress;
1273
}
1274
1275
/**
1276
 * __nbcon_atomic_flush_all - Flush all nbcon consoles using their
1277
 *					write_atomic() callback
1278
 * @stop_seq:			Flush up until this record
1279
 * @allow_unsafe_takeover:	True, to allow unsafe hostile takeovers
1280
 */
1281
static void __nbcon_atomic_flush_all(u64 stop_seq, bool allow_unsafe_takeover)
1282
{
1283
	struct nbcon_write_context wctxt = { };
1284
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1285
	struct console *con;
1286
	bool any_progress;
1287
	int cookie;
1288
1289
	do {
1290
		any_progress = false;
1291
1292
		cookie = console_srcu_read_lock();
1293
		for_each_console_srcu(con) {
1294
			short flags = console_srcu_read_flags(con);
1295
			unsigned long irq_flags;
1296
1297
			if (!(flags & CON_NBCON))
1298
				continue;
1299
1300
			if (!console_is_usable(con, flags, true))
1301
				continue;
1302
1303
			if (nbcon_seq_read(con) >= stop_seq)
1304
				continue;
1305
1306
			memset(ctxt, 0, sizeof(*ctxt));
1307
			ctxt->console			= con;
1308
			ctxt->spinwait_max_us		= 2000;
1309
			ctxt->allow_unsafe_takeover	= allow_unsafe_takeover;
1310
1311
			/*
1312
			 * Atomic flushing does not use console driver
1313
			 * synchronization (i.e. it does not hold the port
1314
			 * lock for uart consoles). Therefore IRQs must be
1315
			 * disabled to avoid being interrupted and then
1316
			 * calling into a driver that will deadlock trying
1317
			 * acquire console ownership.
1318
			 *
1319
			 * This also disables migration in order to get the
1320
			 * current CPU priority.
1321
			 */
1322
			local_irq_save(irq_flags);
1323
1324
			ctxt->prio = nbcon_get_default_prio();
1325
1326
			any_progress |= nbcon_atomic_emit_one(&wctxt);
1327
1328
			local_irq_restore(irq_flags);
1329
		}
1330
		console_srcu_read_unlock(cookie);
1331
	} while (any_progress);
1332
}
1333
1334
/**
1335
 * nbcon_atomic_flush_all - Flush all nbcon consoles using their
1336
 *				write_atomic() callback
1337
 *
1338
 * Flush the backlog up through the currently newest record. Any new
1339
 * records added while flushing will not be flushed. This is to avoid
1340
 * one CPU printing unbounded because other CPUs continue to add records.
1341
 */
1342
void nbcon_atomic_flush_all(void)
1343
{
1344
	__nbcon_atomic_flush_all(prb_next_reserve_seq(prb), false);
1345
}
1346
1347
/**
1348
 * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their
1349
 *	write_atomic() callback and allowing unsafe hostile takeovers
1350
 *
1351
 * Flush the backlog up through the currently newest record. Unsafe hostile
1352
 * takeovers will be performed, if necessary.
1353
 */
1354
void nbcon_atomic_flush_unsafe(void)
1355
{
1356
	__nbcon_atomic_flush_all(prb_next_reserve_seq(prb), true);
1357
}
1358
1359
/**
1360
 * nbcon_cpu_emergency_enter - Enter an emergency section where printk()
1361
 *	messages for that CPU are only stored
1362
 *
1363
 * Upon exiting the emergency section, all stored messages are flushed.
1364
 *
1365
 * Context:	Any context. Disables preemption.
1366
 *
1367
 * When within an emergency section, no printing occurs on that CPU. This
1368
 * is to allow all emergency messages to be dumped into the ringbuffer before
1369
 * flushing the ringbuffer. The actual printing occurs when exiting the
1370
 * outermost emergency section.
1371
 */
1372
void nbcon_cpu_emergency_enter(void)
1373
{
1374
	unsigned int *cpu_emergency_nesting;
1375
1376
	preempt_disable();
1377
1378
	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1379
	(*cpu_emergency_nesting)++;
1380
}
1381
1382
/**
1383
 * nbcon_cpu_emergency_exit - Exit an emergency section and flush the
1384
 *	stored messages
1385
 *
1386
 * Flushing only occurs when exiting all nesting for the CPU.
1387
 *
1388
 * Context:	Any context. Enables preemption.
1389
 */
1390
void nbcon_cpu_emergency_exit(void)
1391
{
1392
	unsigned int *cpu_emergency_nesting;
1393
	bool do_trigger_flush = false;
1394
1395
	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1396
1397
	WARN_ON_ONCE(*cpu_emergency_nesting == 0);
1398
1399
	if (*cpu_emergency_nesting == 1)
1400
		do_trigger_flush = true;
1401
1402
	/* Undo the nesting count of nbcon_cpu_emergency_enter(). */
1403
	(*cpu_emergency_nesting)--;
1404
1405
	preempt_enable();
1406
1407
	if (do_trigger_flush)
1408
		printk_trigger_flush();
1409
}
1410
1411
/**
1412
 * nbcon_kthread_stop - Stop a printer thread
1413
 * @con:	Console to operate on
1414
 */
1415
static void nbcon_kthread_stop(struct console *con)
1416
{
1417
	lockdep_assert_console_list_lock_held();
1418
1419
	if (!con->kthread)
1420
		return;
1421
1422
	kthread_stop(con->kthread);
1423
	con->kthread = NULL;
1424
}
1425
1426
/**
1427
 * nbcon_kthread_create - Create a printer thread
1428
 * @con:	Console to operate on
1429
 *
1430
 * If it fails, let the console proceed. The atomic part might
1431
 * be usable and useful.
1432
 */
1433
void nbcon_kthread_create(struct console *con)
1434
{
1435
	struct task_struct *kt;
1436
1437
	lockdep_assert_console_list_lock_held();
1438
1439
	if (!(con->flags & CON_NBCON) || !con->write_thread)
1440
		return;
1441
1442
	if (!printk_threads_enabled || con->kthread)
1443
		return;
1444
1445
	/*
1446
	 * Printer threads cannot be started as long as any boot console is
1447
	 * registered because there is no way to synchronize the hardware
1448
	 * registers between boot console code and regular console code.
1449
	 */
1450
	if (have_boot_console)
1451
		return;
1452
1453
	kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index);
1454
	if (IS_ERR(kt)) {
1455
		con_printk(KERN_ERR, con, "failed to start printing thread\n");
1456
		return;
1457
	}
1458
1459
	con->kthread = kt;
1460
1461
	/*
1462
	 * It is important that console printing threads are scheduled
1463
	 * shortly after a printk call and with generous runtime budgets.
1464
	 */
1465
	sched_set_normal(con->kthread, -20);
1466
}
1467
1468
static int __init printk_setup_threads(void)
1469
{
1470
	struct console *con;
1471
1472
	console_list_lock();
1473
	printk_threads_enabled = true;
1474
	for_each_console(con)
1475
		nbcon_kthread_create(con);
1476
	if (IS_ENABLED(CONFIG_PREEMPT_RT) && printing_via_unlock)
1477
		nbcon_legacy_kthread_create();
1478
	console_list_unlock();
1479
	return 0;
1480
}
1481
early_initcall(printk_setup_threads);
1482
964
/**
1483
/**
965
 * nbcon_alloc - Allocate buffers needed by the nbcon console
1484
 * nbcon_alloc - Allocate buffers needed by the nbcon console
966
 * @con:	Console to allocate buffers for
1485
 * @con:	Console to allocate buffers for
Lines 1007-1014 void nbcon_init(struct console *con) Link Here
1007
	/* nbcon_alloc() must have been called and successful! */
1526
	/* nbcon_alloc() must have been called and successful! */
1008
	BUG_ON(!con->pbufs);
1527
	BUG_ON(!con->pbufs);
1009
1528
1529
	rcuwait_init(&con->rcuwait);
1530
	init_irq_work(&con->irq_work, nbcon_irq_work);
1010
	nbcon_seq_force(con, con->seq);
1531
	nbcon_seq_force(con, con->seq);
1011
	nbcon_state_set(con, &state);
1532
	nbcon_state_set(con, &state);
1533
	nbcon_kthread_create(con);
1012
}
1534
}
1013
1535
1014
/**
1536
/**
Lines 1019-1024 void nbcon_free(struct console *con) Link Here
1019
{
1541
{
1020
	struct nbcon_state state = { };
1542
	struct nbcon_state state = { };
1021
1543
1544
	nbcon_kthread_stop(con);
1022
	nbcon_state_set(con, &state);
1545
	nbcon_state_set(con, &state);
1023
1546
1024
	/* Boot consoles share global printk buffers. */
1547
	/* Boot consoles share global printk buffers. */
Lines 1027-1029 void nbcon_free(struct console *con) Link Here
1027
1550
1028
	con->pbufs = NULL;
1551
	con->pbufs = NULL;
1029
}
1552
}
1553
1554
static inline bool uart_is_nbcon(struct uart_port *up)
1555
{
1556
	int cookie;
1557
	bool ret;
1558
1559
	if (!uart_console(up))
1560
		return false;
1561
1562
	cookie = console_srcu_read_lock();
1563
	ret = (console_srcu_read_flags(up->cons) & CON_NBCON);
1564
	console_srcu_read_unlock(cookie);
1565
	return ret;
1566
}
1567
1568
/**
1569
 * nbcon_acquire - The second half of the port locking wrapper
1570
 * @up:		The uart port whose @lock was locked
1571
 *
1572
 * The uart_port_lock() wrappers will first lock the spin_lock @up->lock.
1573
 * Then this function is called to implement nbcon-specific processing.
1574
 *
1575
 * If @up is an nbcon console, this console will be acquired and marked as
1576
 * unsafe. Otherwise this function does nothing.
1577
 *
1578
 * nbcon consoles acquired via the port lock wrapper always use priority
1579
 * NBCON_PRIO_NORMAL.
1580
 */
1581
void nbcon_acquire(struct uart_port *up)
1582
{
1583
	struct console *con = up->cons;
1584
	struct nbcon_context ctxt;
1585
1586
	if (!uart_is_nbcon(up))
1587
		return;
1588
1589
	WARN_ON_ONCE(con->locked_port);
1590
1591
	do {
1592
		do {
1593
			memset(&ctxt, 0, sizeof(ctxt));
1594
			ctxt.console	= con;
1595
			ctxt.prio	= NBCON_PRIO_NORMAL;
1596
		} while (!nbcon_context_try_acquire(&ctxt));
1597
1598
	} while (!nbcon_context_enter_unsafe(&ctxt));
1599
1600
	con->locked_port = true;
1601
}
1602
EXPORT_SYMBOL_GPL(nbcon_acquire);
1603
1604
/**
1605
 * nbcon_release - The first half of the port unlocking wrapper
1606
 * @up:		The uart port whose @lock is about to be unlocked
1607
 *
1608
 * The uart_port_unlock() wrappers will first call this function to implement
1609
 * nbcon-specific processing. Then afterwards the uart_port_unlock() wrappers
1610
 * will unlock the spin_lock @up->lock.
1611
 *
1612
 * If @up is an nbcon console, the console will be marked as safe and
1613
 * released. Otherwise this function does nothing.
1614
 *
1615
 * nbcon consoles acquired via the port lock wrapper always use priority
1616
 * NBCON_PRIO_NORMAL.
1617
 */
1618
void nbcon_release(struct uart_port *up)
1619
{
1620
	struct console *con = up->cons;
1621
	struct nbcon_context ctxt = {
1622
		.console	= con,
1623
		.prio		= NBCON_PRIO_NORMAL,
1624
	};
1625
1626
	if (!con->locked_port)
1627
		return;
1628
1629
	if (nbcon_context_exit_unsafe(&ctxt))
1630
		nbcon_context_release(&ctxt);
1631
1632
	con->locked_port = false;
1633
}
1634
EXPORT_SYMBOL_GPL(nbcon_release);
1635
1636
/**
1637
 * printk_kthread_shutdown - shutdown all threaded printers
1638
 *
1639
 * On system shutdown all threaded printers are stopped. This allows printk
1640
 * to transition back to atomic printing, thus providing a robust mechanism
1641
 * for the final shutdown/reboot messages to be output.
1642
 */
1643
static void printk_kthread_shutdown(void)
1644
{
1645
	struct console *con;
1646
1647
	console_list_lock();
1648
	for_each_console(con) {
1649
		if (con->flags & CON_NBCON)
1650
			nbcon_kthread_stop(con);
1651
	}
1652
	console_list_unlock();
1653
}
1654
1655
static struct syscore_ops printk_syscore_ops = {
1656
	.shutdown = printk_kthread_shutdown,
1657
};
1658
1659
static int __init printk_init_ops(void)
1660
{
1661
	register_syscore_ops(&printk_syscore_ops);
1662
	return 0;
1663
}
1664
device_initcall(printk_init_ops);
(-)a/kernel/printk/printk.c (-146 / +498 lines)
Lines 282-287 EXPORT_SYMBOL(console_list_unlock); Link Here
282
 * Return: A cookie to pass to console_srcu_read_unlock().
282
 * Return: A cookie to pass to console_srcu_read_unlock().
283
 */
283
 */
284
int console_srcu_read_lock(void)
284
int console_srcu_read_lock(void)
285
	__acquires(&console_srcu)
285
{
286
{
286
	return srcu_read_lock_nmisafe(&console_srcu);
287
	return srcu_read_lock_nmisafe(&console_srcu);
287
}
288
}
Lines 295-300 EXPORT_SYMBOL(console_srcu_read_lock); Link Here
295
 * Counterpart to console_srcu_read_lock()
296
 * Counterpart to console_srcu_read_lock()
296
 */
297
 */
297
void console_srcu_read_unlock(int cookie)
298
void console_srcu_read_unlock(int cookie)
299
	__releases(&console_srcu)
298
{
300
{
299
	srcu_read_unlock_nmisafe(&console_srcu, cookie);
301
	srcu_read_unlock_nmisafe(&console_srcu, cookie);
300
}
302
}
Lines 347-352 static bool panic_in_progress(void) Link Here
347
	return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
349
	return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
348
}
350
}
349
351
352
/* Return true if a panic is in progress on the current CPU. */
353
bool this_cpu_in_panic(void)
354
{
355
	/*
356
	 * We can use raw_smp_processor_id() here because it is impossible for
357
	 * the task to be migrated to the panic_cpu, or away from it. If
358
	 * panic_cpu has already been set, and we're not currently executing on
359
	 * that CPU, then we never will be.
360
	 */
361
	return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id());
362
}
363
364
/*
365
 * Return true if a panic is in progress on a remote CPU.
366
 *
367
 * On true, the local CPU should immediately release any printing resources
368
 * that may be needed by the panic CPU.
369
 */
370
bool other_cpu_in_panic(void)
371
{
372
	return (panic_in_progress() && !this_cpu_in_panic());
373
}
374
350
/*
375
/*
351
 * This is used for debugging the mess that is the VT code by
376
 * This is used for debugging the mess that is the VT code by
352
 * keeping track if we have the console semaphore held. It's
377
 * keeping track if we have the console semaphore held. It's
Lines 438-451 static int console_msg_format = MSG_FORMAT_DEFAULT; Link Here
438
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
463
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
439
static DEFINE_MUTEX(syslog_lock);
464
static DEFINE_MUTEX(syslog_lock);
440
465
441
#ifdef CONFIG_PRINTK
442
/*
466
/*
443
 * During panic, heavy printk by other CPUs can delay the
467
 * Specifies if a legacy console is registered. If legacy consoles are
444
 * panic and risk deadlock on console resources.
468
 * present, it is necessary to perform the console_lock/console_unlock dance
469
 * whenever console flushing should occur.
445
 */
470
 */
446
static int __read_mostly suppress_panic_printk;
471
bool have_legacy_console;
447
472
473
/*
474
 * Specifies if an nbcon console is registered. If nbcon consoles are present,
475
 * synchronous printing of legacy consoles will not occur during panic until
476
 * the backtrace has been stored to the ringbuffer.
477
 */
478
bool have_nbcon_console;
479
480
/*
481
 * Specifies if a boot console is registered. If boot consoles are present,
482
 * nbcon consoles cannot print simultaneously and must be synchronized by
483
 * the console lock. This is because boot consoles and nbcon consoles may
484
 * have mapped the same hardware.
485
 */
486
bool have_boot_console;
487
488
#ifdef CONFIG_PRINTK
448
DECLARE_WAIT_QUEUE_HEAD(log_wait);
489
DECLARE_WAIT_QUEUE_HEAD(log_wait);
490
491
static DECLARE_WAIT_QUEUE_HEAD(legacy_wait);
492
449
/* All 3 protected by @syslog_lock. */
493
/* All 3 protected by @syslog_lock. */
450
/* the next printk record to read by syslog(READ) or /proc/kmsg */
494
/* the next printk record to read by syslog(READ) or /proc/kmsg */
451
static u64 syslog_seq;
495
static u64 syslog_seq;
Lines 1844-1855 static bool console_waiter; Link Here
1844
 * there may be a waiter spinning (like a spinlock). Also it must be
1888
 * there may be a waiter spinning (like a spinlock). Also it must be
1845
 * ready to hand over the lock at the end of the section.
1889
 * ready to hand over the lock at the end of the section.
1846
 */
1890
 */
1847
static void console_lock_spinning_enable(void)
1891
void console_lock_spinning_enable(void)
1848
{
1892
{
1893
	/*
1894
	 * Do not use spinning in panic(). The panic CPU wants to keep the lock.
1895
	 * Non-panic CPUs abandon the flush anyway.
1896
	 *
1897
	 * Just keep the lockdep annotation. The panic-CPU should avoid
1898
	 * taking console_owner_lock because it might cause a deadlock.
1899
	 * This looks like the easiest way how to prevent false lockdep
1900
	 * reports without handling races a lockless way.
1901
	 */
1902
	if (panic_in_progress())
1903
		goto lockdep;
1904
1849
	raw_spin_lock(&console_owner_lock);
1905
	raw_spin_lock(&console_owner_lock);
1850
	console_owner = current;
1906
	console_owner = current;
1851
	raw_spin_unlock(&console_owner_lock);
1907
	raw_spin_unlock(&console_owner_lock);
1852
1908
1909
lockdep:
1853
	/* The waiter may spin on us after setting console_owner */
1910
	/* The waiter may spin on us after setting console_owner */
1854
	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
1911
	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
1855
}
1912
}
Lines 1870-1879 static void console_lock_spinning_enable(void) Link Here
1870
 *
1927
 *
1871
 * Return: 1 if the lock rights were passed, 0 otherwise.
1928
 * Return: 1 if the lock rights were passed, 0 otherwise.
1872
 */
1929
 */
1873
static int console_lock_spinning_disable_and_check(int cookie)
1930
int console_lock_spinning_disable_and_check(int cookie)
1874
{
1931
{
1875
	int waiter;
1932
	int waiter;
1876
1933
1934
	/*
1935
	 * Ignore spinning waiters during panic() because they might get stopped
1936
	 * or blocked at any time,
1937
	 *
1938
	 * It is safe because nobody is allowed to start spinning during panic
1939
	 * in the first place. If there has been a waiter then non panic CPUs
1940
	 * might stay spinning. They would get stopped anyway. The panic context
1941
	 * will never start spinning and an interrupted spin on panic CPU will
1942
	 * never continue.
1943
	 */
1944
	if (panic_in_progress()) {
1945
		/* Keep lockdep happy. */
1946
		spin_release(&console_owner_dep_map, _THIS_IP_);
1947
		return 0;
1948
	}
1949
1877
	raw_spin_lock(&console_owner_lock);
1950
	raw_spin_lock(&console_owner_lock);
1878
	waiter = READ_ONCE(console_waiter);
1951
	waiter = READ_ONCE(console_waiter);
1879
	console_owner = NULL;
1952
	console_owner = NULL;
Lines 2259-2313 int vprintk_store(int facility, int level, Link Here
2259
	return ret;
2332
	return ret;
2260
}
2333
}
2261
2334
2335
static bool legacy_allow_panic_sync;
2336
2337
/*
2338
 * This acts as a one-way switch to allow legacy consoles to print from
2339
 * the printk() caller context on a panic CPU.
2340
 */
2341
void printk_legacy_allow_panic_sync(void)
2342
{
2343
	legacy_allow_panic_sync = true;
2344
}
2345
2262
asmlinkage int vprintk_emit(int facility, int level,
2346
asmlinkage int vprintk_emit(int facility, int level,
2263
			    const struct dev_printk_info *dev_info,
2347
			    const struct dev_printk_info *dev_info,
2264
			    const char *fmt, va_list args)
2348
			    const char *fmt, va_list args)
2265
{
2349
{
2350
	bool do_trylock_unlock = printing_via_unlock &&
2351
				 !IS_ENABLED(CONFIG_PREEMPT_RT);
2266
	int printed_len;
2352
	int printed_len;
2267
	bool in_sched = false;
2268
2353
2269
	/* Suppress unimportant messages after panic happens */
2354
	/* Suppress unimportant messages after panic happens */
2270
	if (unlikely(suppress_printk))
2355
	if (unlikely(suppress_printk))
2271
		return 0;
2356
		return 0;
2272
2357
2273
	if (unlikely(suppress_panic_printk) &&
2358
	/*
2274
	    atomic_read(&panic_cpu) != raw_smp_processor_id())
2359
	 * The messages on the panic CPU are the most important. If
2360
	 * non-panic CPUs are generating any messages, they will be
2361
	 * silently dropped.
2362
	 */
2363
	if (other_cpu_in_panic())
2275
		return 0;
2364
		return 0;
2276
2365
2277
	if (level == LOGLEVEL_SCHED) {
2366
	if (level == LOGLEVEL_SCHED) {
2278
		level = LOGLEVEL_DEFAULT;
2367
		level = LOGLEVEL_DEFAULT;
2279
		in_sched = true;
2368
		/* If called from the scheduler, we can not call up(). */
2369
		do_trylock_unlock = false;
2280
	}
2370
	}
2281
2371
2282
	printk_delay(level);
2372
	printk_delay(level);
2283
2373
2284
	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
2374
	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
2285
2375
2286
	/* If called from the scheduler, we can not call up(). */
2376
	if (!have_boot_console && have_nbcon_console) {
2287
	if (!in_sched) {
2377
		bool is_panic_context = this_cpu_in_panic();
2378
2379
		/*
2380
		 * In panic, the legacy consoles are not allowed to print from
2381
		 * the printk calling context unless explicitly allowed. This
2382
		 * gives the safe nbcon consoles a chance to print out all the
2383
		 * panic messages first. This restriction only applies if
2384
		 * there are nbcon consoles registered.
2385
		 */
2386
		if (is_panic_context)
2387
			do_trylock_unlock &= legacy_allow_panic_sync;
2388
2389
		/*
2390
		 * There are situations where nbcon atomic printing should
2391
		 * happen in the printk() caller context:
2392
		 *
2393
		 * - When this CPU is in panic.
2394
		 *
2395
		 * - When booting, before the printing threads have been
2396
		 *   started.
2397
		 *
2398
		 * - During shutdown, since the printing threads may not get
2399
		 *   a chance to print the final messages.
2400
		 *
2401
		 * Note that if boot consoles are registered, the
2402
		 * console_lock/console_unlock dance must be relied upon
2403
		 * instead because nbcon consoles cannot print simultaneously
2404
		 * with boot consoles.
2405
		 */
2406
		if (is_panic_context ||
2407
		    !printk_threads_enabled ||
2408
		    (system_state > SYSTEM_RUNNING)) {
2409
			nbcon_atomic_flush_all();
2410
		}
2411
	}
2412
2413
	nbcon_wake_threads();
2414
2415
	if (do_trylock_unlock) {
2288
		/*
2416
		/*
2289
		 * The caller may be holding system-critical or
2417
		 * The caller may be holding system-critical or
2290
		 * timing-sensitive locks. Disable preemption during
2418
		 * timing-sensitive locks. Disable preemption during
2291
		 * printing of all remaining records to all consoles so that
2419
		 * printing of all remaining records to all consoles so that
2292
		 * this context can return as soon as possible. Hopefully
2420
		 * this context can return as soon as possible. Hopefully
2293
		 * another printk() caller will take over the printing.
2421
		 * another printk() caller will take over the printing.
2422
		 *
2423
		 * Also, nbcon_get_default_prio() requires migration disabled.
2294
		 */
2424
		 */
2295
		preempt_disable();
2425
		preempt_disable();
2426
2296
		/*
2427
		/*
2297
		 * Try to acquire and then immediately release the console
2428
		 * Do not emit for EMERGENCY priority. The console will be
2298
		 * semaphore. The release will print out buffers. With the
2429
		 * explicitly flushed when exiting the emergency section.
2299
		 * spinning variant, this context tries to take over the
2300
		 * printing from another printing context.
2301
		 */
2430
		 */
2302
		if (console_trylock_spinning())
2431
		if (nbcon_get_default_prio() == NBCON_PRIO_EMERGENCY) {
2303
			console_unlock();
2432
			do_trylock_unlock = false;
2433
		} else {
2434
			/*
2435
			 * Try to acquire and then immediately release the
2436
			 * console semaphore. The release will print out
2437
			 * buffers. With the spinning variant, this context
2438
			 * tries to take over the printing from another
2439
			 * printing context.
2440
			 */
2441
			if (console_trylock_spinning())
2442
				console_unlock();
2443
		}
2444
2304
		preempt_enable();
2445
		preempt_enable();
2305
	}
2446
	}
2306
2447
2307
	if (in_sched)
2448
	if (do_trylock_unlock)
2308
		defer_console_output();
2309
	else
2310
		wake_up_klogd();
2449
		wake_up_klogd();
2450
	else
2451
		defer_console_output();
2311
2452
2312
	return printed_len;
2453
	return printed_len;
2313
}
2454
}
Lines 2335-2340 EXPORT_SYMBOL(_printk); Link Here
2335
static bool pr_flush(int timeout_ms, bool reset_on_progress);
2476
static bool pr_flush(int timeout_ms, bool reset_on_progress);
2336
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
2477
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
2337
2478
2479
static struct task_struct *nbcon_legacy_kthread;
2480
2481
static inline void wake_up_legacy_kthread(void)
2482
{
2483
	if (nbcon_legacy_kthread)
2484
		wake_up_interruptible(&legacy_wait);
2485
}
2486
2338
#else /* CONFIG_PRINTK */
2487
#else /* CONFIG_PRINTK */
2339
2488
2340
#define printk_time		false
2489
#define printk_time		false
Lines 2348-2353 static u64 syslog_seq; Link Here
2348
static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; }
2497
static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; }
2349
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
2498
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
2350
2499
2500
static inline void nbcon_legacy_kthread_create(void) { }
2501
static inline void wake_up_legacy_kthread(void) { }
2351
#endif /* CONFIG_PRINTK */
2502
#endif /* CONFIG_PRINTK */
2352
2503
2353
#ifdef CONFIG_EARLY_PRINTK
2504
#ifdef CONFIG_EARLY_PRINTK
Lines 2563-2568 void suspend_console(void) Link Here
2563
void resume_console(void)
2714
void resume_console(void)
2564
{
2715
{
2565
	struct console *con;
2716
	struct console *con;
2717
	short flags;
2718
	int cookie;
2566
2719
2567
	if (!console_suspend_enabled)
2720
	if (!console_suspend_enabled)
2568
		return;
2721
		return;
Lines 2579-2584 void resume_console(void) Link Here
2579
	 */
2732
	 */
2580
	synchronize_srcu(&console_srcu);
2733
	synchronize_srcu(&console_srcu);
2581
2734
2735
	/*
2736
	 * Since this runs in task context, wake the threaded printers
2737
	 * directly rather than scheduling irq_work to do it.
2738
	 */
2739
	cookie = console_srcu_read_lock();
2740
	for_each_console_srcu(con) {
2741
		flags = console_srcu_read_flags(con);
2742
		if (flags & CON_NBCON)
2743
			nbcon_kthread_wake(con);
2744
	}
2745
	console_srcu_read_unlock(cookie);
2746
2747
	wake_up_legacy_kthread();
2748
2582
	pr_flush(1000, true);
2749
	pr_flush(1000, true);
2583
}
2750
}
2584
2751
Lines 2593-2599 void resume_console(void) Link Here
2593
 */
2760
 */
2594
static int console_cpu_notify(unsigned int cpu)
2761
static int console_cpu_notify(unsigned int cpu)
2595
{
2762
{
2596
	if (!cpuhp_tasks_frozen) {
2763
	if (!cpuhp_tasks_frozen && printing_via_unlock &&
2764
	    !IS_ENABLED(CONFIG_PREEMPT_RT)) {
2597
		/* If trylock fails, someone else is doing the printing */
2765
		/* If trylock fails, someone else is doing the printing */
2598
		if (console_trylock())
2766
		if (console_trylock())
2599
			console_unlock();
2767
			console_unlock();
Lines 2601-2626 static int console_cpu_notify(unsigned int cpu) Link Here
2601
	return 0;
2769
	return 0;
2602
}
2770
}
2603
2771
2604
/*
2605
 * Return true if a panic is in progress on a remote CPU.
2606
 *
2607
 * On true, the local CPU should immediately release any printing resources
2608
 * that may be needed by the panic CPU.
2609
 */
2610
bool other_cpu_in_panic(void)
2611
{
2612
	if (!panic_in_progress())
2613
		return false;
2614
2615
	/*
2616
	 * We can use raw_smp_processor_id() here because it is impossible for
2617
	 * the task to be migrated to the panic_cpu, or away from it. If
2618
	 * panic_cpu has already been set, and we're not currently executing on
2619
	 * that CPU, then we never will be.
2620
	 */
2621
	return atomic_read(&panic_cpu) != raw_smp_processor_id();
2622
}
2623
2624
/**
2772
/**
2625
 * console_lock - block the console subsystem from printing
2773
 * console_lock - block the console subsystem from printing
2626
 *
2774
 *
Lines 2670-2711 int is_console_locked(void) Link Here
2670
}
2818
}
2671
EXPORT_SYMBOL(is_console_locked);
2819
EXPORT_SYMBOL(is_console_locked);
2672
2820
2673
/*
2674
 * Check if the given console is currently capable and allowed to print
2675
 * records.
2676
 *
2677
 * Requires the console_srcu_read_lock.
2678
 */
2679
static inline bool console_is_usable(struct console *con)
2680
{
2681
	short flags = console_srcu_read_flags(con);
2682
2683
	if (!(flags & CON_ENABLED))
2684
		return false;
2685
2686
	if ((flags & CON_SUSPENDED))
2687
		return false;
2688
2689
	if (!con->write)
2690
		return false;
2691
2692
	/*
2693
	 * Console drivers may assume that per-cpu resources have been
2694
	 * allocated. So unless they're explicitly marked as being able to
2695
	 * cope (CON_ANYTIME) don't call them until this CPU is officially up.
2696
	 */
2697
	if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
2698
		return false;
2699
2700
	return true;
2701
}
2702
2703
static void __console_unlock(void)
2821
static void __console_unlock(void)
2704
{
2822
{
2705
	console_locked = 0;
2823
	console_locked = 0;
2706
	up_console_sem();
2824
	up_console_sem();
2707
}
2825
}
2708
2826
2827
static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP);
2828
2709
#ifdef CONFIG_PRINTK
2829
#ifdef CONFIG_PRINTK
2710
2830
2711
/*
2831
/*
Lines 2776-2783 void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) Link Here
2776
bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
2896
bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
2777
			     bool is_extended, bool may_suppress)
2897
			     bool is_extended, bool may_suppress)
2778
{
2898
{
2779
	static int panic_console_dropped;
2780
2781
	struct printk_buffers *pbufs = pmsg->pbufs;
2899
	struct printk_buffers *pbufs = pmsg->pbufs;
2782
	const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
2900
	const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
2783
	const size_t outbuf_sz = sizeof(pbufs->outbuf);
2901
	const size_t outbuf_sz = sizeof(pbufs->outbuf);
Lines 2805-2821 bool printk_get_next_message(struct printk_message *pmsg, u64 seq, Link Here
2805
	pmsg->seq = r.info->seq;
2923
	pmsg->seq = r.info->seq;
2806
	pmsg->dropped = r.info->seq - seq;
2924
	pmsg->dropped = r.info->seq - seq;
2807
2925
2808
	/*
2809
	 * Check for dropped messages in panic here so that printk
2810
	 * suppression can occur as early as possible if necessary.
2811
	 */
2812
	if (pmsg->dropped &&
2813
	    panic_in_progress() &&
2814
	    panic_console_dropped++ > 10) {
2815
		suppress_panic_printk = 1;
2816
		pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
2817
	}
2818
2819
	/* Skip record that has level above the console loglevel. */
2926
	/* Skip record that has level above the console loglevel. */
2820
	if (may_suppress && suppress_message_printing(r.info->level))
2927
	if (may_suppress && suppress_message_printing(r.info->level))
2821
		goto out;
2928
		goto out;
Lines 2881-2911 static bool console_emit_next_record(struct console *con, bool *handover, int co Link Here
2881
		con->dropped = 0;
2988
		con->dropped = 0;
2882
	}
2989
	}
2883
2990
2884
	/*
2885
	 * While actively printing out messages, if another printk()
2886
	 * were to occur on another CPU, it may wait for this one to
2887
	 * finish. This task can not be preempted if there is a
2888
	 * waiter waiting to take over.
2889
	 *
2890
	 * Interrupts are disabled because the hand over to a waiter
2891
	 * must not be interrupted until the hand over is completed
2892
	 * (@console_waiter is cleared).
2893
	 */
2894
	printk_safe_enter_irqsave(flags);
2895
	console_lock_spinning_enable();
2896
2897
	/* Do not trace print latency. */
2898
	stop_critical_timings();
2899
2900
	/* Write everything out to the hardware. */
2991
	/* Write everything out to the hardware. */
2901
	con->write(con, outbuf, pmsg.outbuf_len);
2902
2992
2903
	start_critical_timings();
2993
	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
2994
		/*
2995
		 * On PREEMPT_RT this function is either in a thread or
2996
		 * panic context. So there is no need for concern about
2997
		 * printk reentrance, handovers, or lockdep complaints.
2998
		 */
2904
2999
2905
	con->seq = pmsg.seq + 1;
3000
		con->write(con, outbuf, pmsg.outbuf_len);
3001
		con->seq = pmsg.seq + 1;
3002
	} else {
3003
		/*
3004
		 * While actively printing out messages, if another printk()
3005
		 * were to occur on another CPU, it may wait for this one to
3006
		 * finish. This task can not be preempted if there is a
3007
		 * waiter waiting to take over.
3008
		 *
3009
		 * Interrupts are disabled because the hand over to a waiter
3010
		 * must not be interrupted until the hand over is completed
3011
		 * (@console_waiter is cleared).
3012
		 */
3013
		printk_safe_enter_irqsave(flags);
3014
		console_lock_spinning_enable();
2906
3015
2907
	*handover = console_lock_spinning_disable_and_check(cookie);
3016
		/* Do not trace print latency. */
2908
	printk_safe_exit_irqrestore(flags);
3017
		stop_critical_timings();
3018
3019
		lock_map_acquire_try(&printk_legacy_map);
3020
		con->write(con, outbuf, pmsg.outbuf_len);
3021
		lock_map_release(&printk_legacy_map);
3022
3023
		start_critical_timings();
3024
3025
		con->seq = pmsg.seq + 1;
3026
3027
		*handover = console_lock_spinning_disable_and_check(cookie);
3028
		printk_safe_exit_irqrestore(flags);
3029
	}
2909
skip:
3030
skip:
2910
	return true;
3031
	return true;
2911
}
3032
}
Lines 2958-2970 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove Link Here
2958
3079
2959
		cookie = console_srcu_read_lock();
3080
		cookie = console_srcu_read_lock();
2960
		for_each_console_srcu(con) {
3081
		for_each_console_srcu(con) {
3082
			short flags = console_srcu_read_flags(con);
3083
			u64 printk_seq;
2961
			bool progress;
3084
			bool progress;
2962
3085
2963
			if (!console_is_usable(con))
3086
			/*
3087
			 * console_flush_all() is only for legacy consoles,
3088
			 * unless the nbcon console has no kthread printer.
3089
			 */
3090
			if ((flags & CON_NBCON) && con->kthread)
3091
				continue;
3092
3093
			if (!console_is_usable(con, flags, true))
2964
				continue;
3094
				continue;
2965
			any_usable = true;
3095
			any_usable = true;
2966
3096
2967
			progress = console_emit_next_record(con, handover, cookie);
3097
			if (flags & CON_NBCON) {
3098
3099
				lock_map_acquire_try(&printk_legacy_map);
3100
				progress = nbcon_atomic_emit_next_record(con, handover, cookie);
3101
				lock_map_release(&printk_legacy_map);
3102
3103
				printk_seq = nbcon_seq_read(con);
3104
			} else {
3105
				progress = console_emit_next_record(con, handover, cookie);
3106
3107
				printk_seq = con->seq;
3108
			}
2968
3109
2969
			/*
3110
			/*
2970
			 * If a handover has occurred, the SRCU read lock
3111
			 * If a handover has occurred, the SRCU read lock
Lines 2974-2981 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove Link Here
2974
				return false;
3115
				return false;
2975
3116
2976
			/* Track the next of the highest seq flushed. */
3117
			/* Track the next of the highest seq flushed. */
2977
			if (con->seq > *next_seq)
3118
			if (printk_seq > *next_seq)
2978
				*next_seq = con->seq;
3119
				*next_seq = printk_seq;
2979
3120
2980
			if (!progress)
3121
			if (!progress)
2981
				continue;
3122
				continue;
Lines 2998-3016 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove Link Here
2998
	return false;
3139
	return false;
2999
}
3140
}
3000
3141
3001
/**
3142
static void console_flush_and_unlock(void)
3002
 * console_unlock - unblock the console subsystem from printing
3003
 *
3004
 * Releases the console_lock which the caller holds to block printing of
3005
 * the console subsystem.
3006
 *
3007
 * While the console_lock was held, console output may have been buffered
3008
 * by printk().  If this is the case, console_unlock(); emits
3009
 * the output prior to releasing the lock.
3010
 *
3011
 * console_unlock(); may be called from any context.
3012
 */
3013
void console_unlock(void)
3014
{
3143
{
3015
	bool do_cond_resched;
3144
	bool do_cond_resched;
3016
	bool handover;
3145
	bool handover;
Lines 3054-3059 void console_unlock(void) Link Here
3054
		 */
3183
		 */
3055
	} while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
3184
	} while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
3056
}
3185
}
3186
3187
/**
3188
 * console_unlock - unblock the console subsystem from printing
3189
 *
3190
 * Releases the console_lock which the caller holds to block printing of
3191
 * the console subsystem.
3192
 *
3193
 * While the console_lock was held, console output may have been buffered
3194
 * by printk().  If this is the case, console_unlock(); emits
3195
 * the output prior to releasing the lock.
3196
 *
3197
 * console_unlock(); may be called from any context.
3198
 */
3199
void console_unlock(void)
3200
{
3201
	/*
3202
	 * PREEMPT_RT relies on kthread and atomic consoles for printing.
3203
	 * It never attempts to print from console_unlock().
3204
	 */
3205
	if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
3206
		__console_unlock();
3207
		return;
3208
	}
3209
3210
	console_flush_and_unlock();
3211
}
3057
EXPORT_SYMBOL(console_unlock);
3212
EXPORT_SYMBOL(console_unlock);
3058
3213
3059
/**
3214
/**
Lines 3187-3193 void console_flush_on_panic(enum con_flush_mode mode) Link Here
3187
		console_srcu_read_unlock(cookie);
3342
		console_srcu_read_unlock(cookie);
3188
	}
3343
	}
3189
3344
3190
	console_flush_all(false, &next_seq, &handover);
3345
	nbcon_atomic_flush_all();
3346
3347
	if (printing_via_unlock)
3348
		console_flush_all(false, &next_seq, &handover);
3191
}
3349
}
3192
3350
3193
/*
3351
/*
Lines 3244-3256 EXPORT_SYMBOL(console_stop); Link Here
3244
3402
3245
void console_start(struct console *console)
3403
void console_start(struct console *console)
3246
{
3404
{
3405
	short flags;
3406
3247
	console_list_lock();
3407
	console_list_lock();
3248
	console_srcu_write_flags(console, console->flags | CON_ENABLED);
3408
	console_srcu_write_flags(console, console->flags | CON_ENABLED);
3409
	flags = console->flags;
3249
	console_list_unlock();
3410
	console_list_unlock();
3411
3412
	/*
3413
	 * Ensure that all SRCU list walks have completed. The related
3414
	 * printing context must be able to see it is enabled so that
3415
	 * it is guaranteed to wake up and resume printing.
3416
	 */
3417
	synchronize_srcu(&console_srcu);
3418
3419
	if (flags & CON_NBCON)
3420
		nbcon_kthread_wake(console);
3421
	else
3422
		wake_up_legacy_kthread();
3423
3250
	__pr_flush(console, 1000, true);
3424
	__pr_flush(console, 1000, true);
3251
}
3425
}
3252
EXPORT_SYMBOL(console_start);
3426
EXPORT_SYMBOL(console_start);
3253
3427
3428
#ifdef CONFIG_PRINTK
3429
static bool printer_should_wake(void)
3430
{
3431
	bool available = false;
3432
	struct console *con;
3433
	int cookie;
3434
3435
	if (kthread_should_stop())
3436
		return true;
3437
3438
	cookie = console_srcu_read_lock();
3439
	for_each_console_srcu(con) {
3440
		short flags = console_srcu_read_flags(con);
3441
		u64 printk_seq;
3442
3443
		/*
3444
		 * The legacy printer thread is only for legacy consoles,
3445
		 * unless the nbcon console has no kthread printer.
3446
		 */
3447
		if ((flags & CON_NBCON) && con->kthread)
3448
			continue;
3449
3450
		if (!console_is_usable(con, flags, true))
3451
			continue;
3452
3453
		if (flags & CON_NBCON) {
3454
			printk_seq = nbcon_seq_read(con);
3455
		} else {
3456
			/*
3457
			 * It is safe to read @seq because only this
3458
			 * thread context updates @seq.
3459
			 */
3460
			printk_seq = con->seq;
3461
		}
3462
3463
		if (prb_read_valid(prb, printk_seq, NULL)) {
3464
			available = true;
3465
			break;
3466
		}
3467
	}
3468
	console_srcu_read_unlock(cookie);
3469
3470
	return available;
3471
}
3472
3473
static int nbcon_legacy_kthread_func(void *unused)
3474
{
3475
	int error;
3476
3477
	for (;;) {
3478
		error = wait_event_interruptible(legacy_wait, printer_should_wake());
3479
3480
		if (kthread_should_stop())
3481
			break;
3482
3483
		if (error)
3484
			continue;
3485
3486
		console_lock();
3487
		console_flush_and_unlock();
3488
	}
3489
3490
	return 0;
3491
}
3492
3493
void nbcon_legacy_kthread_create(void)
3494
{
3495
	struct task_struct *kt;
3496
3497
	lockdep_assert_held(&console_mutex);
3498
3499
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
3500
		return;
3501
3502
	if (!printk_threads_enabled || nbcon_legacy_kthread)
3503
		return;
3504
3505
	kt = kthread_run(nbcon_legacy_kthread_func, NULL, "pr/legacy");
3506
	if (IS_ERR(kt)) {
3507
		pr_err("unable to start legacy printing thread\n");
3508
		return;
3509
	}
3510
3511
	nbcon_legacy_kthread = kt;
3512
3513
	/*
3514
	 * It is important that console printing threads are scheduled
3515
	 * shortly after a printk call and with generous runtime budgets.
3516
	 */
3517
	sched_set_normal(nbcon_legacy_kthread, -20);
3518
}
3519
#endif /* CONFIG_PRINTK */
3520
3254
static int __read_mostly keep_bootcon;
3521
static int __read_mostly keep_bootcon;
3255
3522
3256
static int __init keep_bootcon_setup(char *str)
3523
static int __init keep_bootcon_setup(char *str)
Lines 3382-3392 static void console_init_seq(struct console *newcon, bool bootcon_registered) Link Here
3382
3649
3383
				newcon->seq = prb_next_seq(prb);
3650
				newcon->seq = prb_next_seq(prb);
3384
				for_each_console(con) {
3651
				for_each_console(con) {
3385
					if ((con->flags & CON_BOOT) &&
3652
					u64 seq;
3386
					    (con->flags & CON_ENABLED) &&
3653
3387
					    con->seq < newcon->seq) {
3654
					if (!((con->flags & CON_BOOT) &&
3388
						newcon->seq = con->seq;
3655
					      (con->flags & CON_ENABLED))) {
3656
						continue;
3389
					}
3657
					}
3658
3659
					if (con->flags & CON_NBCON)
3660
						seq = nbcon_seq_read(con);
3661
					else
3662
						seq = con->seq;
3663
3664
					if (seq < newcon->seq)
3665
						newcon->seq = seq;
3390
				}
3666
				}
3391
			}
3667
			}
3392
3668
Lines 3503-3510 void register_console(struct console *newcon) Link Here
3503
	newcon->dropped = 0;
3779
	newcon->dropped = 0;
3504
	console_init_seq(newcon, bootcon_registered);
3780
	console_init_seq(newcon, bootcon_registered);
3505
3781
3506
	if (newcon->flags & CON_NBCON)
3782
	if (newcon->flags & CON_NBCON) {
3783
		have_nbcon_console = true;
3507
		nbcon_init(newcon);
3784
		nbcon_init(newcon);
3785
	} else {
3786
		have_legacy_console = true;
3787
		nbcon_legacy_kthread_create();
3788
	}
3789
3790
	if (newcon->flags & CON_BOOT)
3791
		have_boot_console = true;
3508
3792
3509
	/*
3793
	/*
3510
	 * Put this console in the list - keep the
3794
	 * Put this console in the list - keep the
Lines 3558-3563 EXPORT_SYMBOL(register_console); Link Here
3558
/* Must be called under console_list_lock(). */
3842
/* Must be called under console_list_lock(). */
3559
static int unregister_console_locked(struct console *console)
3843
static int unregister_console_locked(struct console *console)
3560
{
3844
{
3845
	bool is_boot_con = (console->flags & CON_BOOT);
3846
	bool found_legacy_con = false;
3847
	bool found_nbcon_con = false;
3848
	bool found_boot_con = false;
3849
	struct console *c;
3561
	int res;
3850
	int res;
3562
3851
3563
	lockdep_assert_console_list_lock_held();
3852
	lockdep_assert_console_list_lock_held();
Lines 3605-3610 static int unregister_console_locked(struct console *console) Link Here
3605
	if (console->exit)
3894
	if (console->exit)
3606
		res = console->exit(console);
3895
		res = console->exit(console);
3607
3896
3897
	/*
3898
	 * With this console gone, the global flags tracking registered
3899
	 * console types may have changed. Update them.
3900
	 */
3901
	for_each_console(c) {
3902
		if (c->flags & CON_BOOT)
3903
			found_boot_con = true;
3904
3905
		if (c->flags & CON_NBCON)
3906
			found_nbcon_con = true;
3907
		else
3908
			found_legacy_con = true;
3909
	}
3910
	if (!found_boot_con)
3911
		have_boot_console = false;
3912
	if (!found_legacy_con)
3913
		have_legacy_console = false;
3914
	if (!found_nbcon_con)
3915
		have_nbcon_console = false;
3916
3917
	/*
3918
	 * When the last boot console unregisters, start up the
3919
	 * printing threads.
3920
	 */
3921
	if (is_boot_con && !have_boot_console) {
3922
		for_each_console(c)
3923
			nbcon_kthread_create(c);
3924
	}
3925
3926
#ifdef CONFIG_PRINTK
3927
	if (!printing_via_unlock && nbcon_legacy_kthread) {
3928
		kthread_stop(nbcon_legacy_kthread);
3929
		nbcon_legacy_kthread = NULL;
3930
	}
3931
#endif
3932
3608
	return res;
3933
	return res;
3609
}
3934
}
3610
3935
Lines 3755-3785 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre Link Here
3755
	u64 last_diff = 0;
4080
	u64 last_diff = 0;
3756
	u64 printk_seq;
4081
	u64 printk_seq;
3757
	short flags;
4082
	short flags;
4083
	bool locked;
3758
	int cookie;
4084
	int cookie;
3759
	u64 diff;
4085
	u64 diff;
3760
	u64 seq;
4086
	u64 seq;
3761
4087
3762
	might_sleep();
4088
	might_sleep();
3763
4089
3764
	seq = prb_next_seq(prb);
4090
	seq = prb_next_reserve_seq(prb);
3765
4091
3766
	/* Flush the consoles so that records up to @seq are printed. */
4092
	/*
3767
	console_lock();
4093
	 * Flush the consoles so that records up to @seq are printed.
3768
	console_unlock();
4094
	 * Otherwise this function will just wait for the threaded printers
4095
	 * to print up to @seq.
4096
	 */
4097
	if (printing_via_unlock && !IS_ENABLED(CONFIG_PREEMPT_RT)) {
4098
		console_lock();
4099
		console_unlock();
4100
	}
3769
4101
3770
	for (;;) {
4102
	for (;;) {
3771
		unsigned long begin_jiffies;
4103
		unsigned long begin_jiffies;
3772
		unsigned long slept_jiffies;
4104
		unsigned long slept_jiffies;
3773
4105
4106
		locked = false;
3774
		diff = 0;
4107
		diff = 0;
3775
4108
3776
		/*
4109
		if (printing_via_unlock) {
3777
		 * Hold the console_lock to guarantee safe access to
4110
			/*
3778
		 * console->seq. Releasing console_lock flushes more
4111
			 * Hold the console_lock to guarantee safe access to
3779
		 * records in case @seq is still not printed on all
4112
			 * console->seq. Releasing console_lock flushes more
3780
		 * usable consoles.
4113
			 * records in case @seq is still not printed on all
3781
		 */
4114
			 * usable consoles.
3782
		console_lock();
4115
			 */
4116
			console_lock();
4117
			locked = true;
4118
		}
3783
4119
3784
		cookie = console_srcu_read_lock();
4120
		cookie = console_srcu_read_lock();
3785
		for_each_console_srcu(c) {
4121
		for_each_console_srcu(c) {
Lines 3793-3804 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre Link Here
3793
			 * that they make forward progress, so only increment
4129
			 * that they make forward progress, so only increment
3794
			 * @diff for usable consoles.
4130
			 * @diff for usable consoles.
3795
			 */
4131
			 */
3796
			if (!console_is_usable(c))
4132
			if (!console_is_usable(c, flags, true) &&
4133
			    !console_is_usable(c, flags, false)) {
3797
				continue;
4134
				continue;
4135
			}
3798
4136
3799
			if (flags & CON_NBCON) {
4137
			if (flags & CON_NBCON) {
3800
				printk_seq = nbcon_seq_read(c);
4138
				printk_seq = nbcon_seq_read(c);
3801
			} else {
4139
			} else {
4140
				WARN_ON_ONCE(!locked);
3802
				printk_seq = c->seq;
4141
				printk_seq = c->seq;
3803
			}
4142
			}
3804
4143
Lines 3810-3816 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre Link Here
3810
		if (diff != last_diff && reset_on_progress)
4149
		if (diff != last_diff && reset_on_progress)
3811
			remaining_jiffies = timeout_jiffies;
4150
			remaining_jiffies = timeout_jiffies;
3812
4151
3813
		console_unlock();
4152
		if (locked)
4153
			console_unlock();
3814
4154
3815
		/* Note: @diff is 0 if there are no usable consoles. */
4155
		/* Note: @diff is 0 if there are no usable consoles. */
3816
		if (diff == 0 || remaining_jiffies == 0)
4156
		if (diff == 0 || remaining_jiffies == 0)
Lines 3862-3870 static void wake_up_klogd_work_func(struct irq_work *irq_work) Link Here
3862
	int pending = this_cpu_xchg(printk_pending, 0);
4202
	int pending = this_cpu_xchg(printk_pending, 0);
3863
4203
3864
	if (pending & PRINTK_PENDING_OUTPUT) {
4204
	if (pending & PRINTK_PENDING_OUTPUT) {
3865
		/* If trylock fails, someone else is doing the printing */
4205
		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
3866
		if (console_trylock())
4206
			wake_up_interruptible(&legacy_wait);
3867
			console_unlock();
4207
		} else {
4208
			/*
4209
			 * If trylock fails, some other context
4210
			 * will do the printing.
4211
			 */
4212
			if (console_trylock())
4213
				console_unlock();
4214
		}
3868
	}
4215
	}
3869
4216
3870
	if (pending & PRINTK_PENDING_WAKEUP)
4217
	if (pending & PRINTK_PENDING_WAKEUP)
Lines 3932-3942 void defer_console_output(void) Link Here
3932
	 * New messages may have been added directly to the ringbuffer
4279
	 * New messages may have been added directly to the ringbuffer
3933
	 * using vprintk_store(), so wake any waiters as well.
4280
	 * using vprintk_store(), so wake any waiters as well.
3934
	 */
4281
	 */
3935
	__wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
4282
	int val = PRINTK_PENDING_WAKEUP;
4283
4284
	if (printing_via_unlock)
4285
		val |= PRINTK_PENDING_OUTPUT;
4286
	__wake_up_klogd(val);
3936
}
4287
}
3937
4288
3938
void printk_trigger_flush(void)
4289
void printk_trigger_flush(void)
3939
{
4290
{
4291
	nbcon_wake_threads();
3940
	defer_console_output();
4292
	defer_console_output();
3941
}
4293
}
3942
4294
(-)a/kernel/printk/printk_ringbuffer.c (-57 / +305 lines)
Lines 6-11 Link Here
6
#include <linux/errno.h>
6
#include <linux/errno.h>
7
#include <linux/bug.h>
7
#include <linux/bug.h>
8
#include "printk_ringbuffer.h"
8
#include "printk_ringbuffer.h"
9
#include "internal.h"
9
10
10
/**
11
/**
11
 * DOC: printk_ringbuffer overview
12
 * DOC: printk_ringbuffer overview
Lines 303-308 Link Here
303
 *
304
 *
304
 *   desc_push_tail:B / desc_reserve:D
305
 *   desc_push_tail:B / desc_reserve:D
305
 *     set descriptor reusable (state), then push descriptor tail (id)
306
 *     set descriptor reusable (state), then push descriptor tail (id)
307
 *
308
 *   desc_update_last_finalized:A / desc_last_finalized_seq:A
309
 *     store finalized record, then set new highest finalized sequence number
306
 */
310
 */
307
311
308
#define DATA_SIZE(data_ring)		_DATA_SIZE((data_ring)->size_bits)
312
#define DATA_SIZE(data_ring)		_DATA_SIZE((data_ring)->size_bits)
Lines 1030-1038 static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size, Link Here
1030
	unsigned long next_lpos;
1034
	unsigned long next_lpos;
1031
1035
1032
	if (size == 0) {
1036
	if (size == 0) {
1033
		/* Specify a data-less block. */
1037
		/*
1034
		blk_lpos->begin = NO_LPOS;
1038
		 * Data blocks are not created for empty lines. Instead, the
1035
		blk_lpos->next = NO_LPOS;
1039
		 * reader will recognize these special lpos values and handle
1040
		 * it appropriately.
1041
		 */
1042
		blk_lpos->begin = EMPTY_LINE_LPOS;
1043
		blk_lpos->next = EMPTY_LINE_LPOS;
1036
		return NULL;
1044
		return NULL;
1037
	}
1045
	}
1038
1046
Lines 1210-1219 static const char *get_data(struct prb_data_ring *data_ring, Link Here
1210
1218
1211
	/* Data-less data block description. */
1219
	/* Data-less data block description. */
1212
	if (BLK_DATALESS(blk_lpos)) {
1220
	if (BLK_DATALESS(blk_lpos)) {
1213
		if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) {
1221
		/*
1222
		 * Records that are just empty lines are also valid, even
1223
		 * though they do not have a data block. For such records
1224
		 * explicitly return empty string data to signify success.
1225
		 */
1226
		if (blk_lpos->begin == EMPTY_LINE_LPOS &&
1227
		    blk_lpos->next == EMPTY_LINE_LPOS) {
1214
			*data_size = 0;
1228
			*data_size = 0;
1215
			return "";
1229
			return "";
1216
		}
1230
		}
1231
1232
		/* Data lost, invalid, or otherwise unavailable. */
1217
		return NULL;
1233
		return NULL;
1218
	}
1234
	}
1219
1235
Lines 1441-1460 bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer Link Here
1441
	return false;
1457
	return false;
1442
}
1458
}
1443
1459
1460
/*
1461
 * @last_finalized_seq value guarantees that all records up to and including
1462
 * this sequence number are finalized and can be read. The only exception are
1463
 * too old records which have already been overwritten.
1464
 *
1465
 * It is also guaranteed that @last_finalized_seq only increases.
1466
 *
1467
 * Be aware that finalized records following non-finalized records are not
1468
 * reported because they are not yet available to the reader. For example,
1469
 * a new record stored via printk() will not be available to a printer if
1470
 * it follows a record that has not been finalized yet. However, once that
1471
 * non-finalized record becomes finalized, @last_finalized_seq will be
1472
 * appropriately updated and the full set of finalized records will be
1473
 * available to the printer. And since each printk() caller will either
1474
 * directly print or trigger deferred printing of all available unprinted
1475
 * records, all printk() messages will get printed.
1476
 */
1477
static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb)
1478
{
1479
	struct prb_desc_ring *desc_ring = &rb->desc_ring;
1480
	unsigned long ulseq;
1481
1482
	/*
1483
	 * Guarantee the sequence number is loaded before loading the
1484
	 * associated record in order to guarantee that the record can be
1485
	 * seen by this CPU. This pairs with desc_update_last_finalized:A.
1486
	 */
1487
	ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq
1488
					); /* LMM(desc_last_finalized_seq:A) */
1489
1490
	return __ulseq_to_u64seq(rb, ulseq);
1491
}
1492
1493
static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
1494
			    struct printk_record *r, unsigned int *line_count);
1495
1496
/*
1497
 * Check if there are records directly following @last_finalized_seq that are
1498
 * finalized. If so, update @last_finalized_seq to the latest of these
1499
 * records. It is not allowed to skip over records that are not yet finalized.
1500
 */
1501
static void desc_update_last_finalized(struct printk_ringbuffer *rb)
1502
{
1503
	struct prb_desc_ring *desc_ring = &rb->desc_ring;
1504
	u64 old_seq = desc_last_finalized_seq(rb);
1505
	unsigned long oldval;
1506
	unsigned long newval;
1507
	u64 finalized_seq;
1508
	u64 try_seq;
1509
1510
try_again:
1511
	finalized_seq = old_seq;
1512
	try_seq = finalized_seq + 1;
1513
1514
	/* Try to find later finalized records. */
1515
	while (_prb_read_valid(rb, &try_seq, NULL, NULL)) {
1516
		finalized_seq = try_seq;
1517
		try_seq++;
1518
	}
1519
1520
	/* No update needed if no later finalized record was found. */
1521
	if (finalized_seq == old_seq)
1522
		return;
1523
1524
	oldval = __u64seq_to_ulseq(old_seq);
1525
	newval = __u64seq_to_ulseq(finalized_seq);
1526
1527
	/*
1528
	 * Set the sequence number of a later finalized record that has been
1529
	 * seen.
1530
	 *
1531
	 * Guarantee the record data is visible to other CPUs before storing
1532
	 * its sequence number. This pairs with desc_last_finalized_seq:A.
1533
	 *
1534
	 * Memory barrier involvement:
1535
	 *
1536
	 * If desc_last_finalized_seq:A reads from
1537
	 * desc_update_last_finalized:A, then desc_read:A reads from
1538
	 * _prb_commit:B.
1539
	 *
1540
	 * Relies on:
1541
	 *
1542
	 * RELEASE from _prb_commit:B to desc_update_last_finalized:A
1543
	 *    matching
1544
	 * ACQUIRE from desc_last_finalized_seq:A to desc_read:A
1545
	 *
1546
	 * Note: _prb_commit:B and desc_update_last_finalized:A can be
1547
	 *       different CPUs. However, the desc_update_last_finalized:A
1548
	 *       CPU (which performs the release) must have previously seen
1549
	 *       _prb_commit:B.
1550
	 */
1551
	if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq,
1552
				&oldval, newval)) { /* LMM(desc_update_last_finalized:A) */
1553
		old_seq = __ulseq_to_u64seq(rb, oldval);
1554
		goto try_again;
1555
	}
1556
}
1557
1444
/*
1558
/*
1445
 * Attempt to finalize a specified descriptor. If this fails, the descriptor
1559
 * Attempt to finalize a specified descriptor. If this fails, the descriptor
1446
 * is either already final or it will finalize itself when the writer commits.
1560
 * is either already final or it will finalize itself when the writer commits.
1447
 */
1561
 */
1448
static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id)
1562
static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id)
1449
{
1563
{
1564
	struct prb_desc_ring *desc_ring = &rb->desc_ring;
1450
	unsigned long prev_state_val = DESC_SV(id, desc_committed);
1565
	unsigned long prev_state_val = DESC_SV(id, desc_committed);
1451
	struct prb_desc *d = to_desc(desc_ring, id);
1566
	struct prb_desc *d = to_desc(desc_ring, id);
1452
1567
1453
	atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val,
1568
	if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val,
1454
			DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */
1569
			DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */
1455
1570
		desc_update_last_finalized(rb);
1456
	/* Best effort to remember the last finalized @id. */
1571
	}
1457
	atomic_long_set(&desc_ring->last_finalized_id, id);
1458
}
1572
}
1459
1573
1460
/**
1574
/**
Lines 1550-1556 bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, Link Here
1550
	 * readers. (For seq==0 there is no previous descriptor.)
1664
	 * readers. (For seq==0 there is no previous descriptor.)
1551
	 */
1665
	 */
1552
	if (info->seq > 0)
1666
	if (info->seq > 0)
1553
		desc_make_final(desc_ring, DESC_ID(id - 1));
1667
		desc_make_final(rb, DESC_ID(id - 1));
1554
1668
1555
	r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id);
1669
	r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id);
1556
	/* If text data allocation fails, a data-less record is committed. */
1670
	/* If text data allocation fails, a data-less record is committed. */
Lines 1643-1649 void prb_commit(struct prb_reserved_entry *e) Link Here
1643
	 */
1757
	 */
1644
	head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */
1758
	head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */
1645
	if (head_id != e->id)
1759
	if (head_id != e->id)
1646
		desc_make_final(desc_ring, e->id);
1760
		desc_make_final(e->rb, e->id);
1647
}
1761
}
1648
1762
1649
/**
1763
/**
Lines 1663-1674 void prb_commit(struct prb_reserved_entry *e) Link Here
1663
 */
1777
 */
1664
void prb_final_commit(struct prb_reserved_entry *e)
1778
void prb_final_commit(struct prb_reserved_entry *e)
1665
{
1779
{
1666
	struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
1667
1668
	_prb_commit(e, desc_finalized);
1780
	_prb_commit(e, desc_finalized);
1669
1781
1670
	/* Best effort to remember the last finalized @id. */
1782
	desc_update_last_finalized(e->rb);
1671
	atomic_long_set(&desc_ring->last_finalized_id, e->id);
1672
}
1783
}
1673
1784
1674
/*
1785
/*
Lines 1746-1751 static bool copy_data(struct prb_data_ring *data_ring, Link Here
1746
 * descriptor. However, it also verifies that the record is finalized and has
1857
 * descriptor. However, it also verifies that the record is finalized and has
1747
 * the sequence number @seq. On success, 0 is returned.
1858
 * the sequence number @seq. On success, 0 is returned.
1748
 *
1859
 *
1860
 * For the panic CPU, committed descriptors are also considered finalized.
1861
 *
1749
 * Error return values:
1862
 * Error return values:
1750
 * -EINVAL: A finalized record with sequence number @seq does not exist.
1863
 * -EINVAL: A finalized record with sequence number @seq does not exist.
1751
 * -ENOENT: A finalized record with sequence number @seq exists, but its data
1864
 * -ENOENT: A finalized record with sequence number @seq exists, but its data
Lines 1764-1779 static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring, Link Here
1764
1877
1765
	/*
1878
	/*
1766
	 * An unexpected @id (desc_miss) or @seq mismatch means the record
1879
	 * An unexpected @id (desc_miss) or @seq mismatch means the record
1767
	 * does not exist. A descriptor in the reserved or committed state
1880
	 * does not exist. A descriptor in the reserved state means the
1768
	 * means the record does not yet exist for the reader.
1881
	 * record does not yet exist for the reader.
1769
	 */
1882
	 */
1770
	if (d_state == desc_miss ||
1883
	if (d_state == desc_miss ||
1771
	    d_state == desc_reserved ||
1884
	    d_state == desc_reserved ||
1772
	    d_state == desc_committed ||
1773
	    s != seq) {
1885
	    s != seq) {
1774
		return -EINVAL;
1886
		return -EINVAL;
1775
	}
1887
	}
1776
1888
1889
	/*
1890
	 * A descriptor in the committed state means the record does not yet
1891
	 * exist for the reader. However, for the panic CPU, committed
1892
	 * records are also handled as finalized records since they contain
1893
	 * message data in a consistent state and may contain additional
1894
	 * hints as to the cause of the panic.
1895
	 */
1896
	if (d_state == desc_committed && !this_cpu_in_panic())
1897
		return -EINVAL;
1898
1777
	/*
1899
	/*
1778
	 * A descriptor in the reusable state may no longer have its data
1900
	 * A descriptor in the reusable state may no longer have its data
1779
	 * available; report it as existing but with lost data. Or the record
1901
	 * available; report it as existing but with lost data. Or the record
Lines 1832-1838 static int prb_read(struct printk_ringbuffer *rb, u64 seq, Link Here
1832
}
1954
}
1833
1955
1834
/* Get the sequence number of the tail descriptor. */
1956
/* Get the sequence number of the tail descriptor. */
1835
static u64 prb_first_seq(struct printk_ringbuffer *rb)
1957
u64 prb_first_seq(struct printk_ringbuffer *rb)
1836
{
1958
{
1837
	struct prb_desc_ring *desc_ring = &rb->desc_ring;
1959
	struct prb_desc_ring *desc_ring = &rb->desc_ring;
1838
	enum desc_state d_state;
1960
	enum desc_state d_state;
Lines 1875-1886 static u64 prb_first_seq(struct printk_ringbuffer *rb) Link Here
1875
	return seq;
1997
	return seq;
1876
}
1998
}
1877
1999
1878
/*
2000
/**
1879
 * Non-blocking read of a record. Updates @seq to the last finalized record
2001
 * prb_next_reserve_seq() - Get the sequence number after the most recently
1880
 * (which may have no data available).
2002
 *                  reserved record.
1881
 *
2003
 *
1882
 * See the description of prb_read_valid() and prb_read_valid_info()
2004
 * @rb:  The ringbuffer to get the sequence number from.
1883
 * for details.
2005
 *
2006
 * This is the public function available to readers to see what sequence
2007
 * number will be assigned to the next reserved record.
2008
 *
2009
 * Note that depending on the situation, this value can be equal to or
2010
 * higher than the sequence number returned by prb_next_seq().
2011
 *
2012
 * Context: Any context.
2013
 * Return: The sequence number that will be assigned to the next record
2014
 *         reserved.
2015
 */
2016
u64 prb_next_reserve_seq(struct printk_ringbuffer *rb)
2017
{
2018
	struct prb_desc_ring *desc_ring = &rb->desc_ring;
2019
	unsigned long last_finalized_id;
2020
	atomic_long_t *state_var;
2021
	u64 last_finalized_seq;
2022
	unsigned long head_id;
2023
	struct prb_desc desc;
2024
	unsigned long diff;
2025
	struct prb_desc *d;
2026
	int err;
2027
2028
	/*
2029
	 * It may not be possible to read a sequence number for @head_id.
2030
	 * So the ID of @last_finailzed_seq is used to calculate what the
2031
	 * sequence number of @head_id will be.
2032
	 */
2033
2034
try_again:
2035
	last_finalized_seq = desc_last_finalized_seq(rb);
2036
2037
	/*
2038
	 * @head_id is loaded after @last_finalized_seq to ensure that it is
2039
	 * at or beyond @last_finalized_seq.
2040
	 *
2041
	 * Memory barrier involvement:
2042
	 *
2043
	 * If desc_last_finalized_seq:A reads from
2044
	 * desc_update_last_finalized:A, then
2045
	 * prb_next_reserve_seq:A reads from desc_reserve:D.
2046
	 *
2047
	 * Relies on:
2048
	 *
2049
	 * RELEASE from desc_reserve:D to desc_update_last_finalized:A
2050
	 *    matching
2051
	 * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A
2052
	 *
2053
	 * Note: desc_reserve:D and desc_update_last_finalized:A can be
2054
	 *       different CPUs. However, the desc_update_last_finalized:A CPU
2055
	 *       (which performs the release) must have previously seen
2056
	 *       desc_read:C, which implies desc_reserve:D can be seen.
2057
	 */
2058
	head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */
2059
2060
	d = to_desc(desc_ring, last_finalized_seq);
2061
	state_var = &d->state_var;
2062
2063
	/* Extract the ID, used to specify the descriptor to read. */
2064
	last_finalized_id = DESC_ID(atomic_long_read(state_var));
2065
2066
	/* Ensure @last_finalized_id is correct. */
2067
	err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc);
2068
2069
	if (err == -EINVAL) {
2070
		if (last_finalized_seq == 0) {
2071
			/*
2072
			 * @last_finalized_seq still contains its initial
2073
			 * value. Probably no record has been finalized yet.
2074
			 * This means the ringbuffer is not yet full and the
2075
			 * @head_id value can be used directly (subtracting
2076
			 * off the id value corresponding to seq=0).
2077
			 */
2078
2079
			/*
2080
			 * Because of hack#2 of the bootstrapping phase, the
2081
			 * @head_id initial value must be handled separately.
2082
			 */
2083
			if (head_id == DESC0_ID(desc_ring->count_bits))
2084
				return 0;
2085
2086
			/*
2087
			 * The @head_id is initialized such that the first
2088
			 * increment will yield the first record (seq=0).
2089
			 * Therefore use the initial value +1 as the base to
2090
			 * subtract from @head_id.
2091
			 */
2092
			last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1;
2093
		} else {
2094
			/* Record must have been overwritten. Try again. */
2095
			goto try_again;
2096
		}
2097
	}
2098
2099
	/*
2100
	 * @diff is the number of records beyond the last record available
2101
	 * to readers.
2102
	 */
2103
	diff = head_id - last_finalized_id;
2104
2105
	/*
2106
	 * @head_id points to the most recently reserved record, but this
2107
	 * function returns the sequence number that will be assigned to the
2108
	 * next (not yet reserved) record. Thus +1 is needed.
2109
	 */
2110
	return (last_finalized_seq + diff + 1);
2111
}
2112
2113
/*
2114
 * Non-blocking read of a record.
2115
 *
2116
 * On success @seq is updated to the record that was read and (if provided)
2117
 * @r and @line_count will contain the read/calculated data.
2118
 *
2119
 * On failure @seq is updated to a record that is not yet available to the
2120
 * reader, but it will be the next record available to the reader.
2121
 *
2122
 * Note: When the current CPU is in panic, this function will skip over any
2123
 *       non-existent/non-finalized records in order to allow the panic CPU
2124
 *       to print any and all records that have been finalized.
1884
 */
2125
 */
1885
static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
2126
static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
1886
			    struct printk_record *r, unsigned int *line_count)
2127
			    struct printk_record *r, unsigned int *line_count)
Lines 1899-1910 static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, Link Here
1899
			*seq = tail_seq;
2140
			*seq = tail_seq;
1900
2141
1901
		} else if (err == -ENOENT) {
2142
		} else if (err == -ENOENT) {
1902
			/* Record exists, but no data available. Skip. */
2143
			/* Record exists, but the data was lost. Skip. */
1903
			(*seq)++;
2144
			(*seq)++;
1904
2145
1905
		} else {
2146
		} else {
1906
			/* Non-existent/non-finalized record. Must stop. */
2147
			/*
1907
			return false;
2148
			 * Non-existent/non-finalized record. Must stop.
2149
			 *
2150
			 * For panic situations it cannot be expected that
2151
			 * non-finalized records will become finalized. But
2152
			 * there may be other finalized records beyond that
2153
			 * need to be printed for a panic situation. If this
2154
			 * is the panic CPU, skip this
2155
			 * non-existent/non-finalized record unless it is
2156
			 * at or beyond the head, in which case it is not
2157
			 * possible to continue.
2158
			 *
2159
			 * Note that new messages printed on panic CPU are
2160
			 * finalized when we are here. The only exception
2161
			 * might be the last message without trailing newline.
2162
			 * But it would have the sequence number returned
2163
			 * by "prb_next_reserve_seq() - 1".
2164
			 */
2165
			if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb)))
2166
				(*seq)++;
2167
			else
2168
				return false;
1908
		}
2169
		}
1909
	}
2170
	}
1910
2171
Lines 1932-1938 static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, Link Here
1932
 * On success, the reader must check r->info.seq to see which record was
2193
 * On success, the reader must check r->info.seq to see which record was
1933
 * actually read. This allows the reader to detect dropped records.
2194
 * actually read. This allows the reader to detect dropped records.
1934
 *
2195
 *
1935
 * Failure means @seq refers to a not yet written record.
2196
 * Failure means @seq refers to a record not yet available to the reader.
1936
 */
2197
 */
1937
bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
2198
bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
1938
		    struct printk_record *r)
2199
		    struct printk_record *r)
Lines 1962-1968 bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, Link Here
1962
 * On success, the reader must check info->seq to see which record meta data
2223
 * On success, the reader must check info->seq to see which record meta data
1963
 * was actually read. This allows the reader to detect dropped records.
2224
 * was actually read. This allows the reader to detect dropped records.
1964
 *
2225
 *
1965
 * Failure means @seq refers to a not yet written record.
2226
 * Failure means @seq refers to a record not yet available to the reader.
1966
 */
2227
 */
1967
bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
2228
bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
1968
			 struct printk_info *info, unsigned int *line_count)
2229
			 struct printk_info *info, unsigned int *line_count)
Lines 2008-2014 u64 prb_first_valid_seq(struct printk_ringbuffer *rb) Link Here
2008
 * newest sequence number available to readers will be.
2269
 * newest sequence number available to readers will be.
2009
 *
2270
 *
2010
 * This provides readers a sequence number to jump to if all currently
2271
 * This provides readers a sequence number to jump to if all currently
2011
 * available records should be skipped.
2272
 * available records should be skipped. It is guaranteed that all records
2273
 * previous to the returned value have been finalized and are (or were)
2274
 * available to the reader.
2012
 *
2275
 *
2013
 * Context: Any context.
2276
 * Context: Any context.
2014
 * Return: The sequence number of the next newest (not yet available) record
2277
 * Return: The sequence number of the next newest (not yet available) record
Lines 2016-2049 u64 prb_first_valid_seq(struct printk_ringbuffer *rb) Link Here
2016
 */
2279
 */
2017
u64 prb_next_seq(struct printk_ringbuffer *rb)
2280
u64 prb_next_seq(struct printk_ringbuffer *rb)
2018
{
2281
{
2019
	struct prb_desc_ring *desc_ring = &rb->desc_ring;
2020
	enum desc_state d_state;
2021
	unsigned long id;
2022
	u64 seq;
2282
	u64 seq;
2023
2283
2024
	/* Check if the cached @id still points to a valid @seq. */
2284
	seq = desc_last_finalized_seq(rb);
2025
	id = atomic_long_read(&desc_ring->last_finalized_id);
2026
	d_state = desc_read(desc_ring, id, NULL, &seq, NULL);
2027
2285
2028
	if (d_state == desc_finalized || d_state == desc_reusable) {
2286
	/*
2029
		/*
2287
	 * Begin searching after the last finalized record.
2030
		 * Begin searching after the last finalized record.
2288
	 *
2031
		 *
2289
	 * On 0, the search must begin at 0 because of hack#2
2032
		 * On 0, the search must begin at 0 because of hack#2
2290
	 * of the bootstrapping phase it is not known if a
2033
		 * of the bootstrapping phase it is not known if a
2291
	 * record at index 0 exists.
2034
		 * record at index 0 exists.
2292
	 */
2035
		 */
2293
	if (seq != 0)
2036
		if (seq != 0)
2294
		seq++;
2037
			seq++;
2038
	} else {
2039
		/*
2040
		 * The information about the last finalized sequence number
2041
		 * has gone. It should happen only when there is a flood of
2042
		 * new messages and the ringbuffer is rapidly recycled.
2043
		 * Give up and start from the beginning.
2044
		 */
2045
		seq = 0;
2046
	}
2047
2295
2048
	/*
2296
	/*
2049
	 * The information about the last finalized @seq might be inaccurate.
2297
	 * The information about the last finalized @seq might be inaccurate.
Lines 2085-2091 void prb_init(struct printk_ringbuffer *rb, Link Here
2085
	rb->desc_ring.infos = infos;
2333
	rb->desc_ring.infos = infos;
2086
	atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));
2334
	atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));
2087
	atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits));
2335
	atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits));
2088
	atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits));
2336
	atomic_long_set(&rb->desc_ring.last_finalized_seq, 0);
2089
2337
2090
	rb->text_data_ring.size_bits = textbits;
2338
	rb->text_data_ring.size_bits = textbits;
2091
	rb->text_data_ring.data = text_buf;
2339
	rb->text_data_ring.data = text_buf;
(-)a/kernel/printk/printk_ringbuffer.h (-3 / +51 lines)
Lines 75-81 struct prb_desc_ring { Link Here
75
	struct printk_info	*infos;
75
	struct printk_info	*infos;
76
	atomic_long_t		head_id;
76
	atomic_long_t		head_id;
77
	atomic_long_t		tail_id;
77
	atomic_long_t		tail_id;
78
	atomic_long_t		last_finalized_id;
78
	atomic_long_t		last_finalized_seq;
79
};
79
};
80
80
81
/*
81
/*
Lines 127-134 enum desc_state { Link Here
127
#define DESC_SV(id, state)	(((unsigned long)state << DESC_FLAGS_SHIFT) | id)
127
#define DESC_SV(id, state)	(((unsigned long)state << DESC_FLAGS_SHIFT) | id)
128
#define DESC_ID_MASK		(~DESC_FLAGS_MASK)
128
#define DESC_ID_MASK		(~DESC_FLAGS_MASK)
129
#define DESC_ID(sv)		((sv) & DESC_ID_MASK)
129
#define DESC_ID(sv)		((sv) & DESC_ID_MASK)
130
131
/*
132
 * Special data block logical position values (for fields of
133
 * @prb_desc.text_blk_lpos).
134
 *
135
 * - Bit0 is used to identify if the record has no data block. (Implemented in
136
 *   the LPOS_DATALESS() macro.)
137
 *
138
 * - Bit1 specifies the reason for not having a data block.
139
 *
140
 * These special values could never be real lpos values because of the
141
 * meta data and alignment padding of data blocks. (See to_blk_size() for
142
 * details.)
143
 */
130
#define FAILED_LPOS		0x1
144
#define FAILED_LPOS		0x1
131
#define NO_LPOS			0x3
145
#define EMPTY_LINE_LPOS		0x3
132
146
133
#define FAILED_BLK_LPOS	\
147
#define FAILED_BLK_LPOS	\
134
{				\
148
{				\
Lines 259-265 static struct printk_ringbuffer name = { \ Link Here
259
		.infos		= &_##name##_infos[0],						\
273
		.infos		= &_##name##_infos[0],						\
260
		.head_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
274
		.head_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
261
		.tail_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
275
		.tail_id	= ATOMIC_INIT(DESC0_ID(descbits)),				\
262
		.last_finalized_id = ATOMIC_INIT(DESC0_ID(descbits)),				\
276
		.last_finalized_seq = ATOMIC_INIT(0),						\
263
	},											\
277
	},											\
264
	.text_data_ring = {									\
278
	.text_data_ring = {									\
265
		.size_bits	= (avgtextbits) + (descbits),					\
279
		.size_bits	= (avgtextbits) + (descbits),					\
Lines 378-384 bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, Link Here
378
bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
392
bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
379
			 struct printk_info *info, unsigned int *line_count);
393
			 struct printk_info *info, unsigned int *line_count);
380
394
395
u64 prb_first_seq(struct printk_ringbuffer *rb);
381
u64 prb_first_valid_seq(struct printk_ringbuffer *rb);
396
u64 prb_first_valid_seq(struct printk_ringbuffer *rb);
382
u64 prb_next_seq(struct printk_ringbuffer *rb);
397
u64 prb_next_seq(struct printk_ringbuffer *rb);
398
u64 prb_next_reserve_seq(struct printk_ringbuffer *rb);
399
400
#ifdef CONFIG_64BIT
401
402
#define __u64seq_to_ulseq(u64seq) (u64seq)
403
#define __ulseq_to_u64seq(rb, ulseq) (ulseq)
404
405
#else /* CONFIG_64BIT */
406
407
#define __u64seq_to_ulseq(u64seq) ((u32)u64seq)
408
409
static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq)
410
{
411
	u64 rb_first_seq = prb_first_seq(rb);
412
	u64 seq;
413
414
	/*
415
	 * The provided sequence is only the lower 32 bits of the ringbuffer
416
	 * sequence. It needs to be expanded to 64bit. Get the first sequence
417
	 * number from the ringbuffer and fold it.
418
	 *
419
	 * Having a 32bit representation in the console is sufficient.
420
	 * If a console ever gets more than 2^31 records behind
421
	 * the ringbuffer then this is the least of the problems.
422
	 *
423
	 * Also the access to the ring buffer is always safe.
424
	 */
425
	seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq);
426
427
	return seq;
428
}
429
430
#endif /* CONFIG_64BIT */
383
431
384
#endif /* _KERNEL_PRINTK_RINGBUFFER_H */
432
#endif /* _KERNEL_PRINTK_RINGBUFFER_H */
(-)a/kernel/printk/printk_safe.c (+12 lines)
Lines 26-31 void __printk_safe_exit(void) Link Here
26
	this_cpu_dec(printk_context);
26
	this_cpu_dec(printk_context);
27
}
27
}
28
28
29
void __printk_deferred_enter(void)
30
{
31
	cant_migrate();
32
	this_cpu_inc(printk_context);
33
}
34
35
void __printk_deferred_exit(void)
36
{
37
	cant_migrate();
38
	this_cpu_dec(printk_context);
39
}
40
29
asmlinkage int vprintk(const char *fmt, va_list args)
41
asmlinkage int vprintk(const char *fmt, va_list args)
30
{
42
{
31
#ifdef CONFIG_KGDB_KDB
43
#ifdef CONFIG_KGDB_KDB
(-)a/kernel/rcu/rcutorture.c (+6 lines)
Lines 2409-2414 static int rcutorture_booster_init(unsigned int cpu) Link Here
2409
		WARN_ON_ONCE(!t);
2409
		WARN_ON_ONCE(!t);
2410
		sp.sched_priority = 2;
2410
		sp.sched_priority = 2;
2411
		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
2411
		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
2412
#ifdef CONFIG_PREEMPT_RT
2413
		t = per_cpu(timersd, cpu);
2414
		WARN_ON_ONCE(!t);
2415
		sp.sched_priority = 2;
2416
		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
2417
#endif
2412
	}
2418
	}
2413
2419
2414
	/* Don't allow time recalculation while creating a new task. */
2420
	/* Don't allow time recalculation while creating a new task. */
(-)a/kernel/rcu/tree_stall.h (+5 lines)
Lines 9-14 Link Here
9
9
10
#include <linux/kvm_para.h>
10
#include <linux/kvm_para.h>
11
#include <linux/rcu_notifier.h>
11
#include <linux/rcu_notifier.h>
12
#include <linux/console.h>
12
13
13
//////////////////////////////////////////////////////////////////////////////
14
//////////////////////////////////////////////////////////////////////////////
14
//
15
//
Lines 604-609 static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) Link Here
604
	if (rcu_stall_is_suppressed())
605
	if (rcu_stall_is_suppressed())
605
		return;
606
		return;
606
607
608
	nbcon_cpu_emergency_enter();
609
607
	/*
610
	/*
608
	 * OK, time to rat on our buddy...
611
	 * OK, time to rat on our buddy...
609
	 * See Documentation/RCU/stallwarn.rst for info on how to debug
612
	 * See Documentation/RCU/stallwarn.rst for info on how to debug
Lines 658-663 static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) Link Here
658
	panic_on_rcu_stall();
661
	panic_on_rcu_stall();
659
662
660
	rcu_force_quiescent_state();  /* Kick them all. */
663
	rcu_force_quiescent_state();  /* Kick them all. */
664
665
	nbcon_cpu_emergency_exit();
661
}
666
}
662
667
663
static void print_cpu_stall(unsigned long gps)
668
static void print_cpu_stall(unsigned long gps)
(-)a/kernel/sched/core.c (-15 / +50 lines)
Lines 898-911 static inline void hrtick_rq_init(struct rq *rq) Link Here
898
898
899
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
899
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
900
/*
900
/*
901
 * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
901
 * Atomically set TIF_NEED_RESCHED[_LAZY] and test for TIF_POLLING_NRFLAG,
902
 * this avoids any races wrt polling state changes and thereby avoids
902
 * this avoids any races wrt polling state changes and thereby avoids
903
 * spurious IPIs.
903
 * spurious IPIs.
904
 */
904
 */
905
static inline bool set_nr_and_not_polling(struct task_struct *p)
905
static inline bool set_nr_and_not_polling(struct task_struct *p, int tif_bit)
906
{
906
{
907
	struct thread_info *ti = task_thread_info(p);
907
	struct thread_info *ti = task_thread_info(p);
908
	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
908
909
	return !(fetch_or(&ti->flags, 1 << tif_bit) & _TIF_POLLING_NRFLAG);
909
}
910
}
910
911
911
/*
912
/*
Lines 922-928 static bool set_nr_if_polling(struct task_struct *p) Link Here
922
	do {
923
	do {
923
		if (!(val & _TIF_POLLING_NRFLAG))
924
		if (!(val & _TIF_POLLING_NRFLAG))
924
			return false;
925
			return false;
925
		if (val & _TIF_NEED_RESCHED)
926
		if (val & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
926
			return true;
927
			return true;
927
	} while (!try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED));
928
	} while (!try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED));
928
929
Lines 930-938 static bool set_nr_if_polling(struct task_struct *p) Link Here
930
}
931
}
931
932
932
#else
933
#else
933
static inline bool set_nr_and_not_polling(struct task_struct *p)
934
static inline bool set_nr_and_not_polling(struct task_struct *p, int tif_bit)
934
{
935
{
935
	set_tsk_need_resched(p);
936
	set_tsk_thread_flag(p, tif_bit);
936
	return true;
937
	return true;
937
}
938
}
938
939
Lines 1037-1064 void wake_up_q(struct wake_q_head *head) Link Here
1037
 * might also involve a cross-CPU call to trigger the scheduler on
1038
 * might also involve a cross-CPU call to trigger the scheduler on
1038
 * the target CPU.
1039
 * the target CPU.
1039
 */
1040
 */
1040
void resched_curr(struct rq *rq)
1041
static void __resched_curr(struct rq *rq, int lazy)
1041
{
1042
{
1043
	int cpu, tif_bit = TIF_NEED_RESCHED + lazy;
1042
	struct task_struct *curr = rq->curr;
1044
	struct task_struct *curr = rq->curr;
1043
	int cpu;
1044
1045
1045
	lockdep_assert_rq_held(rq);
1046
	lockdep_assert_rq_held(rq);
1046
1047
1047
	if (test_tsk_need_resched(curr))
1048
	if (unlikely(test_tsk_thread_flag(curr, tif_bit)))
1048
		return;
1049
		return;
1049
1050
1050
	cpu = cpu_of(rq);
1051
	cpu = cpu_of(rq);
1051
1052
1052
	if (cpu == smp_processor_id()) {
1053
	if (cpu == smp_processor_id()) {
1053
		set_tsk_need_resched(curr);
1054
		set_tsk_thread_flag(curr, tif_bit);
1054
		set_preempt_need_resched();
1055
		if (!lazy)
1056
			set_preempt_need_resched();
1055
		return;
1057
		return;
1056
	}
1058
	}
1057
1059
1058
	if (set_nr_and_not_polling(curr))
1060
	if (set_nr_and_not_polling(curr, tif_bit)) {
1059
		smp_send_reschedule(cpu);
1061
		if (!lazy)
1060
	else
1062
			smp_send_reschedule(cpu);
1063
	} else {
1061
		trace_sched_wake_idle_without_ipi(cpu);
1064
		trace_sched_wake_idle_without_ipi(cpu);
1065
	}
1066
}
1067
1068
void resched_curr(struct rq *rq)
1069
{
1070
	__resched_curr(rq, 0);
1071
}
1072
1073
void resched_curr_lazy(struct rq *rq)
1074
{
1075
	int lazy = IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) && !sched_feat(FORCE_NEED_RESCHED) ?
1076
		TIF_NEED_RESCHED_LAZY_OFFSET : 0;
1077
1078
	if (lazy && unlikely(test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED)))
1079
		return;
1080
1081
	__resched_curr(rq, lazy);
1062
}
1082
}
1063
1083
1064
void resched_cpu(int cpu)
1084
void resched_cpu(int cpu)
Lines 1131-1137 static void wake_up_idle_cpu(int cpu) Link Here
1131
	if (cpu == smp_processor_id())
1151
	if (cpu == smp_processor_id())
1132
		return;
1152
		return;
1133
1153
1134
	if (set_nr_and_not_polling(rq->idle))
1154
	if (set_nr_and_not_polling(rq->idle, TIF_NEED_RESCHED))
1135
		smp_send_reschedule(cpu);
1155
		smp_send_reschedule(cpu);
1136
	else
1156
	else
1137
		trace_sched_wake_idle_without_ipi(cpu);
1157
		trace_sched_wake_idle_without_ipi(cpu);
Lines 8865-8870 static inline void preempt_dynamic_init(void) { } Link Here
8865
8885
8866
#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
8886
#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
8867
8887
8888
/*
8889
 * task_is_pi_boosted - Check if task has been PI boosted.
8890
 * @p:	Task to check.
8891
 *
8892
 * Return true if task is subject to priority inheritance.
8893
 */
8894
bool task_is_pi_boosted(const struct task_struct *p)
8895
{
8896
	int prio = p->prio;
8897
8898
	if (!rt_prio(prio))
8899
		return false;
8900
	return prio != p->normal_prio;
8901
}
8902
8868
/**
8903
/**
8869
 * yield - yield the current processor to other threads.
8904
 * yield - yield the current processor to other threads.
8870
 *
8905
 *
(-)a/kernel/sched/debug.c (+19 lines)
Lines 333-338 static const struct file_operations sched_debug_fops = { Link Here
333
	.release	= seq_release,
333
	.release	= seq_release,
334
};
334
};
335
335
336
static ssize_t sched_hog_write(struct file *filp, const char __user *ubuf,
337
			       size_t cnt, loff_t *ppos)
338
{
339
	unsigned long end = jiffies + 60 * HZ;
340
341
	for (; time_before(jiffies, end) && !signal_pending(current);)
342
		cpu_relax();
343
344
	return cnt;
345
}
346
347
static const struct file_operations sched_hog_fops = {
348
	.write		= sched_hog_write,
349
	.open		= simple_open,
350
	.llseek		= default_llseek,
351
};
352
336
static struct dentry *debugfs_sched;
353
static struct dentry *debugfs_sched;
337
354
338
static __init int sched_init_debug(void)
355
static __init int sched_init_debug(void)
Lines 374-379 static __init int sched_init_debug(void) Link Here
374
391
375
	debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
392
	debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
376
393
394
	debugfs_create_file("hog", 0200, debugfs_sched, NULL, &sched_hog_fops);
395
377
	return 0;
396
	return 0;
378
}
397
}
379
late_initcall(sched_init_debug);
398
late_initcall(sched_init_debug);
(-)a/kernel/sched/fair.c (-15 / +31 lines)
Lines 1001-1008 static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se); Link Here
1001
 * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
1001
 * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
1002
 * this is probably good enough.
1002
 * this is probably good enough.
1003
 */
1003
 */
1004
static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
1004
static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se, bool tick)
1005
{
1005
{
1006
	struct rq *rq = rq_of(cfs_rq);
1007
1006
	if ((s64)(se->vruntime - se->deadline) < 0)
1008
	if ((s64)(se->vruntime - se->deadline) < 0)
1007
		return;
1009
		return;
1008
1010
Lines 1021-1030 static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) Link Here
1021
	/*
1023
	/*
1022
	 * The task has consumed its request, reschedule.
1024
	 * The task has consumed its request, reschedule.
1023
	 */
1025
	 */
1024
	if (cfs_rq->nr_running > 1) {
1026
	if (cfs_rq->nr_running < 2)
1025
		resched_curr(rq_of(cfs_rq));
1027
		return;
1026
		clear_buddies(cfs_rq, se);
1028
1029
	if (!IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) || sched_feat(FORCE_NEED_RESCHED)) {
1030
		resched_curr(rq);
1031
	} else {
1032
		/* Did the task ignore the lazy reschedule request? */
1033
		if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY))
1034
			resched_curr(rq);
1035
		else
1036
			resched_curr_lazy(rq);
1027
	}
1037
	}
1038
	clear_buddies(cfs_rq, se);
1028
}
1039
}
1029
1040
1030
#include "pelt.h"
1041
#include "pelt.h"
Lines 1132-1138 static void update_tg_load_avg(struct cfs_rq *cfs_rq) Link Here
1132
/*
1143
/*
1133
 * Update the current task's runtime statistics.
1144
 * Update the current task's runtime statistics.
1134
 */
1145
 */
1135
static void update_curr(struct cfs_rq *cfs_rq)
1146
static void __update_curr(struct cfs_rq *cfs_rq, bool tick)
1136
{
1147
{
1137
	struct sched_entity *curr = cfs_rq->curr;
1148
	struct sched_entity *curr = cfs_rq->curr;
1138
	u64 now = rq_clock_task(rq_of(cfs_rq));
1149
	u64 now = rq_clock_task(rq_of(cfs_rq));
Lines 1371-1377 Link Here
1371
	update_burst_penalty(curr);
1371
	update_burst_penalty(curr);
1372
#endif // CONFIG_SCHED_BORE
1372
#endif // CONFIG_SCHED_BORE
1373
	curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr));
1373
	curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr));
1374
	update_deadline(cfs_rq, curr);
1374
	update_deadline(cfs_rq, curr, tick);
1375
	update_min_vruntime(cfs_rq);
1375
	update_min_vruntime(cfs_rq);
1376
1376
1377
	if (entity_is_task(curr)) {
1377
	if (entity_is_task(curr)) {
Lines 1173-1178 static void update_curr(struct cfs_rq *cfs_rq) Link Here
1173
	account_cfs_rq_runtime(cfs_rq, delta_exec);
1184
	account_cfs_rq_runtime(cfs_rq, delta_exec);
1174
}
1185
}
1175
1186
1187
static inline void update_curr(struct cfs_rq *cfs_rq)
1188
{
1189
	__update_curr(cfs_rq, false);
1190
}
1191
1176
static void update_curr_fair(struct rq *rq)
1192
static void update_curr_fair(struct rq *rq)
1177
{
1193
{
1178
	update_curr(cfs_rq_of(&rq->curr->se));
1194
	update_curr(cfs_rq_of(&rq->curr->se));
Lines 5449-5455 entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) Link Here
5449
	/*
5465
	/*
5450
	 * Update run-time statistics of the 'current'.
5466
	 * Update run-time statistics of the 'current'.
5451
	 */
5467
	 */
5452
	update_curr(cfs_rq);
5468
	__update_curr(cfs_rq, true);
5453
5469
5454
	/*
5470
	/*
5455
	 * Ensure that runnable average is periodically updated.
5471
	 * Ensure that runnable average is periodically updated.
Lines 5463-5469 entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) Link Here
5463
	 * validating it and just reschedule.
5479
	 * validating it and just reschedule.
5464
	 */
5480
	 */
5465
	if (queued) {
5481
	if (queued) {
5466
		resched_curr(rq_of(cfs_rq));
5482
		resched_curr_lazy(rq_of(cfs_rq));
5467
		return;
5483
		return;
5468
	}
5484
	}
5469
	/*
5485
	/*
Lines 5609-5615 static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) Link Here
5609
	 * hierarchy can be throttled
5625
	 * hierarchy can be throttled
5610
	 */
5626
	 */
5611
	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
5627
	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
5612
		resched_curr(rq_of(cfs_rq));
5628
		resched_curr_lazy(rq_of(cfs_rq));
5613
}
5629
}
5614
5630
5615
static __always_inline
5631
static __always_inline
Lines 5869-5875 void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) Link Here
5869
5885
5870
	/* Determine whether we need to wake up potentially idle CPU: */
5886
	/* Determine whether we need to wake up potentially idle CPU: */
5871
	if (rq->curr == rq->idle && rq->cfs.nr_running)
5887
	if (rq->curr == rq->idle && rq->cfs.nr_running)
5872
		resched_curr(rq);
5888
		resched_curr_lazy(rq);
5873
}
5889
}
5874
5890
5875
#ifdef CONFIG_SMP
5891
#ifdef CONFIG_SMP
Lines 6584-6590 static void hrtick_start_fair(struct rq *rq, struct task_struct *p) Link Here
6584
6600
6585
		if (delta < 0) {
6601
		if (delta < 0) {
6586
			if (task_current(rq, p))
6602
			if (task_current(rq, p))
6587
				resched_curr(rq);
6603
				resched_curr_lazy(rq);
6588
			return;
6604
			return;
6589
		}
6605
		}
6590
		hrtick_start(rq, delta);
6606
		hrtick_start(rq, delta);
Lines 8240-8246 static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int Link Here
8240
	 * prevents us from potentially nominating it as a false LAST_BUDDY
8256
	 * prevents us from potentially nominating it as a false LAST_BUDDY
8241
	 * below.
8257
	 * below.
8242
	 */
8258
	 */
8243
	if (test_tsk_need_resched(curr))
8259
	if (need_resched())
8244
		return;
8260
		return;
8245
8261
8246
	/* Idle tasks are by definition preempted by non-idle tasks. */
8262
	/* Idle tasks are by definition preempted by non-idle tasks. */
Lines 8282-8288 static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int Link Here
8282
	return;
8298
	return;
8283
8299
8284
preempt:
8300
preempt:
8285
	resched_curr(rq);
8301
	resched_curr_lazy(rq);
8286
}
8302
}
8287
8303
8288
#ifdef CONFIG_SMP
8304
#ifdef CONFIG_SMP
Lines 12449-12455 static inline void task_tick_core(struct rq *rq, struct task_struct *curr) Link Here
12449
	 */
12465
	 */
12450
	if (rq->core->core_forceidle_count && rq->cfs.nr_running == 1 &&
12466
	if (rq->core->core_forceidle_count && rq->cfs.nr_running == 1 &&
12451
	    __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
12467
	    __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
12452
		resched_curr(rq);
12468
		resched_curr_lazy(rq);
12453
}
12469
}
12454
12470
12455
/*
12471
/*
Lines 12614-12620 prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) Link Here
12614
	 */
12630
	 */
12615
	if (task_current(rq, p)) {
12631
	if (task_current(rq, p)) {
12616
		if (p->prio > oldprio)
12632
		if (p->prio > oldprio)
12617
			resched_curr(rq);
12633
			resched_curr_lazy(rq);
12618
	} else
12634
	} else
12619
		wakeup_preempt(rq, p, 0);
12635
		wakeup_preempt(rq, p, 0);
12620
}
12636
}
(-)a/kernel/sched/features.h (+2 lines)
Lines 88-90 SCHED_FEAT(UTIL_EST_FASTUP, true) Link Here
88
SCHED_FEAT(LATENCY_WARN, false)
88
SCHED_FEAT(LATENCY_WARN, false)
89
89
90
SCHED_FEAT(HZ_BW, true)
90
SCHED_FEAT(HZ_BW, true)
91
92
SCHED_FEAT(FORCE_NEED_RESCHED, false)
(-)a/kernel/sched/idle.c (-2 / +1 lines)
Lines 57-64 static noinline int __cpuidle cpu_idle_poll(void) Link Here
57
	ct_cpuidle_enter();
57
	ct_cpuidle_enter();
58
58
59
	raw_local_irq_enable();
59
	raw_local_irq_enable();
60
	while (!tif_need_resched() &&
60
	while (!need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired()))
61
	       (cpu_idle_force_poll || tick_check_broadcast_expired()))
62
		cpu_relax();
61
		cpu_relax();
63
	raw_local_irq_disable();
62
	raw_local_irq_disable();
64
63
(-)a/kernel/sched/rt.c (-1 / +4 lines)
Lines 2203-2210 static int rto_next_cpu(struct root_domain *rd) Link Here
2203
2203
2204
		rd->rto_cpu = cpu;
2204
		rd->rto_cpu = cpu;
2205
2205
2206
		if (cpu < nr_cpu_ids)
2206
		if (cpu < nr_cpu_ids) {
2207
			if (!has_pushable_tasks(cpu_rq(cpu)))
2208
				continue;
2207
			return cpu;
2209
			return cpu;
2210
		}
2208
2211
2209
		rd->rto_cpu = -1;
2212
		rd->rto_cpu = -1;
2210
2213
(-)a/kernel/sched/sched.h (+1 lines)
Lines 2419-2424 extern void init_sched_fair_class(void); Link Here
2419
extern void reweight_task(struct task_struct *p, int prio);
2419
extern void reweight_task(struct task_struct *p, int prio);
2420
2420
2421
extern void resched_curr(struct rq *rq);
2421
extern void resched_curr(struct rq *rq);
2422
extern void resched_curr_lazy(struct rq *rq);
2422
extern void resched_cpu(int cpu);
2423
extern void resched_cpu(int cpu);
2423
2424
2424
extern struct rt_bandwidth def_rt_bandwidth;
2425
extern struct rt_bandwidth def_rt_bandwidth;
(-)a/kernel/softirq.c (-1 / +94 lines)
Lines 247-252 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) Link Here
247
}
247
}
248
EXPORT_SYMBOL(__local_bh_enable_ip);
248
EXPORT_SYMBOL(__local_bh_enable_ip);
249
249
250
void softirq_preempt(void)
251
{
252
	if (WARN_ON_ONCE(!preemptible()))
253
		return;
254
255
	if (WARN_ON_ONCE(__this_cpu_read(softirq_ctrl.cnt) != SOFTIRQ_OFFSET))
256
		return;
257
258
	__local_bh_enable(SOFTIRQ_OFFSET, true);
259
	/* preemption point */
260
	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
261
}
262
250
/*
263
/*
251
 * Invoked from ksoftirqd_run() outside of the interrupt disabled section
264
 * Invoked from ksoftirqd_run() outside of the interrupt disabled section
252
 * to acquire the per CPU local lock for reentrancy protection.
265
 * to acquire the per CPU local lock for reentrancy protection.
Lines 619-624 static inline void tick_irq_exit(void) Link Here
619
#endif
632
#endif
620
}
633
}
621
634
635
#ifdef CONFIG_PREEMPT_RT
636
DEFINE_PER_CPU(struct task_struct *, timersd);
637
DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
638
639
static void wake_timersd(void)
640
{
641
        struct task_struct *tsk = __this_cpu_read(timersd);
642
643
        if (tsk)
644
                wake_up_process(tsk);
645
}
646
647
#else
648
649
static inline void wake_timersd(void) { }
650
651
#endif
652
622
static inline void __irq_exit_rcu(void)
653
static inline void __irq_exit_rcu(void)
623
{
654
{
624
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
655
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
Lines 631-636 static inline void __irq_exit_rcu(void) Link Here
631
	if (!in_interrupt() && local_softirq_pending())
662
	if (!in_interrupt() && local_softirq_pending())
632
		invoke_softirq();
663
		invoke_softirq();
633
664
665
	if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() &&
666
	    !(in_nmi() | in_hardirq()))
667
		wake_timersd();
668
634
	tick_irq_exit();
669
	tick_irq_exit();
635
}
670
}
636
671
Lines 963-974 static struct smp_hotplug_thread softirq_threads = { Link Here
963
	.thread_comm		= "ksoftirqd/%u",
998
	.thread_comm		= "ksoftirqd/%u",
964
};
999
};
965
1000
1001
#ifdef CONFIG_PREEMPT_RT
1002
static void timersd_setup(unsigned int cpu)
1003
{
1004
        sched_set_fifo_low(current);
1005
}
1006
1007
static int timersd_should_run(unsigned int cpu)
1008
{
1009
        return local_pending_timers();
1010
}
1011
1012
static void run_timersd(unsigned int cpu)
1013
{
1014
	unsigned int timer_si;
1015
1016
	ksoftirqd_run_begin();
1017
1018
	timer_si = local_pending_timers();
1019
	__this_cpu_write(pending_timer_softirq, 0);
1020
	or_softirq_pending(timer_si);
1021
1022
	__do_softirq();
1023
1024
	ksoftirqd_run_end();
1025
}
1026
1027
static void raise_ktimers_thread(unsigned int nr)
1028
{
1029
	trace_softirq_raise(nr);
1030
	__this_cpu_or(pending_timer_softirq, 1 << nr);
1031
}
1032
1033
void raise_hrtimer_softirq(void)
1034
{
1035
	raise_ktimers_thread(HRTIMER_SOFTIRQ);
1036
}
1037
1038
void raise_timer_softirq(void)
1039
{
1040
	unsigned long flags;
1041
1042
	local_irq_save(flags);
1043
	raise_ktimers_thread(TIMER_SOFTIRQ);
1044
	wake_timersd();
1045
	local_irq_restore(flags);
1046
}
1047
1048
static struct smp_hotplug_thread timer_threads = {
1049
        .store                  = &timersd,
1050
        .setup                  = timersd_setup,
1051
        .thread_should_run      = timersd_should_run,
1052
        .thread_fn              = run_timersd,
1053
        .thread_comm            = "ktimers/%u",
1054
};
1055
#endif
1056
966
static __init int spawn_ksoftirqd(void)
1057
static __init int spawn_ksoftirqd(void)
967
{
1058
{
968
	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
1059
	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
969
				  takeover_tasklets);
1060
				  takeover_tasklets);
970
	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
1061
	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
971
1062
#ifdef CONFIG_PREEMPT_RT
1063
	BUG_ON(smpboot_register_percpu_thread(&timer_threads));
1064
#endif
972
	return 0;
1065
	return 0;
973
}
1066
}
974
early_initcall(spawn_ksoftirqd);
1067
early_initcall(spawn_ksoftirqd);
(-)a/kernel/time/hrtimer.c (-2 / +2 lines)
Lines 1808-1814 void hrtimer_interrupt(struct clock_event_device *dev) Link Here
1808
	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1808
	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1809
		cpu_base->softirq_expires_next = KTIME_MAX;
1809
		cpu_base->softirq_expires_next = KTIME_MAX;
1810
		cpu_base->softirq_activated = 1;
1810
		cpu_base->softirq_activated = 1;
1811
		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1811
		raise_hrtimer_softirq();
1812
	}
1812
	}
1813
1813
1814
	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1814
	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
Lines 1921-1927 void hrtimer_run_queues(void) Link Here
1921
	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1921
	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1922
		cpu_base->softirq_expires_next = KTIME_MAX;
1922
		cpu_base->softirq_expires_next = KTIME_MAX;
1923
		cpu_base->softirq_activated = 1;
1923
		cpu_base->softirq_activated = 1;
1924
		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1924
		raise_hrtimer_softirq();
1925
	}
1925
	}
1926
1926
1927
	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1927
	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
(-)a/kernel/time/tick-sched.c (-1 / +1 lines)
Lines 796-802 static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) Link Here
796
796
797
static inline bool local_timer_softirq_pending(void)
797
static inline bool local_timer_softirq_pending(void)
798
{
798
{
799
	return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
799
	return local_pending_timers() & BIT(TIMER_SOFTIRQ);
800
}
800
}
801
801
802
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
802
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
(-)a/kernel/time/timer.c (-2 / +9 lines)
Lines 1470-1478 static inline void timer_base_unlock_expiry(struct timer_base *base) Link Here
1470
 */
1470
 */
1471
static void timer_sync_wait_running(struct timer_base *base)
1471
static void timer_sync_wait_running(struct timer_base *base)
1472
{
1472
{
1473
	if (atomic_read(&base->timer_waiters)) {
1473
	bool need_preempt;
1474
1475
	need_preempt = task_is_pi_boosted(current);
1476
	if (need_preempt || atomic_read(&base->timer_waiters)) {
1474
		raw_spin_unlock_irq(&base->lock);
1477
		raw_spin_unlock_irq(&base->lock);
1475
		spin_unlock(&base->expiry_lock);
1478
		spin_unlock(&base->expiry_lock);
1479
1480
		if (need_preempt)
1481
			softirq_preempt();
1482
1476
		spin_lock(&base->expiry_lock);
1483
		spin_lock(&base->expiry_lock);
1477
		raw_spin_lock_irq(&base->lock);
1484
		raw_spin_lock_irq(&base->lock);
1478
	}
1485
	}
Lines 2054-2060 static void run_local_timers(void) Link Here
2054
		if (time_before(jiffies, base->next_expiry))
2061
		if (time_before(jiffies, base->next_expiry))
2055
			return;
2062
			return;
2056
	}
2063
	}
2057
	raise_softirq(TIMER_SOFTIRQ);
2064
	raise_timer_softirq();
2058
}
2065
}
2059
2066
2060
/*
2067
/*
(-)a/kernel/trace/trace.c (+2 lines)
Lines 2695-2700 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) Link Here
2695
2695
2696
	if (tif_need_resched())
2696
	if (tif_need_resched())
2697
		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2697
		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2698
	if (tif_need_resched_lazy())
2699
		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2698
	if (test_preempt_need_resched())
2700
	if (test_preempt_need_resched())
2699
		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2701
		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2700
	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2702
	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
(-)a/kernel/trace/trace_output.c (-2 / +14 lines)
Lines 460-476 int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) Link Here
460
		(entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
460
		(entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
461
		(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
461
		(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
462
		bh_off ? 'b' :
462
		bh_off ? 'b' :
463
		(entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
463
		!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_SUPPORT) ? 'X' :
464
		'.';
464
		'.';
465
465
466
	switch (entry->flags & (TRACE_FLAG_NEED_RESCHED |
466
	switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY |
467
				TRACE_FLAG_PREEMPT_RESCHED)) {
467
				TRACE_FLAG_PREEMPT_RESCHED)) {
468
	case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED:
469
		need_resched = 'B';
470
		break;
468
	case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
471
	case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
469
		need_resched = 'N';
472
		need_resched = 'N';
470
		break;
473
		break;
474
	case TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED:
475
		need_resched = 'L';
476
		break;
477
	case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY:
478
		need_resched = 'b';
479
		break;
471
	case TRACE_FLAG_NEED_RESCHED:
480
	case TRACE_FLAG_NEED_RESCHED:
472
		need_resched = 'n';
481
		need_resched = 'n';
473
		break;
482
		break;
483
	case TRACE_FLAG_NEED_RESCHED_LAZY:
484
		need_resched = 'l';
485
		break;
474
	case TRACE_FLAG_PREEMPT_RESCHED:
486
	case TRACE_FLAG_PREEMPT_RESCHED:
475
		need_resched = 'p';
487
		need_resched = 'p';
476
		break;
488
		break;
(-)a/localversion-rt (+1 lines)
Line 0 Link Here
1
-rt6
(-)a/net/core/dev.c (-9 / +30 lines)
Lines 4682-4696 static void rps_trigger_softirq(void *data) Link Here
4682
4682
4683
#endif /* CONFIG_RPS */
4683
#endif /* CONFIG_RPS */
4684
4684
4685
/* Called from hardirq (IPI) context */
4686
static void trigger_rx_softirq(void *data)
4687
{
4688
	struct softnet_data *sd = data;
4689
4690
	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
4691
	smp_store_release(&sd->defer_ipi_scheduled, 0);
4692
}
4693
4694
/*
4685
/*
4695
 * After we queued a packet into sd->input_pkt_queue,
4686
 * After we queued a packet into sd->input_pkt_queue,
4696
 * we need to make sure this queue is serviced soon.
4687
 * we need to make sure this queue is serviced soon.
Lines 6661-6666 static void skb_defer_free_flush(struct softnet_data *sd) Link Here
6661
	}
6652
	}
6662
}
6653
}
6663
6654
6655
#ifndef CONFIG_PREEMPT_RT
6656
6657
/* Called from hardirq (IPI) context */
6658
static void trigger_rx_softirq(void *data)
6659
{
6660
	struct softnet_data *sd = data;
6661
6662
	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
6663
	smp_store_release(&sd->defer_ipi_scheduled, 0);
6664
}
6665
6666
#else
6667
6668
static void trigger_rx_softirq(struct work_struct *defer_work)
6669
{
6670
	struct softnet_data *sd;
6671
6672
	sd = container_of(defer_work, struct softnet_data, defer_work);
6673
	smp_store_release(&sd->defer_ipi_scheduled, 0);
6674
	local_bh_disable();
6675
	skb_defer_free_flush(sd);
6676
	local_bh_enable();
6677
}
6678
6679
#endif
6680
6664
static int napi_threaded_poll(void *data)
6681
static int napi_threaded_poll(void *data)
6665
{
6682
{
6666
	struct napi_struct *napi = data;
6683
	struct napi_struct *napi = data;
Lines 11624-11630 static int __init net_dev_init(void) Link Here
11624
		INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
11641
		INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
11625
		sd->cpu = i;
11642
		sd->cpu = i;
11626
#endif
11643
#endif
11644
#ifndef CONFIG_PREEMPT_RT
11627
		INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
11645
		INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
11646
#else
11647
		INIT_WORK(&sd->defer_work, trigger_rx_softirq);
11648
#endif
11628
		spin_lock_init(&sd->defer_lock);
11649
		spin_lock_init(&sd->defer_lock);
11629
11650
11630
		init_gro_hash(&sd->backlog);
11651
		init_gro_hash(&sd->backlog);
(-)a/net/core/skbuff.c (-1 / +6 lines)
Lines 6861-6868 nodefer: __kfree_skb(skb); Link Here
6861
	/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
6861
	/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
6862
	 * if we are unlucky enough (this seems very unlikely).
6862
	 * if we are unlucky enough (this seems very unlikely).
6863
	 */
6863
	 */
6864
	if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
6864
	if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) {
6865
#ifndef CONFIG_PREEMPT_RT
6865
		smp_call_function_single_async(cpu, &sd->defer_csd);
6866
		smp_call_function_single_async(cpu, &sd->defer_csd);
6867
#else
6868
		schedule_work_on(cpu, &sd->defer_work);
6869
#endif
6870
	}
6866
}
6871
}
6867
6872
6868
static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
6873
static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,

Return to bug 916954