Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 916954 | Differences between
and this patch

Collapse All | Expand All

(-)a/include/linux/sched.h (+10 lines)
Lines 547-552 struct sched_entity { Link Here
547
	u64				sum_exec_runtime;
547
	u64				sum_exec_runtime;
548
	u64				prev_sum_exec_runtime;
548
	u64				prev_sum_exec_runtime;
549
	u64				vruntime;
549
	u64				vruntime;
550
#ifdef CONFIG_SCHED_BORE
551
	u64				burst_time;
552
	u8				prev_burst_penalty;
553
	u8				curr_burst_penalty;
554
	u8				burst_penalty;
555
	u8				burst_score;
556
	u8				child_burst;
557
	u32				child_burst_cnt;
558
	u64				child_burst_last_cached;
559
#endif // CONFIG_SCHED_BORE
550
	s64				vlag;
560
	s64				vlag;
551
	u64				slice;
561
	u64				slice;
552
562
(-)a/init/Kconfig (+17 lines)
Lines 1299-1304 config CHECKPOINT_RESTORE Link Here
1299
1299
1300
	  If unsure, say N here.
1300
	  If unsure, say N here.
1301
1301
1302
config SCHED_BORE
1303
	bool "Burst-Oriented Response Enhancer"
1304
	default y
1305
	help
1306
	  In Desktop and Mobile computing, one might prefer interactive
1307
	  tasks to keep responsive no matter what they run in the background.
1308
1309
	  Enabling this kernel feature modifies the scheduler to discriminate
1310
	  tasks by their burst time (runtime since it last went sleeping or
1311
	  yielding state) and prioritize those that run less bursty.
1312
	  Such tasks usually include window compositor, widgets backend,
1313
	  terminal emulator, video playback, games and so on.
1314
	  With a little impact to scheduling fairness, it may improve
1315
	  responsiveness especially under heavy background workload.
1316
1317
	  If unsure, say Y here.
1318
1302
config SCHED_AUTOGROUP
1319
config SCHED_AUTOGROUP
1303
	bool "Automatic process group scheduling"
1320
	bool "Automatic process group scheduling"
1304
	select CGROUPS
1321
	select CGROUPS
(-)a/kernel/sched/core.c (+143 lines)
Lines 4516-4521 int wake_up_state(struct task_struct *p, unsigned int state) Link Here
4516
	return try_to_wake_up(p, state, 0);
4516
	return try_to_wake_up(p, state, 0);
4517
}
4517
}
4518
4518
4519
#ifdef CONFIG_SCHED_BORE
4520
extern u8   sched_burst_fork_atavistic;
4521
extern uint sched_burst_cache_lifetime;
4522
4523
static void __init sched_init_bore(void) {
4524
	init_task.se.burst_time = 0;
4525
	init_task.se.prev_burst_penalty = 0;
4526
	init_task.se.curr_burst_penalty = 0;
4527
	init_task.se.burst_penalty = 0;
4528
	init_task.se.burst_score = 0;
4529
	init_task.se.child_burst_last_cached = 0;
4530
}
4531
4532
void inline sched_fork_bore(struct task_struct *p) {
4533
	p->se.burst_time = 0;
4534
	p->se.curr_burst_penalty = 0;
4535
	p->se.burst_score = 0;
4536
	p->se.child_burst_last_cached = 0;
4537
}
4538
4539
static u32 count_child_tasks(struct task_struct *p) {
4540
	struct task_struct *child;
4541
	u32 cnt = 0;
4542
	list_for_each_entry(child, &p->children, sibling) {cnt++;}
4543
	return cnt;
4544
}
4545
4546
static inline bool task_is_inheritable(struct task_struct *p) {
4547
	return (p->sched_class == &fair_sched_class);
4548
}
4549
4550
static inline bool child_burst_cache_expired(struct task_struct *p, u64 now) {
4551
	u64 expiration_time =
4552
		p->se.child_burst_last_cached + sched_burst_cache_lifetime;
4553
	return ((s64)(expiration_time - now) < 0);
4554
}
4555
4556
static void __update_child_burst_cache(
4557
	struct task_struct *p, u32 cnt, u32 sum, u64 now) {
4558
	u8 avg = 0;
4559
	if (cnt) avg = sum / cnt;
4560
	p->se.child_burst = max(avg, p->se.burst_penalty);
4561
	p->se.child_burst_cnt = cnt;
4562
	p->se.child_burst_last_cached = now;
4563
}
4564
4565
static inline void update_child_burst_direct(struct task_struct *p, u64 now) {
4566
	struct task_struct *child;
4567
	u32 cnt = 0;
4568
	u32 sum = 0;
4569
4570
	list_for_each_entry(child, &p->children, sibling) {
4571
		if (!task_is_inheritable(child)) continue;
4572
		cnt++;
4573
		sum += child->se.burst_penalty;
4574
	}
4575
4576
	__update_child_burst_cache(p, cnt, sum, now);
4577
}
4578
4579
static inline u8 __inherit_burst_direct(struct task_struct *p, u64 now) {
4580
	struct task_struct *parent = p->real_parent;
4581
	if (child_burst_cache_expired(parent, now))
4582
		update_child_burst_direct(parent, now);
4583
4584
	return parent->se.child_burst;
4585
}
4586
4587
static void update_child_burst_topological(
4588
	struct task_struct *p, u64 now, u32 depth, u32 *acnt, u32 *asum) {
4589
	struct task_struct *child, *dec;
4590
	u32 cnt = 0, dcnt = 0;
4591
	u32 sum = 0;
4592
4593
	list_for_each_entry(child, &p->children, sibling) {
4594
		dec = child;
4595
		while ((dcnt = count_child_tasks(dec)) == 1)
4596
			dec = list_first_entry(&dec->children, struct task_struct, sibling);
4597
		
4598
		if (!dcnt || !depth) {
4599
			if (!task_is_inheritable(dec)) continue;
4600
			cnt++;
4601
			sum += dec->se.burst_penalty;
4602
			continue;
4603
		}
4604
		if (!child_burst_cache_expired(dec, now)) {
4605
			cnt += dec->se.child_burst_cnt;
4606
			sum += (u32)dec->se.child_burst * dec->se.child_burst_cnt;
4607
			continue;
4608
		}
4609
		update_child_burst_topological(dec, now, depth - 1, &cnt, &sum);
4610
	}
4611
4612
	__update_child_burst_cache(p, cnt, sum, now);
4613
	*acnt += cnt;
4614
	*asum += sum;
4615
}
4616
4617
static inline u8 __inherit_burst_topological(struct task_struct *p, u64 now) {
4618
	struct task_struct *anc = p->real_parent;
4619
	u32 cnt = 0, sum = 0;
4620
4621
	while (anc->real_parent != anc && count_child_tasks(anc) == 1)
4622
		anc = anc->real_parent;
4623
4624
	if (child_burst_cache_expired(anc, now))
4625
		update_child_burst_topological(
4626
			anc, now, sched_burst_fork_atavistic - 1, &cnt, &sum);
4627
4628
	return anc->se.child_burst;
4629
}
4630
4631
static inline void inherit_burst(struct task_struct *p) {
4632
	u8 burst_cache;
4633
	u64 now = ktime_get_ns();
4634
4635
	read_lock(&tasklist_lock);
4636
	burst_cache = likely(sched_burst_fork_atavistic)?
4637
		__inherit_burst_topological(p, now):
4638
		__inherit_burst_direct(p, now);
4639
	read_unlock(&tasklist_lock);
4640
4641
	p->se.prev_burst_penalty = max(p->se.prev_burst_penalty, burst_cache);
4642
}
4643
4644
static void sched_post_fork_bore(struct task_struct *p) {
4645
	if (p->sched_class == &fair_sched_class)
4646
		inherit_burst(p);
4647
	p->se.burst_penalty = p->se.prev_burst_penalty;
4648
}
4649
#endif // CONFIG_SCHED_BORE
4650
4519
/*
4651
/*
4520
 * Perform scheduler related setup for a newly forked process p.
4652
 * Perform scheduler related setup for a newly forked process p.
4521
 * p is forked by current.
4653
 * p is forked by current.
Lines 4532-4537 static void __sched_fork(unsigned long clone_flags, struct task_struct *p) Link Here
4532
	p->se.prev_sum_exec_runtime	= 0;
4664
	p->se.prev_sum_exec_runtime	= 0;
4533
	p->se.nr_migrations		= 0;
4665
	p->se.nr_migrations		= 0;
4534
	p->se.vruntime			= 0;
4666
	p->se.vruntime			= 0;
4667
#ifdef CONFIG_SCHED_BORE
4668
	sched_fork_bore(p);
4669
#endif // CONFIG_SCHED_BORE
4535
	p->se.vlag			= 0;
4670
	p->se.vlag			= 0;
4536
	p->se.slice			= sysctl_sched_base_slice;
4671
	p->se.slice			= sysctl_sched_base_slice;
4537
	INIT_LIST_HEAD(&p->se.group_node);
4672
	INIT_LIST_HEAD(&p->se.group_node);
Lines 4848-4853 void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs) Link Here
4848
4983
4849
void sched_post_fork(struct task_struct *p)
4984
void sched_post_fork(struct task_struct *p)
4850
{
4985
{
4986
#ifdef CONFIG_SCHED_BORE
4987
	sched_post_fork_bore(p);
4988
#endif // CONFIG_SCHED_BORE
4851
	uclamp_post_fork(p);
4989
	uclamp_post_fork(p);
4852
}
4990
}
4853
4991
Lines 9931-9936 void __init sched_init(void) Link Here
9931
	BUG_ON(&dl_sched_class != &stop_sched_class + 1);
10069
	BUG_ON(&dl_sched_class != &stop_sched_class + 1);
9932
#endif
10070
#endif
9933
10071
10072
#ifdef CONFIG_SCHED_BORE
10073
	sched_init_bore();
10074
	printk(KERN_INFO "BORE (Burst-Oriented Response Enhancer) CPU Scheduler modification 5.1.0 by Masahito Suzuki");
10075
#endif // CONFIG_SCHED_BORE
10076
9934
	wait_bit_init();
10077
	wait_bit_init();
9935
10078
9936
#ifdef CONFIG_FAIR_GROUP_SCHED
10079
#ifdef CONFIG_FAIR_GROUP_SCHED
(-)a/kernel/sched/debug.c (-1 / +59 lines)
Lines 167-173 static const struct file_operations sched_feat_fops = { Link Here
167
};
167
};
168
168
169
#ifdef CONFIG_SMP
169
#ifdef CONFIG_SMP
170
#ifdef CONFIG_SCHED_BORE
171
static ssize_t sched_min_base_slice_write(struct file *filp, const char __user *ubuf,
172
				   size_t cnt, loff_t *ppos)
173
{
174
	char buf[16];
175
	unsigned int value;
176
177
	if (cnt > 15)
178
		cnt = 15;
179
180
	if (copy_from_user(&buf, ubuf, cnt))
181
		return -EFAULT;
182
	buf[cnt] = '\0';
183
184
	if (kstrtouint(buf, 10, &value))
185
		return -EINVAL;
170
186
187
	if (!value)
188
		return -EINVAL;
189
190
	sysctl_sched_min_base_slice = value;
191
	sched_update_min_base_slice();
192
193
	*ppos += cnt;
194
	return cnt;
195
}
196
197
static int sched_min_base_slice_show(struct seq_file *m, void *v)
198
{
199
	seq_printf(m, "%d\n", sysctl_sched_min_base_slice);
200
	return 0;
201
}
202
203
static int sched_min_base_slice_open(struct inode *inode, struct file *filp)
204
{
205
	return single_open(filp, sched_min_base_slice_show, NULL);
206
}
207
208
static const struct file_operations sched_min_base_slice_fops = {
209
	.open		= sched_min_base_slice_open,
210
	.write		= sched_min_base_slice_write,
211
	.read		= seq_read,
212
	.llseek		= seq_lseek,
213
	.release	= single_release,
214
};
215
#else // !CONFIG_SCHED_BORE
171
static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
216
static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
172
				   size_t cnt, loff_t *ppos)
217
				   size_t cnt, loff_t *ppos)
173
{
218
{
Lines 213-219 static const struct file_operations sched_scaling_fops = { Link Here
213
	.llseek		= seq_lseek,
258
	.llseek		= seq_lseek,
214
	.release	= single_release,
259
	.release	= single_release,
215
};
260
};
216
261
#endif // CONFIG_SCHED_BORE
217
#endif /* SMP */
262
#endif /* SMP */
218
263
219
#ifdef CONFIG_PREEMPT_DYNAMIC
264
#ifdef CONFIG_PREEMPT_DYNAMIC
Lines 347-359 static __init int sched_init_debug(void) Link Here
347
	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
392
	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
348
#endif
393
#endif
349
394
395
#ifdef CONFIG_SCHED_BORE
396
	debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops);
397
	debugfs_create_u32("base_slice_ns", 0400, debugfs_sched, &sysctl_sched_base_slice);
398
#else // !CONFIG_SCHED_BORE
350
	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
399
	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
400
#endif // CONFIG_SCHED_BORE
351
401
352
	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
402
	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
353
	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
403
	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
354
404
355
#ifdef CONFIG_SMP
405
#ifdef CONFIG_SMP
406
#if !defined(CONFIG_SCHED_BORE)
356
	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
407
	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
408
#endif // CONFIG_SCHED_BORE
357
	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
409
	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
358
	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
410
	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
359
411
Lines 595-600 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) Link Here
595
		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
647
		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
596
		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
648
		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
597
649
650
#ifdef CONFIG_SCHED_BORE
651
	SEQ_printf(m, " %2d", p->se.burst_score);
652
#endif // CONFIG_SCHED_BORE
598
#ifdef CONFIG_NUMA_BALANCING
653
#ifdef CONFIG_NUMA_BALANCING
599
	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
654
	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
600
#endif
655
#endif
Lines 1068-1073 void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, Link Here
1068
1123
1069
	P(se.load.weight);
1124
	P(se.load.weight);
1070
#ifdef CONFIG_SMP
1125
#ifdef CONFIG_SMP
1126
#ifdef CONFIG_SCHED_BORE
1127
	P(se.burst_score);
1128
#endif // CONFIG_SCHED_BORE
1071
	P(se.avg.load_sum);
1129
	P(se.avg.load_sum);
1072
	P(se.avg.runnable_sum);
1130
	P(se.avg.runnable_sum);
1073
	P(se.avg.util_sum);
1131
	P(se.avg.util_sum);
(-)a/kernel/sched/fair.c (-2 / +215 lines)
Lines 19-24 Link Here
19
 *
19
 *
20
 *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
20
 *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
21
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
21
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
22
 *
23
 *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
24
 *  Copyright (C) 2021-2024 Masahito Suzuki <firelzrd@gmail.com>
22
 */
25
 */
23
#include <linux/energy_model.h>
26
#include <linux/energy_model.h>
24
#include <linux/mmap_lock.h>
27
#include <linux/mmap_lock.h>
Lines 64-83 Link Here
64
 *   SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
67
 *   SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
65
 *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
68
 *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
66
 *
69
 *
67
 * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
70
 * (BORE  default SCHED_TUNABLESCALING_NONE = *1 constant)
71
 * (EEVDF default SCHED_TUNABLESCALING_LOG  = *(1+ilog(ncpus))
68
 */
72
 */
73
#ifdef CONFIG_SCHED_BORE
74
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
75
#else // !CONFIG_SCHED_BORE
69
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
76
unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
77
#endif // CONFIG_SCHED_BORE
70
78
71
/*
79
/*
72
 * Minimal preemption granularity for CPU-bound tasks:
80
 * Minimal preemption granularity for CPU-bound tasks:
73
 *
81
 *
74
 * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
82
 * (BORE  default: max(1 sec / HZ, min_base_slice) constant, units: nanoseconds)
83
 * (EEVDF default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
75
 */
84
 */
85
#ifdef CONFIG_SCHED_BORE
86
unsigned int            sysctl_sched_base_slice = 1000000000ULL / HZ;
87
static unsigned int configured_sched_base_slice = 1000000000ULL / HZ;
88
unsigned int        sysctl_sched_min_base_slice =    2000000ULL;
89
#else // !CONFIG_SCHED_BORE
76
unsigned int sysctl_sched_base_slice			= 750000ULL;
90
unsigned int sysctl_sched_base_slice			= 750000ULL;
77
static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
91
static unsigned int normalized_sysctl_sched_base_slice	= 750000ULL;
92
#endif // CONFIG_SCHED_BORE
78
93
79
const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
94
const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
80
95
96
#ifdef CONFIG_SCHED_BORE
97
u8   __read_mostly sched_bore                   = 1;
98
u8   __read_mostly sched_burst_smoothness_long  = 1;
99
u8   __read_mostly sched_burst_smoothness_short = 0;
100
u8   __read_mostly sched_burst_fork_atavistic   = 2;
101
u8   __read_mostly sched_burst_penalty_offset   = 22;
102
uint __read_mostly sched_burst_penalty_scale    = 1280;
103
uint __read_mostly sched_burst_cache_lifetime   = 60000000;
104
static int __maybe_unused sixty_four     = 64;
105
static int __maybe_unused maxval_12_bits = 4095;
106
107
#define MAX_BURST_PENALTY (39U <<2)
108
109
static inline u32 log2plus1_u64_u32f8(u64 v) {
110
	u32 msb = fls64(v);
111
	s32 excess_bits = msb - 9;
112
    u8 fractional = (0 <= excess_bits)? v >> excess_bits: v << -excess_bits;
113
	return msb << 8 | fractional;
114
}
115
116
static inline u32 calc_burst_penalty(u64 burst_time) {
117
	u32 greed, tolerance, penalty, scaled_penalty;
118
	
119
	greed = log2plus1_u64_u32f8(burst_time);
120
	tolerance = sched_burst_penalty_offset << 8;
121
	penalty = max(0, (s32)greed - (s32)tolerance);
122
	scaled_penalty = penalty * sched_burst_penalty_scale >> 16;
123
124
	return min(MAX_BURST_PENALTY, scaled_penalty);
125
}
126
127
static inline u64 scale_slice(u64 delta, struct sched_entity *se) {
128
	return mul_u64_u32_shr(delta, sched_prio_to_wmult[se->burst_score], 22);
129
}
130
131
static inline u64 __unscale_slice(u64 delta, u8 score) {
132
	return mul_u64_u32_shr(delta, sched_prio_to_weight[score], 10);
133
}
134
135
static inline u64 unscale_slice(u64 delta, struct sched_entity *se) {
136
	return __unscale_slice(delta, se->burst_score);
137
}
138
139
void reweight_task(struct task_struct *p, int prio);
140
141
static void update_burst_score(struct sched_entity *se) {
142
	if (!entity_is_task(se)) return;
143
	struct task_struct *p = task_of(se);
144
	u8 prio = p->static_prio - MAX_RT_PRIO;
145
	u8 prev_prio = min(39, prio + se->burst_score);
146
147
	se->burst_score = se->burst_penalty >> 2;
148
149
	u8 new_prio = min(39, prio + se->burst_score);
150
	if (new_prio != prev_prio)
151
		reweight_task(p, new_prio);
152
}
153
154
static void update_burst_penalty(struct sched_entity *se) {
155
	se->curr_burst_penalty = calc_burst_penalty(se->burst_time);
156
	se->burst_penalty = max(se->prev_burst_penalty, se->curr_burst_penalty);
157
	update_burst_score(se);
158
}
159
160
static inline u32 binary_smooth(u32 new, u32 old) {
161
  int increment = new - old;
162
  return (0 <= increment)?
163
    old + ( increment >> (int)sched_burst_smoothness_long):
164
    old - (-increment >> (int)sched_burst_smoothness_short);
165
}
166
167
static void restart_burst(struct sched_entity *se) {
168
	se->burst_penalty = se->prev_burst_penalty =
169
		binary_smooth(se->curr_burst_penalty, se->prev_burst_penalty);
170
	se->curr_burst_penalty = 0;
171
	se->burst_time = 0;
172
	update_burst_score(se);
173
}
174
175
static void restart_burst_rescale_deadline(struct sched_entity *se) {
176
	s64 vscaled, wremain, vremain = se->deadline - se->vruntime;
177
	u8 prev_score = se->burst_score;
178
	restart_burst(se);
179
	if (prev_score > se->burst_score) {
180
		wremain = __unscale_slice(abs(vremain), prev_score);
181
		vscaled = scale_slice(wremain, se);
182
		if (unlikely(vremain < 0))
183
			vscaled = -vscaled;
184
		se->deadline = se->vruntime + vscaled;
185
	}
186
}
187
#endif // CONFIG_SCHED_BORE
188
81
int sched_thermal_decay_shift;
189
int sched_thermal_decay_shift;
82
static int __init setup_sched_thermal_decay_shift(char *str)
190
static int __init setup_sched_thermal_decay_shift(char *str)
83
{
191
{
Lines 137-142 static unsigned int sysctl_numa_balancing_promote_rate_limit = 65536; Link Here
137
245
138
#ifdef CONFIG_SYSCTL
246
#ifdef CONFIG_SYSCTL
139
static struct ctl_table sched_fair_sysctls[] = {
247
static struct ctl_table sched_fair_sysctls[] = {
248
#ifdef CONFIG_SCHED_BORE
249
	{
250
		.procname	= "sched_bore",
251
		.data		= &sched_bore,
252
		.maxlen		= sizeof(u8),
253
		.mode		= 0644,
254
		.proc_handler = proc_dou8vec_minmax,
255
		.extra1		= SYSCTL_ONE,
256
		.extra2		= SYSCTL_ONE,
257
	},
258
	{
259
		.procname	= "sched_burst_smoothness_long",
260
		.data		= &sched_burst_smoothness_long,
261
		.maxlen		= sizeof(u8),
262
		.mode		= 0644,
263
		.proc_handler = proc_dou8vec_minmax,
264
		.extra1		= SYSCTL_ZERO,
265
		.extra2		= SYSCTL_ONE,
266
	},
267
	{
268
		.procname	= "sched_burst_smoothness_short",
269
		.data		= &sched_burst_smoothness_short,
270
		.maxlen		= sizeof(u8),
271
		.mode		= 0644,
272
		.proc_handler = proc_dou8vec_minmax,
273
		.extra1		= SYSCTL_ZERO,
274
		.extra2		= SYSCTL_ONE,
275
	},
276
	{
277
		.procname	= "sched_burst_fork_atavistic",
278
		.data		= &sched_burst_fork_atavistic,
279
		.maxlen		= sizeof(u8),
280
		.mode		= 0644,
281
		.proc_handler = proc_dou8vec_minmax,
282
		.extra1		= SYSCTL_ZERO,
283
		.extra2		= SYSCTL_THREE,
284
	},
285
	{
286
		.procname	= "sched_burst_penalty_offset",
287
		.data		= &sched_burst_penalty_offset,
288
		.maxlen		= sizeof(u8),
289
		.mode		= 0644,
290
		.proc_handler = proc_dou8vec_minmax,
291
		.extra1		= SYSCTL_ZERO,
292
		.extra2		= &sixty_four,
293
	},
294
	{
295
		.procname	= "sched_burst_penalty_scale",
296
		.data		= &sched_burst_penalty_scale,
297
		.maxlen		= sizeof(uint),
298
		.mode		= 0644,
299
		.proc_handler = proc_douintvec_minmax,
300
		.extra1		= SYSCTL_ZERO,
301
		.extra2		= &maxval_12_bits,
302
	},
303
	{
304
		.procname	= "sched_burst_cache_lifetime",
305
		.data		= &sched_burst_cache_lifetime,
306
		.maxlen		= sizeof(uint),
307
		.mode		= 0644,
308
		.proc_handler = proc_douintvec,
309
	},
310
#endif // CONFIG_SCHED_BORE
140
#ifdef CONFIG_CFS_BANDWIDTH
311
#ifdef CONFIG_CFS_BANDWIDTH
141
	{
312
	{
142
		.procname       = "sched_cfs_bandwidth_slice_us",
313
		.procname       = "sched_cfs_bandwidth_slice_us",
Lines 195-200 static inline void update_load_set(struct load_weight *lw, unsigned long w) Link Here
195
 *
366
 *
196
 * This idea comes from the SD scheduler of Con Kolivas:
367
 * This idea comes from the SD scheduler of Con Kolivas:
197
 */
368
 */
369
#ifdef CONFIG_SCHED_BORE
370
static void update_sysctl(void) {
371
	sysctl_sched_base_slice =
372
		max(sysctl_sched_min_base_slice, configured_sched_base_slice);
373
}
374
void sched_update_min_base_slice(void) { update_sysctl(); }
375
#else // !CONFIG_SCHED_BORE
198
static unsigned int get_update_sysctl_factor(void)
376
static unsigned int get_update_sysctl_factor(void)
199
{
377
{
200
	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
378
	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
Lines 225-230 static void update_sysctl(void) Link Here
225
	SET_SYSCTL(sched_base_slice);
403
	SET_SYSCTL(sched_base_slice);
226
#undef SET_SYSCTL
404
#undef SET_SYSCTL
227
}
405
}
406
#endif // CONFIG_SCHED_BORE
228
407
229
void __init sched_init_granularity(void)
408
void __init sched_init_granularity(void)
230
{
409
{
Lines 702-707 static s64 entity_lag(u64 avruntime, struct sched_entity *se) Link Here
702
881
703
	vlag = avruntime - se->vruntime;
882
	vlag = avruntime - se->vruntime;
704
	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
883
	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
884
#ifdef CONFIG_SCHED_BORE
885
	limit >>= 1;
886
#endif // CONFIG_SCHED_BORE
705
887
706
	return clamp(vlag, -limit, limit);
888
	return clamp(vlag, -limit, limit);
707
}
889
}
Lines 961-966 struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) Link Here
961
 * Scheduling class statistics methods:
1143
 * Scheduling class statistics methods:
962
 */
1144
 */
963
#ifdef CONFIG_SMP
1145
#ifdef CONFIG_SMP
1146
#if !defined(CONFIG_SCHED_BORE)
964
int sched_update_scaling(void)
1147
int sched_update_scaling(void)
965
{
1148
{
966
	unsigned int factor = get_update_sysctl_factor();
1149
	unsigned int factor = get_update_sysctl_factor();
Lines 972-977 int sched_update_scaling(void) Link Here
972
1155
973
	return 0;
1156
	return 0;
974
}
1157
}
1158
#endif // CONFIG_SCHED_BORE
975
#endif
1159
#endif
976
#endif
1160
#endif
977
1161
Lines 1171-1177 static void update_curr(struct cfs_rq *cfs_rq) Link Here
1171
	if (unlikely(delta_exec <= 0))
1355
	if (unlikely(delta_exec <= 0))
1172
		return;
1356
		return;
1173
1357
1358
#ifdef CONFIG_SCHED_BORE
1359
	curr->burst_time += delta_exec;
1360
	update_burst_penalty(curr);
1361
	curr->vruntime += max(1ULL, calc_delta_fair(delta_exec, curr));
1362
#else // !CONFIG_SCHED_BORE
1174
	curr->vruntime += calc_delta_fair(delta_exec, curr);
1363
	curr->vruntime += calc_delta_fair(delta_exec, curr);
1364
#endif // CONFIG_SCHED_BORE
1175
	update_deadline(cfs_rq, curr);
1365
	update_deadline(cfs_rq, curr);
1176
	update_min_vruntime(cfs_rq);
1366
	update_min_vruntime(cfs_rq);
1177
1367
Lines 5183-5188 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) Link Here
5183
	 *
5373
	 *
5184
	 * EEVDF: placement strategy #1 / #2
5374
	 * EEVDF: placement strategy #1 / #2
5185
	 */
5375
	 */
5376
#ifdef CONFIG_SCHED_BORE
5377
	if (se->vlag)
5378
#endif // CONFIG_SCHED_BORE
5186
	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
5379
	if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
5187
		struct sched_entity *curr = cfs_rq->curr;
5380
		struct sched_entity *curr = cfs_rq->curr;
5188
		unsigned long load;
5381
		unsigned long load;
Lines 6815-6820 static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) Link Here
6815
	bool was_sched_idle = sched_idle_rq(rq);
7008
	bool was_sched_idle = sched_idle_rq(rq);
6816
7009
6817
	util_est_dequeue(&rq->cfs, p);
7010
	util_est_dequeue(&rq->cfs, p);
7011
#ifdef CONFIG_SCHED_BORE
7012
	if (task_sleep) {
7013
		cfs_rq = cfs_rq_of(se);
7014
		if (cfs_rq->curr == se)
7015
			update_curr(cfs_rq);
7016
		restart_burst(se);
7017
	}
7018
#endif // CONFIG_SCHED_BORE
6818
7019
6819
	for_each_sched_entity(se) {
7020
	for_each_sched_entity(se) {
6820
		cfs_rq = cfs_rq_of(se);
7021
		cfs_rq = cfs_rq_of(se);
Lines 8570-8585 static void yield_task_fair(struct rq *rq) Link Here
8570
	/*
8771
	/*
8571
	 * Are we the only task in the tree?
8772
	 * Are we the only task in the tree?
8572
	 */
8773
	 */
8774
#if !defined(CONFIG_SCHED_BORE)
8573
	if (unlikely(rq->nr_running == 1))
8775
	if (unlikely(rq->nr_running == 1))
8574
		return;
8776
		return;
8575
8777
8576
	clear_buddies(cfs_rq, se);
8778
	clear_buddies(cfs_rq, se);
8779
#endif // CONFIG_SCHED_BORE
8577
8780
8578
	update_rq_clock(rq);
8781
	update_rq_clock(rq);
8579
	/*
8782
	/*
8580
	 * Update run-time statistics of the 'current'.
8783
	 * Update run-time statistics of the 'current'.
8581
	 */
8784
	 */
8582
	update_curr(cfs_rq);
8785
	update_curr(cfs_rq);
8786
#ifdef CONFIG_SCHED_BORE
8787
	restart_burst_rescale_deadline(se);
8788
	if (unlikely(rq->nr_running == 1))
8789
		return;
8790
8791
	clear_buddies(cfs_rq, se);
8792
#endif // CONFIG_SCHED_BORE
8583
	/*
8793
	/*
8584
	 * Tell update_rq_clock() that we've just updated,
8794
	 * Tell update_rq_clock() that we've just updated,
8585
	 * so we don't do microscopic update in schedule()
8795
	 * so we don't do microscopic update in schedule()
Lines 12645-12650 static void task_fork_fair(struct task_struct *p) Link Here
12645
	curr = cfs_rq->curr;
12855
	curr = cfs_rq->curr;
12646
	if (curr)
12856
	if (curr)
12647
		update_curr(cfs_rq);
12857
		update_curr(cfs_rq);
12858
#ifdef CONFIG_SCHED_BORE
12859
	update_burst_score(se);
12860
#endif // CONFIG_SCHED_BORE
12648
	place_entity(cfs_rq, se, ENQUEUE_INITIAL);
12861
	place_entity(cfs_rq, se, ENQUEUE_INITIAL);
12649
	rq_unlock(rq, &rf);
12862
	rq_unlock(rq, &rf);
12650
}
12863
}
(-)a/kernel/sched/features.h (+4 lines)
Lines 6-12 Link Here
6
 */
6
 */
7
SCHED_FEAT(PLACE_LAG, true)
7
SCHED_FEAT(PLACE_LAG, true)
8
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
8
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
9
#ifdef CONFIG_SCHED_BORE
10
SCHED_FEAT(RUN_TO_PARITY, false)
11
#else // !CONFIG_SCHED_BORE
9
SCHED_FEAT(RUN_TO_PARITY, true)
12
SCHED_FEAT(RUN_TO_PARITY, true)
13
#endif // CONFIG_SCHED_BORE
10
14
11
/*
15
/*
12
 * Prefer to schedule the task we woke last (assuming it failed
16
 * Prefer to schedule the task we woke last (assuming it failed
(-)a/kernel/sched/sched.h (-1 / +7 lines)
Lines 1967-1973 static inline void dirty_sched_domain_sysctl(int cpu) Link Here
1967
}
1967
}
1968
#endif
1968
#endif
1969
1969
1970
#ifdef CONFIG_SCHED_BORE
1971
extern void sched_update_min_base_slice(void);
1972
#else // !CONFIG_SCHED_BORE
1970
extern int sched_update_scaling(void);
1973
extern int sched_update_scaling(void);
1974
#endif // CONFIG_SCHED_BORE
1971
1975
1972
static inline const struct cpumask *task_user_cpus(struct task_struct *p)
1976
static inline const struct cpumask *task_user_cpus(struct task_struct *p)
1973
{
1977
{
Lines 2554-2559 extern const_debug unsigned int sysctl_sched_nr_migrate; Link Here
2554
extern const_debug unsigned int sysctl_sched_migration_cost;
2558
extern const_debug unsigned int sysctl_sched_migration_cost;
2555
2559
2556
extern unsigned int sysctl_sched_base_slice;
2560
extern unsigned int sysctl_sched_base_slice;
2561
#ifdef CONFIG_SCHED_BORE
2562
extern unsigned int sysctl_sched_min_base_slice;
2563
#endif // CONFIG_SCHED_BORE
2557
2564
2558
#ifdef CONFIG_SCHED_DEBUG
2565
#ifdef CONFIG_SCHED_DEBUG
2559
extern int sysctl_resched_latency_warn_ms;
2566
extern int sysctl_resched_latency_warn_ms;
2560
- 
(-)a/Documentation/admin-guide/kernel-parameters.txt (+12 lines)
Lines 6552-6557 Link Here
6552
			Force threading of all interrupt handlers except those
6552
			Force threading of all interrupt handlers except those
6553
			marked explicitly IRQF_NO_THREAD.
6553
			marked explicitly IRQF_NO_THREAD.
6554
6554
6555
	threadprintk	[KNL]
6556
			Force threaded printing of all legacy consoles. Be
6557
			aware that with this option, the shutdown, reboot, and
6558
			panic messages may not be printed on the legacy
6559
			consoles. Also, earlycon/earlyprintk printing will be
6560
			delayed until a regular console or the kthread is
6561
			available.
6562
6563
			Users can view /proc/consoles to see if their console
6564
			driver is legacy or not. Non-legacy (NBCON) console
6565
			drivers are already threaded and are shown with 'N'.
6566
6555
	topology=	[S390,EARLY]
6567
	topology=	[S390,EARLY]
6556
			Format: {off | on}
6568
			Format: {off | on}
6557
			Specify if the kernel should make use of the cpu
6569
			Specify if the kernel should make use of the cpu
(-)a/arch/arm/Kconfig (-2 / +4 lines)
Lines 37-42 config ARM Link Here
37
	select ARCH_SUPPORTS_ATOMIC_RMW
37
	select ARCH_SUPPORTS_ATOMIC_RMW
38
	select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
38
	select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
39
	select ARCH_SUPPORTS_PER_VMA_LOCK
39
	select ARCH_SUPPORTS_PER_VMA_LOCK
40
	select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK
40
	select ARCH_USE_BUILTIN_BSWAP
41
	select ARCH_USE_BUILTIN_BSWAP
41
	select ARCH_USE_CMPXCHG_LOCKREF
42
	select ARCH_USE_CMPXCHG_LOCKREF
42
	select ARCH_USE_MEMTEST
43
	select ARCH_USE_MEMTEST
Lines 76-82 config ARM Link Here
76
	select HAS_IOPORT
77
	select HAS_IOPORT
77
	select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
78
	select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
78
	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
79
	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
79
	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
80
	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT
80
	select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
81
	select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL
81
	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
82
	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
82
	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
83
	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
Lines 99-105 config ARM Link Here
99
	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
100
	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
100
	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
101
	select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
101
	select HAVE_EXIT_THREAD
102
	select HAVE_EXIT_THREAD
102
	select HAVE_FAST_GUP if ARM_LPAE
103
	select HAVE_FAST_GUP if ARM_LPAE && !(PREEMPT_RT && HIGHPTE)
103
	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
104
	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
104
	select HAVE_FUNCTION_ERROR_INJECTION
105
	select HAVE_FUNCTION_ERROR_INJECTION
105
	select HAVE_FUNCTION_GRAPH_TRACER
106
	select HAVE_FUNCTION_GRAPH_TRACER
Lines 122-127 config ARM Link Here
122
	select HAVE_PERF_EVENTS
123
	select HAVE_PERF_EVENTS
123
	select HAVE_PERF_REGS
124
	select HAVE_PERF_REGS
124
	select HAVE_PERF_USER_STACK_DUMP
125
	select HAVE_PERF_USER_STACK_DUMP
126
	select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM
125
	select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
127
	select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE
126
	select HAVE_REGS_AND_STACK_ACCESS_API
128
	select HAVE_REGS_AND_STACK_ACCESS_API
127
	select HAVE_RSEQ
129
	select HAVE_RSEQ
(-)a/arch/arm/mm/fault.c (+6 lines)
Lines 443-448 do_translation_fault(unsigned long addr, unsigned int fsr, Link Here
443
	if (addr < TASK_SIZE)
443
	if (addr < TASK_SIZE)
444
		return do_page_fault(addr, fsr, regs);
444
		return do_page_fault(addr, fsr, regs);
445
445
446
	if (interrupts_enabled(regs))
447
		local_irq_enable();
448
446
	if (user_mode(regs))
449
	if (user_mode(regs))
447
		goto bad_area;
450
		goto bad_area;
448
451
Lines 513-518 do_translation_fault(unsigned long addr, unsigned int fsr, Link Here
513
static int
516
static int
514
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
517
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
515
{
518
{
519
	if (interrupts_enabled(regs))
520
		local_irq_enable();
521
516
	do_bad_area(addr, fsr, regs);
522
	do_bad_area(addr, fsr, regs);
517
	return 0;
523
	return 0;
518
}
524
}
(-)a/arch/arm/vfp/vfpmodule.c (-21 / +53 lines)
Lines 55-60 extern unsigned int VFP_arch_feroceon __alias(VFP_arch); Link Here
55
 */
55
 */
56
union vfp_state *vfp_current_hw_state[NR_CPUS];
56
union vfp_state *vfp_current_hw_state[NR_CPUS];
57
57
58
/*
59
 * Claim ownership of the VFP unit.
60
 *
61
 * The caller may change VFP registers until vfp_unlock() is called.
62
 *
63
 * local_bh_disable() is used to disable preemption and to disable VFP
64
 * processing in softirq context. On PREEMPT_RT kernels local_bh_disable() is
65
 * not sufficient because it only serializes soft interrupt related sections
66
 * via a local lock, but stays preemptible. Disabling preemption is the right
67
 * choice here as bottom half processing is always in thread context on RT
68
 * kernels so it implicitly prevents bottom half processing as well.
69
 */
70
static void vfp_lock(void)
71
{
72
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
73
		local_bh_disable();
74
	else
75
		preempt_disable();
76
}
77
78
static void vfp_unlock(void)
79
{
80
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
81
		local_bh_enable();
82
	else
83
		preempt_enable();
84
}
85
58
/*
86
/*
59
 * Is 'thread's most up to date state stored in this CPUs hardware?
87
 * Is 'thread's most up to date state stored in this CPUs hardware?
60
 * Must be called from non-preemptible context.
88
 * Must be called from non-preemptible context.
Lines 240-246 static void vfp_panic(char *reason, u32 inst) Link Here
240
/*
268
/*
241
 * Process bitmask of exception conditions.
269
 * Process bitmask of exception conditions.
242
 */
270
 */
243
static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_regs *regs)
271
static int vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr)
244
{
272
{
245
	int si_code = 0;
273
	int si_code = 0;
246
274
Lines 248-255 static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ Link Here
248
276
249
	if (exceptions == VFP_EXCEPTION_ERROR) {
277
	if (exceptions == VFP_EXCEPTION_ERROR) {
250
		vfp_panic("unhandled bounce", inst);
278
		vfp_panic("unhandled bounce", inst);
251
		vfp_raise_sigfpe(FPE_FLTINV, regs);
279
		return FPE_FLTINV;
252
		return;
253
	}
280
	}
254
281
255
	/*
282
	/*
Lines 277-284 static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ Link Here
277
	RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF);
304
	RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF);
278
	RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV);
305
	RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV);
279
306
280
	if (si_code)
307
	return si_code;
281
		vfp_raise_sigfpe(si_code, regs);
282
}
308
}
283
309
284
/*
310
/*
Lines 324-329 static u32 vfp_emulate_instruction(u32 inst, u32 fpscr, struct pt_regs *regs) Link Here
324
static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
350
static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
325
{
351
{
326
	u32 fpscr, orig_fpscr, fpsid, exceptions;
352
	u32 fpscr, orig_fpscr, fpsid, exceptions;
353
	int si_code2 = 0;
354
	int si_code = 0;
327
355
328
	pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);
356
	pr_debug("VFP: bounce: trigger %08x fpexc %08x\n", trigger, fpexc);
329
357
Lines 369-376 static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) Link Here
369
		 * unallocated VFP instruction but with FPSCR.IXE set and not
397
		 * unallocated VFP instruction but with FPSCR.IXE set and not
370
		 * on VFP subarch 1.
398
		 * on VFP subarch 1.
371
		 */
399
		 */
372
		 vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr, regs);
400
		si_code = vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr);
373
		return;
401
		goto exit;
374
	}
402
	}
375
403
376
	/*
404
	/*
Lines 394-407 static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) Link Here
394
	 */
422
	 */
395
	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
423
	exceptions = vfp_emulate_instruction(trigger, fpscr, regs);
396
	if (exceptions)
424
	if (exceptions)
397
		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
425
		si_code2 = vfp_raise_exceptions(exceptions, trigger, orig_fpscr);
398
426
399
	/*
427
	/*
400
	 * If there isn't a second FP instruction, exit now. Note that
428
	 * If there isn't a second FP instruction, exit now. Note that
401
	 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
429
	 * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
402
	 */
430
	 */
403
	if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
431
	if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V))
404
		return;
432
		goto exit;
405
433
406
	/*
434
	/*
407
	 * The barrier() here prevents fpinst2 being read
435
	 * The barrier() here prevents fpinst2 being read
Lines 413-419 static void VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs) Link Here
413
 emulate:
441
 emulate:
414
	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
442
	exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs);
415
	if (exceptions)
443
	if (exceptions)
416
		vfp_raise_exceptions(exceptions, trigger, orig_fpscr, regs);
444
		si_code = vfp_raise_exceptions(exceptions, trigger, orig_fpscr);
445
exit:
446
	vfp_unlock();
447
	if (si_code2)
448
		vfp_raise_sigfpe(si_code2, regs);
449
	if (si_code)
450
		vfp_raise_sigfpe(si_code, regs);
417
}
451
}
418
452
419
static void vfp_enable(void *unused)
453
static void vfp_enable(void *unused)
Lines 512-522 static inline void vfp_pm_init(void) { } Link Here
512
 */
546
 */
513
void vfp_sync_hwstate(struct thread_info *thread)
547
void vfp_sync_hwstate(struct thread_info *thread)
514
{
548
{
515
	unsigned int cpu = get_cpu();
549
	vfp_lock();
516
550
517
	local_bh_disable();
551
	if (vfp_state_in_hw(raw_smp_processor_id(), thread)) {
518
519
	if (vfp_state_in_hw(cpu, thread)) {
520
		u32 fpexc = fmrx(FPEXC);
552
		u32 fpexc = fmrx(FPEXC);
521
553
522
		/*
554
		/*
Lines 527-534 void vfp_sync_hwstate(struct thread_info *thread) Link Here
527
		fmxr(FPEXC, fpexc);
559
		fmxr(FPEXC, fpexc);
528
	}
560
	}
529
561
530
	local_bh_enable();
562
	vfp_unlock();
531
	put_cpu();
532
}
563
}
533
564
534
/* Ensure that the thread reloads the hardware VFP state on the next use. */
565
/* Ensure that the thread reloads the hardware VFP state on the next use. */
Lines 683-689 static int vfp_support_entry(struct pt_regs *regs, u32 trigger) Link Here
683
	if (!user_mode(regs))
714
	if (!user_mode(regs))
684
		return vfp_kmode_exception(regs, trigger);
715
		return vfp_kmode_exception(regs, trigger);
685
716
686
	local_bh_disable();
717
	vfp_lock();
687
	fpexc = fmrx(FPEXC);
718
	fpexc = fmrx(FPEXC);
688
719
689
	/*
720
	/*
Lines 748-753 static int vfp_support_entry(struct pt_regs *regs, u32 trigger) Link Here
748
		 * replay the instruction that trapped.
779
		 * replay the instruction that trapped.
749
		 */
780
		 */
750
		fmxr(FPEXC, fpexc);
781
		fmxr(FPEXC, fpexc);
782
		vfp_unlock();
751
	} else {
783
	} else {
752
		/* Check for synchronous or asynchronous exceptions */
784
		/* Check for synchronous or asynchronous exceptions */
753
		if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
785
		if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
Lines 762-778 static int vfp_support_entry(struct pt_regs *regs, u32 trigger) Link Here
762
			if (!(fpscr & FPSCR_IXE)) {
794
			if (!(fpscr & FPSCR_IXE)) {
763
				if (!(fpscr & FPSCR_LENGTH_MASK)) {
795
				if (!(fpscr & FPSCR_LENGTH_MASK)) {
764
					pr_debug("not VFP\n");
796
					pr_debug("not VFP\n");
765
					local_bh_enable();
797
					vfp_unlock();
766
					return -ENOEXEC;
798
					return -ENOEXEC;
767
				}
799
				}
768
				fpexc |= FPEXC_DEX;
800
				fpexc |= FPEXC_DEX;
769
			}
801
			}
770
		}
802
		}
771
bounce:		regs->ARM_pc += 4;
803
bounce:		regs->ARM_pc += 4;
804
		/* VFP_bounce() will invoke vfp_unlock() */
772
		VFP_bounce(trigger, fpexc, regs);
805
		VFP_bounce(trigger, fpexc, regs);
773
	}
806
	}
774
807
775
	local_bh_enable();
776
	return 0;
808
	return 0;
777
}
809
}
778
810
Lines 837-843 void kernel_neon_begin(void) Link Here
837
	unsigned int cpu;
869
	unsigned int cpu;
838
	u32 fpexc;
870
	u32 fpexc;
839
871
840
	local_bh_disable();
872
	vfp_lock();
841
873
842
	/*
874
	/*
843
	 * Kernel mode NEON is only allowed outside of hardirq context with
875
	 * Kernel mode NEON is only allowed outside of hardirq context with
Lines 868-874 void kernel_neon_end(void) Link Here
868
{
900
{
869
	/* Disable the NEON/VFP unit. */
901
	/* Disable the NEON/VFP unit. */
870
	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
902
	fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
871
	local_bh_enable();
903
	vfp_unlock();
872
}
904
}
873
EXPORT_SYMBOL(kernel_neon_end);
905
EXPORT_SYMBOL(kernel_neon_end);
874
906
(-)a/arch/arm64/Kconfig (+1 lines)
Lines 98-103 config ARM64 Link Here
98
	select ARCH_SUPPORTS_NUMA_BALANCING
98
	select ARCH_SUPPORTS_NUMA_BALANCING
99
	select ARCH_SUPPORTS_PAGE_TABLE_CHECK
99
	select ARCH_SUPPORTS_PAGE_TABLE_CHECK
100
	select ARCH_SUPPORTS_PER_VMA_LOCK
100
	select ARCH_SUPPORTS_PER_VMA_LOCK
101
	select ARCH_SUPPORTS_RT
101
	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
102
	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
102
	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
103
	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
103
	select ARCH_WANT_DEFAULT_BPF_JIT
104
	select ARCH_WANT_DEFAULT_BPF_JIT
(-)a/arch/powerpc/Kconfig (+2 lines)
Lines 166-171 config PPC Link Here
166
	select ARCH_STACKWALK
166
	select ARCH_STACKWALK
167
	select ARCH_SUPPORTS_ATOMIC_RMW
167
	select ARCH_SUPPORTS_ATOMIC_RMW
168
	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC_BOOK3S || PPC_8xx || 40x
168
	select ARCH_SUPPORTS_DEBUG_PAGEALLOC	if PPC_BOOK3S || PPC_8xx || 40x
169
	select ARCH_SUPPORTS_RT			if HAVE_POSIX_CPU_TIMERS_TASK_WORK
169
	select ARCH_USE_BUILTIN_BSWAP
170
	select ARCH_USE_BUILTIN_BSWAP
170
	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
171
	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
171
	select ARCH_USE_MEMTEST
172
	select ARCH_USE_MEMTEST
Lines 270-275 config PPC Link Here
270
	select HAVE_PERF_USER_STACK_DUMP
271
	select HAVE_PERF_USER_STACK_DUMP
271
	select HAVE_REGS_AND_STACK_ACCESS_API
272
	select HAVE_REGS_AND_STACK_ACCESS_API
272
	select HAVE_RELIABLE_STACKTRACE
273
	select HAVE_RELIABLE_STACKTRACE
274
	select HAVE_POSIX_CPU_TIMERS_TASK_WORK	if !KVM
273
	select HAVE_RSEQ
275
	select HAVE_RSEQ
274
	select HAVE_SETUP_PER_CPU_AREA		if PPC64
276
	select HAVE_SETUP_PER_CPU_AREA		if PPC64
275
	select HAVE_SOFTIRQ_ON_OWN_STACK
277
	select HAVE_SOFTIRQ_ON_OWN_STACK
(-)a/arch/powerpc/include/asm/stackprotector.h (-1 / +6 lines)
Lines 19-26 Link Here
19
 */
19
 */
20
static __always_inline void boot_init_stack_canary(void)
20
static __always_inline void boot_init_stack_canary(void)
21
{
21
{
22
	unsigned long canary = get_random_canary();
22
	unsigned long canary;
23
23
24
#ifndef CONFIG_PREEMPT_RT
25
	canary = get_random_canary();
26
#else
27
	canary = ((unsigned long)&canary) & CANARY_MASK;
28
#endif
24
	current->stack_canary = canary;
29
	current->stack_canary = canary;
25
#ifdef CONFIG_PPC64
30
#ifdef CONFIG_PPC64
26
	get_paca()->canary = canary;
31
	get_paca()->canary = canary;
(-)a/arch/powerpc/kernel/traps.c (-1 / +6 lines)
Lines 261-272 static char *get_mmu_str(void) Link Here
261
261
262
static int __die(const char *str, struct pt_regs *regs, long err)
262
static int __die(const char *str, struct pt_regs *regs, long err)
263
{
263
{
264
	const char *pr = "";
265
264
	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
266
	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
265
267
268
	if (IS_ENABLED(CONFIG_PREEMPTION))
269
		pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
270
266
	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
271
	printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
267
	       IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
272
	       IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
268
	       PAGE_SIZE / 1024, get_mmu_str(),
273
	       PAGE_SIZE / 1024, get_mmu_str(),
269
	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
274
	       pr,
270
	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
275
	       IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
271
	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
276
	       IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
272
	       debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
277
	       debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
(-)a/arch/powerpc/kvm/Kconfig (+1 lines)
Lines 221-226 config KVM_E500MC Link Here
221
config KVM_MPIC
221
config KVM_MPIC
222
	bool "KVM in-kernel MPIC emulation"
222
	bool "KVM in-kernel MPIC emulation"
223
	depends on KVM && PPC_E500
223
	depends on KVM && PPC_E500
224
	depends on !PREEMPT_RT
224
	select HAVE_KVM_IRQCHIP
225
	select HAVE_KVM_IRQCHIP
225
	select HAVE_KVM_IRQ_ROUTING
226
	select HAVE_KVM_IRQ_ROUTING
226
	select HAVE_KVM_MSI
227
	select HAVE_KVM_MSI
(-)a/arch/powerpc/platforms/pseries/Kconfig (+1 lines)
Lines 2-7 Link Here
2
config PPC_PSERIES
2
config PPC_PSERIES
3
	depends on PPC64 && PPC_BOOK3S
3
	depends on PPC64 && PPC_BOOK3S
4
	bool "IBM pSeries & new (POWER5-based) iSeries"
4
	bool "IBM pSeries & new (POWER5-based) iSeries"
5
	select GENERIC_ALLOCATOR
5
	select HAVE_PCSPKR_PLATFORM
6
	select HAVE_PCSPKR_PLATFORM
6
	select MPIC
7
	select MPIC
7
	select OF_DYNAMIC
8
	select OF_DYNAMIC
(-)a/arch/powerpc/platforms/pseries/iommu.c (-11 / +20 lines)
Lines 25-30 Link Here
25
#include <linux/of_address.h>
25
#include <linux/of_address.h>
26
#include <linux/iommu.h>
26
#include <linux/iommu.h>
27
#include <linux/rculist.h>
27
#include <linux/rculist.h>
28
#include <linux/local_lock.h>
28
#include <asm/io.h>
29
#include <asm/io.h>
29
#include <asm/prom.h>
30
#include <asm/prom.h>
30
#include <asm/rtas.h>
31
#include <asm/rtas.h>
Lines 206-212 static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, Link Here
206
	return ret;
207
	return ret;
207
}
208
}
208
209
209
static DEFINE_PER_CPU(__be64 *, tce_page);
210
struct tce_page {
211
	__be64 * page;
212
	local_lock_t lock;
213
};
214
static DEFINE_PER_CPU(struct tce_page, tce_page) = {
215
	.lock = INIT_LOCAL_LOCK(lock),
216
};
210
217
211
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
218
static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
212
				     long npages, unsigned long uaddr,
219
				     long npages, unsigned long uaddr,
Lines 229-237 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, Link Here
229
		                           direction, attrs);
236
		                           direction, attrs);
230
	}
237
	}
231
238
232
	local_irq_save(flags);	/* to protect tcep and the page behind it */
239
	/* to protect tcep and the page behind it */
240
	local_lock_irqsave(&tce_page.lock, flags);
233
241
234
	tcep = __this_cpu_read(tce_page);
242
	tcep = __this_cpu_read(tce_page.page);
235
243
236
	/* This is safe to do since interrupts are off when we're called
244
	/* This is safe to do since interrupts are off when we're called
237
	 * from iommu_alloc{,_sg}()
245
	 * from iommu_alloc{,_sg}()
Lines 240-251 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, Link Here
240
		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
248
		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
241
		/* If allocation fails, fall back to the loop implementation */
249
		/* If allocation fails, fall back to the loop implementation */
242
		if (!tcep) {
250
		if (!tcep) {
243
			local_irq_restore(flags);
251
			local_unlock_irqrestore(&tce_page.lock, flags);
244
			return tce_build_pSeriesLP(tbl->it_index, tcenum,
252
			return tce_build_pSeriesLP(tbl->it_index, tcenum,
245
					tceshift,
253
					tceshift,
246
					npages, uaddr, direction, attrs);
254
					npages, uaddr, direction, attrs);
247
		}
255
		}
248
		__this_cpu_write(tce_page, tcep);
256
		__this_cpu_write(tce_page.page, tcep);
249
	}
257
	}
250
258
251
	rpn = __pa(uaddr) >> tceshift;
259
	rpn = __pa(uaddr) >> tceshift;
Lines 275-281 static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, Link Here
275
		tcenum += limit;
283
		tcenum += limit;
276
	} while (npages > 0 && !rc);
284
	} while (npages > 0 && !rc);
277
285
278
	local_irq_restore(flags);
286
	local_unlock_irqrestore(&tce_page.lock, flags);
279
287
280
	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
288
	if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
281
		ret = (int)rc;
289
		ret = (int)rc;
Lines 459-474 static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, Link Here
459
				DMA_BIDIRECTIONAL, 0);
467
				DMA_BIDIRECTIONAL, 0);
460
	}
468
	}
461
469
462
	local_irq_disable();	/* to protect tcep and the page behind it */
470
	/* to protect tcep and the page behind it */
463
	tcep = __this_cpu_read(tce_page);
471
	local_lock_irq(&tce_page.lock);
472
	tcep = __this_cpu_read(tce_page.page);
464
473
465
	if (!tcep) {
474
	if (!tcep) {
466
		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
475
		tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
467
		if (!tcep) {
476
		if (!tcep) {
468
			local_irq_enable();
477
			local_unlock_irq(&tce_page.lock);
469
			return -ENOMEM;
478
			return -ENOMEM;
470
		}
479
		}
471
		__this_cpu_write(tce_page, tcep);
480
		__this_cpu_write(tce_page.page, tcep);
472
	}
481
	}
473
482
474
	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
483
	proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
Lines 511-517 static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, Link Here
511
520
512
	/* error cleanup: caller will clear whole range */
521
	/* error cleanup: caller will clear whole range */
513
522
514
	local_irq_enable();
523
	local_unlock_irq(&tce_page.lock);
515
	return rc;
524
	return rc;
516
}
525
}
517
526
(-)a/arch/riscv/Kconfig (+2 lines)
Lines 56-61 config RISCV Link Here
56
	select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000
56
	select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000
57
	select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
57
	select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
58
	select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
58
	select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
59
	select ARCH_SUPPORTS_RT
59
	select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
60
	select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
60
	select ARCH_USE_MEMTEST
61
	select ARCH_USE_MEMTEST
61
	select ARCH_USE_QUEUED_RWLOCKS
62
	select ARCH_USE_QUEUED_RWLOCKS
Lines 152-157 config RISCV Link Here
152
	select HAVE_PERF_USER_STACK_DUMP
153
	select HAVE_PERF_USER_STACK_DUMP
153
	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
154
	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
154
	select HAVE_PREEMPT_DYNAMIC_KEY if !XIP_KERNEL
155
	select HAVE_PREEMPT_DYNAMIC_KEY if !XIP_KERNEL
156
	select HAVE_PREEMPT_AUTO
155
	select HAVE_REGS_AND_STACK_ACCESS_API
157
	select HAVE_REGS_AND_STACK_ACCESS_API
156
	select HAVE_RETHOOK if !XIP_KERNEL
158
	select HAVE_RETHOOK if !XIP_KERNEL
157
	select HAVE_RSEQ
159
	select HAVE_RSEQ
(-)a/arch/riscv/include/asm/thread_info.h (+2 lines)
Lines 94-99 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); Link Here
94
 * - pending work-to-be-done flags are in lowest half-word
94
 * - pending work-to-be-done flags are in lowest half-word
95
 * - other flags in upper half-word(s)
95
 * - other flags in upper half-word(s)
96
 */
96
 */
97
#define TIF_ARCH_RESCHED_LAZY	0	/* Lazy rescheduling */
97
#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
98
#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
98
#define TIF_SIGPENDING		2	/* signal pending */
99
#define TIF_SIGPENDING		2	/* signal pending */
99
#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
100
#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
Lines 104-109 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); Link Here
104
#define TIF_32BIT		11	/* compat-mode 32bit process */
105
#define TIF_32BIT		11	/* compat-mode 32bit process */
105
#define TIF_RISCV_V_DEFER_RESTORE	12 /* restore Vector before returing to user */
106
#define TIF_RISCV_V_DEFER_RESTORE	12 /* restore Vector before returing to user */
106
107
108
#define _TIF_ARCH_RESCHED_LAZY	(1 << TIF_ARCH_RESCHED_LAZY)
107
#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
109
#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
108
#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
110
#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
109
#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
111
#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
(-)a/arch/x86/Kconfig (+3 lines)
Lines 28-33 config X86_64 Link Here
28
	select ARCH_HAS_GIGANTIC_PAGE
28
	select ARCH_HAS_GIGANTIC_PAGE
29
	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
29
	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
30
	select ARCH_SUPPORTS_PER_VMA_LOCK
30
	select ARCH_SUPPORTS_PER_VMA_LOCK
31
	select ARCH_SUPPORTS_RT
31
	select HAVE_ARCH_SOFT_DIRTY
32
	select HAVE_ARCH_SOFT_DIRTY
32
	select MODULES_USE_ELF_RELA
33
	select MODULES_USE_ELF_RELA
33
	select NEED_DMA_MAP_STATE
34
	select NEED_DMA_MAP_STATE
Lines 120-125 config X86 Link Here
120
	select ARCH_USES_CFI_TRAPS		if X86_64 && CFI_CLANG
121
	select ARCH_USES_CFI_TRAPS		if X86_64 && CFI_CLANG
121
	select ARCH_SUPPORTS_LTO_CLANG
122
	select ARCH_SUPPORTS_LTO_CLANG
122
	select ARCH_SUPPORTS_LTO_CLANG_THIN
123
	select ARCH_SUPPORTS_LTO_CLANG_THIN
124
	select ARCH_SUPPORTS_RT
123
	select ARCH_USE_BUILTIN_BSWAP
125
	select ARCH_USE_BUILTIN_BSWAP
124
	select ARCH_USE_CMPXCHG_LOCKREF		if X86_CMPXCHG64
126
	select ARCH_USE_CMPXCHG_LOCKREF		if X86_CMPXCHG64
125
	select ARCH_USE_MEMTEST
127
	select ARCH_USE_MEMTEST
Lines 277-282 config X86 Link Here
277
	select HAVE_STATIC_CALL
279
	select HAVE_STATIC_CALL
278
	select HAVE_STATIC_CALL_INLINE		if HAVE_OBJTOOL
280
	select HAVE_STATIC_CALL_INLINE		if HAVE_OBJTOOL
279
	select HAVE_PREEMPT_DYNAMIC_CALL
281
	select HAVE_PREEMPT_DYNAMIC_CALL
282
	select HAVE_PREEMPT_AUTO
280
	select HAVE_RSEQ
283
	select HAVE_RSEQ
281
	select HAVE_RUST			if X86_64
284
	select HAVE_RUST			if X86_64
282
	select HAVE_SYSCALL_TRACEPOINTS
285
	select HAVE_SYSCALL_TRACEPOINTS
(-)a/arch/x86/include/asm/thread_info.h (-2 / +4 lines)
Lines 87-94 struct thread_info { Link Here
87
#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
87
#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
88
#define TIF_SIGPENDING		2	/* signal pending */
88
#define TIF_SIGPENDING		2	/* signal pending */
89
#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
89
#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
90
#define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
90
#define TIF_ARCH_RESCHED_LAZY	4	/* Lazy rescheduling */
91
#define TIF_SSBD		5	/* Speculative store bypass disable */
91
#define TIF_SINGLESTEP		5	/* reenable singlestep on user return*/
92
#define TIF_SSBD		6	/* Speculative store bypass disable */
92
#define TIF_SPEC_IB		9	/* Indirect branch speculation mitigation */
93
#define TIF_SPEC_IB		9	/* Indirect branch speculation mitigation */
93
#define TIF_SPEC_L1D_FLUSH	10	/* Flush L1D on mm switches (processes) */
94
#define TIF_SPEC_L1D_FLUSH	10	/* Flush L1D on mm switches (processes) */
94
#define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
95
#define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
Lines 110-115 struct thread_info { Link Here
110
#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
111
#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
111
#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
112
#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
112
#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
113
#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
114
#define _TIF_ARCH_RESCHED_LAZY	(1 << TIF_ARCH_RESCHED_LAZY)
113
#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
115
#define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
114
#define _TIF_SSBD		(1 << TIF_SSBD)
116
#define _TIF_SSBD		(1 << TIF_SSBD)
115
#define _TIF_SPEC_IB		(1 << TIF_SPEC_IB)
117
#define _TIF_SPEC_IB		(1 << TIF_SPEC_IB)
(-)a/drivers/acpi/processor_idle.c (-1 / +1 lines)
Lines 108-114 static const struct dmi_system_id processor_power_dmi_table[] = { Link Here
108
 */
108
 */
109
static void __cpuidle acpi_safe_halt(void)
109
static void __cpuidle acpi_safe_halt(void)
110
{
110
{
111
	if (!tif_need_resched()) {
111
	if (!need_resched()) {
112
		raw_safe_halt();
112
		raw_safe_halt();
113
		raw_local_irq_disable();
113
		raw_local_irq_disable();
114
	}
114
	}
(-)a/drivers/block/zram/zram_drv.c (+37 lines)
Lines 57-62 static void zram_free_page(struct zram *zram, size_t index); Link Here
57
static int zram_read_page(struct zram *zram, struct page *page, u32 index,
57
static int zram_read_page(struct zram *zram, struct page *page, u32 index,
58
			  struct bio *parent);
58
			  struct bio *parent);
59
59
60
#ifdef CONFIG_PREEMPT_RT
61
static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages)
62
{
63
	size_t index;
64
65
	for (index = 0; index < num_pages; index++)
66
		spin_lock_init(&zram->table[index].lock);
67
}
68
69
static int zram_slot_trylock(struct zram *zram, u32 index)
70
{
71
	int ret;
72
73
	ret = spin_trylock(&zram->table[index].lock);
74
	if (ret)
75
		__set_bit(ZRAM_LOCK, &zram->table[index].flags);
76
	return ret;
77
}
78
79
static void zram_slot_lock(struct zram *zram, u32 index)
80
{
81
	spin_lock(&zram->table[index].lock);
82
	__set_bit(ZRAM_LOCK, &zram->table[index].flags);
83
}
84
85
static void zram_slot_unlock(struct zram *zram, u32 index)
86
{
87
	__clear_bit(ZRAM_LOCK, &zram->table[index].flags);
88
	spin_unlock(&zram->table[index].lock);
89
}
90
91
#else
92
93
static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { }
94
60
static int zram_slot_trylock(struct zram *zram, u32 index)
95
static int zram_slot_trylock(struct zram *zram, u32 index)
61
{
96
{
62
	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
97
	return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags);
Lines 71-76 static void zram_slot_unlock(struct zram *zram, u32 index) Link Here
71
{
106
{
72
	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
107
	bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags);
73
}
108
}
109
#endif
74
110
75
static inline bool init_done(struct zram *zram)
111
static inline bool init_done(struct zram *zram)
76
{
112
{
Lines 1241-1246 static bool zram_meta_alloc(struct zram *zram, u64 disksize) Link Here
1241
1277
1242
	if (!huge_class_size)
1278
	if (!huge_class_size)
1243
		huge_class_size = zs_huge_class_size(zram->mem_pool);
1279
		huge_class_size = zs_huge_class_size(zram->mem_pool);
1280
	zram_meta_init_table_locks(zram, num_pages);
1244
	return true;
1281
	return true;
1245
}
1282
}
1246
1283
(-)a/drivers/block/zram/zram_drv.h (+3 lines)
Lines 69-74 struct zram_table_entry { Link Here
69
		unsigned long element;
69
		unsigned long element;
70
	};
70
	};
71
	unsigned long flags;
71
	unsigned long flags;
72
#ifdef CONFIG_PREEMPT_RT
73
	spinlock_t lock;
74
#endif
72
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
75
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
73
	ktime_t ac_time;
76
	ktime_t ac_time;
74
#endif
77
#endif
(-)a/drivers/gpu/drm/i915/Kconfig (-1 lines)
Lines 3-9 config DRM_I915 Link Here
3
	tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
3
	tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics"
4
	depends on DRM
4
	depends on DRM
5
	depends on X86 && PCI
5
	depends on X86 && PCI
6
	depends on !PREEMPT_RT
7
	select INTEL_GTT if X86
6
	select INTEL_GTT if X86
8
	select INTERVAL_TREE
7
	select INTERVAL_TREE
9
	# we need shmfs for the swappable backing store, and in particular
8
	# we need shmfs for the swappable backing store, and in particular
(-)a/drivers/gpu/drm/i915/display/intel_crtc.c (-3 / +6 lines)
Lines 512-518 void intel_pipe_update_start(struct intel_atomic_state *state, Link Here
512
	 */
512
	 */
513
	intel_psr_wait_for_idle_locked(new_crtc_state);
513
	intel_psr_wait_for_idle_locked(new_crtc_state);
514
514
515
	local_irq_disable();
515
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
516
		local_irq_disable();
516
517
517
	crtc->debug.min_vbl = evade.min;
518
	crtc->debug.min_vbl = evade.min;
518
	crtc->debug.max_vbl = evade.max;
519
	crtc->debug.max_vbl = evade.max;
Lines 530-536 void intel_pipe_update_start(struct intel_atomic_state *state, Link Here
530
	return;
531
	return;
531
532
532
irq_disable:
533
irq_disable:
533
	local_irq_disable();
534
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
535
		local_irq_disable();
534
}
536
}
535
537
536
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
538
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE)
Lines 632-638 void intel_pipe_update_end(struct intel_atomic_state *state, Link Here
632
	 */
634
	 */
633
	intel_vrr_send_push(new_crtc_state);
635
	intel_vrr_send_push(new_crtc_state);
634
636
635
	local_irq_enable();
637
	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
638
		local_irq_enable();
636
639
637
	if (intel_vgpu_active(dev_priv))
640
	if (intel_vgpu_active(dev_priv))
638
		goto out;
641
		goto out;
(-)a/drivers/gpu/drm/i915/display/intel_display_trace.h (+4 lines)
Lines 9-14 Link Here
9
#if !defined(__INTEL_DISPLAY_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
9
#if !defined(__INTEL_DISPLAY_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ)
10
#define __INTEL_DISPLAY_TRACE_H__
10
#define __INTEL_DISPLAY_TRACE_H__
11
11
12
#if defined(CONFIG_PREEMPT_RT) && !defined(NOTRACE)
13
#define NOTRACE
14
#endif
15
12
#include <linux/string_helpers.h>
16
#include <linux/string_helpers.h>
13
#include <linux/types.h>
17
#include <linux/types.h>
14
#include <linux/tracepoint.h>
18
#include <linux/tracepoint.h>
(-)a/drivers/gpu/drm/i915/display/intel_vblank.c (-12 / +32 lines)
Lines 276-281 int intel_crtc_scanline_to_hw(struct intel_crtc *crtc, int scanline) Link Here
276
 * all register accesses to the same cacheline to be serialized,
276
 * all register accesses to the same cacheline to be serialized,
277
 * otherwise they may hang.
277
 * otherwise they may hang.
278
 */
278
 */
279
static void intel_vblank_section_enter_irqsave(struct drm_i915_private *i915, unsigned long *flags)
280
	__acquires(i915->uncore.lock)
281
{
282
#ifdef I915
283
	spin_lock_irqsave(&i915->uncore.lock, *flags);
284
#else
285
	*flags = 0;
286
#endif
287
}
288
289
static void intel_vblank_section_exit_irqrestore(struct drm_i915_private *i915, unsigned long flags)
290
	__releases(i915->uncore.lock)
291
{
292
#ifdef I915
293
	spin_unlock_irqrestore(&i915->uncore.lock, flags);
294
#else
295
	if (flags)
296
		return;
297
#endif
298
}
279
static void intel_vblank_section_enter(struct drm_i915_private *i915)
299
static void intel_vblank_section_enter(struct drm_i915_private *i915)
280
	__acquires(i915->uncore.lock)
300
	__acquires(i915->uncore.lock)
281
{
301
{
Lines 333-342 static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, Link Here
333
	 * timing critical raw register reads, potentially with
353
	 * timing critical raw register reads, potentially with
334
	 * preemption disabled, so the following code must not block.
354
	 * preemption disabled, so the following code must not block.
335
	 */
355
	 */
336
	local_irq_save(irqflags);
356
	intel_vblank_section_enter_irqsave(dev_priv, &irqflags);
337
	intel_vblank_section_enter(dev_priv);
338
357
339
	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
358
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
359
		preempt_disable();
340
360
341
	/* Get optional system timestamp before query. */
361
	/* Get optional system timestamp before query. */
342
	if (stime)
362
	if (stime)
Lines 400-409 static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, Link Here
400
	if (etime)
420
	if (etime)
401
		*etime = ktime_get();
421
		*etime = ktime_get();
402
422
403
	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
423
	if (IS_ENABLED(CONFIG_PREEMPT_RT))
424
		preempt_enable();
404
425
405
	intel_vblank_section_exit(dev_priv);
426
	intel_vblank_section_exit_irqrestore(dev_priv, irqflags);
406
	local_irq_restore(irqflags);
407
427
408
	/*
428
	/*
409
	 * While in vblank, position will be negative
429
	 * While in vblank, position will be negative
Lines 441-453 int intel_get_crtc_scanline(struct intel_crtc *crtc) Link Here
441
	unsigned long irqflags;
461
	unsigned long irqflags;
442
	int position;
462
	int position;
443
463
444
	local_irq_save(irqflags);
464
	intel_vblank_section_enter_irqsave(dev_priv, &irqflags);
445
	intel_vblank_section_enter(dev_priv);
446
465
447
	position = __intel_get_crtc_scanline(crtc);
466
	position = __intel_get_crtc_scanline(crtc);
448
467
449
	intel_vblank_section_exit(dev_priv);
468
	intel_vblank_section_exit_irqrestore(dev_priv, irqflags);
450
	local_irq_restore(irqflags);
451
469
452
	return position;
470
	return position;
453
}
471
}
Lines 682-692 int intel_vblank_evade(struct intel_vblank_evade_ctx *evade) Link Here
682
			break;
700
			break;
683
		}
701
		}
684
702
685
		local_irq_enable();
703
		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
704
			local_irq_enable();
686
705
687
		timeout = schedule_timeout(timeout);
706
		timeout = schedule_timeout(timeout);
688
707
689
		local_irq_disable();
708
		if (!IS_ENABLED(CONFIG_PREEMPT_RT))
709
			local_irq_disable();
690
	}
710
	}
691
711
692
	finish_wait(wq, &wait);
712
	finish_wait(wq, &wait);
(-)a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c (-3 / +2 lines)
Lines 317-326 void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) Link Here
317
	/* Kick the work once more to drain the signalers, and disarm the irq */
317
	/* Kick the work once more to drain the signalers, and disarm the irq */
318
	irq_work_sync(&b->irq_work);
318
	irq_work_sync(&b->irq_work);
319
	while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
319
	while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
320
		local_irq_disable();
320
		irq_work_queue(&b->irq_work);
321
		signal_irq_work(&b->irq_work);
322
		local_irq_enable();
323
		cond_resched();
321
		cond_resched();
322
		irq_work_sync(&b->irq_work);
324
	}
323
	}
325
}
324
}
326
325
(-)a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c (-12 / +5 lines)
Lines 1303-1309 static void execlists_dequeue(struct intel_engine_cs *engine) Link Here
1303
	 * and context switches) submission.
1303
	 * and context switches) submission.
1304
	 */
1304
	 */
1305
1305
1306
	spin_lock(&sched_engine->lock);
1306
	spin_lock_irq(&sched_engine->lock);
1307
1307
1308
	/*
1308
	/*
1309
	 * If the queue is higher priority than the last
1309
	 * If the queue is higher priority than the last
Lines 1403-1409 static void execlists_dequeue(struct intel_engine_cs *engine) Link Here
1403
				 * Even if ELSP[1] is occupied and not worthy
1403
				 * Even if ELSP[1] is occupied and not worthy
1404
				 * of timeslices, our queue might be.
1404
				 * of timeslices, our queue might be.
1405
				 */
1405
				 */
1406
				spin_unlock(&sched_engine->lock);
1406
				spin_unlock_irq(&sched_engine->lock);
1407
				return;
1407
				return;
1408
			}
1408
			}
1409
		}
1409
		}
Lines 1429-1435 static void execlists_dequeue(struct intel_engine_cs *engine) Link Here
1429
1429
1430
		if (last && !can_merge_rq(last, rq)) {
1430
		if (last && !can_merge_rq(last, rq)) {
1431
			spin_unlock(&ve->base.sched_engine->lock);
1431
			spin_unlock(&ve->base.sched_engine->lock);
1432
			spin_unlock(&engine->sched_engine->lock);
1432
			spin_unlock_irq(&engine->sched_engine->lock);
1433
			return; /* leave this for another sibling */
1433
			return; /* leave this for another sibling */
1434
		}
1434
		}
1435
1435
Lines 1591-1597 static void execlists_dequeue(struct intel_engine_cs *engine) Link Here
1591
	 */
1591
	 */
1592
	sched_engine->queue_priority_hint = queue_prio(sched_engine);
1592
	sched_engine->queue_priority_hint = queue_prio(sched_engine);
1593
	i915_sched_engine_reset_on_empty(sched_engine);
1593
	i915_sched_engine_reset_on_empty(sched_engine);
1594
	spin_unlock(&sched_engine->lock);
1594
	spin_unlock_irq(&sched_engine->lock);
1595
1595
1596
	/*
1596
	/*
1597
	 * We can skip poking the HW if we ended up with exactly the same set
1597
	 * We can skip poking the HW if we ended up with exactly the same set
Lines 1617-1629 static void execlists_dequeue(struct intel_engine_cs *engine) Link Here
1617
	}
1617
	}
1618
}
1618
}
1619
1619
1620
static void execlists_dequeue_irq(struct intel_engine_cs *engine)
1621
{
1622
	local_irq_disable(); /* Suspend interrupts across request submission */
1623
	execlists_dequeue(engine);
1624
	local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */
1625
}
1626
1627
static void clear_ports(struct i915_request **ports, int count)
1620
static void clear_ports(struct i915_request **ports, int count)
1628
{
1621
{
1629
	memset_p((void **)ports, NULL, count);
1622
	memset_p((void **)ports, NULL, count);
Lines 2478-2484 static void execlists_submission_tasklet(struct tasklet_struct *t) Link Here
2478
	}
2471
	}
2479
2472
2480
	if (!engine->execlists.pending[0]) {
2473
	if (!engine->execlists.pending[0]) {
2481
		execlists_dequeue_irq(engine);
2474
		execlists_dequeue(engine);
2482
		start_timeslice(engine);
2475
		start_timeslice(engine);
2483
	}
2476
	}
2484
2477
(-)a/drivers/gpu/drm/i915/gt/uc/intel_guc.h (-1 / +1 lines)
Lines 360-366 static inline int intel_guc_send_busy_loop(struct intel_guc *guc, Link Here
360
{
360
{
361
	int err;
361
	int err;
362
	unsigned int sleep_period_ms = 1;
362
	unsigned int sleep_period_ms = 1;
363
	bool not_atomic = !in_atomic() && !irqs_disabled();
363
	bool not_atomic = !in_atomic() && !irqs_disabled() && !rcu_preempt_depth();
364
364
365
	/*
365
	/*
366
	 * FIXME: Have caller pass in if we are in an atomic context to avoid
366
	 * FIXME: Have caller pass in if we are in an atomic context to avoid
(-)a/drivers/gpu/drm/i915/i915_request.c (-2 lines)
Lines 608-614 bool __i915_request_submit(struct i915_request *request) Link Here
608
608
609
	RQ_TRACE(request, "\n");
609
	RQ_TRACE(request, "\n");
610
610
611
	GEM_BUG_ON(!irqs_disabled());
612
	lockdep_assert_held(&engine->sched_engine->lock);
611
	lockdep_assert_held(&engine->sched_engine->lock);
613
612
614
	/*
613
	/*
Lines 717-723 void __i915_request_unsubmit(struct i915_request *request) Link Here
717
	 */
716
	 */
718
	RQ_TRACE(request, "\n");
717
	RQ_TRACE(request, "\n");
719
718
720
	GEM_BUG_ON(!irqs_disabled());
721
	lockdep_assert_held(&engine->sched_engine->lock);
719
	lockdep_assert_held(&engine->sched_engine->lock);
722
720
723
	/*
721
	/*
(-)a/drivers/gpu/drm/i915/i915_trace.h (+4 lines)
Lines 6-11 Link Here
6
#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
6
#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
7
#define _I915_TRACE_H_
7
#define _I915_TRACE_H_
8
8
9
#if defined(CONFIG_PREEMPT_RT) && !defined(NOTRACE)
10
#define NOTRACE
11
#endif
12
9
#include <linux/stringify.h>
13
#include <linux/stringify.h>
10
#include <linux/types.h>
14
#include <linux/types.h>
11
#include <linux/tracepoint.h>
15
#include <linux/tracepoint.h>
(-)a/drivers/gpu/drm/i915/i915_utils.h (-1 / +1 lines)
Lines 288-294 wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) Link Here
288
#define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
288
#define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
289
289
290
/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
290
/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
291
#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
291
#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT)
292
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
292
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
293
#else
293
#else
294
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
294
# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
(-)a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c (-1 / +7 lines)
Lines 18-23 Link Here
18
18
19
#define BO_SIZE		SZ_8K
19
#define BO_SIZE		SZ_8K
20
20
21
#ifdef CONFIG_PREEMPT_RT
22
#define ww_mutex_base_lock(b)			rt_mutex_lock(b)
23
#else
24
#define ww_mutex_base_lock(b)			mutex_lock(b)
25
#endif
26
21
struct ttm_bo_test_case {
27
struct ttm_bo_test_case {
22
	const char *description;
28
	const char *description;
23
	bool interruptible;
29
	bool interruptible;
Lines 142-148 static void ttm_bo_reserve_deadlock(struct kunit *test) Link Here
142
	bo2 = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
148
	bo2 = ttm_bo_kunit_init(test, test->priv, BO_SIZE);
143
149
144
	ww_acquire_init(&ctx1, &reservation_ww_class);
150
	ww_acquire_init(&ctx1, &reservation_ww_class);
145
	mutex_lock(&bo2->base.resv->lock.base);
151
	ww_mutex_base_lock(&bo2->base.resv->lock.base);
146
152
147
	/* The deadlock will be caught by WW mutex, don't warn about it */
153
	/* The deadlock will be caught by WW mutex, don't warn about it */
148
	lock_release(&bo2->base.resv->lock.base.dep_map, 1);
154
	lock_release(&bo2->base.resv->lock.base.dep_map, 1);
(-)a/drivers/tty/serial/8250/8250_core.c (-4 / +44 lines)
Lines 592-597 serial8250_register_ports(struct uart_driver *drv, struct device *dev) Link Here
592
592
593
#ifdef CONFIG_SERIAL_8250_CONSOLE
593
#ifdef CONFIG_SERIAL_8250_CONSOLE
594
594
595
#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE
595
static void univ8250_console_write(struct console *co, const char *s,
596
static void univ8250_console_write(struct console *co, const char *s,
596
				   unsigned int count)
597
				   unsigned int count)
597
{
598
{
Lines 599-604 static void univ8250_console_write(struct console *co, const char *s, Link Here
599
600
600
	serial8250_console_write(up, s, count);
601
	serial8250_console_write(up, s, count);
601
}
602
}
603
#else
604
static void univ8250_console_write_atomic(struct console *co,
605
					  struct nbcon_write_context *wctxt)
606
{
607
	struct uart_8250_port *up = &serial8250_ports[co->index];
608
609
	serial8250_console_write_atomic(up, wctxt);
610
}
611
612
static void univ8250_console_write_thread(struct console *co,
613
					  struct nbcon_write_context *wctxt)
614
{
615
	struct uart_8250_port *up = &serial8250_ports[co->index];
616
617
	serial8250_console_write_thread(up, wctxt);
618
}
619
620
static void univ8250_console_device_lock(struct console *con, unsigned long *flags)
621
{
622
	struct uart_port *up = &serial8250_ports[con->index].port;
623
624
	__uart_port_lock_irqsave(up, flags);
625
}
626
627
static void univ8250_console_device_unlock(struct console *con, unsigned long flags)
628
{
629
	struct uart_port *up = &serial8250_ports[con->index].port;
630
631
	__uart_port_unlock_irqrestore(up, flags);
632
}
633
#endif /* CONFIG_SERIAL_8250_LEGACY_CONSOLE */
602
634
603
static int univ8250_console_setup(struct console *co, char *options)
635
static int univ8250_console_setup(struct console *co, char *options)
604
{
636
{
Lines 627-637 static int univ8250_console_setup(struct console *co, char *options) Link Here
627
659
628
	port = &serial8250_ports[co->index].port;
660
	port = &serial8250_ports[co->index].port;
629
	/* link port to console */
661
	/* link port to console */
630
	port->cons = co;
662
	uart_port_set_cons(port, co);
631
663
632
	retval = serial8250_console_setup(port, options, false);
664
	retval = serial8250_console_setup(port, options, false);
633
	if (retval != 0)
665
	if (retval != 0)
634
		port->cons = NULL;
666
		uart_port_set_cons(port, NULL);
635
	return retval;
667
	return retval;
636
}
668
}
637
669
Lines 689-695 static int univ8250_console_match(struct console *co, char *name, int idx, Link Here
689
			continue;
721
			continue;
690
722
691
		co->index = i;
723
		co->index = i;
692
		port->cons = co;
724
		uart_port_set_cons(port, co);
693
		return serial8250_console_setup(port, options, true);
725
		return serial8250_console_setup(port, options, true);
694
	}
726
	}
695
727
Lines 698-709 static int univ8250_console_match(struct console *co, char *name, int idx, Link Here
698
730
699
static struct console univ8250_console = {
731
static struct console univ8250_console = {
700
	.name		= "ttyS",
732
	.name		= "ttyS",
733
#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE
701
	.write		= univ8250_console_write,
734
	.write		= univ8250_console_write,
735
	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
736
#else
737
	.write_atomic	= univ8250_console_write_atomic,
738
	.write_thread	= univ8250_console_write_thread,
739
	.device_lock	= univ8250_console_device_lock,
740
	.device_unlock	= univ8250_console_device_unlock,
741
	.flags		= CON_PRINTBUFFER | CON_ANYTIME | CON_NBCON,
742
#endif
702
	.device		= uart_console_device,
743
	.device		= uart_console_device,
703
	.setup		= univ8250_console_setup,
744
	.setup		= univ8250_console_setup,
704
	.exit		= univ8250_console_exit,
745
	.exit		= univ8250_console_exit,
705
	.match		= univ8250_console_match,
746
	.match		= univ8250_console_match,
706
	.flags		= CON_PRINTBUFFER | CON_ANYTIME,
707
	.index		= -1,
747
	.index		= -1,
708
	.data		= &serial8250_reg,
748
	.data		= &serial8250_reg,
709
};
749
};
(-)a/drivers/tty/serial/8250/8250_port.c (-2 / +155 lines)
Lines 546-551 static int serial8250_em485_init(struct uart_8250_port *p) Link Here
546
	if (!p->em485)
546
	if (!p->em485)
547
		return -ENOMEM;
547
		return -ENOMEM;
548
548
549
#ifndef CONFIG_SERIAL_8250_LEGACY_CONSOLE
550
	if (uart_console(&p->port)) {
551
		dev_warn(p->port.dev, "no atomic printing for rs485 consoles\n");
552
		p->port.cons->write_atomic = NULL;
553
	}
554
#endif
555
549
	hrtimer_init(&p->em485->stop_tx_timer, CLOCK_MONOTONIC,
556
	hrtimer_init(&p->em485->stop_tx_timer, CLOCK_MONOTONIC,
550
		     HRTIMER_MODE_REL);
557
		     HRTIMER_MODE_REL);
551
	hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC,
558
	hrtimer_init(&p->em485->start_tx_timer, CLOCK_MONOTONIC,
Lines 698-704 static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) Link Here
698
	serial8250_rpm_put(p);
705
	serial8250_rpm_put(p);
699
}
706
}
700
707
701
static void serial8250_clear_IER(struct uart_8250_port *up)
708
/*
709
 * Only to be used by write_atomic() and the legacy write(), which do not
710
 * require port lock.
711
 */
712
static void __serial8250_clear_IER(struct uart_8250_port *up)
702
{
713
{
703
	if (up->capabilities & UART_CAP_UUE)
714
	if (up->capabilities & UART_CAP_UUE)
704
		serial_out(up, UART_IER, UART_IER_UUE);
715
		serial_out(up, UART_IER, UART_IER_UUE);
Lines 706-711 static void serial8250_clear_IER(struct uart_8250_port *up) Link Here
706
		serial_out(up, UART_IER, 0);
717
		serial_out(up, UART_IER, 0);
707
}
718
}
708
719
720
static inline void serial8250_clear_IER(struct uart_8250_port *up)
721
{
722
	/* Port locked to synchronize UART_IER access against the console. */
723
	lockdep_assert_held_once(&up->port.lock);
724
725
	__serial8250_clear_IER(up);
726
}
727
709
#ifdef CONFIG_SERIAL_8250_RSA
728
#ifdef CONFIG_SERIAL_8250_RSA
710
/*
729
/*
711
 * Attempts to turn on the RSA FIFO.  Returns zero on failure.
730
 * Attempts to turn on the RSA FIFO.  Returns zero on failure.
Lines 3272-3277 static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) Link Here
3272
3291
3273
	wait_for_xmitr(up, UART_LSR_THRE);
3292
	wait_for_xmitr(up, UART_LSR_THRE);
3274
	serial_port_out(port, UART_TX, ch);
3293
	serial_port_out(port, UART_TX, ch);
3294
3295
	if (ch == '\n')
3296
		up->console_newline_needed = false;
3297
	else
3298
		up->console_newline_needed = true;
3275
}
3299
}
3276
3300
3277
/*
3301
/*
Lines 3300-3305 static void serial8250_console_restore(struct uart_8250_port *up) Link Here
3300
	serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
3324
	serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS);
3301
}
3325
}
3302
3326
3327
#ifdef CONFIG_SERIAL_8250_LEGACY_CONSOLE
3303
/*
3328
/*
3304
 * Print a string to the serial port using the device FIFO
3329
 * Print a string to the serial port using the device FIFO
3305
 *
3330
 *
Lines 3358-3364 void serial8250_console_write(struct uart_8250_port *up, const char *s, Link Here
3358
	 *	First save the IER then disable the interrupts
3383
	 *	First save the IER then disable the interrupts
3359
	 */
3384
	 */
3360
	ier = serial_port_in(port, UART_IER);
3385
	ier = serial_port_in(port, UART_IER);
3361
	serial8250_clear_IER(up);
3386
	__serial8250_clear_IER(up);
3362
3387
3363
	/* check scratch reg to see if port powered off during system sleep */
3388
	/* check scratch reg to see if port powered off during system sleep */
3364
	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
3389
	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
Lines 3424-3429 void serial8250_console_write(struct uart_8250_port *up, const char *s, Link Here
3424
	if (locked)
3449
	if (locked)
3425
		uart_port_unlock_irqrestore(port, flags);
3450
		uart_port_unlock_irqrestore(port, flags);
3426
}
3451
}
3452
#else
3453
void serial8250_console_write_thread(struct uart_8250_port *up,
3454
				     struct nbcon_write_context *wctxt)
3455
{
3456
	struct uart_8250_em485 *em485 = up->em485;
3457
	struct uart_port *port = &up->port;
3458
	unsigned int ier;
3459
3460
	touch_nmi_watchdog();
3461
3462
	if (!nbcon_enter_unsafe(wctxt))
3463
		return;
3464
3465
	/* First save IER then disable the interrupts. */
3466
	ier = serial_port_in(port, UART_IER);
3467
	serial8250_clear_IER(up);
3468
3469
	/* Check scratch reg if port powered off during system sleep. */
3470
	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
3471
		serial8250_console_restore(up);
3472
		up->canary = 0;
3473
	}
3474
3475
	if (em485) {
3476
		if (em485->tx_stopped)
3477
			up->rs485_start_tx(up);
3478
		mdelay(port->rs485.delay_rts_before_send);
3479
	}
3480
3481
	if (nbcon_exit_unsafe(wctxt)) {
3482
		int len = READ_ONCE(wctxt->len);
3483
		int i;
3484
3485
		/*
3486
		 * Write out the message. Toggle unsafe for each byte in order
3487
		 * to give another (higher priority) context the opportunity
3488
		 * for a friendly takeover. If such a takeover occurs, this
3489
		 * context must reacquire ownership in order to perform final
3490
		 * actions (such as re-enabling the interrupts).
3491
		 *
3492
		 * IMPORTANT: wctxt->outbuf and wctxt->len are no longer valid
3493
		 *	      after a reacquire so writing the message must be
3494
		 *	      aborted.
3495
		 */
3496
		for (i = 0; i < len; i++) {
3497
			if (!nbcon_enter_unsafe(wctxt)) {
3498
				nbcon_reacquire(wctxt);
3499
				break;
3500
			}
3501
3502
			uart_console_write(port, wctxt->outbuf + i, 1, serial8250_console_putchar);
3503
3504
			if (!nbcon_exit_unsafe(wctxt)) {
3505
				nbcon_reacquire(wctxt);
3506
				break;
3507
			}
3508
		}
3509
	} else {
3510
		nbcon_reacquire(wctxt);
3511
	}
3512
3513
	while (!nbcon_enter_unsafe(wctxt))
3514
		nbcon_reacquire(wctxt);
3515
3516
	/* Finally, wait for transmitter to become empty and restore IER. */
3517
	wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
3518
	if (em485) {
3519
		mdelay(port->rs485.delay_rts_after_send);
3520
		if (em485->tx_stopped)
3521
			up->rs485_stop_tx(up);
3522
	}
3523
	serial_port_out(port, UART_IER, ier);
3524
3525
	/*
3526
	 * The receive handling will happen properly because the receive ready
3527
	 * bit will still be set; it is not cleared on read.  However, modem
3528
	 * control will not, we must call it if we have saved something in the
3529
	 * saved flags while processing with interrupts off.
3530
	 */
3531
	if (up->msr_saved_flags)
3532
		serial8250_modem_status(up);
3533
3534
	nbcon_exit_unsafe(wctxt);
3535
}
3536
3537
void serial8250_console_write_atomic(struct uart_8250_port *up,
3538
				     struct nbcon_write_context *wctxt)
3539
{
3540
	struct uart_port *port = &up->port;
3541
	unsigned int ier;
3542
3543
	/* Atomic console not supported for rs485 mode. */
3544
	if (WARN_ON_ONCE(up->em485))
3545
		return;
3546
3547
	touch_nmi_watchdog();
3548
3549
	if (!nbcon_enter_unsafe(wctxt))
3550
		return;
3551
3552
	/*
3553
	 * First save IER then disable the interrupts. The special variant to
3554
	 * clear IER is used because atomic printing may occur without holding
3555
	 * the port lock.
3556
	 */
3557
	ier = serial_port_in(port, UART_IER);
3558
	__serial8250_clear_IER(up);
3559
3560
	/* Check scratch reg if port powered off during system sleep. */
3561
	if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
3562
		serial8250_console_restore(up);
3563
		up->canary = 0;
3564
	}
3565
3566
	if (up->console_newline_needed)
3567
		uart_console_write(port, "\n", 1, serial8250_console_putchar);
3568
	uart_console_write(port, wctxt->outbuf, wctxt->len, serial8250_console_putchar);
3569
3570
	/* Finally, wait for transmitter to become empty and restore IER. */
3571
	wait_for_xmitr(up, UART_LSR_BOTH_EMPTY);
3572
	serial_port_out(port, UART_IER, ier);
3573
3574
	nbcon_exit_unsafe(wctxt);
3575
}
3576
#endif /* CONFIG_SERIAL_8250_LEGACY_CONSOLE */
3427
3577
3428
static unsigned int probe_baud(struct uart_port *port)
3578
static unsigned int probe_baud(struct uart_port *port)
3429
{
3579
{
Lines 3442-3447 static unsigned int probe_baud(struct uart_port *port) Link Here
3442
3592
3443
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
3593
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
3444
{
3594
{
3595
	struct uart_8250_port *up = up_to_u8250p(port);
3445
	int baud = 9600;
3596
	int baud = 9600;
3446
	int bits = 8;
3597
	int bits = 8;
3447
	int parity = 'n';
3598
	int parity = 'n';
Lines 3451-3456 int serial8250_console_setup(struct uart_port *port, char *options, bool probe) Link Here
3451
	if (!port->iobase && !port->membase)
3602
	if (!port->iobase && !port->membase)
3452
		return -ENODEV;
3603
		return -ENODEV;
3453
3604
3605
	up->console_newline_needed = false;
3606
3454
	if (options)
3607
	if (options)
3455
		uart_parse_options(options, &baud, &parity, &bits, &flow);
3608
		uart_parse_options(options, &baud, &parity, &bits, &flow);
3456
	else if (probe)
3609
	else if (probe)
(-)a/drivers/tty/serial/amba-pl011.c (-1 / +1 lines)
Lines 2488-2494 static int pl011_console_match(struct console *co, char *name, int idx, Link Here
2488
			continue;
2488
			continue;
2489
2489
2490
		co->index = i;
2490
		co->index = i;
2491
		port->cons = co;
2491
		uart_port_set_cons(port, co);
2492
		return pl011_console_setup(co, options);
2492
		return pl011_console_setup(co, options);
2493
	}
2493
	}
2494
2494
(-)a/drivers/tty/serial/serial_core.c (-8 / +8 lines)
Lines 3172-3179 static int serial_core_add_one_port(struct uart_driver *drv, struct uart_port *u Link Here
3172
	state->uart_port = uport;
3172
	state->uart_port = uport;
3173
	uport->state = state;
3173
	uport->state = state;
3174
3174
3175
	/*
3176
	 * If this port is in use as a console then the spinlock is already
3177
	 * initialised.
3178
	 */
3179
	if (!uart_console_registered(uport))
3180
		uart_port_spin_lock_init(uport);
3181
3175
	state->pm_state = UART_PM_STATE_UNDEFINED;
3182
	state->pm_state = UART_PM_STATE_UNDEFINED;
3176
	uport->cons = drv->cons;
3183
	uart_port_set_cons(uport, drv->cons);
3177
	uport->minor = drv->tty_driver->minor_start + uport->line;
3184
	uport->minor = drv->tty_driver->minor_start + uport->line;
3178
	uport->name = kasprintf(GFP_KERNEL, "%s%d", drv->dev_name,
3185
	uport->name = kasprintf(GFP_KERNEL, "%s%d", drv->dev_name,
3179
				drv->tty_driver->name_base + uport->line);
3186
				drv->tty_driver->name_base + uport->line);
Lines 3182-3194 static int serial_core_add_one_port(struct uart_driver *drv, struct uart_port *u Link Here
3182
		goto out;
3189
		goto out;
3183
	}
3190
	}
3184
3191
3185
	/*
3186
	 * If this port is in use as a console then the spinlock is already
3187
	 * initialised.
3188
	 */
3189
	if (!uart_console_registered(uport))
3190
		uart_port_spin_lock_init(uport);
3191
3192
	if (uport->cons && uport->dev)
3192
	if (uport->cons && uport->dev)
3193
		of_console_check(uport->dev->of_node, uport->cons->name, uport->line);
3193
		of_console_check(uport->dev->of_node, uport->cons->name, uport->line);
3194
3194
(-)a/drivers/tty/tty_io.c (-2 / +7 lines)
Lines 3567-3574 static ssize_t show_cons_active(struct device *dev, Link Here
3567
	for_each_console(c) {
3567
	for_each_console(c) {
3568
		if (!c->device)
3568
		if (!c->device)
3569
			continue;
3569
			continue;
3570
		if (!c->write)
3570
		if (c->flags & CON_NBCON) {
3571
			continue;
3571
			if (!c->write_atomic && !c->write_thread)
3572
				continue;
3573
		} else {
3574
			if (!c->write)
3575
				continue;
3576
		}
3572
		if ((c->flags & CON_ENABLED) == 0)
3577
		if ((c->flags & CON_ENABLED) == 0)
3573
			continue;
3578
			continue;
3574
		cs[i++] = c;
3579
		cs[i++] = c;
(-)a/fs/proc/consoles.c (-3 / +13 lines)
Lines 21-32 static int show_console_dev(struct seq_file *m, void *v) Link Here
21
		{ CON_ENABLED,		'E' },
21
		{ CON_ENABLED,		'E' },
22
		{ CON_CONSDEV,		'C' },
22
		{ CON_CONSDEV,		'C' },
23
		{ CON_BOOT,		'B' },
23
		{ CON_BOOT,		'B' },
24
		{ CON_NBCON,		'N' },
24
		{ CON_PRINTBUFFER,	'p' },
25
		{ CON_PRINTBUFFER,	'p' },
25
		{ CON_BRL,		'b' },
26
		{ CON_BRL,		'b' },
26
		{ CON_ANYTIME,		'a' },
27
		{ CON_ANYTIME,		'a' },
27
	};
28
	};
28
	char flags[ARRAY_SIZE(con_flags) + 1];
29
	char flags[ARRAY_SIZE(con_flags) + 1];
29
	struct console *con = v;
30
	struct console *con = v;
31
	char con_write = '-';
30
	unsigned int a;
32
	unsigned int a;
31
	dev_t dev = 0;
33
	dev_t dev = 0;
32
34
Lines 57-65 static int show_console_dev(struct seq_file *m, void *v) Link Here
57
	seq_setwidth(m, 21 - 1);
59
	seq_setwidth(m, 21 - 1);
58
	seq_printf(m, "%s%d", con->name, con->index);
60
	seq_printf(m, "%s%d", con->name, con->index);
59
	seq_pad(m, ' ');
61
	seq_pad(m, ' ');
60
	seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-',
62
	if (con->flags & CON_NBCON) {
61
			con->write ? 'W' : '-', con->unblank ? 'U' : '-',
63
		if (con->write_atomic || con->write_thread)
62
			flags);
64
			con_write = 'W';
65
	} else {
66
		if (con->write)
67
			con_write = 'W';
68
	}
69
	seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con_write,
70
		   con->unblank ? 'U' : '-', flags);
63
	if (dev)
71
	if (dev)
64
		seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
72
		seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
65
73
Lines 68-73 static int show_console_dev(struct seq_file *m, void *v) Link Here
68
}
76
}
69
77
70
static void *c_start(struct seq_file *m, loff_t *pos)
78
static void *c_start(struct seq_file *m, loff_t *pos)
79
	__acquires(&console_mutex)
71
{
80
{
72
	struct console *con;
81
	struct console *con;
73
	loff_t off = 0;
82
	loff_t off = 0;
Lines 94-99 static void *c_next(struct seq_file *m, void *v, loff_t *pos) Link Here
94
}
103
}
95
104
96
static void c_stop(struct seq_file *m, void *v)
105
static void c_stop(struct seq_file *m, void *v)
106
	__releases(&console_mutex)
97
{
107
{
98
	console_list_unlock();
108
	console_list_unlock();
99
}
109
}
(-)a/include/linux/bottom_half.h (+2 lines)
Lines 35-42 static inline void local_bh_enable(void) Link Here
35
35
36
#ifdef CONFIG_PREEMPT_RT
36
#ifdef CONFIG_PREEMPT_RT
37
extern bool local_bh_blocked(void);
37
extern bool local_bh_blocked(void);
38
extern void softirq_preempt(void);
38
#else
39
#else
39
static inline bool local_bh_blocked(void) { return false; }
40
static inline bool local_bh_blocked(void) { return false; }
41
static inline void softirq_preempt(void) { }
40
#endif
42
#endif
41
43
42
#endif /* _LINUX_BH_H */
44
#endif /* _LINUX_BH_H */
(-)a/include/linux/console.h (-15 / +136 lines)
Lines 16-22 Link Here
16
16
17
#include <linux/atomic.h>
17
#include <linux/atomic.h>
18
#include <linux/bits.h>
18
#include <linux/bits.h>
19
#include <linux/irq_work.h>
19
#include <linux/rculist.h>
20
#include <linux/rculist.h>
21
#include <linux/rcuwait.h>
20
#include <linux/types.h>
22
#include <linux/types.h>
21
#include <linux/vesa.h>
23
#include <linux/vesa.h>
22
24
Lines 303-309 struct nbcon_write_context { Link Here
303
/**
305
/**
304
 * struct console - The console descriptor structure
306
 * struct console - The console descriptor structure
305
 * @name:		The name of the console driver
307
 * @name:		The name of the console driver
306
 * @write:		Write callback to output messages (Optional)
308
 * @write:		Legacy write callback to output messages (Optional)
307
 * @read:		Read callback for console input (Optional)
309
 * @read:		Read callback for console input (Optional)
308
 * @device:		The underlying TTY device driver (Optional)
310
 * @device:		The underlying TTY device driver (Optional)
309
 * @unblank:		Callback to unblank the console (Optional)
311
 * @unblank:		Callback to unblank the console (Optional)
Lines 320-329 struct nbcon_write_context { Link Here
320
 * @data:		Driver private data
322
 * @data:		Driver private data
321
 * @node:		hlist node for the console list
323
 * @node:		hlist node for the console list
322
 *
324
 *
323
 * @write_atomic:	Write callback for atomic context
324
 * @nbcon_state:	State for nbcon consoles
325
 * @nbcon_state:	State for nbcon consoles
325
 * @nbcon_seq:		Sequence number of the next record for nbcon to print
326
 * @nbcon_seq:		Sequence number of the next record for nbcon to print
327
 * @nbcon_driver_ctxt:	Context available for driver non-printing operations
328
 * @nbcon_prev_seq:	Seq num the previous nbcon owner was assigned to print
326
 * @pbufs:		Pointer to nbcon private buffer
329
 * @pbufs:		Pointer to nbcon private buffer
330
 * @kthread:		Printer kthread for this console
331
 * @rcuwait:		RCU-safe wait object for @kthread waking
332
 * @irq_work:		Defer @kthread waking to IRQ work context
327
 */
333
 */
328
struct console {
334
struct console {
329
	char			name[16];
335
	char			name[16];
Lines 345-355 struct console { Link Here
345
	struct hlist_node	node;
351
	struct hlist_node	node;
346
352
347
	/* nbcon console specific members */
353
	/* nbcon console specific members */
348
	bool			(*write_atomic)(struct console *con,
354
349
						struct nbcon_write_context *wctxt);
355
	/**
356
	 * @write_atomic:
357
	 *
358
	 * NBCON callback to write out text in any context.
359
	 *
360
	 * This callback is called with the console already acquired. However,
361
	 * a higher priority context is allowed to take it over by default.
362
	 *
363
	 * The callback must call nbcon_enter_unsafe() and nbcon_exit_unsafe()
364
	 * around any code where the takeover is not safe, for example, when
365
	 * manipulating the serial port registers.
366
	 *
367
	 * nbcon_enter_unsafe() will fail if the context has lost the console
368
	 * ownership in the meantime. In this case, the callback is no longer
369
	 * allowed to go forward. It must back out immediately and carefully.
370
	 * The buffer content is also no longer trusted since it no longer
371
	 * belongs to the context.
372
	 *
373
	 * The callback should allow the takeover whenever it is safe. It
374
	 * increases the chance to see messages when the system is in trouble.
375
	 *
376
	 * If the driver must reacquire ownership in order to finalize or
377
	 * revert hardware changes, nbcon_reacquire() can be used. However,
378
	 * on reacquire the buffer content is no longer available. A
379
	 * reacquire cannot be used to resume printing.
380
	 *
381
	 * The callback can be called from any context (including NMI).
382
	 * Therefore it must avoid usage of any locking and instead rely
383
	 * on the console ownership for synchronization.
384
	 */
385
	void (*write_atomic)(struct console *con, struct nbcon_write_context *wctxt);
386
387
	/**
388
	 * @write_thread:
389
	 *
390
	 * NBCON callback to write out text in task context. (Optional)
391
	 *
392
	 * This callback is called with the console already acquired. Any
393
	 * additional driver synchronization should have been performed by
394
	 * device_lock().
395
	 *
396
	 * This callback is always called from task context but with migration
397
	 * disabled.
398
	 *
399
	 * The same criteria for console ownership verification and unsafe
400
	 * sections applies as with write_atomic(). The difference between
401
	 * this callback and write_atomic() is that this callback is used
402
	 * during normal operation and is always called from task context.
403
	 * This provides drivers with a relatively relaxed locking context
404
	 * for synchronizing output to the hardware.
405
	 */
406
	void (*write_thread)(struct console *con, struct nbcon_write_context *wctxt);
407
408
	/**
409
	 * @device_lock:
410
	 *
411
	 * NBCON callback to begin synchronization with driver code.
412
	 *
413
	 * Console drivers typically must deal with access to the hardware
414
	 * via user input/output (such as an interactive login shell) and
415
	 * output of kernel messages via printk() calls. This callback is
416
	 * called by the printk-subsystem whenever it needs to synchronize
417
	 * with hardware access by the driver. It should be implemented to
418
	 * use whatever synchronization mechanism the driver is using for
419
	 * itself (for example, the port lock for uart serial consoles).
420
	 *
421
	 * The callback is always called from task context. It may use any
422
	 * synchronization method required by the driver.
423
	 *
424
	 * IMPORTANT: The callback MUST disable migration. The console driver
425
	 *	may be using a synchronization mechanism that already takes
426
	 *	care of this (such as spinlocks). Otherwise this function must
427
	 *	explicitly call migrate_disable().
428
	 *
429
	 * The flags argument is provided as a convenience to the driver. It
430
	 * will be passed again to device_unlock(). It can be ignored if the
431
	 * driver does not need it.
432
	 */
433
	void (*device_lock)(struct console *con, unsigned long *flags);
434
435
	/**
436
	 * @device_unlock:
437
	 *
438
	 * NBCON callback to finish synchronization with driver code.
439
	 *
440
	 * It is the counterpart to device_lock().
441
	 *
442
	 * This callback is always called from task context. It must
443
	 * appropriately re-enable migration (depending on how device_lock()
444
	 * disabled migration).
445
	 *
446
	 * The flags argument is the value of the same variable that was
447
	 * passed to device_lock().
448
	 */
449
	void (*device_unlock)(struct console *con, unsigned long flags);
450
350
	atomic_t		__private nbcon_state;
451
	atomic_t		__private nbcon_state;
351
	atomic_long_t		__private nbcon_seq;
452
	atomic_long_t		__private nbcon_seq;
453
	struct nbcon_context	__private nbcon_driver_ctxt;
454
	atomic_long_t           __private nbcon_prev_seq;
455
352
	struct printk_buffers	*pbufs;
456
	struct printk_buffers	*pbufs;
457
	struct task_struct	*kthread;
458
	struct rcuwait		rcuwait;
459
	struct irq_work		irq_work;
353
};
460
};
354
461
355
#ifdef CONFIG_LOCKDEP
462
#ifdef CONFIG_LOCKDEP
Lines 378-405 extern void console_list_unlock(void) __releases(console_mutex); Link Here
378
extern struct hlist_head console_list;
485
extern struct hlist_head console_list;
379
486
380
/**
487
/**
381
 * console_srcu_read_flags - Locklessly read the console flags
488
 * console_srcu_read_flags - Locklessly read flags of a possibly registered
489
 *				console
382
 * @con:	struct console pointer of console to read flags from
490
 * @con:	struct console pointer of console to read flags from
383
 *
491
 *
384
 * This function provides the necessary READ_ONCE() and data_race()
492
 * Locklessly reading @con->flags provides a consistent read value because
385
 * notation for locklessly reading the console flags. The READ_ONCE()
493
 * there is at most one CPU modifying @con->flags and that CPU is using only
386
 * in this function matches the WRITE_ONCE() when @flags are modified
494
 * read-modify-write operations to do so.
387
 * for registered consoles with console_srcu_write_flags().
388
 *
495
 *
389
 * Only use this function to read console flags when locklessly
496
 * Requires console_srcu_read_lock to be held, which implies that @con might
390
 * iterating the console list via srcu.
497
 * be a registered console. The purpose of holding console_srcu_read_lock is
498
 * to guarantee that the console state is valid (CON_SUSPENDED/CON_ENABLED)
499
 * and that no exit/cleanup routines will run if the console is currently
500
 * undergoing unregistration.
501
 *
502
 * If the caller is holding the console_list_lock or it is _certain_ that
503
 * @con is not and will not become registered, the caller may read
504
 * @con->flags directly instead.
391
 *
505
 *
392
 * Context: Any context.
506
 * Context: Any context.
507
 * Return: The current value of the @con->flags field.
393
 */
508
 */
394
static inline short console_srcu_read_flags(const struct console *con)
509
static inline short console_srcu_read_flags(const struct console *con)
395
{
510
{
396
	WARN_ON_ONCE(!console_srcu_read_lock_is_held());
511
	WARN_ON_ONCE(!console_srcu_read_lock_is_held());
397
512
398
	/*
513
	/*
399
	 * Locklessly reading console->flags provides a consistent
514
	 * The READ_ONCE() matches the WRITE_ONCE() when @flags are modified
400
	 * read value because there is at most one CPU modifying
515
	 * for registered consoles with console_srcu_write_flags().
401
	 * console->flags and that CPU is using only read-modify-write
402
	 * operations to do so.
403
	 */
516
	 */
404
	return data_race(READ_ONCE(con->flags));
517
	return data_race(READ_ONCE(con->flags));
405
}
518
}
Lines 477-489 static inline bool console_is_registered(const struct console *con) Link Here
477
	hlist_for_each_entry(con, &console_list, node)
590
	hlist_for_each_entry(con, &console_list, node)
478
591
479
#ifdef CONFIG_PRINTK
592
#ifdef CONFIG_PRINTK
593
extern void nbcon_cpu_emergency_enter(void);
594
extern void nbcon_cpu_emergency_exit(void);
595
extern void nbcon_cpu_emergency_flush(void);
480
extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
596
extern bool nbcon_can_proceed(struct nbcon_write_context *wctxt);
481
extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
597
extern bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt);
482
extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
598
extern bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt);
599
extern void nbcon_reacquire(struct nbcon_write_context *wctxt);
483
#else
600
#else
601
static inline void nbcon_cpu_emergency_enter(void) { }
602
static inline void nbcon_cpu_emergency_exit(void) { }
603
static inline void nbcon_cpu_emergency_flush(void) { }
484
static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
604
static inline bool nbcon_can_proceed(struct nbcon_write_context *wctxt) { return false; }
485
static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
605
static inline bool nbcon_enter_unsafe(struct nbcon_write_context *wctxt) { return false; }
486
static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
606
static inline bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) { return false; }
607
static inline void nbcon_reacquire(struct nbcon_write_context *wctxt) { }
487
#endif
608
#endif
488
609
489
extern int console_set_on_cmdline;
610
extern int console_set_on_cmdline;
(-)a/include/linux/entry-common.h (-1 / +1 lines)
Lines 65-71 Link Here
65
#define EXIT_TO_USER_MODE_WORK						\
65
#define EXIT_TO_USER_MODE_WORK						\
66
	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
66
	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
67
	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
67
	 _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |	\
68
	 ARCH_EXIT_TO_USER_MODE_WORK)
68
	 _TIF_NEED_RESCHED_LAZY | ARCH_EXIT_TO_USER_MODE_WORK)
69
69
70
/**
70
/**
71
 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs
71
 * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs
(-)a/include/linux/entry-kvm.h (-1 / +1 lines)
Lines 18-24 Link Here
18
18
19
#define XFER_TO_GUEST_MODE_WORK						\
19
#define XFER_TO_GUEST_MODE_WORK						\
20
	(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL |	\
20
	(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL |	\
21
	 _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
21
	 _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED_LAZY | ARCH_XFER_TO_GUEST_MODE_WORK)
22
22
23
struct kvm_vcpu;
23
struct kvm_vcpu;
24
24
(-)a/include/linux/interrupt.h (+29 lines)
Lines 612-617 extern void __raise_softirq_irqoff(unsigned int nr); Link Here
612
extern void raise_softirq_irqoff(unsigned int nr);
612
extern void raise_softirq_irqoff(unsigned int nr);
613
extern void raise_softirq(unsigned int nr);
613
extern void raise_softirq(unsigned int nr);
614
614
615
#ifdef CONFIG_PREEMPT_RT
616
DECLARE_PER_CPU(struct task_struct *, timersd);
617
DECLARE_PER_CPU(unsigned long, pending_timer_softirq);
618
619
extern void raise_timer_softirq(void);
620
extern void raise_hrtimer_softirq(void);
621
622
static inline unsigned int local_pending_timers(void)
623
{
624
        return __this_cpu_read(pending_timer_softirq);
625
}
626
627
#else
628
static inline void raise_timer_softirq(void)
629
{
630
	raise_softirq(TIMER_SOFTIRQ);
631
}
632
633
static inline void raise_hrtimer_softirq(void)
634
{
635
	raise_softirq_irqoff(HRTIMER_SOFTIRQ);
636
}
637
638
static inline unsigned int local_pending_timers(void)
639
{
640
        return local_softirq_pending();
641
}
642
#endif
643
615
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
644
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
616
645
617
static inline struct task_struct *this_cpu_ksoftirqd(void)
646
static inline struct task_struct *this_cpu_ksoftirqd(void)
(-)a/include/linux/netdevice.h (+1 lines)
Lines 3287-3292 static inline void dev_xmit_recursion_dec(void) Link Here
3287
	__this_cpu_dec(softnet_data.xmit.recursion);
3287
	__this_cpu_dec(softnet_data.xmit.recursion);
3288
}
3288
}
3289
3289
3290
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
3290
void __netif_schedule(struct Qdisc *q);
3291
void __netif_schedule(struct Qdisc *q);
3291
void netif_schedule_queue(struct netdev_queue *txq);
3292
void netif_schedule_queue(struct netdev_queue *txq);
3292
3293
(-)a/include/linux/perf_event.h (-2 / +2 lines)
Lines 781-789 struct perf_event { Link Here
781
	unsigned int			pending_wakeup;
781
	unsigned int			pending_wakeup;
782
	unsigned int			pending_kill;
782
	unsigned int			pending_kill;
783
	unsigned int			pending_disable;
783
	unsigned int			pending_disable;
784
	unsigned int			pending_sigtrap;
785
	unsigned long			pending_addr;	/* SIGTRAP */
784
	unsigned long			pending_addr;	/* SIGTRAP */
786
	struct irq_work			pending_irq;
785
	struct irq_work			pending_irq;
786
	struct irq_work			pending_disable_irq;
787
	struct callback_head		pending_task;
787
	struct callback_head		pending_task;
788
	unsigned int			pending_work;
788
	unsigned int			pending_work;
789
789
Lines 959-965 struct perf_event_context { Link Here
959
	struct rcu_head			rcu_head;
959
	struct rcu_head			rcu_head;
960
960
961
	/*
961
	/*
962
	 * Sum (event->pending_sigtrap + event->pending_work)
962
	 * Sum (event->pending_work + event->pending_work)
963
	 *
963
	 *
964
	 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
964
	 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
965
	 * that until the signal is delivered.
965
	 * that until the signal is delivered.
(-)a/include/linux/printk.h (-4 / +29 lines)
Lines 9-14 Link Here
9
#include <linux/ratelimit_types.h>
9
#include <linux/ratelimit_types.h>
10
#include <linux/once_lite.h>
10
#include <linux/once_lite.h>
11
11
12
struct console;
13
12
extern const char linux_banner[];
14
extern const char linux_banner[];
13
extern const char linux_proc_banner[];
15
extern const char linux_proc_banner[];
14
16
Lines 157-171 int _printk(const char *fmt, ...); Link Here
157
 */
159
 */
158
__printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);
160
__printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);
159
161
160
extern void __printk_safe_enter(void);
162
extern void __printk_deferred_enter(void);
161
extern void __printk_safe_exit(void);
163
extern void __printk_deferred_exit(void);
164
162
/*
165
/*
163
 * The printk_deferred_enter/exit macros are available only as a hack for
166
 * The printk_deferred_enter/exit macros are available only as a hack for
164
 * some code paths that need to defer all printk console printing. Interrupts
167
 * some code paths that need to defer all printk console printing. Interrupts
165
 * must be disabled for the deferred duration.
168
 * must be disabled for the deferred duration.
166
 */
169
 */
167
#define printk_deferred_enter __printk_safe_enter
170
#define printk_deferred_enter() __printk_deferred_enter()
168
#define printk_deferred_exit __printk_safe_exit
171
#define printk_deferred_exit() __printk_deferred_exit()
169
172
170
/*
173
/*
171
 * Please don't use printk_ratelimit(), because it shares ratelimiting state
174
 * Please don't use printk_ratelimit(), because it shares ratelimiting state
Lines 192-197 void show_regs_print_info(const char *log_lvl); Link Here
192
extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
195
extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
193
extern asmlinkage void dump_stack(void) __cold;
196
extern asmlinkage void dump_stack(void) __cold;
194
void printk_trigger_flush(void);
197
void printk_trigger_flush(void);
198
void printk_legacy_allow_panic_sync(void);
199
extern bool nbcon_driver_try_acquire(struct console *con);
200
extern void nbcon_driver_release(struct console *con);
201
void nbcon_atomic_flush_unsafe(void);
195
#else
202
#else
196
static inline __printf(1, 0)
203
static inline __printf(1, 0)
197
int vprintk(const char *s, va_list args)
204
int vprintk(const char *s, va_list args)
Lines 271-276 static inline void dump_stack(void) Link Here
271
static inline void printk_trigger_flush(void)
278
static inline void printk_trigger_flush(void)
272
{
279
{
273
}
280
}
281
282
static inline void printk_legacy_allow_panic_sync(void)
283
{
284
}
285
286
static inline bool nbcon_driver_try_acquire(struct console *con)
287
{
288
	return false;
289
}
290
291
static inline void nbcon_driver_release(struct console *con)
292
{
293
}
294
295
static inline void nbcon_atomic_flush_unsafe(void)
296
{
297
}
298
274
#endif
299
#endif
275
300
276
bool this_cpu_in_panic(void);
301
bool this_cpu_in_panic(void);
(-)a/include/linux/sched.h (-5 / +8 lines)
Lines 1795-1800 static inline int dl_task_check_affinity(struct task_struct *p, const struct cpu Link Here
1795
}
1795
}
1796
#endif
1796
#endif
1797
1797
1798
extern bool task_is_pi_boosted(const struct task_struct *p);
1798
extern int yield_to(struct task_struct *p, bool preempt);
1799
extern int yield_to(struct task_struct *p, bool preempt);
1799
extern void set_user_nice(struct task_struct *p, long nice);
1800
extern void set_user_nice(struct task_struct *p, long nice);
1800
extern int task_prio(const struct task_struct *p);
1801
extern int task_prio(const struct task_struct *p);
Lines 1937-1953 static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, Link Here
1937
	update_ti_thread_flag(task_thread_info(tsk), flag, value);
1938
	update_ti_thread_flag(task_thread_info(tsk), flag, value);
1938
}
1939
}
1939
1940
1940
static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
1941
static inline bool test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
1941
{
1942
{
1942
	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
1943
	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
1943
}
1944
}
1944
1945
1945
static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
1946
static inline bool test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
1946
{
1947
{
1947
	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
1948
	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
1948
}
1949
}
1949
1950
1950
static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
1951
static inline bool test_tsk_thread_flag(struct task_struct *tsk, int flag)
1951
{
1952
{
1952
	return test_ti_thread_flag(task_thread_info(tsk), flag);
1953
	return test_ti_thread_flag(task_thread_info(tsk), flag);
1953
}
1954
}
Lines 1960-1968 static inline void set_tsk_need_resched(struct task_struct *tsk) Link Here
1960
static inline void clear_tsk_need_resched(struct task_struct *tsk)
1961
static inline void clear_tsk_need_resched(struct task_struct *tsk)
1961
{
1962
{
1962
	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1963
	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1964
	if (IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO))
1965
		clear_tsk_thread_flag(tsk, TIF_NEED_RESCHED_LAZY);
1963
}
1966
}
1964
1967
1965
static inline int test_tsk_need_resched(struct task_struct *tsk)
1968
static inline bool test_tsk_need_resched(struct task_struct *tsk)
1966
{
1969
{
1967
	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
1970
	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
1968
}
1971
}
Lines 2103-2109 static inline bool preempt_model_preemptible(void) Link Here
2103
2106
2104
static __always_inline bool need_resched(void)
2107
static __always_inline bool need_resched(void)
2105
{
2108
{
2106
	return unlikely(tif_need_resched());
2109
	return unlikely(tif_need_resched_lazy() || tif_need_resched());
2107
}
2110
}
2108
2111
2109
/*
2112
/*
(-)a/include/linux/sched/idle.h (-4 / +4 lines)
Lines 63-69 static __always_inline bool __must_check current_set_polling_and_test(void) Link Here
63
	 */
63
	 */
64
	smp_mb__after_atomic();
64
	smp_mb__after_atomic();
65
65
66
	return unlikely(tif_need_resched());
66
	return unlikely(need_resched());
67
}
67
}
68
68
69
static __always_inline bool __must_check current_clr_polling_and_test(void)
69
static __always_inline bool __must_check current_clr_polling_and_test(void)
Lines 76-82 static __always_inline bool __must_check current_clr_polling_and_test(void) Link Here
76
	 */
76
	 */
77
	smp_mb__after_atomic();
77
	smp_mb__after_atomic();
78
78
79
	return unlikely(tif_need_resched());
79
	return unlikely(need_resched());
80
}
80
}
81
81
82
#else
82
#else
Lines 85-95 static inline void __current_clr_polling(void) { } Link Here
85
85
86
static inline bool __must_check current_set_polling_and_test(void)
86
static inline bool __must_check current_set_polling_and_test(void)
87
{
87
{
88
	return unlikely(tif_need_resched());
88
	return unlikely(need_resched());
89
}
89
}
90
static inline bool __must_check current_clr_polling_and_test(void)
90
static inline bool __must_check current_clr_polling_and_test(void)
91
{
91
{
92
	return unlikely(tif_need_resched());
92
	return unlikely(need_resched());
93
}
93
}
94
#endif
94
#endif
95
95
(-)a/include/linux/serial_8250.h (+6 lines)
Lines 153-158 struct uart_8250_port { Link Here
153
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
153
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
154
	unsigned char		msr_saved_flags;
154
	unsigned char		msr_saved_flags;
155
155
156
	bool			console_newline_needed;
157
156
	struct uart_8250_dma	*dma;
158
	struct uart_8250_dma	*dma;
157
	const struct uart_8250_ops *ops;
159
	const struct uart_8250_ops *ops;
158
160
Lines 204-209 void serial8250_init_port(struct uart_8250_port *up); Link Here
204
void serial8250_set_defaults(struct uart_8250_port *up);
206
void serial8250_set_defaults(struct uart_8250_port *up);
205
void serial8250_console_write(struct uart_8250_port *up, const char *s,
207
void serial8250_console_write(struct uart_8250_port *up, const char *s,
206
			      unsigned int count);
208
			      unsigned int count);
209
void serial8250_console_write_atomic(struct uart_8250_port *up,
210
				     struct nbcon_write_context *wctxt);
211
void serial8250_console_write_thread(struct uart_8250_port *up,
212
				     struct nbcon_write_context *wctxt);
207
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
213
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
208
int serial8250_console_exit(struct uart_port *port);
214
int serial8250_console_exit(struct uart_port *port);
209
215
(-)a/include/linux/serial_core.h (-2 / +115 lines)
Lines 12-17 Link Here
12
#include <linux/console.h>
12
#include <linux/console.h>
13
#include <linux/interrupt.h>
13
#include <linux/interrupt.h>
14
#include <linux/circ_buf.h>
14
#include <linux/circ_buf.h>
15
#include <linux/lockdep.h>
16
#include <linux/printk.h>
15
#include <linux/spinlock.h>
17
#include <linux/spinlock.h>
16
#include <linux/sched.h>
18
#include <linux/sched.h>
17
#include <linux/tty.h>
19
#include <linux/tty.h>
Lines 591-596 struct uart_port { Link Here
591
	void			*private_data;		/* generic platform data pointer */
593
	void			*private_data;		/* generic platform data pointer */
592
};
594
};
593
595
596
/*
597
 * Only for console->device_lock()/_unlock() callbacks and internal
598
 * port lock wrapper synchronization.
599
 */
600
static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
601
{
602
	spin_lock_irqsave(&up->lock, *flags);
603
}
604
605
/*
606
 * Only for console->device_lock()/_unlock() callbacks and internal
607
 * port lock wrapper synchronization.
608
 */
609
static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
610
{
611
	spin_unlock_irqrestore(&up->lock, flags);
612
}
613
614
/**
615
 * uart_port_set_cons - Safely set the @cons field for a uart
616
 * @up:		The uart port to set
617
 * @con:	The new console to set to
618
 *
619
 * This function must be used to set @up->cons. It uses the port lock to
620
 * synchronize with the port lock wrappers in order to ensure that the console
621
 * cannot change or disappear while another context is holding the port lock.
622
 */
623
static inline void uart_port_set_cons(struct uart_port *up, struct console *con)
624
{
625
	unsigned long flags;
626
627
	__uart_port_lock_irqsave(up, &flags);
628
	up->cons = con;
629
	__uart_port_unlock_irqrestore(up, flags);
630
}
631
632
/* Only for internal port lock wrapper usage. */
633
static inline bool __uart_port_using_nbcon(struct uart_port *up)
634
{
635
	lockdep_assert_held_once(&up->lock);
636
637
	if (likely(!uart_console(up)))
638
		return false;
639
640
	/*
641
	 * @up->cons is only modified under the port lock. Therefore it is
642
	 * certain that it cannot disappear here.
643
	 *
644
	 * @up->cons->node is added/removed from the console list under the
645
	 * port lock. Therefore it is certain that the registration status
646
	 * cannot change here, thus @up->cons->flags can be read directly.
647
	 */
648
	if (hlist_unhashed_lockless(&up->cons->node) ||
649
	    !(up->cons->flags & CON_NBCON) ||
650
	    !up->cons->write_atomic) {
651
		return false;
652
	}
653
654
	return true;
655
}
656
657
/* Only for internal port lock wrapper usage. */
658
static inline bool __uart_port_nbcon_try_acquire(struct uart_port *up)
659
{
660
	if (!__uart_port_using_nbcon(up))
661
		return true;
662
663
	return nbcon_driver_try_acquire(up->cons);
664
}
665
666
/* Only for internal port lock wrapper usage. */
667
static inline void __uart_port_nbcon_acquire(struct uart_port *up)
668
{
669
	if (!__uart_port_using_nbcon(up))
670
		return;
671
672
	while (!nbcon_driver_try_acquire(up->cons))
673
		cpu_relax();
674
}
675
676
/* Only for internal port lock wrapper usage. */
677
static inline void __uart_port_nbcon_release(struct uart_port *up)
678
{
679
	if (!__uart_port_using_nbcon(up))
680
		return;
681
682
	nbcon_driver_release(up->cons);
683
}
684
594
/**
685
/**
595
 * uart_port_lock - Lock the UART port
686
 * uart_port_lock - Lock the UART port
596
 * @up:		Pointer to UART port structure
687
 * @up:		Pointer to UART port structure
Lines 598-603 struct uart_port { Link Here
598
static inline void uart_port_lock(struct uart_port *up)
689
static inline void uart_port_lock(struct uart_port *up)
599
{
690
{
600
	spin_lock(&up->lock);
691
	spin_lock(&up->lock);
692
	__uart_port_nbcon_acquire(up);
601
}
693
}
602
694
603
/**
695
/**
Lines 607-612 static inline void uart_port_lock(struct uart_port *up) Link Here
607
static inline void uart_port_lock_irq(struct uart_port *up)
699
static inline void uart_port_lock_irq(struct uart_port *up)
608
{
700
{
609
	spin_lock_irq(&up->lock);
701
	spin_lock_irq(&up->lock);
702
	__uart_port_nbcon_acquire(up);
610
}
703
}
611
704
612
/**
705
/**
Lines 617-622 static inline void uart_port_lock_irq(struct uart_port *up) Link Here
617
static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
710
static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags)
618
{
711
{
619
	spin_lock_irqsave(&up->lock, *flags);
712
	spin_lock_irqsave(&up->lock, *flags);
713
	__uart_port_nbcon_acquire(up);
620
}
714
}
621
715
622
/**
716
/**
Lines 627-633 static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *f Link Here
627
 */
721
 */
628
static inline bool uart_port_trylock(struct uart_port *up)
722
static inline bool uart_port_trylock(struct uart_port *up)
629
{
723
{
630
	return spin_trylock(&up->lock);
724
	if (!spin_trylock(&up->lock))
725
		return false;
726
727
	if (!__uart_port_nbcon_try_acquire(up)) {
728
		spin_unlock(&up->lock);
729
		return false;
730
	}
731
732
	return true;
631
}
733
}
632
734
633
/**
735
/**
Lines 639-645 static inline bool uart_port_trylock(struct uart_port *up) Link Here
639
 */
741
 */
640
static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags)
742
static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags)
641
{
743
{
642
	return spin_trylock_irqsave(&up->lock, *flags);
744
	if (!spin_trylock_irqsave(&up->lock, *flags))
745
		return false;
746
747
	if (!__uart_port_nbcon_try_acquire(up)) {
748
		spin_unlock_irqrestore(&up->lock, *flags);
749
		return false;
750
	}
751
752
	return true;
643
}
753
}
644
754
645
/**
755
/**
Lines 648-653 static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long Link Here
648
 */
758
 */
649
static inline void uart_port_unlock(struct uart_port *up)
759
static inline void uart_port_unlock(struct uart_port *up)
650
{
760
{
761
	__uart_port_nbcon_release(up);
651
	spin_unlock(&up->lock);
762
	spin_unlock(&up->lock);
652
}
763
}
653
764
Lines 657-662 static inline void uart_port_unlock(struct uart_port *up) Link Here
657
 */
768
 */
658
static inline void uart_port_unlock_irq(struct uart_port *up)
769
static inline void uart_port_unlock_irq(struct uart_port *up)
659
{
770
{
771
	__uart_port_nbcon_release(up);
660
	spin_unlock_irq(&up->lock);
772
	spin_unlock_irq(&up->lock);
661
}
773
}
662
774
Lines 667-672 static inline void uart_port_unlock_irq(struct uart_port *up) Link Here
667
 */
779
 */
668
static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
780
static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags)
669
{
781
{
782
	__uart_port_nbcon_release(up);
670
	spin_unlock_irqrestore(&up->lock, flags);
783
	spin_unlock_irqrestore(&up->lock, flags);
671
}
784
}
672
785
(-)a/include/linux/thread_info.h (+24 lines)
Lines 59-64 enum syscall_work_bit { Link Here
59
59
60
#include <asm/thread_info.h>
60
#include <asm/thread_info.h>
61
61
62
#ifdef CONFIG_PREEMPT_BUILD_AUTO
63
# define TIF_NEED_RESCHED_LAZY		TIF_ARCH_RESCHED_LAZY
64
# define _TIF_NEED_RESCHED_LAZY		_TIF_ARCH_RESCHED_LAZY
65
# define TIF_NEED_RESCHED_LAZY_OFFSET	(TIF_NEED_RESCHED_LAZY - TIF_NEED_RESCHED)
66
#else
67
# define TIF_NEED_RESCHED_LAZY		TIF_NEED_RESCHED
68
# define _TIF_NEED_RESCHED_LAZY		_TIF_NEED_RESCHED
69
# define TIF_NEED_RESCHED_LAZY_OFFSET	0
70
#endif
71
62
#ifdef __KERNEL__
72
#ifdef __KERNEL__
63
73
64
#ifndef arch_set_restart_data
74
#ifndef arch_set_restart_data
Lines 185-190 static __always_inline bool tif_need_resched(void) Link Here
185
			     (unsigned long *)(&current_thread_info()->flags));
195
			     (unsigned long *)(&current_thread_info()->flags));
186
}
196
}
187
197
198
static __always_inline bool tif_need_resched_lazy(void)
199
{
200
	return IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) &&
201
		arch_test_bit(TIF_NEED_RESCHED_LAZY,
202
			      (unsigned long *)(&current_thread_info()->flags));
203
}
204
188
#else
205
#else
189
206
190
static __always_inline bool tif_need_resched(void)
207
static __always_inline bool tif_need_resched(void)
Lines 193-198 static __always_inline bool tif_need_resched(void) Link Here
193
			(unsigned long *)(&current_thread_info()->flags));
210
			(unsigned long *)(&current_thread_info()->flags));
194
}
211
}
195
212
213
static __always_inline bool tif_need_resched_lazy(void)
214
{
215
	return IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) &&
216
		test_bit(TIF_NEED_RESCHED_LAZY,
217
			 (unsigned long *)(&current_thread_info()->flags));
218
}
219
196
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
220
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
197
221
198
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
222
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
(-)a/include/linux/trace_events.h (-4 / +4 lines)
Lines 184-191 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); Link Here
184
184
185
enum trace_flag_type {
185
enum trace_flag_type {
186
	TRACE_FLAG_IRQS_OFF		= 0x01,
186
	TRACE_FLAG_IRQS_OFF		= 0x01,
187
	TRACE_FLAG_IRQS_NOSUPPORT	= 0x02,
187
	TRACE_FLAG_NEED_RESCHED		= 0x02,
188
	TRACE_FLAG_NEED_RESCHED		= 0x04,
188
	TRACE_FLAG_NEED_RESCHED_LAZY	= 0x04,
189
	TRACE_FLAG_HARDIRQ		= 0x08,
189
	TRACE_FLAG_HARDIRQ		= 0x08,
190
	TRACE_FLAG_SOFTIRQ		= 0x10,
190
	TRACE_FLAG_SOFTIRQ		= 0x10,
191
	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
191
	TRACE_FLAG_PREEMPT_RESCHED	= 0x20,
Lines 211-221 static inline unsigned int tracing_gen_ctx(void) Link Here
211
211
212
static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
212
static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
213
{
213
{
214
	return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
214
	return tracing_gen_ctx_irq_test(0);
215
}
215
}
216
static inline unsigned int tracing_gen_ctx(void)
216
static inline unsigned int tracing_gen_ctx(void)
217
{
217
{
218
	return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
218
	return tracing_gen_ctx_irq_test(0);
219
}
219
}
220
#endif
220
#endif
221
221
(-)a/kernel/Kconfig.preempt (-1 / +16 lines)
Lines 11-16 config PREEMPT_BUILD Link Here
11
	select PREEMPTION
11
	select PREEMPTION
12
	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
12
	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
13
13
14
config PREEMPT_BUILD_AUTO
15
	bool
16
	select PREEMPT_BUILD
17
18
config HAVE_PREEMPT_AUTO
19
	bool
20
14
choice
21
choice
15
	prompt "Preemption Model"
22
	prompt "Preemption Model"
16
	default PREEMPT_NONE
23
	default PREEMPT_NONE
Lines 67-75 config PREEMPT Link Here
67
	  embedded system with latency requirements in the milliseconds
74
	  embedded system with latency requirements in the milliseconds
68
	  range.
75
	  range.
69
76
77
config PREEMPT_AUTO
78
	bool "Automagic preemption mode with runtime tweaking support"
79
	depends on HAVE_PREEMPT_AUTO
80
	select PREEMPT_BUILD_AUTO
81
	help
82
	  Add some sensible blurb here
83
70
config PREEMPT_RT
84
config PREEMPT_RT
71
	bool "Fully Preemptible Kernel (Real-Time)"
85
	bool "Fully Preemptible Kernel (Real-Time)"
72
	depends on EXPERT && ARCH_SUPPORTS_RT
86
	depends on EXPERT && ARCH_SUPPORTS_RT
87
	select PREEMPT_BUILD_AUTO if HAVE_PREEMPT_AUTO
73
	select PREEMPTION
88
	select PREEMPTION
74
	help
89
	help
75
	  This option turns the kernel into a real-time kernel by replacing
90
	  This option turns the kernel into a real-time kernel by replacing
Lines 95-101 config PREEMPTION Link Here
95
110
96
config PREEMPT_DYNAMIC
111
config PREEMPT_DYNAMIC
97
	bool "Preemption behaviour defined on boot"
112
	bool "Preemption behaviour defined on boot"
98
	depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
113
	depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT && !PREEMPT_AUTO
99
	select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY
114
	select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY
100
	select PREEMPT_BUILD
115
	select PREEMPT_BUILD
101
	default y if HAVE_PREEMPT_DYNAMIC_CALL
116
	default y if HAVE_PREEMPT_DYNAMIC_CALL
(-)a/kernel/entry/common.c (-2 / +2 lines)
Lines 98-104 __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs, Link Here
98
98
99
		local_irq_enable_exit_to_user(ti_work);
99
		local_irq_enable_exit_to_user(ti_work);
100
100
101
		if (ti_work & _TIF_NEED_RESCHED)
101
		if (ti_work & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
102
			schedule();
102
			schedule();
103
103
104
		if (ti_work & _TIF_UPROBE)
104
		if (ti_work & _TIF_UPROBE)
Lines 307-313 void raw_irqentry_exit_cond_resched(void) Link Here
307
		rcu_irq_exit_check_preempt();
307
		rcu_irq_exit_check_preempt();
308
		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
308
		if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
309
			WARN_ON_ONCE(!on_thread_stack());
309
			WARN_ON_ONCE(!on_thread_stack());
310
		if (need_resched())
310
		if (test_tsk_need_resched(current))
311
			preempt_schedule_irq();
311
			preempt_schedule_irq();
312
	}
312
	}
313
}
313
}
(-)a/kernel/entry/kvm.c (-1 / +1 lines)
Lines 13-19 static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) Link Here
13
			return -EINTR;
13
			return -EINTR;
14
		}
14
		}
15
15
16
		if (ti_work & _TIF_NEED_RESCHED)
16
		if (ti_work & (_TIF_NEED_RESCHED | TIF_NEED_RESCHED_LAZY))
17
			schedule();
17
			schedule();
18
18
19
		if (ti_work & _TIF_NOTIFY_RESUME)
19
		if (ti_work & _TIF_NOTIFY_RESUME)
(-)a/kernel/events/core.c (-44 / +54 lines)
Lines 2283-2303 event_sched_out(struct perf_event *event, struct perf_event_context *ctx) Link Here
2283
		state = PERF_EVENT_STATE_OFF;
2283
		state = PERF_EVENT_STATE_OFF;
2284
	}
2284
	}
2285
2285
2286
	if (event->pending_sigtrap) {
2287
		bool dec = true;
2288
2289
		event->pending_sigtrap = 0;
2290
		if (state != PERF_EVENT_STATE_OFF &&
2291
		    !event->pending_work) {
2292
			event->pending_work = 1;
2293
			dec = false;
2294
			WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
2295
			task_work_add(current, &event->pending_task, TWA_RESUME);
2296
		}
2297
		if (dec)
2298
			local_dec(&event->ctx->nr_pending);
2299
	}
2300
2301
	perf_event_set_state(event, state);
2286
	perf_event_set_state(event, state);
2302
2287
2303
	if (!is_software_event(event))
2288
	if (!is_software_event(event))
Lines 2464-2470 static void __perf_event_disable(struct perf_event *event, Link Here
2464
 * hold the top-level event's child_mutex, so any descendant that
2449
 * hold the top-level event's child_mutex, so any descendant that
2465
 * goes to exit will block in perf_event_exit_event().
2450
 * goes to exit will block in perf_event_exit_event().
2466
 *
2451
 *
2467
 * When called from perf_pending_irq it's OK because event->ctx
2452
 * When called from perf_pending_disable it's OK because event->ctx
2468
 * is the current context on this CPU and preemption is disabled,
2453
 * is the current context on this CPU and preemption is disabled,
2469
 * hence we can't get into perf_event_task_sched_out for this context.
2454
 * hence we can't get into perf_event_task_sched_out for this context.
2470
 */
2455
 */
Lines 2504-2510 EXPORT_SYMBOL_GPL(perf_event_disable); Link Here
2504
void perf_event_disable_inatomic(struct perf_event *event)
2489
void perf_event_disable_inatomic(struct perf_event *event)
2505
{
2490
{
2506
	event->pending_disable = 1;
2491
	event->pending_disable = 1;
2507
	irq_work_queue(&event->pending_irq);
2492
	irq_work_queue(&event->pending_disable_irq);
2508
}
2493
}
2509
2494
2510
#define MAX_INTERRUPTS (~0ULL)
2495
#define MAX_INTERRUPTS (~0ULL)
Lines 5190-5195 static void perf_addr_filters_splice(struct perf_event *event, Link Here
5190
static void _free_event(struct perf_event *event)
5175
static void _free_event(struct perf_event *event)
5191
{
5176
{
5192
	irq_work_sync(&event->pending_irq);
5177
	irq_work_sync(&event->pending_irq);
5178
	irq_work_sync(&event->pending_disable_irq);
5193
5179
5194
	unaccount_event(event);
5180
	unaccount_event(event);
5195
5181
Lines 6726-6732 static void perf_sigtrap(struct perf_event *event) Link Here
6726
/*
6712
/*
6727
 * Deliver the pending work in-event-context or follow the context.
6713
 * Deliver the pending work in-event-context or follow the context.
6728
 */
6714
 */
6729
static void __perf_pending_irq(struct perf_event *event)
6715
static void __perf_pending_disable(struct perf_event *event)
6730
{
6716
{
6731
	int cpu = READ_ONCE(event->oncpu);
6717
	int cpu = READ_ONCE(event->oncpu);
6732
6718
Lines 6741-6751 static void __perf_pending_irq(struct perf_event *event) Link Here
6741
	 * Yay, we hit home and are in the context of the event.
6727
	 * Yay, we hit home and are in the context of the event.
6742
	 */
6728
	 */
6743
	if (cpu == smp_processor_id()) {
6729
	if (cpu == smp_processor_id()) {
6744
		if (event->pending_sigtrap) {
6745
			event->pending_sigtrap = 0;
6746
			perf_sigtrap(event);
6747
			local_dec(&event->ctx->nr_pending);
6748
		}
6749
		if (event->pending_disable) {
6730
		if (event->pending_disable) {
6750
			event->pending_disable = 0;
6731
			event->pending_disable = 0;
6751
			perf_event_disable_local(event);
6732
			perf_event_disable_local(event);
Lines 6769-6779 static void __perf_pending_irq(struct perf_event *event) Link Here
6769
	 *				  irq_work_queue(); // FAILS
6750
	 *				  irq_work_queue(); // FAILS
6770
	 *
6751
	 *
6771
	 *  irq_work_run()
6752
	 *  irq_work_run()
6772
	 *    perf_pending_irq()
6753
	 *    perf_pending_disable()
6773
	 *
6754
	 *
6774
	 * But the event runs on CPU-B and wants disabling there.
6755
	 * But the event runs on CPU-B and wants disabling there.
6775
	 */
6756
	 */
6776
	irq_work_queue_on(&event->pending_irq, cpu);
6757
	irq_work_queue_on(&event->pending_disable_irq, cpu);
6758
}
6759
6760
static void perf_pending_disable(struct irq_work *entry)
6761
{
6762
	struct perf_event *event = container_of(entry, struct perf_event, pending_disable_irq);
6763
	int rctx;
6764
6765
	/*
6766
	 * If we 'fail' here, that's OK, it means recursion is already disabled
6767
	 * and we won't recurse 'further'.
6768
	 */
6769
	rctx = perf_swevent_get_recursion_context();
6770
	__perf_pending_disable(event);
6771
	if (rctx >= 0)
6772
		perf_swevent_put_recursion_context(rctx);
6777
}
6773
}
6778
6774
6779
static void perf_pending_irq(struct irq_work *entry)
6775
static void perf_pending_irq(struct irq_work *entry)
Lines 6796-6803 static void perf_pending_irq(struct irq_work *entry) Link Here
6796
		perf_event_wakeup(event);
6792
		perf_event_wakeup(event);
6797
	}
6793
	}
6798
6794
6799
	__perf_pending_irq(event);
6800
6801
	if (rctx >= 0)
6795
	if (rctx >= 0)
6802
		perf_swevent_put_recursion_context(rctx);
6796
		perf_swevent_put_recursion_context(rctx);
6803
}
6797
}
Lines 6805-6818 static void perf_pending_irq(struct irq_work *entry) Link Here
6805
static void perf_pending_task(struct callback_head *head)
6799
static void perf_pending_task(struct callback_head *head)
6806
{
6800
{
6807
	struct perf_event *event = container_of(head, struct perf_event, pending_task);
6801
	struct perf_event *event = container_of(head, struct perf_event, pending_task);
6808
	int rctx;
6809
6810
	/*
6811
	 * If we 'fail' here, that's OK, it means recursion is already disabled
6812
	 * and we won't recurse 'further'.
6813
	 */
6814
	preempt_disable_notrace();
6815
	rctx = perf_swevent_get_recursion_context();
6816
6802
6817
	if (event->pending_work) {
6803
	if (event->pending_work) {
6818
		event->pending_work = 0;
6804
		event->pending_work = 0;
Lines 6820-6829 static void perf_pending_task(struct callback_head *head) Link Here
6820
		local_dec(&event->ctx->nr_pending);
6806
		local_dec(&event->ctx->nr_pending);
6821
	}
6807
	}
6822
6808
6823
	if (rctx >= 0)
6824
		perf_swevent_put_recursion_context(rctx);
6825
	preempt_enable_notrace();
6826
6827
	put_event(event);
6809
	put_event(event);
6828
}
6810
}
6829
6811
Lines 9588-9600 static int __perf_event_overflow(struct perf_event *event, Link Here
9588
9570
9589
		if (regs)
9571
		if (regs)
9590
			pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
9572
			pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
9591
		if (!event->pending_sigtrap) {
9573
		if (!event->pending_work) {
9592
			event->pending_sigtrap = pending_id;
9574
			event->pending_work = pending_id;
9593
			local_inc(&event->ctx->nr_pending);
9575
			local_inc(&event->ctx->nr_pending);
9576
			WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
9577
			task_work_add(current, &event->pending_task, TWA_RESUME);
9578
			/*
9579
			 * The NMI path returns directly to userland. The
9580
			 * irq_work is raised as a dummy interrupt to ensure
9581
			 * regular return path to user is taken and task_work
9582
			 * is processed.
9583
			 */
9584
			if (in_nmi())
9585
				irq_work_queue(&event->pending_disable_irq);
9594
		} else if (event->attr.exclude_kernel && valid_sample) {
9586
		} else if (event->attr.exclude_kernel && valid_sample) {
9595
			/*
9587
			/*
9596
			 * Should not be able to return to user space without
9588
			 * Should not be able to return to user space without
9597
			 * consuming pending_sigtrap; with exceptions:
9589
			 * consuming pending_work; with exceptions:
9598
			 *
9590
			 *
9599
			 *  1. Where !exclude_kernel, events can overflow again
9591
			 *  1. Where !exclude_kernel, events can overflow again
9600
			 *     in the kernel without returning to user space.
9592
			 *     in the kernel without returning to user space.
Lines 9604-9616 static int __perf_event_overflow(struct perf_event *event, Link Here
9604
			 *     To approximate progress (with false negatives),
9596
			 *     To approximate progress (with false negatives),
9605
			 *     check 32-bit hash of the current IP.
9597
			 *     check 32-bit hash of the current IP.
9606
			 */
9598
			 */
9607
			WARN_ON_ONCE(event->pending_sigtrap != pending_id);
9599
			WARN_ON_ONCE(event->pending_work != pending_id);
9608
		}
9600
		}
9609
9601
9610
		event->pending_addr = 0;
9602
		event->pending_addr = 0;
9611
		if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
9603
		if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
9612
			event->pending_addr = data->addr;
9604
			event->pending_addr = data->addr;
9613
		irq_work_queue(&event->pending_irq);
9614
	}
9605
	}
9615
9606
9616
	READ_ONCE(event->overflow_handler)(event, data, regs);
9607
	READ_ONCE(event->overflow_handler)(event, data, regs);
Lines 11931-11936 perf_event_alloc(struct perf_event_attr *attr, int cpu, Link Here
11931
11922
11932
	init_waitqueue_head(&event->waitq);
11923
	init_waitqueue_head(&event->waitq);
11933
	init_irq_work(&event->pending_irq, perf_pending_irq);
11924
	init_irq_work(&event->pending_irq, perf_pending_irq);
11925
	event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
11934
	init_task_work(&event->pending_task, perf_pending_task);
11926
	init_task_work(&event->pending_task, perf_pending_task);
11935
11927
11936
	mutex_init(&event->mmap_mutex);
11928
	mutex_init(&event->mmap_mutex);
Lines 13045-13050 static void sync_child_event(struct perf_event *child_event) Link Here
13045
		     &parent_event->child_total_time_running);
13037
		     &parent_event->child_total_time_running);
13046
}
13038
}
13047
13039
13040
static bool task_work_cb_match(struct callback_head *cb, void *data)
13041
{
13042
	struct perf_event *event = container_of(cb, struct perf_event, pending_task);
13043
13044
	return event == data;
13045
}
13046
13048
static void
13047
static void
13049
perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
13048
perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
13050
{
13049
{
Lines 13084-13089 perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) Link Here
13084
		 * Kick perf_poll() for is_event_hup();
13083
		 * Kick perf_poll() for is_event_hup();
13085
		 */
13084
		 */
13086
		perf_event_wakeup(parent_event);
13085
		perf_event_wakeup(parent_event);
13086
		/*
13087
		 * Cancel pending task_work and update counters if it has not
13088
		 * yet been delivered to userland. free_event() expects the
13089
		 * reference counter at 1 and keeping the event around until the
13090
		 * task return to userland will be a unexpected.
13091
		 */
13092
		if (event->pending_work &&
13093
		    task_work_cancel_match(current, task_work_cb_match, event)) {
13094
			put_event(event);
13095
			local_dec(&event->ctx->nr_pending);
13096
		}
13087
		free_event(event);
13097
		free_event(event);
13088
		put_event(parent_event);
13098
		put_event(parent_event);
13089
		return;
13099
		return;
(-)a/kernel/ksysfs.c (+12 lines)
Lines 181-186 KERNEL_ATTR_RO(crash_elfcorehdr_size); Link Here
181
181
182
#endif /* CONFIG_VMCORE_INFO */
182
#endif /* CONFIG_VMCORE_INFO */
183
183
184
#if defined(CONFIG_PREEMPT_RT)
185
static ssize_t realtime_show(struct kobject *kobj,
186
			     struct kobj_attribute *attr, char *buf)
187
{
188
	return sprintf(buf, "%d\n", 1);
189
}
190
KERNEL_ATTR_RO(realtime);
191
#endif
192
184
/* whether file capabilities are enabled */
193
/* whether file capabilities are enabled */
185
static ssize_t fscaps_show(struct kobject *kobj,
194
static ssize_t fscaps_show(struct kobject *kobj,
186
				  struct kobj_attribute *attr, char *buf)
195
				  struct kobj_attribute *attr, char *buf)
Lines 278-283 static struct attribute * kernel_attrs[] = { Link Here
278
#ifndef CONFIG_TINY_RCU
287
#ifndef CONFIG_TINY_RCU
279
	&rcu_expedited_attr.attr,
288
	&rcu_expedited_attr.attr,
280
	&rcu_normal_attr.attr,
289
	&rcu_normal_attr.attr,
290
#endif
291
#ifdef CONFIG_PREEMPT_RT
292
	&realtime_attr.attr,
281
#endif
293
#endif
282
	NULL
294
	NULL
283
};
295
};
(-)a/kernel/locking/lockdep.c (-2 / +82 lines)
Lines 56-61 Link Here
56
#include <linux/kprobes.h>
56
#include <linux/kprobes.h>
57
#include <linux/lockdep.h>
57
#include <linux/lockdep.h>
58
#include <linux/context_tracking.h>
58
#include <linux/context_tracking.h>
59
#include <linux/console.h>
59
60
60
#include <asm/sections.h>
61
#include <asm/sections.h>
61
62
Lines 574-581 static struct lock_trace *save_trace(void) Link Here
574
		if (!debug_locks_off_graph_unlock())
575
		if (!debug_locks_off_graph_unlock())
575
			return NULL;
576
			return NULL;
576
577
578
		nbcon_cpu_emergency_enter();
577
		print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
579
		print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
578
		dump_stack();
580
		dump_stack();
581
		nbcon_cpu_emergency_exit();
579
582
580
		return NULL;
583
		return NULL;
581
	}
584
	}
Lines 888-898 look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) Link Here
888
	if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
891
	if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
889
		instrumentation_begin();
892
		instrumentation_begin();
890
		debug_locks_off();
893
		debug_locks_off();
894
		nbcon_cpu_emergency_enter();
891
		printk(KERN_ERR
895
		printk(KERN_ERR
892
			"BUG: looking up invalid subclass: %u\n", subclass);
896
			"BUG: looking up invalid subclass: %u\n", subclass);
893
		printk(KERN_ERR
897
		printk(KERN_ERR
894
			"turning off the locking correctness validator.\n");
898
			"turning off the locking correctness validator.\n");
895
		dump_stack();
899
		dump_stack();
900
		nbcon_cpu_emergency_exit();
896
		instrumentation_end();
901
		instrumentation_end();
897
		return NULL;
902
		return NULL;
898
	}
903
	}
Lines 969-979 static bool assign_lock_key(struct lockdep_map *lock) Link Here
969
	else {
974
	else {
970
		/* Debug-check: all keys must be persistent! */
975
		/* Debug-check: all keys must be persistent! */
971
		debug_locks_off();
976
		debug_locks_off();
977
		nbcon_cpu_emergency_enter();
972
		pr_err("INFO: trying to register non-static key.\n");
978
		pr_err("INFO: trying to register non-static key.\n");
973
		pr_err("The code is fine but needs lockdep annotation, or maybe\n");
979
		pr_err("The code is fine but needs lockdep annotation, or maybe\n");
974
		pr_err("you didn't initialize this object before use?\n");
980
		pr_err("you didn't initialize this object before use?\n");
975
		pr_err("turning off the locking correctness validator.\n");
981
		pr_err("turning off the locking correctness validator.\n");
976
		dump_stack();
982
		dump_stack();
983
		nbcon_cpu_emergency_exit();
977
		return false;
984
		return false;
978
	}
985
	}
979
986
Lines 1317-1324 register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) Link Here
1317
			return NULL;
1324
			return NULL;
1318
		}
1325
		}
1319
1326
1327
		nbcon_cpu_emergency_enter();
1320
		print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
1328
		print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
1321
		dump_stack();
1329
		dump_stack();
1330
		nbcon_cpu_emergency_exit();
1322
		return NULL;
1331
		return NULL;
1323
	}
1332
	}
1324
	nr_lock_classes++;
1333
	nr_lock_classes++;
Lines 1350-1360 register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) Link Here
1350
	if (verbose(class)) {
1359
	if (verbose(class)) {
1351
		graph_unlock();
1360
		graph_unlock();
1352
1361
1362
		nbcon_cpu_emergency_enter();
1353
		printk("\nnew class %px: %s", class->key, class->name);
1363
		printk("\nnew class %px: %s", class->key, class->name);
1354
		if (class->name_version > 1)
1364
		if (class->name_version > 1)
1355
			printk(KERN_CONT "#%d", class->name_version);
1365
			printk(KERN_CONT "#%d", class->name_version);
1356
		printk(KERN_CONT "\n");
1366
		printk(KERN_CONT "\n");
1357
		dump_stack();
1367
		dump_stack();
1368
		nbcon_cpu_emergency_exit();
1358
1369
1359
		if (!graph_lock()) {
1370
		if (!graph_lock()) {
1360
			return NULL;
1371
			return NULL;
Lines 1393-1400 static struct lock_list *alloc_list_entry(void) Link Here
1393
		if (!debug_locks_off_graph_unlock())
1404
		if (!debug_locks_off_graph_unlock())
1394
			return NULL;
1405
			return NULL;
1395
1406
1407
		nbcon_cpu_emergency_enter();
1396
		print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!");
1408
		print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!");
1397
		dump_stack();
1409
		dump_stack();
1410
		nbcon_cpu_emergency_exit();
1398
		return NULL;
1411
		return NULL;
1399
	}
1412
	}
1400
	nr_list_entries++;
1413
	nr_list_entries++;
Lines 2040-2045 static noinline void print_circular_bug(struct lock_list *this, Link Here
2040
2053
2041
	depth = get_lock_depth(target);
2054
	depth = get_lock_depth(target);
2042
2055
2056
	nbcon_cpu_emergency_enter();
2057
2043
	print_circular_bug_header(target, depth, check_src, check_tgt);
2058
	print_circular_bug_header(target, depth, check_src, check_tgt);
2044
2059
2045
	parent = get_lock_parent(target);
2060
	parent = get_lock_parent(target);
Lines 2058-2063 static noinline void print_circular_bug(struct lock_list *this, Link Here
2058
2073
2059
	printk("\nstack backtrace:\n");
2074
	printk("\nstack backtrace:\n");
2060
	dump_stack();
2075
	dump_stack();
2076
2077
	nbcon_cpu_emergency_exit();
2061
}
2078
}
2062
2079
2063
static noinline void print_bfs_bug(int ret)
2080
static noinline void print_bfs_bug(int ret)
Lines 2570-2575 print_bad_irq_dependency(struct task_struct *curr, Link Here
2570
	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2587
	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2571
		return;
2588
		return;
2572
2589
2590
	nbcon_cpu_emergency_enter();
2591
2573
	pr_warn("\n");
2592
	pr_warn("\n");
2574
	pr_warn("=====================================================\n");
2593
	pr_warn("=====================================================\n");
2575
	pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n",
2594
	pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n",
Lines 2619-2629 print_bad_irq_dependency(struct task_struct *curr, Link Here
2619
	pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
2638
	pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
2620
	next_root->trace = save_trace();
2639
	next_root->trace = save_trace();
2621
	if (!next_root->trace)
2640
	if (!next_root->trace)
2622
		return;
2641
		goto out;
2623
	print_shortest_lock_dependencies(forwards_entry, next_root);
2642
	print_shortest_lock_dependencies(forwards_entry, next_root);
2624
2643
2625
	pr_warn("\nstack backtrace:\n");
2644
	pr_warn("\nstack backtrace:\n");
2626
	dump_stack();
2645
	dump_stack();
2646
out:
2647
	nbcon_cpu_emergency_exit();
2627
}
2648
}
2628
2649
2629
static const char *state_names[] = {
2650
static const char *state_names[] = {
Lines 2988-2993 print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, Link Here
2988
	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
3009
	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2989
		return;
3010
		return;
2990
3011
3012
	nbcon_cpu_emergency_enter();
3013
2991
	pr_warn("\n");
3014
	pr_warn("\n");
2992
	pr_warn("============================================\n");
3015
	pr_warn("============================================\n");
2993
	pr_warn("WARNING: possible recursive locking detected\n");
3016
	pr_warn("WARNING: possible recursive locking detected\n");
Lines 3010-3015 print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, Link Here
3010
3033
3011
	pr_warn("\nstack backtrace:\n");
3034
	pr_warn("\nstack backtrace:\n");
3012
	dump_stack();
3035
	dump_stack();
3036
3037
	nbcon_cpu_emergency_exit();
3013
}
3038
}
3014
3039
3015
/*
3040
/*
Lines 3607-3612 static void print_collision(struct task_struct *curr, Link Here
3607
			struct held_lock *hlock_next,
3632
			struct held_lock *hlock_next,
3608
			struct lock_chain *chain)
3633
			struct lock_chain *chain)
3609
{
3634
{
3635
	nbcon_cpu_emergency_enter();
3636
3610
	pr_warn("\n");
3637
	pr_warn("\n");
3611
	pr_warn("============================\n");
3638
	pr_warn("============================\n");
3612
	pr_warn("WARNING: chain_key collision\n");
3639
	pr_warn("WARNING: chain_key collision\n");
Lines 3623-3628 static void print_collision(struct task_struct *curr, Link Here
3623
3650
3624
	pr_warn("\nstack backtrace:\n");
3651
	pr_warn("\nstack backtrace:\n");
3625
	dump_stack();
3652
	dump_stack();
3653
3654
	nbcon_cpu_emergency_exit();
3626
}
3655
}
3627
#endif
3656
#endif
3628
3657
Lines 3713-3720 static inline int add_chain_cache(struct task_struct *curr, Link Here
3713
		if (!debug_locks_off_graph_unlock())
3742
		if (!debug_locks_off_graph_unlock())
3714
			return 0;
3743
			return 0;
3715
3744
3745
		nbcon_cpu_emergency_enter();
3716
		print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
3746
		print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
3717
		dump_stack();
3747
		dump_stack();
3748
		nbcon_cpu_emergency_exit();
3718
		return 0;
3749
		return 0;
3719
	}
3750
	}
3720
	chain->chain_key = chain_key;
3751
	chain->chain_key = chain_key;
Lines 3731-3738 static inline int add_chain_cache(struct task_struct *curr, Link Here
3731
		if (!debug_locks_off_graph_unlock())
3762
		if (!debug_locks_off_graph_unlock())
3732
			return 0;
3763
			return 0;
3733
3764
3765
		nbcon_cpu_emergency_enter();
3734
		print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
3766
		print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
3735
		dump_stack();
3767
		dump_stack();
3768
		nbcon_cpu_emergency_exit();
3736
		return 0;
3769
		return 0;
3737
	}
3770
	}
3738
3771
Lines 3971-3976 print_usage_bug(struct task_struct *curr, struct held_lock *this, Link Here
3971
	if (!debug_locks_off() || debug_locks_silent)
4004
	if (!debug_locks_off() || debug_locks_silent)
3972
		return;
4005
		return;
3973
4006
4007
	nbcon_cpu_emergency_enter();
4008
3974
	pr_warn("\n");
4009
	pr_warn("\n");
3975
	pr_warn("================================\n");
4010
	pr_warn("================================\n");
3976
	pr_warn("WARNING: inconsistent lock state\n");
4011
	pr_warn("WARNING: inconsistent lock state\n");
Lines 3999-4004 print_usage_bug(struct task_struct *curr, struct held_lock *this, Link Here
3999
4034
4000
	pr_warn("\nstack backtrace:\n");
4035
	pr_warn("\nstack backtrace:\n");
4001
	dump_stack();
4036
	dump_stack();
4037
4038
	nbcon_cpu_emergency_exit();
4002
}
4039
}
4003
4040
4004
/*
4041
/*
Lines 4033-4038 print_irq_inversion_bug(struct task_struct *curr, Link Here
4033
	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
4070
	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
4034
		return;
4071
		return;
4035
4072
4073
	nbcon_cpu_emergency_enter();
4074
4036
	pr_warn("\n");
4075
	pr_warn("\n");
4037
	pr_warn("========================================================\n");
4076
	pr_warn("========================================================\n");
4038
	pr_warn("WARNING: possible irq lock inversion dependency detected\n");
4077
	pr_warn("WARNING: possible irq lock inversion dependency detected\n");
Lines 4073-4083 print_irq_inversion_bug(struct task_struct *curr, Link Here
4073
	pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
4112
	pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
4074
	root->trace = save_trace();
4113
	root->trace = save_trace();
4075
	if (!root->trace)
4114
	if (!root->trace)
4076
		return;
4115
		goto out;
4077
	print_shortest_lock_dependencies(other, root);
4116
	print_shortest_lock_dependencies(other, root);
4078
4117
4079
	pr_warn("\nstack backtrace:\n");
4118
	pr_warn("\nstack backtrace:\n");
4080
	dump_stack();
4119
	dump_stack();
4120
out:
4121
	nbcon_cpu_emergency_exit();
4081
}
4122
}
4082
4123
4083
/*
4124
/*
Lines 4154-4159 void print_irqtrace_events(struct task_struct *curr) Link Here
4154
{
4195
{
4155
	const struct irqtrace_events *trace = &curr->irqtrace;
4196
	const struct irqtrace_events *trace = &curr->irqtrace;
4156
4197
4198
	nbcon_cpu_emergency_enter();
4199
4157
	printk("irq event stamp: %u\n", trace->irq_events);
4200
	printk("irq event stamp: %u\n", trace->irq_events);
4158
	printk("hardirqs last  enabled at (%u): [<%px>] %pS\n",
4201
	printk("hardirqs last  enabled at (%u): [<%px>] %pS\n",
4159
		trace->hardirq_enable_event, (void *)trace->hardirq_enable_ip,
4202
		trace->hardirq_enable_event, (void *)trace->hardirq_enable_ip,
Lines 4167-4172 void print_irqtrace_events(struct task_struct *curr) Link Here
4167
	printk("softirqs last disabled at (%u): [<%px>] %pS\n",
4210
	printk("softirqs last disabled at (%u): [<%px>] %pS\n",
4168
		trace->softirq_disable_event, (void *)trace->softirq_disable_ip,
4211
		trace->softirq_disable_event, (void *)trace->softirq_disable_ip,
4169
		(void *)trace->softirq_disable_ip);
4212
		(void *)trace->softirq_disable_ip);
4213
4214
	nbcon_cpu_emergency_exit();
4170
}
4215
}
4171
4216
4172
static int HARDIRQ_verbose(struct lock_class *class)
4217
static int HARDIRQ_verbose(struct lock_class *class)
Lines 4687-4696 static int mark_lock(struct task_struct *curr, struct held_lock *this, Link Here
4687
	 * We must printk outside of the graph_lock:
4732
	 * We must printk outside of the graph_lock:
4688
	 */
4733
	 */
4689
	if (ret == 2) {
4734
	if (ret == 2) {
4735
		nbcon_cpu_emergency_enter();
4690
		printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
4736
		printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
4691
		print_lock(this);
4737
		print_lock(this);
4692
		print_irqtrace_events(curr);
4738
		print_irqtrace_events(curr);
4693
		dump_stack();
4739
		dump_stack();
4740
		nbcon_cpu_emergency_exit();
4694
	}
4741
	}
4695
4742
4696
	return ret;
4743
	return ret;
Lines 4731-4736 print_lock_invalid_wait_context(struct task_struct *curr, Link Here
4731
	if (debug_locks_silent)
4778
	if (debug_locks_silent)
4732
		return 0;
4779
		return 0;
4733
4780
4781
	nbcon_cpu_emergency_enter();
4782
4734
	pr_warn("\n");
4783
	pr_warn("\n");
4735
	pr_warn("=============================\n");
4784
	pr_warn("=============================\n");
4736
	pr_warn("[ BUG: Invalid wait context ]\n");
4785
	pr_warn("[ BUG: Invalid wait context ]\n");
Lines 4750-4755 print_lock_invalid_wait_context(struct task_struct *curr, Link Here
4750
	pr_warn("stack backtrace:\n");
4799
	pr_warn("stack backtrace:\n");
4751
	dump_stack();
4800
	dump_stack();
4752
4801
4802
	nbcon_cpu_emergency_exit();
4803
4753
	return 0;
4804
	return 0;
4754
}
4805
}
4755
4806
Lines 4954-4959 print_lock_nested_lock_not_held(struct task_struct *curr, Link Here
4954
	if (debug_locks_silent)
5005
	if (debug_locks_silent)
4955
		return;
5006
		return;
4956
5007
5008
	nbcon_cpu_emergency_enter();
5009
4957
	pr_warn("\n");
5010
	pr_warn("\n");
4958
	pr_warn("==================================\n");
5011
	pr_warn("==================================\n");
4959
	pr_warn("WARNING: Nested lock was not taken\n");
5012
	pr_warn("WARNING: Nested lock was not taken\n");
Lines 4974-4979 print_lock_nested_lock_not_held(struct task_struct *curr, Link Here
4974
5027
4975
	pr_warn("\nstack backtrace:\n");
5028
	pr_warn("\nstack backtrace:\n");
4976
	dump_stack();
5029
	dump_stack();
5030
5031
	nbcon_cpu_emergency_exit();
4977
}
5032
}
4978
5033
4979
static int __lock_is_held(const struct lockdep_map *lock, int read);
5034
static int __lock_is_held(const struct lockdep_map *lock, int read);
Lines 5019-5029 static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, Link Here
5019
	debug_class_ops_inc(class);
5074
	debug_class_ops_inc(class);
5020
5075
5021
	if (very_verbose(class)) {
5076
	if (very_verbose(class)) {
5077
		nbcon_cpu_emergency_enter();
5022
		printk("\nacquire class [%px] %s", class->key, class->name);
5078
		printk("\nacquire class [%px] %s", class->key, class->name);
5023
		if (class->name_version > 1)
5079
		if (class->name_version > 1)
5024
			printk(KERN_CONT "#%d", class->name_version);
5080
			printk(KERN_CONT "#%d", class->name_version);
5025
		printk(KERN_CONT "\n");
5081
		printk(KERN_CONT "\n");
5026
		dump_stack();
5082
		dump_stack();
5083
		nbcon_cpu_emergency_exit();
5027
	}
5084
	}
5028
5085
5029
	/*
5086
	/*
Lines 5150-5155 static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, Link Here
5150
#endif
5207
#endif
5151
	if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
5208
	if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
5152
		debug_locks_off();
5209
		debug_locks_off();
5210
		nbcon_cpu_emergency_enter();
5153
		print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!");
5211
		print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!");
5154
		printk(KERN_DEBUG "depth: %i  max: %lu!\n",
5212
		printk(KERN_DEBUG "depth: %i  max: %lu!\n",
5155
		       curr->lockdep_depth, MAX_LOCK_DEPTH);
5213
		       curr->lockdep_depth, MAX_LOCK_DEPTH);
Lines 5157-5162 static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, Link Here
5157
		lockdep_print_held_locks(current);
5215
		lockdep_print_held_locks(current);
5158
		debug_show_all_locks();
5216
		debug_show_all_locks();
5159
		dump_stack();
5217
		dump_stack();
5218
		nbcon_cpu_emergency_exit();
5160
5219
5161
		return 0;
5220
		return 0;
5162
	}
5221
	}
Lines 5176-5181 static void print_unlock_imbalance_bug(struct task_struct *curr, Link Here
5176
	if (debug_locks_silent)
5235
	if (debug_locks_silent)
5177
		return;
5236
		return;
5178
5237
5238
	nbcon_cpu_emergency_enter();
5239
5179
	pr_warn("\n");
5240
	pr_warn("\n");
5180
	pr_warn("=====================================\n");
5241
	pr_warn("=====================================\n");
5181
	pr_warn("WARNING: bad unlock balance detected!\n");
5242
	pr_warn("WARNING: bad unlock balance detected!\n");
Lines 5192-5197 static void print_unlock_imbalance_bug(struct task_struct *curr, Link Here
5192
5253
5193
	pr_warn("\nstack backtrace:\n");
5254
	pr_warn("\nstack backtrace:\n");
5194
	dump_stack();
5255
	dump_stack();
5256
5257
	nbcon_cpu_emergency_exit();
5195
}
5258
}
5196
5259
5197
static noinstr int match_held_lock(const struct held_lock *hlock,
5260
static noinstr int match_held_lock(const struct held_lock *hlock,
Lines 5895-5900 static void print_lock_contention_bug(struct task_struct *curr, Link Here
5895
	if (debug_locks_silent)
5958
	if (debug_locks_silent)
5896
		return;
5959
		return;
5897
5960
5961
	nbcon_cpu_emergency_enter();
5962
5898
	pr_warn("\n");
5963
	pr_warn("\n");
5899
	pr_warn("=================================\n");
5964
	pr_warn("=================================\n");
5900
	pr_warn("WARNING: bad contention detected!\n");
5965
	pr_warn("WARNING: bad contention detected!\n");
Lines 5911-5916 static void print_lock_contention_bug(struct task_struct *curr, Link Here
5911
5976
5912
	pr_warn("\nstack backtrace:\n");
5977
	pr_warn("\nstack backtrace:\n");
5913
	dump_stack();
5978
	dump_stack();
5979
5980
	nbcon_cpu_emergency_exit();
5914
}
5981
}
5915
5982
5916
static void
5983
static void
Lines 6524-6529 print_freed_lock_bug(struct task_struct *curr, const void *mem_from, Link Here
6524
	if (debug_locks_silent)
6591
	if (debug_locks_silent)
6525
		return;
6592
		return;
6526
6593
6594
	nbcon_cpu_emergency_enter();
6595
6527
	pr_warn("\n");
6596
	pr_warn("\n");
6528
	pr_warn("=========================\n");
6597
	pr_warn("=========================\n");
6529
	pr_warn("WARNING: held lock freed!\n");
6598
	pr_warn("WARNING: held lock freed!\n");
Lines 6536-6541 print_freed_lock_bug(struct task_struct *curr, const void *mem_from, Link Here
6536
6605
6537
	pr_warn("\nstack backtrace:\n");
6606
	pr_warn("\nstack backtrace:\n");
6538
	dump_stack();
6607
	dump_stack();
6608
6609
	nbcon_cpu_emergency_exit();
6539
}
6610
}
6540
6611
6541
static inline int not_in_range(const void* mem_from, unsigned long mem_len,
6612
static inline int not_in_range(const void* mem_from, unsigned long mem_len,
Lines 6582-6587 static void print_held_locks_bug(void) Link Here
6582
	if (debug_locks_silent)
6653
	if (debug_locks_silent)
6583
		return;
6654
		return;
6584
6655
6656
	nbcon_cpu_emergency_enter();
6657
6585
	pr_warn("\n");
6658
	pr_warn("\n");
6586
	pr_warn("====================================\n");
6659
	pr_warn("====================================\n");
6587
	pr_warn("WARNING: %s/%d still has locks held!\n",
6660
	pr_warn("WARNING: %s/%d still has locks held!\n",
Lines 6591-6596 static void print_held_locks_bug(void) Link Here
6591
	lockdep_print_held_locks(current);
6664
	lockdep_print_held_locks(current);
6592
	pr_warn("\nstack backtrace:\n");
6665
	pr_warn("\nstack backtrace:\n");
6593
	dump_stack();
6666
	dump_stack();
6667
6668
	nbcon_cpu_emergency_exit();
6594
}
6669
}
6595
6670
6596
void debug_check_no_locks_held(void)
6671
void debug_check_no_locks_held(void)
Lines 6616-6621 void debug_show_all_locks(void) Link Here
6616
		if (!p->lockdep_depth)
6691
		if (!p->lockdep_depth)
6617
			continue;
6692
			continue;
6618
		lockdep_print_held_locks(p);
6693
		lockdep_print_held_locks(p);
6694
		nbcon_cpu_emergency_flush();
6619
		touch_nmi_watchdog();
6695
		touch_nmi_watchdog();
6620
		touch_all_softlockup_watchdogs();
6696
		touch_all_softlockup_watchdogs();
6621
	}
6697
	}
Lines 6648-6653 asmlinkage __visible void lockdep_sys_exit(void) Link Here
6648
	if (unlikely(curr->lockdep_depth)) {
6724
	if (unlikely(curr->lockdep_depth)) {
6649
		if (!debug_locks_off())
6725
		if (!debug_locks_off())
6650
			return;
6726
			return;
6727
		nbcon_cpu_emergency_enter();
6651
		pr_warn("\n");
6728
		pr_warn("\n");
6652
		pr_warn("================================================\n");
6729
		pr_warn("================================================\n");
6653
		pr_warn("WARNING: lock held when returning to user space!\n");
6730
		pr_warn("WARNING: lock held when returning to user space!\n");
Lines 6656-6661 asmlinkage __visible void lockdep_sys_exit(void) Link Here
6656
		pr_warn("%s/%d is leaving the kernel with locks still held!\n",
6733
		pr_warn("%s/%d is leaving the kernel with locks still held!\n",
6657
				curr->comm, curr->pid);
6734
				curr->comm, curr->pid);
6658
		lockdep_print_held_locks(curr);
6735
		lockdep_print_held_locks(curr);
6736
		nbcon_cpu_emergency_exit();
6659
	}
6737
	}
6660
6738
6661
	/*
6739
	/*
Lines 6672-6677 void lockdep_rcu_suspicious(const char *file, const int line, const char *s) Link Here
6672
	bool rcu = warn_rcu_enter();
6750
	bool rcu = warn_rcu_enter();
6673
6751
6674
	/* Note: the following can be executed concurrently, so be careful. */
6752
	/* Note: the following can be executed concurrently, so be careful. */
6753
	nbcon_cpu_emergency_enter();
6675
	pr_warn("\n");
6754
	pr_warn("\n");
6676
	pr_warn("=============================\n");
6755
	pr_warn("=============================\n");
6677
	pr_warn("WARNING: suspicious RCU usage\n");
6756
	pr_warn("WARNING: suspicious RCU usage\n");
Lines 6710-6715 void lockdep_rcu_suspicious(const char *file, const int line, const char *s) Link Here
6710
	lockdep_print_held_locks(curr);
6789
	lockdep_print_held_locks(curr);
6711
	pr_warn("\nstack backtrace:\n");
6790
	pr_warn("\nstack backtrace:\n");
6712
	dump_stack();
6791
	dump_stack();
6792
	nbcon_cpu_emergency_exit();
6713
	warn_rcu_exit(rcu);
6793
	warn_rcu_exit(rcu);
6714
}
6794
}
6715
EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
6795
EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
(-)a/kernel/panic.c (+9 lines)
Lines 368-373 void panic(const char *fmt, ...) Link Here
368
368
369
	panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
369
	panic_other_cpus_shutdown(_crash_kexec_post_notifiers);
370
370
371
	printk_legacy_allow_panic_sync();
372
371
	/*
373
	/*
372
	 * Run any panic handlers, including those that might need to
374
	 * Run any panic handlers, including those that might need to
373
	 * add information to the kmsg dump output.
375
	 * add information to the kmsg dump output.
Lines 457-462 void panic(const char *fmt, ...) Link Here
457
	 * Explicitly flush the kernel log buffer one last time.
459
	 * Explicitly flush the kernel log buffer one last time.
458
	 */
460
	 */
459
	console_flush_on_panic(CONSOLE_FLUSH_PENDING);
461
	console_flush_on_panic(CONSOLE_FLUSH_PENDING);
462
	nbcon_atomic_flush_unsafe();
460
463
461
	local_irq_enable();
464
	local_irq_enable();
462
	for (i = 0; ; i += PANIC_TIMER_STEP) {
465
	for (i = 0; ; i += PANIC_TIMER_STEP) {
Lines 635-640 bool oops_may_print(void) Link Here
635
 */
638
 */
636
void oops_enter(void)
639
void oops_enter(void)
637
{
640
{
641
	nbcon_cpu_emergency_enter();
638
	tracing_off();
642
	tracing_off();
639
	/* can't trust the integrity of the kernel anymore: */
643
	/* can't trust the integrity of the kernel anymore: */
640
	debug_locks_off();
644
	debug_locks_off();
Lines 657-662 void oops_exit(void) Link Here
657
{
661
{
658
	do_oops_enter_exit();
662
	do_oops_enter_exit();
659
	print_oops_end_marker();
663
	print_oops_end_marker();
664
	nbcon_cpu_emergency_exit();
660
	kmsg_dump(KMSG_DUMP_OOPS);
665
	kmsg_dump(KMSG_DUMP_OOPS);
661
}
666
}
662
667
Lines 668-673 struct warn_args { Link Here
668
void __warn(const char *file, int line, void *caller, unsigned taint,
673
void __warn(const char *file, int line, void *caller, unsigned taint,
669
	    struct pt_regs *regs, struct warn_args *args)
674
	    struct pt_regs *regs, struct warn_args *args)
670
{
675
{
676
	nbcon_cpu_emergency_enter();
677
671
	disable_trace_on_warning();
678
	disable_trace_on_warning();
672
679
673
	if (file)
680
	if (file)
Lines 703-708 void __warn(const char *file, int line, void *caller, unsigned taint, Link Here
703
710
704
	/* Just a warning, don't kill lockdep. */
711
	/* Just a warning, don't kill lockdep. */
705
	add_taint(taint, LOCKDEP_STILL_OK);
712
	add_taint(taint, LOCKDEP_STILL_OK);
713
714
	nbcon_cpu_emergency_exit();
706
}
715
}
707
716
708
#ifdef CONFIG_BUG
717
#ifdef CONFIG_BUG
(-)a/kernel/printk/internal.h (-4 / +110 lines)
Lines 2-12 Link Here
2
/*
2
/*
3
 * internal.h - printk internal definitions
3
 * internal.h - printk internal definitions
4
 */
4
 */
5
#include <linux/percpu.h>
6
#include <linux/console.h>
5
#include <linux/console.h>
7
#include "printk_ringbuffer.h"
6
#include <linux/jump_label.h>
7
#include <linux/percpu.h>
8
#include <linux/types.h>
8
9
9
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
10
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
11
struct ctl_table;
10
void __init printk_sysctl_init(void);
12
void __init printk_sysctl_init(void);
11
int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
13
int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
12
			      void *buffer, size_t *lenp, loff_t *ppos);
14
			      void *buffer, size_t *lenp, loff_t *ppos);
Lines 20-25 int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, Link Here
20
		(con->flags & CON_BOOT) ? "boot" : "",		\
22
		(con->flags & CON_BOOT) ? "boot" : "",		\
21
		con->name, con->index, ##__VA_ARGS__)
23
		con->name, con->index, ##__VA_ARGS__)
22
24
25
#ifdef CONFIG_PREEMPT_RT
26
# define force_printkthreads()		(true)
27
#else
28
DECLARE_STATIC_KEY_FALSE(force_printkthreads_key);
29
# define force_printkthreads()		(static_branch_unlikely(&force_printkthreads_key))
30
#endif
31
23
#ifdef CONFIG_PRINTK
32
#ifdef CONFIG_PRINTK
24
33
25
#ifdef CONFIG_PRINTK_CALLER
34
#ifdef CONFIG_PRINTK_CALLER
Lines 43-49 enum printk_info_flags { Link Here
43
	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
52
	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
44
};
53
};
45
54
55
struct printk_ringbuffer;
56
struct dev_printk_info;
57
46
extern struct printk_ringbuffer *prb;
58
extern struct printk_ringbuffer *prb;
59
extern bool printk_threads_enabled;
47
60
48
__printf(4, 0)
61
__printf(4, 0)
49
int vprintk_store(int facility, int level,
62
int vprintk_store(int facility, int level,
Lines 53-58 int vprintk_store(int facility, int level, Link Here
53
__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
66
__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
54
__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
67
__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
55
68
69
void __printk_safe_enter(void);
70
void __printk_safe_exit(void);
71
56
bool printk_percpu_data_ready(void);
72
bool printk_percpu_data_ready(void);
57
73
58
#define printk_safe_enter_irqsave(flags)	\
74
#define printk_safe_enter_irqsave(flags)	\
Lines 71-82 void defer_console_output(void); Link Here
71
87
72
u16 printk_parse_prefix(const char *text, int *level,
88
u16 printk_parse_prefix(const char *text, int *level,
73
			enum printk_info_flags *flags);
89
			enum printk_info_flags *flags);
90
void console_lock_spinning_enable(void);
91
int console_lock_spinning_disable_and_check(int cookie);
74
92
75
u64 nbcon_seq_read(struct console *con);
93
u64 nbcon_seq_read(struct console *con);
76
void nbcon_seq_force(struct console *con, u64 seq);
94
void nbcon_seq_force(struct console *con, u64 seq);
77
bool nbcon_alloc(struct console *con);
95
bool nbcon_alloc(struct console *con);
78
void nbcon_init(struct console *con);
96
void nbcon_init(struct console *con, u64 init_seq);
79
void nbcon_free(struct console *con);
97
void nbcon_free(struct console *con);
98
enum nbcon_prio nbcon_get_default_prio(void);
99
void nbcon_atomic_flush_pending(void);
100
bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
101
				   int cookie, bool use_atomic);
102
void nbcon_kthread_create(struct console *con);
103
void nbcon_wake_threads(void);
104
void nbcon_legacy_kthread_create(void);
105
106
/*
107
 * Check if the given console is currently capable and allowed to print
108
 * records. Note that this function does not consider the current context,
109
 * which can also play a role in deciding if @con can be used to print
110
 * records.
111
 */
112
static inline bool console_is_usable(struct console *con, short flags, bool use_atomic)
113
{
114
	if (!(flags & CON_ENABLED))
115
		return false;
116
117
	if ((flags & CON_SUSPENDED))
118
		return false;
119
120
	if (flags & CON_NBCON) {
121
		if (use_atomic) {
122
			if (!con->write_atomic)
123
				return false;
124
		} else {
125
			if (!con->write_thread)
126
				return false;
127
		}
128
	} else {
129
		if (!con->write)
130
			return false;
131
	}
132
133
	/*
134
	 * Console drivers may assume that per-cpu resources have been
135
	 * allocated. So unless they're explicitly marked as being able to
136
	 * cope (CON_ANYTIME) don't call them until this CPU is officially up.
137
	 */
138
	if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
139
		return false;
140
141
	return true;
142
}
143
144
/**
145
 * nbcon_kthread_wake - Wake up a printk thread
146
 * @con:        Console to operate on
147
 */
148
static inline void nbcon_kthread_wake(struct console *con)
149
{
150
	/*
151
	 * Guarantee any new records can be seen by tasks preparing to wait
152
	 * before this context checks if the rcuwait is empty.
153
	 *
154
	 * The full memory barrier in rcuwait_wake_up() pairs with the full
155
	 * memory barrier within set_current_state() of
156
	 * ___rcuwait_wait_event(), which is called after prepare_to_rcuwait()
157
	 * adds the waiter but before it has checked the wait condition.
158
	 *
159
	 * This pairs with nbcon_kthread_func:A.
160
	 */
161
	rcuwait_wake_up(&con->rcuwait); /* LMM(nbcon_kthread_wake:A) */
162
}
80
163
81
#else
164
#else
82
165
Lines 84-89 void nbcon_free(struct console *con); Link Here
84
#define PRINTK_MESSAGE_MAX	0
167
#define PRINTK_MESSAGE_MAX	0
85
#define PRINTKRB_RECORD_MAX	0
168
#define PRINTKRB_RECORD_MAX	0
86
169
170
static inline void nbcon_kthread_wake(struct console *con) { }
171
static inline void nbcon_kthread_create(struct console *con) { }
172
#define printk_threads_enabled (false)
173
87
/*
174
/*
88
 * In !PRINTK builds we still export console_sem
175
 * In !PRINTK builds we still export console_sem
89
 * semaphore and some of console functions (console_unlock()/etc.), so
176
 * semaphore and some of console functions (console_unlock()/etc.), so
Lines 96-106 static inline bool printk_percpu_data_ready(void) { return false; } Link Here
96
static inline u64 nbcon_seq_read(struct console *con) { return 0; }
183
static inline u64 nbcon_seq_read(struct console *con) { return 0; }
97
static inline void nbcon_seq_force(struct console *con, u64 seq) { }
184
static inline void nbcon_seq_force(struct console *con, u64 seq) { }
98
static inline bool nbcon_alloc(struct console *con) { return false; }
185
static inline bool nbcon_alloc(struct console *con) { return false; }
99
static inline void nbcon_init(struct console *con) { }
186
static inline void nbcon_init(struct console *con, u64 init_seq) { }
100
static inline void nbcon_free(struct console *con) { }
187
static inline void nbcon_free(struct console *con) { }
188
static inline enum nbcon_prio nbcon_get_default_prio(void) { return NBCON_PRIO_NONE; }
189
static inline void nbcon_atomic_flush_pending(void) { }
190
static inline bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
191
						 int cookie, bool use_atomic) { return false; }
192
193
static inline bool console_is_usable(struct console *con, short flags,
194
				     bool use_atomic) { return false; }
101
195
102
#endif /* CONFIG_PRINTK */
196
#endif /* CONFIG_PRINTK */
103
197
198
extern bool have_boot_console;
199
extern bool have_legacy_console;
200
201
/*
202
 * Specifies if the console lock/unlock dance is needed for console
203
 * printing. If @have_boot_console is true, the nbcon consoles will
204
 * be printed serially along with the legacy consoles because nbcon
205
 * consoles cannot print simultaneously with boot consoles.
206
 */
207
#define printing_via_unlock (have_legacy_console || have_boot_console)
208
104
extern struct printk_buffers printk_shared_pbufs;
209
extern struct printk_buffers printk_shared_pbufs;
105
210
106
/**
211
/**
Lines 135-138 bool printk_get_next_message(struct printk_message *pmsg, u64 seq, Link Here
135
240
136
#ifdef CONFIG_PRINTK
241
#ifdef CONFIG_PRINTK
137
void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped);
242
void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped);
243
void console_prepend_replay(struct printk_message *pmsg);
138
#endif
244
#endif
(-)a/kernel/printk/nbcon.c (-18 / +834 lines)
Lines 2-12 Link Here
2
// Copyright (C) 2022 Linutronix GmbH, John Ogness
2
// Copyright (C) 2022 Linutronix GmbH, John Ogness
3
// Copyright (C) 2022 Intel, Thomas Gleixner
3
// Copyright (C) 2022 Intel, Thomas Gleixner
4
4
5
#include <linux/kernel.h>
5
#include <linux/atomic.h>
6
#include <linux/bug.h>
6
#include <linux/console.h>
7
#include <linux/console.h>
7
#include <linux/delay.h>
8
#include <linux/delay.h>
9
#include <linux/errno.h>
10
#include <linux/export.h>
11
#include <linux/init.h>
12
#include <linux/irqflags.h>
13
#include <linux/kthread.h>
14
#include <linux/minmax.h>
15
#include <linux/percpu.h>
16
#include <linux/preempt.h>
8
#include <linux/slab.h>
17
#include <linux/slab.h>
18
#include <linux/smp.h>
19
#include <linux/stddef.h>
20
#include <linux/string.h>
21
#include <linux/syscore_ops.h>
22
#include <linux/types.h>
9
#include "internal.h"
23
#include "internal.h"
24
#include "printk_ringbuffer.h"
10
/*
25
/*
11
 * Printk console printing implementation for consoles which does not depend
26
 * Printk console printing implementation for consoles which does not depend
12
 * on the legacy style console_lock mechanism.
27
 * on the legacy style console_lock mechanism.
Lines 172-180 void nbcon_seq_force(struct console *con, u64 seq) Link Here
172
	u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb));
187
	u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb));
173
188
174
	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq));
189
	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq));
175
176
	/* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */
177
	con->seq = 0;
178
}
190
}
179
191
180
/**
192
/**
Lines 201-206 static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) Link Here
201
	}
213
	}
202
}
214
}
203
215
216
bool printk_threads_enabled __ro_after_init;
217
204
/**
218
/**
205
 * nbcon_context_try_acquire_direct - Try to acquire directly
219
 * nbcon_context_try_acquire_direct - Try to acquire directly
206
 * @ctxt:	The context of the caller
220
 * @ctxt:	The context of the caller
Lines 531-536 static struct printk_buffers panic_nbcon_pbufs; Link Here
531
 * nbcon_context_try_acquire - Try to acquire nbcon console
545
 * nbcon_context_try_acquire - Try to acquire nbcon console
532
 * @ctxt:	The context of the caller
546
 * @ctxt:	The context of the caller
533
 *
547
 *
548
 * Context:	Under @ctxt->con->device_lock() or local_irq_save().
534
 * Return:	True if the console was acquired. False otherwise.
549
 * Return:	True if the console was acquired. False otherwise.
535
 *
550
 *
536
 * If the caller allowed an unsafe hostile takeover, on success the
551
 * If the caller allowed an unsafe hostile takeover, on success the
Lines 538-544 static struct printk_buffers panic_nbcon_pbufs; Link Here
538
 * in an unsafe state. Otherwise, on success the caller may assume
553
 * in an unsafe state. Otherwise, on success the caller may assume
539
 * the console is not in an unsafe state.
554
 * the console is not in an unsafe state.
540
 */
555
 */
541
__maybe_unused
542
static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
556
static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
543
{
557
{
544
	unsigned int cpu = smp_processor_id();
558
	unsigned int cpu = smp_processor_id();
Lines 824-832 bool nbcon_exit_unsafe(struct nbcon_write_context *wctxt) Link Here
824
}
838
}
825
EXPORT_SYMBOL_GPL(nbcon_exit_unsafe);
839
EXPORT_SYMBOL_GPL(nbcon_exit_unsafe);
826
840
841
/**
842
 * nbcon_reacquire - Reacquire a console after losing ownership
843
 * @wctxt:	The write context that was handed to the write function
844
 *
845
 * Since ownership can be lost at any time due to handover or takeover, a
846
 * printing context _should_ be prepared to back out immediately and
847
 * carefully. However, there are many scenarios where the context _must_
848
 * reacquire ownership in order to finalize or revert hardware changes.
849
 *
850
 * This function allows a context to reacquire ownership using the same
851
 * priority as its previous ownership.
852
 *
853
 * Note that for printing contexts, after a successful reacquire the
854
 * context will have no output buffer because that has been lost. This
855
 * function cannot be used to resume printing.
856
 */
857
void nbcon_reacquire(struct nbcon_write_context *wctxt)
858
{
859
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
860
	struct console *con = ctxt->console;
861
	struct nbcon_state cur;
862
863
	while (!nbcon_context_try_acquire(ctxt))
864
		cpu_relax();
865
866
	wctxt->outbuf = NULL;
867
	wctxt->len = 0;
868
	nbcon_state_read(con, &cur);
869
	wctxt->unsafe_takeover = cur.unsafe_takeover;
870
}
871
EXPORT_SYMBOL_GPL(nbcon_reacquire);
872
827
/**
873
/**
828
 * nbcon_emit_next_record - Emit a record in the acquired context
874
 * nbcon_emit_next_record - Emit a record in the acquired context
829
 * @wctxt:	The write context that will be handed to the write function
875
 * @wctxt:	The write context that will be handed to the write function
876
 * @use_atomic:	True if the write_atomic callback is to be used
830
 *
877
 *
831
 * Return:	True if this context still owns the console. False if
878
 * Return:	True if this context still owns the console. False if
832
 *		ownership was handed over or taken.
879
 *		ownership was handed over or taken.
Lines 840-847 EXPORT_SYMBOL_GPL(nbcon_exit_unsafe); Link Here
840
 * When true is returned, @wctxt->ctxt.backlog indicates whether there are
887
 * When true is returned, @wctxt->ctxt.backlog indicates whether there are
841
 * still records pending in the ringbuffer,
888
 * still records pending in the ringbuffer,
842
 */
889
 */
843
__maybe_unused
890
static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt, bool use_atomic)
844
static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt)
845
{
891
{
846
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
892
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
847
	struct console *con = ctxt->console;
893
	struct console *con = ctxt->console;
Lines 852-858 static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) Link Here
852
	unsigned long con_dropped;
898
	unsigned long con_dropped;
853
	struct nbcon_state cur;
899
	struct nbcon_state cur;
854
	unsigned long dropped;
900
	unsigned long dropped;
855
	bool done;
901
	unsigned long ulseq;
856
902
857
	/*
903
	/*
858
	 * The printk buffers are filled within an unsafe section. This
904
	 * The printk buffers are filled within an unsafe section. This
Lines 878-883 static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) Link Here
878
	if (dropped && !is_extended)
924
	if (dropped && !is_extended)
879
		console_prepend_dropped(&pmsg, dropped);
925
		console_prepend_dropped(&pmsg, dropped);
880
926
927
	/*
928
	 * If the previous owner was assigned the same record, this context
929
	 * has taken over ownership and is replaying the record. Prepend a
930
	 * message to let the user know the record is replayed.
931
	 */
932
	ulseq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_prev_seq));
933
	if (__ulseq_to_u64seq(prb, ulseq) == pmsg.seq) {
934
		console_prepend_replay(&pmsg);
935
	} else {
936
		/*
937
		 * Ensure this context is still the owner before trying to
938
		 * update @nbcon_prev_seq. Otherwise the value in @ulseq may
939
		 * not be from the previous owner.
940
		 */
941
		nbcon_state_read(con, &cur);
942
		if (!nbcon_context_can_proceed(ctxt, &cur))
943
			return false;
944
945
		atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_prev_seq), &ulseq,
946
					__u64seq_to_ulseq(pmsg.seq));
947
	}
948
881
	if (!nbcon_context_exit_unsafe(ctxt))
949
	if (!nbcon_context_exit_unsafe(ctxt))
882
		return false;
950
		return false;
883
951
Lines 891-907 static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) Link Here
891
	nbcon_state_read(con, &cur);
959
	nbcon_state_read(con, &cur);
892
	wctxt->unsafe_takeover = cur.unsafe_takeover;
960
	wctxt->unsafe_takeover = cur.unsafe_takeover;
893
961
894
	if (con->write_atomic) {
962
	if (use_atomic &&
895
		done = con->write_atomic(con, wctxt);
963
	    con->write_atomic) {
964
		con->write_atomic(con, wctxt);
965
966
	} else if (!use_atomic &&
967
		   con->write_thread) {
968
		con->write_thread(con, wctxt);
969
896
	} else {
970
	} else {
897
		nbcon_context_release(ctxt);
971
		/*
972
		 * This function should never be called for legacy consoles.
973
		 * Handle it as if ownership was lost and try to continue.
974
		 */
898
		WARN_ON_ONCE(1);
975
		WARN_ON_ONCE(1);
899
		done = false;
976
		nbcon_context_release(ctxt);
977
		return false;
900
	}
978
	}
901
979
902
	/* If not done, the emit was aborted. */
980
	if (!wctxt->outbuf) {
903
	if (!done)
981
		/*
982
		 * Ownership was lost and reacquired by the driver.
983
		 * Handle it as if ownership was lost and try to continue.
984
		 */
985
		nbcon_context_release(ctxt);
904
		return false;
986
		return false;
987
	}
905
988
906
	/*
989
	/*
907
	 * Since any dropped message was successfully output, reset the
990
	 * Since any dropped message was successfully output, reset the
Lines 928-933 static bool nbcon_emit_next_record(struct nbcon_write_context *wctxt) Link Here
928
	return nbcon_context_exit_unsafe(ctxt);
1011
	return nbcon_context_exit_unsafe(ctxt);
929
}
1012
}
930
1013
1014
/**
1015
 * nbcon_kthread_should_wakeup - Check whether a printer thread should wakeup
1016
 * @con:	Console to operate on
1017
 * @ctxt:	The acquire context that contains the state
1018
 *		at console_acquire()
1019
 *
1020
 * Return:	True if the thread should shutdown or if the console is
1021
 *		allowed to print and a record is available. False otherwise.
1022
 *
1023
 * After the thread wakes up, it must first check if it should shutdown before
1024
 * attempting any printing.
1025
 */
1026
static bool nbcon_kthread_should_wakeup(struct console *con, struct nbcon_context *ctxt)
1027
{
1028
	bool ret = false;
1029
	short flags;
1030
	int cookie;
1031
1032
	if (kthread_should_stop())
1033
		return true;
1034
1035
	cookie = console_srcu_read_lock();
1036
1037
	flags = console_srcu_read_flags(con);
1038
	if (console_is_usable(con, flags, false)) {
1039
		/* Bring the sequence in @ctxt up to date */
1040
		ctxt->seq = nbcon_seq_read(con);
1041
1042
		ret = prb_read_valid(prb, ctxt->seq, NULL);
1043
	}
1044
1045
	console_srcu_read_unlock(cookie);
1046
	return ret;
1047
}
1048
1049
/**
1050
 * nbcon_kthread_func - The printer thread function
1051
 * @__console:	Console to operate on
1052
 */
1053
static int nbcon_kthread_func(void *__console)
1054
{
1055
	struct console *con = __console;
1056
	struct nbcon_write_context wctxt = {
1057
		.ctxt.console	= con,
1058
		.ctxt.prio	= NBCON_PRIO_NORMAL,
1059
	};
1060
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1061
	unsigned long flags;
1062
	short con_flags;
1063
	bool backlog;
1064
	int cookie;
1065
	int ret;
1066
1067
wait_for_event:
1068
	/*
1069
	 * Guarantee this task is visible on the rcuwait before
1070
	 * checking the wake condition.
1071
	 *
1072
	 * The full memory barrier within set_current_state() of
1073
	 * ___rcuwait_wait_event() pairs with the full memory
1074
	 * barrier within rcuwait_has_sleeper().
1075
	 *
1076
	 * This pairs with rcuwait_has_sleeper:A and nbcon_kthread_wake:A.
1077
	 */
1078
	ret = rcuwait_wait_event(&con->rcuwait,
1079
				 nbcon_kthread_should_wakeup(con, ctxt),
1080
				 TASK_INTERRUPTIBLE); /* LMM(nbcon_kthread_func:A) */
1081
1082
	if (kthread_should_stop())
1083
		return 0;
1084
1085
	/* Wait was interrupted by a spurious signal, go back to sleep. */
1086
	if (ret)
1087
		goto wait_for_event;
1088
1089
	do {
1090
		backlog = false;
1091
1092
		cookie = console_srcu_read_lock();
1093
1094
		con_flags = console_srcu_read_flags(con);
1095
1096
		if (console_is_usable(con, con_flags, false)) {
1097
			con->device_lock(con, &flags);
1098
1099
			/*
1100
			 * Ensure this stays on the CPU to make handover and
1101
			 * takeover possible.
1102
			 */
1103
			cant_migrate();
1104
1105
			if (nbcon_context_try_acquire(ctxt)) {
1106
				/*
1107
				 * If the emit fails, this context is no
1108
				 * longer the owner.
1109
				 */
1110
				if (nbcon_emit_next_record(&wctxt, false)) {
1111
					nbcon_context_release(ctxt);
1112
					backlog = ctxt->backlog;
1113
				}
1114
			}
1115
1116
			con->device_unlock(con, flags);
1117
		}
1118
1119
		console_srcu_read_unlock(cookie);
1120
1121
	} while (backlog);
1122
1123
	goto wait_for_event;
1124
}
1125
1126
/**
1127
 * nbcon_irq_work - irq work to wake printk thread
1128
 * @irq_work:	The irq work to operate on
1129
 */
1130
static void nbcon_irq_work(struct irq_work *irq_work)
1131
{
1132
	struct console *con = container_of(irq_work, struct console, irq_work);
1133
1134
	nbcon_kthread_wake(con);
1135
}
1136
1137
static inline bool rcuwait_has_sleeper(struct rcuwait *w)
1138
{
1139
	bool has_sleeper;
1140
1141
	rcu_read_lock();
1142
	/*
1143
	 * Guarantee any new records can be seen by tasks preparing to wait
1144
	 * before this context checks if the rcuwait is empty.
1145
	 *
1146
	 * This full memory barrier pairs with the full memory barrier within
1147
	 * set_current_state() of ___rcuwait_wait_event(), which is called
1148
	 * after prepare_to_rcuwait() adds the waiter but before it has
1149
	 * checked the wait condition.
1150
	 *
1151
	 * This pairs with nbcon_kthread_func:A.
1152
	 */
1153
	smp_mb(); /* LMM(rcuwait_has_sleeper:A) */
1154
	has_sleeper = !!rcu_dereference(w->task);
1155
	rcu_read_unlock();
1156
1157
	return has_sleeper;
1158
}
1159
1160
/**
1161
 * nbcon_wake_threads - Wake up printing threads using irq_work
1162
 */
1163
void nbcon_wake_threads(void)
1164
{
1165
	struct console *con;
1166
	int cookie;
1167
1168
	cookie = console_srcu_read_lock();
1169
	for_each_console_srcu(con) {
1170
		/*
1171
		 * Only schedule irq_work if the printing thread is
1172
		 * actively waiting. If not waiting, the thread will
1173
		 * notice by itself that it has work to do.
1174
		 */
1175
		if (con->kthread && rcuwait_has_sleeper(&con->rcuwait))
1176
			irq_work_queue(&con->irq_work);
1177
	}
1178
	console_srcu_read_unlock(cookie);
1179
}
1180
1181
/* Track the nbcon emergency nesting per CPU. */
1182
static DEFINE_PER_CPU(unsigned int, nbcon_pcpu_emergency_nesting);
1183
static unsigned int early_nbcon_pcpu_emergency_nesting __initdata;
1184
1185
/**
1186
 * nbcon_get_cpu_emergency_nesting - Get the per CPU emergency nesting pointer
1187
 *
1188
 * Return:	Either a pointer to the per CPU emergency nesting counter of
1189
 *		the current CPU or to the init data during early boot.
1190
 */
1191
static __ref unsigned int *nbcon_get_cpu_emergency_nesting(void)
1192
{
1193
	/*
1194
	 * The value of __printk_percpu_data_ready gets set in normal
1195
	 * context and before SMP initialization. As a result it could
1196
	 * never change while inside an nbcon emergency section.
1197
	 */
1198
	if (!printk_percpu_data_ready())
1199
		return &early_nbcon_pcpu_emergency_nesting;
1200
1201
	return this_cpu_ptr(&nbcon_pcpu_emergency_nesting);
1202
}
1203
1204
/**
1205
 * nbcon_get_default_prio - The appropriate nbcon priority to use for nbcon
1206
 *				printing on the current CPU
1207
 *
1208
 * Context:	Any context which could not be migrated to another CPU.
1209
 * Return:	The nbcon_prio to use for acquiring an nbcon console in this
1210
 *		context for printing.
1211
 */
1212
enum nbcon_prio nbcon_get_default_prio(void)
1213
{
1214
	unsigned int *cpu_emergency_nesting;
1215
1216
	if (this_cpu_in_panic())
1217
		return NBCON_PRIO_PANIC;
1218
1219
	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1220
	if (*cpu_emergency_nesting)
1221
		return NBCON_PRIO_EMERGENCY;
1222
1223
	return NBCON_PRIO_NORMAL;
1224
}
1225
1226
/*
1227
 * nbcon_emit_one - Print one record for an nbcon console using the
1228
 *			specified callback
1229
 * @wctxt:	An initialized write context struct to use for this context
1230
 * @use_atomic:	True if the write_atomic callback is to be used
1231
 *
1232
 * Return:	True, when a record has been printed and there are still
1233
 *		pending records. The caller might want to continue flushing.
1234
 *
1235
 *		False, when there is no pending record, or when the console
1236
 *		context cannot be acquired, or the ownership has been lost.
1237
 *		The caller should give up. Either the job is done, cannot be
1238
 *		done, or will be handled by the owning context.
1239
 *
1240
 * This is an internal helper to handle the locking of the console before
1241
 * calling nbcon_emit_next_record().
1242
 */
1243
static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic)
1244
{
1245
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
1246
1247
	if (!nbcon_context_try_acquire(ctxt))
1248
		return false;
1249
1250
	/*
1251
	 * nbcon_emit_next_record() returns false when the console was
1252
	 * handed over or taken over. In both cases the context is no
1253
	 * longer valid.
1254
	 *
1255
	 * The higher priority printing context takes over responsibility
1256
	 * to print the pending records.
1257
	 */
1258
	if (!nbcon_emit_next_record(wctxt, use_atomic))
1259
		return false;
1260
1261
	nbcon_context_release(ctxt);
1262
1263
	return ctxt->backlog;
1264
}
1265
1266
/**
1267
 * nbcon_legacy_emit_next_record - Print one record for an nbcon console
1268
 *					in legacy contexts
1269
 * @con:	The console to print on
1270
 * @handover:	Will be set to true if a printk waiter has taken over the
1271
 *		console_lock, in which case the caller is no longer holding
1272
 *		both the console_lock and the SRCU read lock. Otherwise it
1273
 *		is set to false.
1274
 * @cookie:	The cookie from the SRCU read lock.
1275
 * @use_atomic:	True if the write_atomic callback is to be used
1276
 *
1277
 * Context:	Any context except NMI.
1278
 * Return:	True, when a record has been printed and there are still
1279
 *		pending records. The caller might want to continue flushing.
1280
 *
1281
 *		False, when there is no pending record, or when the console
1282
 *		context cannot be acquired, or the ownership has been lost.
1283
 *		The caller should give up. Either the job is done, cannot be
1284
 *		done, or will be handled by the owning context.
1285
 *
1286
 * This function is meant to be called by console_flush_all() to print records
1287
 * on nbcon consoles from legacy context (printing via console unlocking).
1288
 * Essentially it is the nbcon version of console_emit_next_record().
1289
 */
1290
bool nbcon_legacy_emit_next_record(struct console *con, bool *handover,
1291
				   int cookie, bool use_atomic)
1292
{
1293
	struct nbcon_write_context wctxt = { };
1294
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1295
	unsigned long flags;
1296
	bool progress;
1297
1298
	ctxt->console = con;
1299
1300
	if (use_atomic) {
1301
		/* Use the same procedure as console_emit_next_record(). */
1302
		printk_safe_enter_irqsave(flags);
1303
		console_lock_spinning_enable();
1304
		stop_critical_timings();
1305
1306
		ctxt->prio = nbcon_get_default_prio();
1307
		progress = nbcon_emit_one(&wctxt, use_atomic);
1308
1309
		start_critical_timings();
1310
		*handover = console_lock_spinning_disable_and_check(cookie);
1311
		printk_safe_exit_irqrestore(flags);
1312
	} else {
1313
		*handover = false;
1314
1315
		con->device_lock(con, &flags);
1316
		cant_migrate();
1317
1318
		ctxt->prio = nbcon_get_default_prio();
1319
		progress = nbcon_emit_one(&wctxt, use_atomic);
1320
1321
		con->device_unlock(con, flags);
1322
	}
1323
1324
	return progress;
1325
}
1326
1327
/**
1328
 * __nbcon_atomic_flush_pending_con - Flush specified nbcon console using its
1329
 *					write_atomic() callback
1330
 * @con:			The nbcon console to flush
1331
 * @stop_seq:			Flush up until this record
1332
 * @allow_unsafe_takeover:	True, to allow unsafe hostile takeovers
1333
 *
1334
 * Return:	0 if @con was flushed up to @stop_seq Otherwise, error code on
1335
 *		failure.
1336
 *
1337
 * Errors:
1338
 *
1339
 *	-EPERM:		Unable to acquire console ownership.
1340
 *
1341
 *	-EAGAIN:	Another context took over ownership while printing.
1342
 *
1343
 *	-ENOENT:	A record before @stop_seq is not available.
1344
 *
1345
 * If flushing up to @stop_seq was not successful, it only makes sense for the
1346
 * caller to try again when -EAGAIN was returned. When -EPERM is returned,
1347
 * this context is not allowed to acquire the console. When -ENOENT is
1348
 * returned, it cannot be expected that the unfinalized record will become
1349
 * available.
1350
 */
1351
static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
1352
					    bool allow_unsafe_takeover)
1353
{
1354
	struct nbcon_write_context wctxt = { };
1355
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(&wctxt, ctxt);
1356
	int err = 0;
1357
1358
	ctxt->console			= con;
1359
	ctxt->spinwait_max_us		= 2000;
1360
	ctxt->prio			= nbcon_get_default_prio();
1361
	ctxt->allow_unsafe_takeover	= allow_unsafe_takeover;
1362
1363
	if (!nbcon_context_try_acquire(ctxt))
1364
		return -EPERM;
1365
1366
	while (nbcon_seq_read(con) < stop_seq) {
1367
		/*
1368
		 * nbcon_emit_next_record() returns false when the console was
1369
		 * handed over or taken over. In both cases the context is no
1370
		 * longer valid.
1371
		 */
1372
		if (!nbcon_emit_next_record(&wctxt, true))
1373
			return -EAGAIN;
1374
1375
		if (!ctxt->backlog) {
1376
			if (nbcon_seq_read(con) < stop_seq)
1377
				err = -ENOENT;
1378
			break;
1379
		}
1380
	}
1381
1382
	nbcon_context_release(ctxt);
1383
	return err;
1384
}
1385
1386
/**
1387
 * nbcon_atomic_flush_pending_con - Flush specified nbcon console using its
1388
 *					write_atomic() callback
1389
 * @con:			The nbcon console to flush
1390
 * @stop_seq:			Flush up until this record
1391
 * @allow_unsafe_takeover:	True, to allow unsafe hostile takeovers
1392
 *
1393
 * This will stop flushing before @stop_seq if another context has ownership.
1394
 * That context is then responsible for the flushing. Likewise, if new records
1395
 * are added while this context was flushing and there is no other context
1396
 * to handle the printing, this context must also flush those records.
1397
 */
1398
static void nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
1399
					   bool allow_unsafe_takeover)
1400
{
1401
	unsigned long flags;
1402
	int err;
1403
1404
again:
1405
	/*
1406
	 * Atomic flushing does not use console driver synchronization (i.e.
1407
	 * it does not hold the port lock for uart consoles). Therefore IRQs
1408
	 * must be disabled to avoid being interrupted and then calling into
1409
	 * a driver that will deadlock trying to acquire console ownership.
1410
	 */
1411
	local_irq_save(flags);
1412
1413
	err = __nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover);
1414
1415
	local_irq_restore(flags);
1416
1417
	/*
1418
	 * If flushing was successful but more records are available this
1419
	 * context must flush those remaining records if the printer thread
1420
	 * is not available to do it.
1421
	 */
1422
	if (!err && !con->kthread && prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
1423
		stop_seq = prb_next_reserve_seq(prb);
1424
		goto again;
1425
	}
1426
1427
	/*
1428
	 * If there was a new owner, that context is responsible for
1429
	 * completing the flush.
1430
	 */
1431
}
1432
1433
/**
1434
 * __nbcon_atomic_flush_pending - Flush all nbcon consoles using their
1435
 *					write_atomic() callback
1436
 * @stop_seq:			Flush up until this record
1437
 * @allow_unsafe_takeover:	True, to allow unsafe hostile takeovers
1438
 */
1439
static void __nbcon_atomic_flush_pending(u64 stop_seq, bool allow_unsafe_takeover)
1440
{
1441
	struct console *con;
1442
	int cookie;
1443
1444
	cookie = console_srcu_read_lock();
1445
	for_each_console_srcu(con) {
1446
		short flags = console_srcu_read_flags(con);
1447
1448
		if (!(flags & CON_NBCON))
1449
			continue;
1450
1451
		if (!console_is_usable(con, flags, true))
1452
			continue;
1453
1454
		if (nbcon_seq_read(con) >= stop_seq)
1455
			continue;
1456
1457
		nbcon_atomic_flush_pending_con(con, stop_seq, allow_unsafe_takeover);
1458
	}
1459
	console_srcu_read_unlock(cookie);
1460
}
1461
1462
/**
1463
 * nbcon_atomic_flush_pending - Flush all nbcon consoles using their
1464
 *				write_atomic() callback
1465
 *
1466
 * Flush the backlog up through the currently newest record. Any new
1467
 * records added while flushing will not be flushed. This is to avoid
1468
 * one CPU printing unbounded because other CPUs continue to add records.
1469
 */
1470
void nbcon_atomic_flush_pending(void)
1471
{
1472
	__nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), false);
1473
}
1474
1475
/**
1476
 * nbcon_atomic_flush_unsafe - Flush all nbcon consoles using their
1477
 *	write_atomic() callback and allowing unsafe hostile takeovers
1478
 *
1479
 * Flush the backlog up through the currently newest record. Unsafe hostile
1480
 * takeovers will be performed, if necessary.
1481
 */
1482
void nbcon_atomic_flush_unsafe(void)
1483
{
1484
	__nbcon_atomic_flush_pending(prb_next_reserve_seq(prb), true);
1485
}
1486
1487
/**
1488
 * nbcon_cpu_emergency_enter - Enter an emergency section where printk()
1489
 *				messages for that CPU are only stored
1490
 *
1491
 * Upon exiting the emergency section, all stored messages are flushed.
1492
 *
1493
 * Context:	Any context. Disables preemption.
1494
 *
1495
 * When within an emergency section, no printing occurs on that CPU. This
1496
 * is to allow all emergency messages to be dumped into the ringbuffer before
1497
 * flushing the ringbuffer. The actual printing occurs when exiting the
1498
 * outermost emergency section.
1499
 */
1500
void nbcon_cpu_emergency_enter(void)
1501
{
1502
	unsigned int *cpu_emergency_nesting;
1503
1504
	preempt_disable();
1505
1506
	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1507
	(*cpu_emergency_nesting)++;
1508
}
1509
1510
/**
1511
 * nbcon_cpu_emergency_exit - Exit an emergency section and flush the
1512
 *				stored messages
1513
 *
1514
 * Flushing only occurs when exiting all nesting for the CPU.
1515
 *
1516
 * Context:	Any context. Enables preemption.
1517
 */
1518
void nbcon_cpu_emergency_exit(void)
1519
{
1520
	unsigned int *cpu_emergency_nesting;
1521
	bool do_trigger_flush = false;
1522
1523
	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
1524
1525
	/*
1526
	 * Flush the messages before enabling preemtion to see them ASAP.
1527
	 *
1528
	 * Reduce the risk of potential softlockup by using the
1529
	 * flush_pending() variant which ignores messages added later. It is
1530
	 * called before decrementing the counter so that the printing context
1531
	 * for the emergency messages is NBCON_PRIO_EMERGENCY.
1532
	 */
1533
	if (*cpu_emergency_nesting == 1) {
1534
		nbcon_atomic_flush_pending();
1535
		do_trigger_flush = true;
1536
	}
1537
1538
	(*cpu_emergency_nesting)--;
1539
1540
	if (WARN_ON_ONCE(*cpu_emergency_nesting < 0))
1541
		*cpu_emergency_nesting = 0;
1542
1543
	preempt_enable();
1544
1545
	if (do_trigger_flush)
1546
		printk_trigger_flush();
1547
}
1548
1549
/**
1550
 * nbcon_cpu_emergency_flush - Explicitly flush consoles while
1551
 *				within emergency context
1552
 *
1553
 * Both nbcon and legacy consoles are flushed.
1554
 *
1555
 * It should be used only when there are too many messages printed
1556
 * in emergency context, for example, printing backtraces of all
1557
 * CPUs or processes. It is typically needed when the watchdogs
1558
 * need to be touched as well.
1559
 */
1560
void nbcon_cpu_emergency_flush(void)
1561
{
1562
	/* The explicit flush is needed only in the emergency context. */
1563
	if (*(nbcon_get_cpu_emergency_nesting()) == 0)
1564
		return;
1565
1566
	nbcon_atomic_flush_pending();
1567
1568
	if (printing_via_unlock && !in_nmi()) {
1569
		if (console_trylock())
1570
			console_unlock();
1571
	}
1572
}
1573
1574
/*
1575
 * nbcon_kthread_stop - Stop a printer thread
1576
 * @con:	Console to operate on
1577
 */
1578
static void nbcon_kthread_stop(struct console *con)
1579
{
1580
	lockdep_assert_console_list_lock_held();
1581
1582
	if (!con->kthread)
1583
		return;
1584
1585
	kthread_stop(con->kthread);
1586
	con->kthread = NULL;
1587
}
1588
1589
/**
1590
 * nbcon_kthread_create - Create a printer thread
1591
 * @con:	Console to operate on
1592
 *
1593
 * If it fails, let the console proceed. The atomic part might
1594
 * be usable and useful.
1595
 */
1596
void nbcon_kthread_create(struct console *con)
1597
{
1598
	struct task_struct *kt;
1599
1600
	lockdep_assert_console_list_lock_held();
1601
1602
	if (!(con->flags & CON_NBCON) || !con->write_thread)
1603
		return;
1604
1605
	if (!printk_threads_enabled || con->kthread)
1606
		return;
1607
1608
	/*
1609
	 * Printer threads cannot be started as long as any boot console is
1610
	 * registered because there is no way to synchronize the hardware
1611
	 * registers between boot console code and regular console code.
1612
	 */
1613
	if (have_boot_console)
1614
		return;
1615
1616
	kt = kthread_run(nbcon_kthread_func, con, "pr/%s%d", con->name, con->index);
1617
	if (IS_ERR(kt)) {
1618
		con_printk(KERN_ERR, con, "failed to start printing thread\n");
1619
		return;
1620
	}
1621
1622
	con->kthread = kt;
1623
1624
	/*
1625
	 * It is important that console printing threads are scheduled
1626
	 * shortly after a printk call and with generous runtime budgets.
1627
	 */
1628
	sched_set_normal(con->kthread, -20);
1629
}
1630
1631
static int __init printk_setup_threads(void)
1632
{
1633
	struct console *con;
1634
1635
	console_list_lock();
1636
	printk_threads_enabled = true;
1637
	for_each_console(con)
1638
		nbcon_kthread_create(con);
1639
	if (force_printkthreads() && printing_via_unlock)
1640
		nbcon_legacy_kthread_create();
1641
	console_list_unlock();
1642
	return 0;
1643
}
1644
early_initcall(printk_setup_threads);
1645
931
/**
1646
/**
932
 * nbcon_alloc - Allocate buffers needed by the nbcon console
1647
 * nbcon_alloc - Allocate buffers needed by the nbcon console
933
 * @con:	Console to allocate buffers for
1648
 * @con:	Console to allocate buffers for
Lines 961-981 bool nbcon_alloc(struct console *con) Link Here
961
/**
1676
/**
962
 * nbcon_init - Initialize the nbcon console specific data
1677
 * nbcon_init - Initialize the nbcon console specific data
963
 * @con:	Console to initialize
1678
 * @con:	Console to initialize
1679
 * @init_seq:	Sequence number of the first record to be emitted
964
 *
1680
 *
965
 * nbcon_alloc() *must* be called and succeed before this function
1681
 * nbcon_alloc() *must* be called and succeed before this function
966
 * is called.
1682
 * is called.
967
 *
968
 * This function expects that the legacy @con->seq has been set.
969
 */
1683
 */
970
void nbcon_init(struct console *con)
1684
void nbcon_init(struct console *con, u64 init_seq)
971
{
1685
{
972
	struct nbcon_state state = { };
1686
	struct nbcon_state state = { };
973
1687
974
	/* nbcon_alloc() must have been called and successful! */
1688
	/* nbcon_alloc() must have been called and successful! */
975
	BUG_ON(!con->pbufs);
1689
	BUG_ON(!con->pbufs);
976
1690
977
	nbcon_seq_force(con, con->seq);
1691
	rcuwait_init(&con->rcuwait);
1692
	init_irq_work(&con->irq_work, nbcon_irq_work);
1693
	nbcon_seq_force(con, init_seq);
1694
	atomic_long_set(&ACCESS_PRIVATE(con, nbcon_prev_seq), -1UL);
978
	nbcon_state_set(con, &state);
1695
	nbcon_state_set(con, &state);
1696
	nbcon_kthread_create(con);
979
}
1697
}
980
1698
981
/**
1699
/**
Lines 986-991 void nbcon_free(struct console *con) Link Here
986
{
1704
{
987
	struct nbcon_state state = { };
1705
	struct nbcon_state state = { };
988
1706
1707
	nbcon_kthread_stop(con);
989
	nbcon_state_set(con, &state);
1708
	nbcon_state_set(con, &state);
990
1709
991
	/* Boot consoles share global printk buffers. */
1710
	/* Boot consoles share global printk buffers. */
Lines 994-996 void nbcon_free(struct console *con) Link Here
994
1713
995
	con->pbufs = NULL;
1714
	con->pbufs = NULL;
996
}
1715
}
1716
1717
/**
1718
 * nbcon_driver_try_acquire - Try to acquire nbcon console and enter unsafe
1719
 *				section
1720
 * @con:	The nbcon console to acquire
1721
 *
1722
 * Context:	Under the locking mechanism implemented in
1723
 *		@con->device_lock() including disabling migration.
1724
 *
1725
 * Console drivers will usually use their own internal synchronization
1726
 * mechasism to synchronize between console printing and non-printing
1727
 * activities (such as setting baud rates). However, nbcon console drivers
1728
 * supporting atomic consoles may also want to mark unsafe sections when
1729
 * performing non-printing activities in order to synchronize against their
1730
 * atomic_write() callback.
1731
 *
1732
 * This function acquires the nbcon console using priority NBCON_PRIO_NORMAL
1733
 * and marks it unsafe for handover/takeover.
1734
 */
1735
bool nbcon_driver_try_acquire(struct console *con)
1736
{
1737
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_driver_ctxt);
1738
1739
	cant_migrate();
1740
1741
	memset(ctxt, 0, sizeof(*ctxt));
1742
	ctxt->console	= con;
1743
	ctxt->prio	= NBCON_PRIO_NORMAL;
1744
1745
	if (!nbcon_context_try_acquire(ctxt))
1746
		return false;
1747
1748
	if (!nbcon_context_enter_unsafe(ctxt))
1749
		return false;
1750
1751
	return true;
1752
}
1753
EXPORT_SYMBOL_GPL(nbcon_driver_try_acquire);
1754
1755
/**
1756
 * nbcon_driver_release - Exit unsafe section and release the nbcon console
1757
 * @con:	The nbcon console acquired in nbcon_driver_try_acquire()
1758
 */
1759
void nbcon_driver_release(struct console *con)
1760
{
1761
	struct nbcon_context *ctxt = &ACCESS_PRIVATE(con, nbcon_driver_ctxt);
1762
	int cookie;
1763
1764
	if (!nbcon_context_exit_unsafe(ctxt))
1765
		return;
1766
1767
	nbcon_context_release(ctxt);
1768
1769
	/*
1770
	 * This context must flush any new records added while the console
1771
	 * was locked. The console_srcu_read_lock must be taken to ensure
1772
	 * the console is usable throughout flushing.
1773
	 */
1774
	cookie = console_srcu_read_lock();
1775
	if (console_is_usable(con, console_srcu_read_flags(con), true) &&
1776
	    !con->kthread &&
1777
	    prb_read_valid(prb, nbcon_seq_read(con), NULL)) {
1778
		__nbcon_atomic_flush_pending_con(con, prb_next_reserve_seq(prb), false);
1779
	}
1780
	console_srcu_read_unlock(cookie);
1781
}
1782
EXPORT_SYMBOL_GPL(nbcon_driver_release);
1783
1784
/**
1785
 * printk_kthread_shutdown - shutdown all threaded printers
1786
 *
1787
 * On system shutdown all threaded printers are stopped. This allows printk
1788
 * to transition back to atomic printing, thus providing a robust mechanism
1789
 * for the final shutdown/reboot messages to be output.
1790
 */
1791
static void printk_kthread_shutdown(void)
1792
{
1793
	struct console *con;
1794
1795
	console_list_lock();
1796
	for_each_console(con) {
1797
		if (con->flags & CON_NBCON)
1798
			nbcon_kthread_stop(con);
1799
	}
1800
	console_list_unlock();
1801
}
1802
1803
static struct syscore_ops printk_syscore_ops = {
1804
	.shutdown = printk_kthread_shutdown,
1805
};
1806
1807
static int __init printk_init_ops(void)
1808
{
1809
	register_syscore_ops(&printk_syscore_ops);
1810
	return 0;
1811
}
1812
device_initcall(printk_init_ops);
(-)a/kernel/printk/printk.c (-121 / +572 lines)
Lines 195-200 static int __init control_devkmsg(char *str) Link Here
195
}
195
}
196
__setup("printk.devkmsg=", control_devkmsg);
196
__setup("printk.devkmsg=", control_devkmsg);
197
197
198
#if !defined(CONFIG_PREEMPT_RT)
199
DEFINE_STATIC_KEY_FALSE(force_printkthreads_key);
200
201
static int __init setup_forced_printkthreads(char *arg)
202
{
203
	static_branch_enable(&force_printkthreads_key);
204
	return 0;
205
}
206
early_param("threadprintk", setup_forced_printkthreads);
207
#endif
208
198
char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
209
char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
199
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
210
#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
200
int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
211
int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
Lines 282-287 EXPORT_SYMBOL(console_list_unlock); Link Here
282
 * Return: A cookie to pass to console_srcu_read_unlock().
293
 * Return: A cookie to pass to console_srcu_read_unlock().
283
 */
294
 */
284
int console_srcu_read_lock(void)
295
int console_srcu_read_lock(void)
296
	__acquires(&console_srcu)
285
{
297
{
286
	return srcu_read_lock_nmisafe(&console_srcu);
298
	return srcu_read_lock_nmisafe(&console_srcu);
287
}
299
}
Lines 295-300 EXPORT_SYMBOL(console_srcu_read_lock); Link Here
295
 * Counterpart to console_srcu_read_lock()
307
 * Counterpart to console_srcu_read_lock()
296
 */
308
 */
297
void console_srcu_read_unlock(int cookie)
309
void console_srcu_read_unlock(int cookie)
310
	__releases(&console_srcu)
298
{
311
{
299
	srcu_read_unlock_nmisafe(&console_srcu, cookie);
312
	srcu_read_unlock_nmisafe(&console_srcu, cookie);
300
}
313
}
Lines 461-468 static int console_msg_format = MSG_FORMAT_DEFAULT; Link Here
461
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
474
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
462
static DEFINE_MUTEX(syslog_lock);
475
static DEFINE_MUTEX(syslog_lock);
463
476
477
/*
478
 * Specifies if a legacy console is registered. If legacy consoles are
479
 * present, it is necessary to perform the console lock/unlock dance
480
 * whenever console flushing should occur.
481
 */
482
bool have_legacy_console;
483
484
/*
485
 * Specifies if an nbcon console is registered. If nbcon consoles are present,
486
 * synchronous printing of legacy consoles will not occur during panic until
487
 * the backtrace has been stored to the ringbuffer.
488
 */
489
static bool have_nbcon_console;
490
491
/*
492
 * Specifies if a boot console is registered. If boot consoles are present,
493
 * nbcon consoles cannot print simultaneously and must be synchronized by
494
 * the console lock. This is because boot consoles and nbcon consoles may
495
 * have mapped the same hardware.
496
 */
497
bool have_boot_console;
498
464
#ifdef CONFIG_PRINTK
499
#ifdef CONFIG_PRINTK
465
DECLARE_WAIT_QUEUE_HEAD(log_wait);
500
DECLARE_WAIT_QUEUE_HEAD(log_wait);
501
502
static DECLARE_WAIT_QUEUE_HEAD(legacy_wait);
503
466
/* All 3 protected by @syslog_lock. */
504
/* All 3 protected by @syslog_lock. */
467
/* the next printk record to read by syslog(READ) or /proc/kmsg */
505
/* the next printk record to read by syslog(READ) or /proc/kmsg */
468
static u64 syslog_seq;
506
static u64 syslog_seq;
Lines 1850-1856 static bool console_waiter; Link Here
1850
 * there may be a waiter spinning (like a spinlock). Also it must be
1888
 * there may be a waiter spinning (like a spinlock). Also it must be
1851
 * ready to hand over the lock at the end of the section.
1889
 * ready to hand over the lock at the end of the section.
1852
 */
1890
 */
1853
static void console_lock_spinning_enable(void)
1891
void console_lock_spinning_enable(void)
1854
{
1892
{
1855
	/*
1893
	/*
1856
	 * Do not use spinning in panic(). The panic CPU wants to keep the lock.
1894
	 * Do not use spinning in panic(). The panic CPU wants to keep the lock.
Lines 1889-1895 static void console_lock_spinning_enable(void) Link Here
1889
 *
1927
 *
1890
 * Return: 1 if the lock rights were passed, 0 otherwise.
1928
 * Return: 1 if the lock rights were passed, 0 otherwise.
1891
 */
1929
 */
1892
static int console_lock_spinning_disable_and_check(int cookie)
1930
int console_lock_spinning_disable_and_check(int cookie)
1893
{
1931
{
1894
	int waiter;
1932
	int waiter;
1895
1933
Lines 2300-2311 int vprintk_store(int facility, int level, Link Here
2300
	return ret;
2338
	return ret;
2301
}
2339
}
2302
2340
2341
static bool legacy_allow_panic_sync;
2342
2343
/*
2344
 * This acts as a one-way switch to allow legacy consoles to print from
2345
 * the printk() caller context on a panic CPU. It also attempts to flush
2346
 * the legacy consoles in this context.
2347
 */
2348
void printk_legacy_allow_panic_sync(void)
2349
{
2350
	legacy_allow_panic_sync = true;
2351
2352
	if (printing_via_unlock && !in_nmi()) {
2353
		if (console_trylock())
2354
			console_unlock();
2355
	}
2356
}
2357
2303
asmlinkage int vprintk_emit(int facility, int level,
2358
asmlinkage int vprintk_emit(int facility, int level,
2304
			    const struct dev_printk_info *dev_info,
2359
			    const struct dev_printk_info *dev_info,
2305
			    const char *fmt, va_list args)
2360
			    const char *fmt, va_list args)
2306
{
2361
{
2362
	bool do_trylock_unlock = printing_via_unlock &&
2363
				 !force_printkthreads();
2307
	int printed_len;
2364
	int printed_len;
2308
	bool in_sched = false;
2309
2365
2310
	/* Suppress unimportant messages after panic happens */
2366
	/* Suppress unimportant messages after panic happens */
2311
	if (unlikely(suppress_printk))
2367
	if (unlikely(suppress_printk))
Lines 2321-2358 asmlinkage int vprintk_emit(int facility, int level, Link Here
2321
2377
2322
	if (level == LOGLEVEL_SCHED) {
2378
	if (level == LOGLEVEL_SCHED) {
2323
		level = LOGLEVEL_DEFAULT;
2379
		level = LOGLEVEL_DEFAULT;
2324
		in_sched = true;
2380
		/* If called from the scheduler, we can not call up(). */
2381
		do_trylock_unlock = false;
2325
	}
2382
	}
2326
2383
2327
	printk_delay(level);
2384
	printk_delay(level);
2328
2385
2329
	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
2386
	printed_len = vprintk_store(facility, level, dev_info, fmt, args);
2330
2387
2331
	/* If called from the scheduler, we can not call up(). */
2388
	if (have_nbcon_console && !have_boot_console) {
2332
	if (!in_sched) {
2389
		bool is_panic_context = this_cpu_in_panic();
2390
2391
		/*
2392
		 * In panic, the legacy consoles are not allowed to print from
2393
		 * the printk calling context unless explicitly allowed. This
2394
		 * gives the safe nbcon consoles a chance to print out all the
2395
		 * panic messages first. This restriction only applies if
2396
		 * there are nbcon consoles registered.
2397
		 */
2398
		if (is_panic_context)
2399
			do_trylock_unlock &= legacy_allow_panic_sync;
2400
2401
		/*
2402
		 * There are situations where nbcon atomic printing should
2403
		 * happen in the printk() caller context:
2404
		 *
2405
		 * - When this CPU is in panic.
2406
		 *
2407
		 * - When booting, before the printing threads have been
2408
		 *   started.
2409
		 *
2410
		 * - During shutdown, since the printing threads may not get
2411
		 *   a chance to print the final messages.
2412
		 *
2413
		 * Note that if boot consoles are registered, the console
2414
		 * lock/unlock dance must be relied upon instead because nbcon
2415
		 * consoles cannot print simultaneously with boot consoles.
2416
		 */
2417
		if (is_panic_context ||
2418
		    !printk_threads_enabled ||
2419
		    (system_state > SYSTEM_RUNNING)) {
2420
			nbcon_atomic_flush_pending();
2421
		}
2422
	}
2423
2424
	nbcon_wake_threads();
2425
2426
	if (do_trylock_unlock) {
2333
		/*
2427
		/*
2334
		 * The caller may be holding system-critical or
2428
		 * The caller may be holding system-critical or
2335
		 * timing-sensitive locks. Disable preemption during
2429
		 * timing-sensitive locks. Disable preemption during
2336
		 * printing of all remaining records to all consoles so that
2430
		 * printing of all remaining records to all consoles so that
2337
		 * this context can return as soon as possible. Hopefully
2431
		 * this context can return as soon as possible. Hopefully
2338
		 * another printk() caller will take over the printing.
2432
		 * another printk() caller will take over the printing.
2433
		 *
2434
		 * Also, nbcon_get_default_prio() requires migration disabled.
2339
		 */
2435
		 */
2340
		preempt_disable();
2436
		preempt_disable();
2437
2341
		/*
2438
		/*
2342
		 * Try to acquire and then immediately release the console
2439
		 * Try to acquire and then immediately release the console
2343
		 * semaphore. The release will print out buffers. With the
2440
		 * semaphore. The release will print out buffers. With the
2344
		 * spinning variant, this context tries to take over the
2441
		 * spinning variant, this context tries to take over the
2345
		 * printing from another printing context.
2442
		 * printing from another printing context.
2443
		 *
2444
		 * Skip it in EMERGENCY priority. The console will be
2445
		 * explicitly flushed when exiting the emergency section.
2346
		 */
2446
		 */
2347
		if (console_trylock_spinning())
2447
		if (nbcon_get_default_prio() != NBCON_PRIO_EMERGENCY) {
2348
			console_unlock();
2448
			if (console_trylock_spinning())
2449
				console_unlock();
2450
		}
2451
2349
		preempt_enable();
2452
		preempt_enable();
2350
	}
2453
	}
2351
2454
2352
	if (in_sched)
2455
	if (do_trylock_unlock)
2353
		defer_console_output();
2354
	else
2355
		wake_up_klogd();
2456
		wake_up_klogd();
2457
	else
2458
		defer_console_output();
2356
2459
2357
	return printed_len;
2460
	return printed_len;
2358
}
2461
}
Lines 2380-2385 EXPORT_SYMBOL(_printk); Link Here
2380
static bool pr_flush(int timeout_ms, bool reset_on_progress);
2483
static bool pr_flush(int timeout_ms, bool reset_on_progress);
2381
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
2484
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
2382
2485
2486
static struct task_struct *nbcon_legacy_kthread;
2487
2488
static inline void wake_up_legacy_kthread(void)
2489
{
2490
	if (nbcon_legacy_kthread)
2491
		wake_up_interruptible(&legacy_wait);
2492
}
2493
2383
#else /* CONFIG_PRINTK */
2494
#else /* CONFIG_PRINTK */
2384
2495
2385
#define printk_time		false
2496
#define printk_time		false
Lines 2393-2398 static u64 syslog_seq; Link Here
2393
static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; }
2504
static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; }
2394
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
2505
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
2395
2506
2507
static inline void nbcon_legacy_kthread_create(void) { }
2508
static inline void wake_up_legacy_kthread(void) { }
2396
#endif /* CONFIG_PRINTK */
2509
#endif /* CONFIG_PRINTK */
2397
2510
2398
#ifdef CONFIG_EARLY_PRINTK
2511
#ifdef CONFIG_EARLY_PRINTK
Lines 2608-2613 void suspend_console(void) Link Here
2608
void resume_console(void)
2721
void resume_console(void)
2609
{
2722
{
2610
	struct console *con;
2723
	struct console *con;
2724
	short flags;
2725
	int cookie;
2611
2726
2612
	if (!console_suspend_enabled)
2727
	if (!console_suspend_enabled)
2613
		return;
2728
		return;
Lines 2624-2629 void resume_console(void) Link Here
2624
	 */
2739
	 */
2625
	synchronize_srcu(&console_srcu);
2740
	synchronize_srcu(&console_srcu);
2626
2741
2742
	/*
2743
	 * Since this runs in task context, wake the threaded printers
2744
	 * directly rather than scheduling irq_work to do it.
2745
	 */
2746
	cookie = console_srcu_read_lock();
2747
	for_each_console_srcu(con) {
2748
		flags = console_srcu_read_flags(con);
2749
		if (flags & CON_NBCON)
2750
			nbcon_kthread_wake(con);
2751
	}
2752
	console_srcu_read_unlock(cookie);
2753
2754
	wake_up_legacy_kthread();
2755
2627
	pr_flush(1000, true);
2756
	pr_flush(1000, true);
2628
}
2757
}
2629
2758
Lines 2638-2644 void resume_console(void) Link Here
2638
 */
2767
 */
2639
static int console_cpu_notify(unsigned int cpu)
2768
static int console_cpu_notify(unsigned int cpu)
2640
{
2769
{
2641
	if (!cpuhp_tasks_frozen) {
2770
	if (!cpuhp_tasks_frozen && printing_via_unlock &&
2771
	    !force_printkthreads()) {
2642
		/* If trylock fails, someone else is doing the printing */
2772
		/* If trylock fails, someone else is doing the printing */
2643
		if (console_trylock())
2773
		if (console_trylock())
2644
			console_unlock();
2774
			console_unlock();
Lines 2695-2730 int is_console_locked(void) Link Here
2695
}
2825
}
2696
EXPORT_SYMBOL(is_console_locked);
2826
EXPORT_SYMBOL(is_console_locked);
2697
2827
2698
/*
2699
 * Check if the given console is currently capable and allowed to print
2700
 * records.
2701
 *
2702
 * Requires the console_srcu_read_lock.
2703
 */
2704
static inline bool console_is_usable(struct console *con)
2705
{
2706
	short flags = console_srcu_read_flags(con);
2707
2708
	if (!(flags & CON_ENABLED))
2709
		return false;
2710
2711
	if ((flags & CON_SUSPENDED))
2712
		return false;
2713
2714
	if (!con->write)
2715
		return false;
2716
2717
	/*
2718
	 * Console drivers may assume that per-cpu resources have been
2719
	 * allocated. So unless they're explicitly marked as being able to
2720
	 * cope (CON_ANYTIME) don't call them until this CPU is officially up.
2721
	 */
2722
	if (!cpu_online(raw_smp_processor_id()) && !(flags & CON_ANYTIME))
2723
		return false;
2724
2725
	return true;
2726
}
2727
2728
static void __console_unlock(void)
2828
static void __console_unlock(void)
2729
{
2829
{
2730
	console_locked = 0;
2830
	console_locked = 0;
Lines 2734-2763 static void __console_unlock(void) Link Here
2734
#ifdef CONFIG_PRINTK
2834
#ifdef CONFIG_PRINTK
2735
2835
2736
/*
2836
/*
2737
 * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". This
2837
 * Prepend the message in @pmsg->pbufs->outbuf with the message in
2738
 * is achieved by shifting the existing message over and inserting the dropped
2838
 * @pmsg->pbufs->scratchbuf. This is achieved by shifting the existing message
2739
 * message.
2839
 * over and inserting the scratchbuf message.
2740
 *
2840
 *
2741
 * @pmsg is the printk message to prepend.
2841
 * @pmsg is the printk message to prepend.
2742
 *
2842
 *
2743
 * @dropped is the dropped count to report in the dropped message.
2843
 * @len is the length of the message in @pmsg->pbufs->scratchbuf.
2744
 *
2844
 *
2745
 * If the message text in @pmsg->pbufs->outbuf does not have enough space for
2845
 * If the message text in @pmsg->pbufs->outbuf does not have enough space for
2746
 * the dropped message, the message text will be sufficiently truncated.
2846
 * the scratchbuf message, the message text will be sufficiently truncated.
2747
 *
2847
 *
2748
 * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated.
2848
 * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated.
2749
 */
2849
 */
2750
void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
2850
static void __console_prepend_scratch(struct printk_message *pmsg, size_t len)
2751
{
2851
{
2752
	struct printk_buffers *pbufs = pmsg->pbufs;
2852
	struct printk_buffers *pbufs = pmsg->pbufs;
2753
	const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
2754
	const size_t outbuf_sz = sizeof(pbufs->outbuf);
2853
	const size_t outbuf_sz = sizeof(pbufs->outbuf);
2755
	char *scratchbuf = &pbufs->scratchbuf[0];
2854
	char *scratchbuf = &pbufs->scratchbuf[0];
2756
	char *outbuf = &pbufs->outbuf[0];
2855
	char *outbuf = &pbufs->outbuf[0];
2757
	size_t len;
2758
2759
	len = scnprintf(scratchbuf, scratchbuf_sz,
2760
		       "** %lu printk messages dropped **\n", dropped);
2761
2856
2762
	/*
2857
	/*
2763
	 * Make sure outbuf is sufficiently large before prepending.
2858
	 * Make sure outbuf is sufficiently large before prepending.
Lines 2779-2784 void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) Link Here
2779
	pmsg->outbuf_len += len;
2874
	pmsg->outbuf_len += len;
2780
}
2875
}
2781
2876
2877
/*
2878
 * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message".
2879
 * @pmsg->outbuf_len is updated appropriately.
2880
 *
2881
 * @pmsg is the printk message to prepend.
2882
 *
2883
 * @dropped is the dropped count to report in the dropped message.
2884
 */
2885
void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped)
2886
{
2887
	struct printk_buffers *pbufs = pmsg->pbufs;
2888
	const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
2889
	char *scratchbuf = &pbufs->scratchbuf[0];
2890
	size_t len;
2891
2892
	len = scnprintf(scratchbuf, scratchbuf_sz,
2893
		       "** %lu printk messages dropped **\n", dropped);
2894
2895
	__console_prepend_scratch(pmsg, len);
2896
}
2897
2898
/*
2899
 * Prepend the message in @pmsg->pbufs->outbuf with a "replay message".
2900
 * @pmsg->outbuf_len is updated appropriately.
2901
 *
2902
 * @pmsg is the printk message to prepend.
2903
 */
2904
void console_prepend_replay(struct printk_message *pmsg)
2905
{
2906
	struct printk_buffers *pbufs = pmsg->pbufs;
2907
	const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf);
2908
	char *scratchbuf = &pbufs->scratchbuf[0];
2909
	size_t len;
2910
2911
	len = scnprintf(scratchbuf, scratchbuf_sz,
2912
			"** replaying previous printk message **\n");
2913
2914
	__console_prepend_scratch(pmsg, len);
2915
}
2916
2782
/*
2917
/*
2783
 * Read and format the specified record (or a later record if the specified
2918
 * Read and format the specified record (or a later record if the specified
2784
 * record is not available).
2919
 * record is not available).
Lines 2844-2849 bool printk_get_next_message(struct printk_message *pmsg, u64 seq, Link Here
2844
	return true;
2979
	return true;
2845
}
2980
}
2846
2981
2982
/*
2983
 * Legacy console printing from printk() caller context does not respect
2984
 * raw_spinlock/spinlock nesting. For !PREEMPT_RT the lockdep warning is a
2985
 * false positive. For PREEMPT_RT the false positive condition does not
2986
 * occur.
2987
 *
2988
 * This map is used to establish LD_WAIT_SLEEP context for the console write
2989
 * callbacks when legacy printing to avoid false positive lockdep complaints,
2990
 * thus allowing lockdep to continue to function for real issues.
2991
 */
2992
#ifdef CONFIG_PREEMPT_RT
2993
static inline void printk_legacy_lock_map_acquire_try(void) { }
2994
static inline void printk_legacy_lock_map_release(void) { }
2995
#else
2996
static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP);
2997
2998
static inline void printk_legacy_lock_map_acquire_try(void)
2999
{
3000
	lock_map_acquire_try(&printk_legacy_map);
3001
}
3002
3003
static inline void printk_legacy_lock_map_release(void)
3004
{
3005
	lock_map_release(&printk_legacy_map);
3006
}
3007
#endif /* CONFIG_PREEMPT_RT */
3008
2847
/*
3009
/*
2848
 * Used as the printk buffers for non-panic, serialized console printing.
3010
 * Used as the printk buffers for non-panic, serialized console printing.
2849
 * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles.
3011
 * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles.
Lines 2893-2923 static bool console_emit_next_record(struct console *con, bool *handover, int co Link Here
2893
		con->dropped = 0;
3055
		con->dropped = 0;
2894
	}
3056
	}
2895
3057
2896
	/*
2897
	 * While actively printing out messages, if another printk()
2898
	 * were to occur on another CPU, it may wait for this one to
2899
	 * finish. This task can not be preempted if there is a
2900
	 * waiter waiting to take over.
2901
	 *
2902
	 * Interrupts are disabled because the hand over to a waiter
2903
	 * must not be interrupted until the hand over is completed
2904
	 * (@console_waiter is cleared).
2905
	 */
2906
	printk_safe_enter_irqsave(flags);
2907
	console_lock_spinning_enable();
2908
2909
	/* Do not trace print latency. */
2910
	stop_critical_timings();
2911
2912
	/* Write everything out to the hardware. */
3058
	/* Write everything out to the hardware. */
2913
	con->write(con, outbuf, pmsg.outbuf_len);
2914
3059
2915
	start_critical_timings();
3060
	if (force_printkthreads()) {
3061
		/*
3062
		 * With forced threading this function is either in a thread
3063
		 * or panic context. So there is no need for concern about
3064
		 * printk reentrance, handovers, or lockdep complaints.
3065
		 */
2916
3066
2917
	con->seq = pmsg.seq + 1;
3067
		con->write(con, outbuf, pmsg.outbuf_len);
3068
		con->seq = pmsg.seq + 1;
3069
	} else {
3070
		/*
3071
		 * While actively printing out messages, if another printk()
3072
		 * were to occur on another CPU, it may wait for this one to
3073
		 * finish. This task can not be preempted if there is a
3074
		 * waiter waiting to take over.
3075
		 *
3076
		 * Interrupts are disabled because the hand over to a waiter
3077
		 * must not be interrupted until the hand over is completed
3078
		 * (@console_waiter is cleared).
3079
		 */
3080
		printk_safe_enter_irqsave(flags);
3081
		console_lock_spinning_enable();
2918
3082
2919
	*handover = console_lock_spinning_disable_and_check(cookie);
3083
		/* Do not trace print latency. */
2920
	printk_safe_exit_irqrestore(flags);
3084
		stop_critical_timings();
3085
3086
		printk_legacy_lock_map_acquire_try();
3087
		con->write(con, outbuf, pmsg.outbuf_len);
3088
		printk_legacy_lock_map_release();
3089
3090
		start_critical_timings();
3091
3092
		con->seq = pmsg.seq + 1;
3093
3094
		*handover = console_lock_spinning_disable_and_check(cookie);
3095
		printk_safe_exit_irqrestore(flags);
3096
	}
2921
skip:
3097
skip:
2922
	return true;
3098
	return true;
2923
}
3099
}
Lines 2970-2982 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove Link Here
2970
3146
2971
		cookie = console_srcu_read_lock();
3147
		cookie = console_srcu_read_lock();
2972
		for_each_console_srcu(con) {
3148
		for_each_console_srcu(con) {
3149
			short flags = console_srcu_read_flags(con);
3150
			u64 printk_seq;
2973
			bool progress;
3151
			bool progress;
2974
3152
2975
			if (!console_is_usable(con))
3153
			/*
3154
			 * console_flush_all() is only for legacy consoles,
3155
			 * unless the nbcon console has no kthread printer.
3156
			 */
3157
			if ((flags & CON_NBCON) && con->kthread)
3158
				continue;
3159
3160
			if (!console_is_usable(con, flags, !do_cond_resched))
2976
				continue;
3161
				continue;
2977
			any_usable = true;
3162
			any_usable = true;
2978
3163
2979
			progress = console_emit_next_record(con, handover, cookie);
3164
			if (flags & CON_NBCON) {
3165
				progress = nbcon_legacy_emit_next_record(con, handover, cookie,
3166
									 !do_cond_resched);
3167
				printk_seq = nbcon_seq_read(con);
3168
			} else {
3169
				progress = console_emit_next_record(con, handover, cookie);
3170
				printk_seq = con->seq;
3171
			}
2980
3172
2981
			/*
3173
			/*
2982
			 * If a handover has occurred, the SRCU read lock
3174
			 * If a handover has occurred, the SRCU read lock
Lines 2986-2993 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove Link Here
2986
				return false;
3178
				return false;
2987
3179
2988
			/* Track the next of the highest seq flushed. */
3180
			/* Track the next of the highest seq flushed. */
2989
			if (con->seq > *next_seq)
3181
			if (printk_seq > *next_seq)
2990
				*next_seq = con->seq;
3182
				*next_seq = printk_seq;
2991
3183
2992
			if (!progress)
3184
			if (!progress)
2993
				continue;
3185
				continue;
Lines 3010-3028 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove Link Here
3010
	return false;
3202
	return false;
3011
}
3203
}
3012
3204
3013
/**
3205
static void console_flush_and_unlock(void)
3014
 * console_unlock - unblock the console subsystem from printing
3015
 *
3016
 * Releases the console_lock which the caller holds to block printing of
3017
 * the console subsystem.
3018
 *
3019
 * While the console_lock was held, console output may have been buffered
3020
 * by printk().  If this is the case, console_unlock(); emits
3021
 * the output prior to releasing the lock.
3022
 *
3023
 * console_unlock(); may be called from any context.
3024
 */
3025
void console_unlock(void)
3026
{
3206
{
3027
	bool do_cond_resched;
3207
	bool do_cond_resched;
3028
	bool handover;
3208
	bool handover;
Lines 3066-3071 void console_unlock(void) Link Here
3066
		 */
3246
		 */
3067
	} while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
3247
	} while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
3068
}
3248
}
3249
3250
/**
3251
 * console_unlock - unblock the console subsystem from printing
3252
 *
3253
 * Releases the console_lock which the caller holds to block printing of
3254
 * the console subsystem.
3255
 *
3256
 * While the console_lock was held, console output may have been buffered
3257
 * by printk().  If this is the case, console_unlock(); emits
3258
 * the output prior to releasing the lock.
3259
 *
3260
 * console_unlock(); may be called from any context.
3261
 */
3262
void console_unlock(void)
3263
{
3264
	/*
3265
	 * Forced threading relies on kthread and atomic consoles for
3266
	 * printing. It never attempts to print from console_unlock().
3267
	 */
3268
	if (force_printkthreads()) {
3269
		__console_unlock();
3270
		return;
3271
	}
3272
3273
	console_flush_and_unlock();
3274
}
3069
EXPORT_SYMBOL(console_unlock);
3275
EXPORT_SYMBOL(console_unlock);
3070
3276
3071
/**
3277
/**
Lines 3199-3205 void console_flush_on_panic(enum con_flush_mode mode) Link Here
3199
		console_srcu_read_unlock(cookie);
3405
		console_srcu_read_unlock(cookie);
3200
	}
3406
	}
3201
3407
3202
	console_flush_all(false, &next_seq, &handover);
3408
	nbcon_atomic_flush_pending();
3409
3410
	if (printing_via_unlock)
3411
		console_flush_all(false, &next_seq, &handover);
3203
}
3412
}
3204
3413
3205
/*
3414
/*
Lines 3256-3268 EXPORT_SYMBOL(console_stop); Link Here
3256
3465
3257
void console_start(struct console *console)
3466
void console_start(struct console *console)
3258
{
3467
{
3468
	short flags;
3469
3259
	console_list_lock();
3470
	console_list_lock();
3260
	console_srcu_write_flags(console, console->flags | CON_ENABLED);
3471
	console_srcu_write_flags(console, console->flags | CON_ENABLED);
3472
	flags = console->flags;
3261
	console_list_unlock();
3473
	console_list_unlock();
3474
3475
	/*
3476
	 * Ensure that all SRCU list walks have completed. The related
3477
	 * printing context must be able to see it is enabled so that
3478
	 * it is guaranteed to wake up and resume printing.
3479
	 */
3480
	synchronize_srcu(&console_srcu);
3481
3482
	if (flags & CON_NBCON)
3483
		nbcon_kthread_wake(console);
3484
	else
3485
		wake_up_legacy_kthread();
3486
3262
	__pr_flush(console, 1000, true);
3487
	__pr_flush(console, 1000, true);
3263
}
3488
}
3264
EXPORT_SYMBOL(console_start);
3489
EXPORT_SYMBOL(console_start);
3265
3490
3491
#ifdef CONFIG_PRINTK
3492
static bool printer_should_wake(void)
3493
{
3494
	bool available = false;
3495
	struct console *con;
3496
	int cookie;
3497
3498
	if (kthread_should_stop())
3499
		return true;
3500
3501
	cookie = console_srcu_read_lock();
3502
	for_each_console_srcu(con) {
3503
		short flags = console_srcu_read_flags(con);
3504
		u64 printk_seq;
3505
3506
		/*
3507
		 * The legacy printer thread is only for legacy consoles,
3508
		 * unless the nbcon console has no kthread printer.
3509
		 */
3510
		if ((flags & CON_NBCON) && con->kthread)
3511
			continue;
3512
3513
		if (!console_is_usable(con, flags, true))
3514
			continue;
3515
3516
		if (flags & CON_NBCON) {
3517
			printk_seq = nbcon_seq_read(con);
3518
		} else {
3519
			/*
3520
			 * It is safe to read @seq because only this
3521
			 * thread context updates @seq.
3522
			 */
3523
			printk_seq = con->seq;
3524
		}
3525
3526
		if (prb_read_valid(prb, printk_seq, NULL)) {
3527
			available = true;
3528
			break;
3529
		}
3530
	}
3531
	console_srcu_read_unlock(cookie);
3532
3533
	return available;
3534
}
3535
3536
static int nbcon_legacy_kthread_func(void *unused)
3537
{
3538
	int error;
3539
3540
	for (;;) {
3541
		error = wait_event_interruptible(legacy_wait, printer_should_wake());
3542
3543
		if (kthread_should_stop())
3544
			break;
3545
3546
		if (error)
3547
			continue;
3548
3549
		console_lock();
3550
		console_flush_and_unlock();
3551
	}
3552
3553
	return 0;
3554
}
3555
3556
void nbcon_legacy_kthread_create(void)
3557
{
3558
	struct task_struct *kt;
3559
3560
	lockdep_assert_held(&console_mutex);
3561
3562
	if (!force_printkthreads())
3563
		return;
3564
3565
	if (!printk_threads_enabled || nbcon_legacy_kthread)
3566
		return;
3567
3568
	kt = kthread_run(nbcon_legacy_kthread_func, NULL, "pr/legacy");
3569
	if (IS_ERR(kt)) {
3570
		pr_err("unable to start legacy printing thread\n");
3571
		return;
3572
	}
3573
3574
	nbcon_legacy_kthread = kt;
3575
3576
	/*
3577
	 * It is important that console printing threads are scheduled
3578
	 * shortly after a printk call and with generous runtime budgets.
3579
	 */
3580
	sched_set_normal(nbcon_legacy_kthread, -20);
3581
}
3582
#endif /* CONFIG_PRINTK */
3583
3266
static int __read_mostly keep_bootcon;
3584
static int __read_mostly keep_bootcon;
3267
3585
3268
static int __init keep_bootcon_setup(char *str)
3586
static int __init keep_bootcon_setup(char *str)
Lines 3361-3379 static void try_enable_default_console(struct console *newcon) Link Here
3361
		newcon->flags |= CON_CONSDEV;
3679
		newcon->flags |= CON_CONSDEV;
3362
}
3680
}
3363
3681
3364
static void console_init_seq(struct console *newcon, bool bootcon_registered)
3682
/* Return the starting sequence number for a newly registered console. */
3683
static u64 get_init_console_seq(struct console *newcon, bool bootcon_registered)
3365
{
3684
{
3366
	struct console *con;
3685
	struct console *con;
3367
	bool handover;
3686
	bool handover;
3687
	u64 init_seq;
3368
3688
3369
	if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) {
3689
	if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) {
3370
		/* Get a consistent copy of @syslog_seq. */
3690
		/* Get a consistent copy of @syslog_seq. */
3371
		mutex_lock(&syslog_lock);
3691
		mutex_lock(&syslog_lock);
3372
		newcon->seq = syslog_seq;
3692
		init_seq = syslog_seq;
3373
		mutex_unlock(&syslog_lock);
3693
		mutex_unlock(&syslog_lock);
3374
	} else {
3694
	} else {
3375
		/* Begin with next message added to ringbuffer. */
3695
		/* Begin with next message added to ringbuffer. */
3376
		newcon->seq = prb_next_seq(prb);
3696
		init_seq = prb_next_seq(prb);
3377
3697
3378
		/*
3698
		/*
3379
		 * If any enabled boot consoles are due to be unregistered
3699
		 * If any enabled boot consoles are due to be unregistered
Lines 3394-3400 static void console_init_seq(struct console *newcon, bool bootcon_registered) Link Here
3394
			 * Flush all consoles and set the console to start at
3714
			 * Flush all consoles and set the console to start at
3395
			 * the next unprinted sequence number.
3715
			 * the next unprinted sequence number.
3396
			 */
3716
			 */
3397
			if (!console_flush_all(true, &newcon->seq, &handover)) {
3717
			if (!console_flush_all(true, &init_seq, &handover)) {
3398
				/*
3718
				/*
3399
				 * Flushing failed. Just choose the lowest
3719
				 * Flushing failed. Just choose the lowest
3400
				 * sequence of the enabled boot consoles.
3720
				 * sequence of the enabled boot consoles.
Lines 3407-3425 static void console_init_seq(struct console *newcon, bool bootcon_registered) Link Here
3407
				if (handover)
3727
				if (handover)
3408
					console_lock();
3728
					console_lock();
3409
3729
3410
				newcon->seq = prb_next_seq(prb);
3730
				init_seq = prb_next_seq(prb);
3411
				for_each_console(con) {
3731
				for_each_console(con) {
3412
					if ((con->flags & CON_BOOT) &&
3732
					u64 seq;
3413
					    (con->flags & CON_ENABLED) &&
3733
3414
					    con->seq < newcon->seq) {
3734
					if (!(con->flags & CON_BOOT) ||
3415
						newcon->seq = con->seq;
3735
					    !(con->flags & CON_ENABLED)) {
3736
						continue;
3416
					}
3737
					}
3738
3739
					if (con->flags & CON_NBCON)
3740
						seq = nbcon_seq_read(con);
3741
					else
3742
						seq = con->seq;
3743
3744
					if (seq < init_seq)
3745
						init_seq = seq;
3417
				}
3746
				}
3418
			}
3747
			}
3419
3748
3420
			console_unlock();
3749
			console_unlock();
3421
		}
3750
		}
3422
	}
3751
	}
3752
3753
	return init_seq;
3423
}
3754
}
3424
3755
3425
#define console_first()				\
3756
#define console_first()				\
Lines 3451-3456 void register_console(struct console *newcon) Link Here
3451
	struct console *con;
3782
	struct console *con;
3452
	bool bootcon_registered = false;
3783
	bool bootcon_registered = false;
3453
	bool realcon_registered = false;
3784
	bool realcon_registered = false;
3785
	unsigned long flags;
3786
	u64 init_seq;
3454
	int err;
3787
	int err;
3455
3788
3456
	console_list_lock();
3789
	console_list_lock();
Lines 3528-3537 void register_console(struct console *newcon) Link Here
3528
	}
3861
	}
3529
3862
3530
	newcon->dropped = 0;
3863
	newcon->dropped = 0;
3531
	console_init_seq(newcon, bootcon_registered);
3864
	init_seq = get_init_console_seq(newcon, bootcon_registered);
3532
3865
3533
	if (newcon->flags & CON_NBCON)
3866
	if (newcon->flags & CON_NBCON) {
3534
		nbcon_init(newcon);
3867
		have_nbcon_console = true;
3868
		nbcon_init(newcon, init_seq);
3869
	} else {
3870
		have_legacy_console = true;
3871
		newcon->seq = init_seq;
3872
		nbcon_legacy_kthread_create();
3873
	}
3874
3875
	if (newcon->flags & CON_BOOT)
3876
		have_boot_console = true;
3877
3878
	/*
3879
	 * If another context is actively using the hardware of this new
3880
	 * console, it will not be aware of the nbcon synchronization. This
3881
	 * is a risk that two contexts could access the hardware
3882
	 * simultaneously if this new console is used for atomic printing
3883
	 * and the other context is still using the hardware.
3884
	 *
3885
	 * Use the driver synchronization to ensure that the hardware is not
3886
	 * in use while this new console transitions to being registered.
3887
	 */
3888
	if ((newcon->flags & CON_NBCON) && newcon->write_atomic)
3889
		newcon->device_lock(newcon, &flags);
3535
3890
3536
	/*
3891
	/*
3537
	 * Put this console in the list - keep the
3892
	 * Put this console in the list - keep the
Lines 3557-3562 void register_console(struct console *newcon) Link Here
3557
	 * register_console() completes.
3912
	 * register_console() completes.
3558
	 */
3913
	 */
3559
3914
3915
	/* This new console is now registered. */
3916
	if ((newcon->flags & CON_NBCON) && newcon->write_atomic)
3917
		newcon->device_unlock(newcon, flags);
3918
3560
	console_sysfs_notify();
3919
	console_sysfs_notify();
3561
3920
3562
	/*
3921
	/*
Lines 3585-3590 EXPORT_SYMBOL(register_console); Link Here
3585
/* Must be called under console_list_lock(). */
3944
/* Must be called under console_list_lock(). */
3586
static int unregister_console_locked(struct console *console)
3945
static int unregister_console_locked(struct console *console)
3587
{
3946
{
3947
	bool is_boot_con = (console->flags & CON_BOOT);
3948
	bool found_legacy_con = false;
3949
	bool found_nbcon_con = false;
3950
	bool found_boot_con = false;
3951
	unsigned long flags;
3952
	struct console *c;
3588
	int res;
3953
	int res;
3589
3954
3590
	lockdep_assert_console_list_lock_held();
3955
	lockdep_assert_console_list_lock_held();
Lines 3603-3610 static int unregister_console_locked(struct console *console) Link Here
3603
	if (!console_is_registered_locked(console))
3968
	if (!console_is_registered_locked(console))
3604
		return -ENODEV;
3969
		return -ENODEV;
3605
3970
3971
	/*
3972
	 * Use the driver synchronization to ensure that the hardware is not
3973
	 * in use while this console transitions to being unregistered.
3974
	 */
3975
	if ((console->flags & CON_NBCON) && console->write_atomic)
3976
		console->device_lock(console, &flags);
3977
3606
	hlist_del_init_rcu(&console->node);
3978
	hlist_del_init_rcu(&console->node);
3607
3979
3980
	if ((console->flags & CON_NBCON) && console->write_atomic)
3981
		console->device_unlock(console, flags);
3982
3608
	/*
3983
	/*
3609
	 * <HISTORICAL>
3984
	 * <HISTORICAL>
3610
	 * If this isn't the last console and it has CON_CONSDEV set, we
3985
	 * If this isn't the last console and it has CON_CONSDEV set, we
Lines 3632-3637 static int unregister_console_locked(struct console *console) Link Here
3632
	if (console->exit)
4007
	if (console->exit)
3633
		res = console->exit(console);
4008
		res = console->exit(console);
3634
4009
4010
	/*
4011
	 * With this console gone, the global flags tracking registered
4012
	 * console types may have changed. Update them.
4013
	 */
4014
	for_each_console(c) {
4015
		if (c->flags & CON_BOOT)
4016
			found_boot_con = true;
4017
4018
		if (c->flags & CON_NBCON)
4019
			found_nbcon_con = true;
4020
		else
4021
			found_legacy_con = true;
4022
	}
4023
	if (!found_boot_con)
4024
		have_boot_console = found_boot_con;
4025
	if (!found_legacy_con)
4026
		have_legacy_console = found_legacy_con;
4027
	if (!found_nbcon_con)
4028
		have_nbcon_console = found_nbcon_con;
4029
4030
	/*
4031
	 * When the last boot console unregisters, start up the
4032
	 * printing threads.
4033
	 */
4034
	if (is_boot_con && !have_boot_console) {
4035
		for_each_console(c)
4036
			nbcon_kthread_create(c);
4037
	}
4038
4039
#ifdef CONFIG_PRINTK
4040
	if (!printing_via_unlock && nbcon_legacy_kthread) {
4041
		kthread_stop(nbcon_legacy_kthread);
4042
		nbcon_legacy_kthread = NULL;
4043
	}
4044
#endif
4045
3635
	return res;
4046
	return res;
3636
}
4047
}
3637
4048
Lines 3790-3812 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre Link Here
3790
4201
3791
	seq = prb_next_reserve_seq(prb);
4202
	seq = prb_next_reserve_seq(prb);
3792
4203
3793
	/* Flush the consoles so that records up to @seq are printed. */
4204
	/*
3794
	console_lock();
4205
	 * Flush the consoles so that records up to @seq are printed.
3795
	console_unlock();
4206
	 * Otherwise this function will just wait for the threaded printers
4207
	 * to print up to @seq.
4208
	 */
4209
	if (printing_via_unlock && !force_printkthreads()) {
4210
		console_lock();
4211
		console_unlock();
4212
	}
3796
4213
3797
	for (;;) {
4214
	for (;;) {
3798
		unsigned long begin_jiffies;
4215
		unsigned long begin_jiffies;
3799
		unsigned long slept_jiffies;
4216
		unsigned long slept_jiffies;
4217
		bool use_console_lock = printing_via_unlock;
4218
4219
		/*
4220
		 * Ensure the compiler does not optimize @use_console_lock to
4221
		 * be @printing_via_unlock since the latter can change at any
4222
		 * time.
4223
		 */
4224
		barrier();
3800
4225
3801
		diff = 0;
4226
		diff = 0;
3802
4227
3803
		/*
4228
		if (use_console_lock) {
3804
		 * Hold the console_lock to guarantee safe access to
4229
			/*
3805
		 * console->seq. Releasing console_lock flushes more
4230
			 * Hold the console_lock to guarantee safe access to
3806
		 * records in case @seq is still not printed on all
4231
			 * console->seq. Releasing console_lock flushes more
3807
		 * usable consoles.
4232
			 * records in case @seq is still not printed on all
3808
		 */
4233
			 * usable consoles.
3809
		console_lock();
4234
			 */
4235
			console_lock();
4236
		}
3810
4237
3811
		cookie = console_srcu_read_lock();
4238
		cookie = console_srcu_read_lock();
3812
		for_each_console_srcu(c) {
4239
		for_each_console_srcu(c) {
Lines 3820-3831 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre Link Here
3820
			 * that they make forward progress, so only increment
4247
			 * that they make forward progress, so only increment
3821
			 * @diff for usable consoles.
4248
			 * @diff for usable consoles.
3822
			 */
4249
			 */
3823
			if (!console_is_usable(c))
4250
			if (!console_is_usable(c, flags, true) &&
4251
			    !console_is_usable(c, flags, false)) {
3824
				continue;
4252
				continue;
4253
			}
3825
4254
3826
			if (flags & CON_NBCON) {
4255
			if (flags & CON_NBCON) {
3827
				printk_seq = nbcon_seq_read(c);
4256
				printk_seq = nbcon_seq_read(c);
3828
			} else {
4257
			} else {
4258
				WARN_ON_ONCE(!use_console_lock);
3829
				printk_seq = c->seq;
4259
				printk_seq = c->seq;
3830
			}
4260
			}
3831
4261
Lines 3837-3843 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre Link Here
3837
		if (diff != last_diff && reset_on_progress)
4267
		if (diff != last_diff && reset_on_progress)
3838
			remaining_jiffies = timeout_jiffies;
4268
			remaining_jiffies = timeout_jiffies;
3839
4269
3840
		console_unlock();
4270
		if (use_console_lock)
4271
			console_unlock();
3841
4272
3842
		/* Note: @diff is 0 if there are no usable consoles. */
4273
		/* Note: @diff is 0 if there are no usable consoles. */
3843
		if (diff == 0 || remaining_jiffies == 0)
4274
		if (diff == 0 || remaining_jiffies == 0)
Lines 3889-3897 static void wake_up_klogd_work_func(struct irq_work *irq_work) Link Here
3889
	int pending = this_cpu_xchg(printk_pending, 0);
4320
	int pending = this_cpu_xchg(printk_pending, 0);
3890
4321
3891
	if (pending & PRINTK_PENDING_OUTPUT) {
4322
	if (pending & PRINTK_PENDING_OUTPUT) {
3892
		/* If trylock fails, someone else is doing the printing */
4323
		if (force_printkthreads()) {
3893
		if (console_trylock())
4324
			wake_up_legacy_kthread();
3894
			console_unlock();
4325
		} else {
4326
			/*
4327
			 * If trylock fails, some other context
4328
			 * will do the printing.
4329
			 */
4330
			if (console_trylock())
4331
				console_unlock();
4332
		}
3895
	}
4333
	}
3896
4334
3897
	if (pending & PRINTK_PENDING_WAKEUP)
4335
	if (pending & PRINTK_PENDING_WAKEUP)
Lines 3907-3912 static void __wake_up_klogd(int val) Link Here
3907
		return;
4345
		return;
3908
4346
3909
	preempt_disable();
4347
	preempt_disable();
4348
3910
	/*
4349
	/*
3911
	 * Guarantee any new records can be seen by tasks preparing to wait
4350
	 * Guarantee any new records can be seen by tasks preparing to wait
3912
	 * before this context checks if the wait queue is empty.
4351
	 * before this context checks if the wait queue is empty.
Lines 3918-3928 static void __wake_up_klogd(int val) Link Here
3918
	 *
4357
	 *
3919
	 * This pairs with devkmsg_read:A and syslog_print:A.
4358
	 * This pairs with devkmsg_read:A and syslog_print:A.
3920
	 */
4359
	 */
3921
	if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */
4360
	if (!wq_has_sleeper(&log_wait)) /* LMM(__wake_up_klogd:A) */
3922
	    (val & PRINTK_PENDING_OUTPUT)) {
4361
		val &= ~PRINTK_PENDING_WAKEUP;
4362
4363
	/*
4364
	 * Simple read is safe. register_console() would flush a newly
4365
	 * registered legacy console when writing the message about it
4366
	 * being enabled.
4367
	 */
4368
	if (!printing_via_unlock)
4369
		val &= ~PRINTK_PENDING_OUTPUT;
4370
4371
	if (val) {
3923
		this_cpu_or(printk_pending, val);
4372
		this_cpu_or(printk_pending, val);
3924
		irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
4373
		irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
3925
	}
4374
	}
4375
3926
	preempt_enable();
4376
	preempt_enable();
3927
}
4377
}
3928
4378
Lines 3964-3969 void defer_console_output(void) Link Here
3964
4414
3965
void printk_trigger_flush(void)
4415
void printk_trigger_flush(void)
3966
{
4416
{
4417
	nbcon_wake_threads();
3967
	defer_console_output();
4418
	defer_console_output();
3968
}
4419
}
3969
4420
(-)a/kernel/printk/printk_ringbuffer.h (+2 lines)
Lines 5-10 Link Here
5
5
6
#include <linux/atomic.h>
6
#include <linux/atomic.h>
7
#include <linux/dev_printk.h>
7
#include <linux/dev_printk.h>
8
#include <linux/stddef.h>
9
#include <linux/types.h>
8
10
9
/*
11
/*
10
 * Meta information about each stored message.
12
 * Meta information about each stored message.
(-)a/kernel/printk/printk_safe.c (+12 lines)
Lines 26-31 void __printk_safe_exit(void) Link Here
26
	this_cpu_dec(printk_context);
26
	this_cpu_dec(printk_context);
27
}
27
}
28
28
29
void __printk_deferred_enter(void)
30
{
31
	cant_migrate();
32
	__printk_safe_enter();
33
}
34
35
void __printk_deferred_exit(void)
36
{
37
	cant_migrate();
38
	__printk_safe_exit();
39
}
40
29
asmlinkage int vprintk(const char *fmt, va_list args)
41
asmlinkage int vprintk(const char *fmt, va_list args)
30
{
42
{
31
#ifdef CONFIG_KGDB_KDB
43
#ifdef CONFIG_KGDB_KDB
(-)a/kernel/rcu/rcutorture.c (+6 lines)
Lines 2413-2418 static int rcutorture_booster_init(unsigned int cpu) Link Here
2413
		WARN_ON_ONCE(!t);
2413
		WARN_ON_ONCE(!t);
2414
		sp.sched_priority = 2;
2414
		sp.sched_priority = 2;
2415
		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
2415
		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
2416
#ifdef CONFIG_PREEMPT_RT
2417
		t = per_cpu(timersd, cpu);
2418
		WARN_ON_ONCE(!t);
2419
		sp.sched_priority = 2;
2420
		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
2421
#endif
2416
	}
2422
	}
2417
2423
2418
	/* Don't allow time recalculation while creating a new task. */
2424
	/* Don't allow time recalculation while creating a new task. */
(-)a/kernel/rcu/tree_exp.h (+9 lines)
Lines 7-12 Link Here
7
 * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
7
 * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
8
 */
8
 */
9
9
10
#include <linux/console.h>
10
#include <linux/lockdep.h>
11
#include <linux/lockdep.h>
11
12
12
static void rcu_exp_handler(void *unused);
13
static void rcu_exp_handler(void *unused);
Lines 571-576 static void synchronize_rcu_expedited_wait(void) Link Here
571
			return;
572
			return;
572
		if (rcu_stall_is_suppressed())
573
		if (rcu_stall_is_suppressed())
573
			continue;
574
			continue;
575
576
		nbcon_cpu_emergency_enter();
577
574
		j = jiffies;
578
		j = jiffies;
575
		rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_EXP, (void *)(j - jiffies_start));
579
		rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_EXP, (void *)(j - jiffies_start));
576
		trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall"));
580
		trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall"));
Lines 612-617 static void synchronize_rcu_expedited_wait(void) Link Here
612
			}
616
			}
613
			pr_cont("\n");
617
			pr_cont("\n");
614
		}
618
		}
619
		nbcon_cpu_emergency_flush();
615
		rcu_for_each_leaf_node(rnp) {
620
		rcu_for_each_leaf_node(rnp) {
616
			for_each_leaf_node_possible_cpu(rnp, cpu) {
621
			for_each_leaf_node_possible_cpu(rnp, cpu) {
617
				mask = leaf_node_cpu_bit(rnp, cpu);
622
				mask = leaf_node_cpu_bit(rnp, cpu);
Lines 624-629 static void synchronize_rcu_expedited_wait(void) Link Here
624
			rcu_exp_print_detail_task_stall_rnp(rnp);
629
			rcu_exp_print_detail_task_stall_rnp(rnp);
625
		}
630
		}
626
		jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
631
		jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
632
633
		nbcon_cpu_emergency_exit();
634
627
		panic_on_rcu_stall();
635
		panic_on_rcu_stall();
628
	}
636
	}
629
}
637
}
Lines 792-797 static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp) Link Here
792
		 */
800
		 */
793
		touch_nmi_watchdog();
801
		touch_nmi_watchdog();
794
		sched_show_task(t);
802
		sched_show_task(t);
803
		nbcon_cpu_emergency_flush();
795
	}
804
	}
796
	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
805
	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
797
}
806
}
(-)a/kernel/rcu/tree_stall.h (+11 lines)
Lines 7-12 Link Here
7
 * Author: Paul E. McKenney <paulmck@linux.ibm.com>
7
 * Author: Paul E. McKenney <paulmck@linux.ibm.com>
8
 */
8
 */
9
9
10
#include <linux/console.h>
10
#include <linux/kvm_para.h>
11
#include <linux/kvm_para.h>
11
#include <linux/rcu_notifier.h>
12
#include <linux/rcu_notifier.h>
12
13
Lines 260-265 static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) Link Here
260
		 */
261
		 */
261
		touch_nmi_watchdog();
262
		touch_nmi_watchdog();
262
		sched_show_task(t);
263
		sched_show_task(t);
264
		nbcon_cpu_emergency_flush();
263
	}
265
	}
264
	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
266
	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
265
}
267
}
Lines 522-527 static void print_cpu_stall_info(int cpu) Link Here
522
	       falsepositive ? " (false positive?)" : "");
524
	       falsepositive ? " (false positive?)" : "");
523
525
524
	print_cpu_stat_info(cpu);
526
	print_cpu_stat_info(cpu);
527
	nbcon_cpu_emergency_flush();
525
}
528
}
526
529
527
/* Complain about starvation of grace-period kthread.  */
530
/* Complain about starvation of grace-period kthread.  */
Lines 604-609 static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) Link Here
604
	if (rcu_stall_is_suppressed())
607
	if (rcu_stall_is_suppressed())
605
		return;
608
		return;
606
609
610
	nbcon_cpu_emergency_enter();
611
607
	/*
612
	/*
608
	 * OK, time to rat on our buddy...
613
	 * OK, time to rat on our buddy...
609
	 * See Documentation/RCU/stallwarn.rst for info on how to debug
614
	 * See Documentation/RCU/stallwarn.rst for info on how to debug
Lines 655-660 static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps) Link Here
655
	rcu_check_gp_kthread_expired_fqs_timer();
660
	rcu_check_gp_kthread_expired_fqs_timer();
656
	rcu_check_gp_kthread_starvation();
661
	rcu_check_gp_kthread_starvation();
657
662
663
	nbcon_cpu_emergency_exit();
664
658
	panic_on_rcu_stall();
665
	panic_on_rcu_stall();
659
666
660
	rcu_force_quiescent_state();  /* Kick them all. */
667
	rcu_force_quiescent_state();  /* Kick them all. */
Lines 675-680 static void print_cpu_stall(unsigned long gps) Link Here
675
	if (rcu_stall_is_suppressed())
682
	if (rcu_stall_is_suppressed())
676
		return;
683
		return;
677
684
685
	nbcon_cpu_emergency_enter();
686
678
	/*
687
	/*
679
	 * OK, time to rat on ourselves...
688
	 * OK, time to rat on ourselves...
680
	 * See Documentation/RCU/stallwarn.rst for info on how to debug
689
	 * See Documentation/RCU/stallwarn.rst for info on how to debug
Lines 703-708 static void print_cpu_stall(unsigned long gps) Link Here
703
			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
712
			   jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
704
	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
713
	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
705
714
715
	nbcon_cpu_emergency_exit();
716
706
	panic_on_rcu_stall();
717
	panic_on_rcu_stall();
707
718
708
	/*
719
	/*
(-)a/kernel/sched/core.c (-15 / +50 lines)
Lines 899-912 static inline void hrtick_rq_init(struct rq *rq) Link Here
899
899
900
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
900
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
901
/*
901
/*
902
 * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
902
 * Atomically set TIF_NEED_RESCHED[_LAZY] and test for TIF_POLLING_NRFLAG,
903
 * this avoids any races wrt polling state changes and thereby avoids
903
 * this avoids any races wrt polling state changes and thereby avoids
904
 * spurious IPIs.
904
 * spurious IPIs.
905
 */
905
 */
906
static inline bool set_nr_and_not_polling(struct task_struct *p)
906
static inline bool set_nr_and_not_polling(struct task_struct *p, int tif_bit)
907
{
907
{
908
	struct thread_info *ti = task_thread_info(p);
908
	struct thread_info *ti = task_thread_info(p);
909
	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
909
910
	return !(fetch_or(&ti->flags, 1 << tif_bit) & _TIF_POLLING_NRFLAG);
910
}
911
}
911
912
912
/*
913
/*
Lines 923-929 static bool set_nr_if_polling(struct task_struct *p) Link Here
923
	do {
924
	do {
924
		if (!(val & _TIF_POLLING_NRFLAG))
925
		if (!(val & _TIF_POLLING_NRFLAG))
925
			return false;
926
			return false;
926
		if (val & _TIF_NEED_RESCHED)
927
		if (val & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
927
			return true;
928
			return true;
928
	} while (!try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED));
929
	} while (!try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED));
929
930
Lines 931-939 static bool set_nr_if_polling(struct task_struct *p) Link Here
931
}
932
}
932
933
933
#else
934
#else
934
static inline bool set_nr_and_not_polling(struct task_struct *p)
935
static inline bool set_nr_and_not_polling(struct task_struct *p, int tif_bit)
935
{
936
{
936
	set_tsk_need_resched(p);
937
	set_tsk_thread_flag(p, tif_bit);
937
	return true;
938
	return true;
938
}
939
}
939
940
Lines 1038-1065 void wake_up_q(struct wake_q_head *head) Link Here
1038
 * might also involve a cross-CPU call to trigger the scheduler on
1039
 * might also involve a cross-CPU call to trigger the scheduler on
1039
 * the target CPU.
1040
 * the target CPU.
1040
 */
1041
 */
1041
void resched_curr(struct rq *rq)
1042
static void __resched_curr(struct rq *rq, int lazy)
1042
{
1043
{
1044
	int cpu, tif_bit = TIF_NEED_RESCHED + lazy;
1043
	struct task_struct *curr = rq->curr;
1045
	struct task_struct *curr = rq->curr;
1044
	int cpu;
1045
1046
1046
	lockdep_assert_rq_held(rq);
1047
	lockdep_assert_rq_held(rq);
1047
1048
1048
	if (test_tsk_need_resched(curr))
1049
	if (unlikely(test_tsk_thread_flag(curr, tif_bit)))
1049
		return;
1050
		return;
1050
1051
1051
	cpu = cpu_of(rq);
1052
	cpu = cpu_of(rq);
1052
1053
1053
	if (cpu == smp_processor_id()) {
1054
	if (cpu == smp_processor_id()) {
1054
		set_tsk_need_resched(curr);
1055
		set_tsk_thread_flag(curr, tif_bit);
1055
		set_preempt_need_resched();
1056
		if (!lazy)
1057
			set_preempt_need_resched();
1056
		return;
1058
		return;
1057
	}
1059
	}
1058
1060
1059
	if (set_nr_and_not_polling(curr))
1061
	if (set_nr_and_not_polling(curr, tif_bit)) {
1060
		smp_send_reschedule(cpu);
1062
		if (!lazy)
1061
	else
1063
			smp_send_reschedule(cpu);
1064
	} else {
1062
		trace_sched_wake_idle_without_ipi(cpu);
1065
		trace_sched_wake_idle_without_ipi(cpu);
1066
	}
1067
}
1068
1069
void resched_curr(struct rq *rq)
1070
{
1071
	__resched_curr(rq, 0);
1072
}
1073
1074
void resched_curr_lazy(struct rq *rq)
1075
{
1076
	int lazy = IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) && !sched_feat(FORCE_NEED_RESCHED) ?
1077
		TIF_NEED_RESCHED_LAZY_OFFSET : 0;
1078
1079
	if (lazy && unlikely(test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED)))
1080
		return;
1081
1082
	__resched_curr(rq, lazy);
1063
}
1083
}
1064
1084
1065
void resched_cpu(int cpu)
1085
void resched_cpu(int cpu)
Lines 1154-1160 static void wake_up_idle_cpu(int cpu) Link Here
1154
	 * and testing of the above solutions didn't appear to report
1174
	 * and testing of the above solutions didn't appear to report
1155
	 * much benefits.
1175
	 * much benefits.
1156
	 */
1176
	 */
1157
	if (set_nr_and_not_polling(rq->idle))
1177
	if (set_nr_and_not_polling(rq->idle, TIF_NEED_RESCHED))
1158
		smp_send_reschedule(cpu);
1178
		smp_send_reschedule(cpu);
1159
	else
1179
	else
1160
		trace_sched_wake_idle_without_ipi(cpu);
1180
		trace_sched_wake_idle_without_ipi(cpu);
Lines 8911-8916 static inline void preempt_dynamic_init(void) { } Link Here
8911
8931
8912
#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
8932
#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
8913
8933
8934
/*
8935
 * task_is_pi_boosted - Check if task has been PI boosted.
8936
 * @p:	Task to check.
8937
 *
8938
 * Return true if task is subject to priority inheritance.
8939
 */
8940
bool task_is_pi_boosted(const struct task_struct *p)
8941
{
8942
	int prio = p->prio;
8943
8944
	if (!rt_prio(prio))
8945
		return false;
8946
	return prio != p->normal_prio;
8947
}
8948
8914
/**
8949
/**
8915
 * yield - yield the current processor to other threads.
8950
 * yield - yield the current processor to other threads.
8916
 *
8951
 *
(-)a/kernel/sched/debug.c (+19 lines)
Lines 333-338 static const struct file_operations sched_debug_fops = { Link Here
333
	.release	= seq_release,
333
	.release	= seq_release,
334
};
334
};
335
335
336
static ssize_t sched_hog_write(struct file *filp, const char __user *ubuf,
337
			       size_t cnt, loff_t *ppos)
338
{
339
	unsigned long end = jiffies + 60 * HZ;
340
341
	for (; time_before(jiffies, end) && !signal_pending(current);)
342
		cpu_relax();
343
344
	return cnt;
345
}
346
347
static const struct file_operations sched_hog_fops = {
348
	.write		= sched_hog_write,
349
	.open		= simple_open,
350
	.llseek		= default_llseek,
351
};
352
336
static struct dentry *debugfs_sched;
353
static struct dentry *debugfs_sched;
337
354
338
static __init int sched_init_debug(void)
355
static __init int sched_init_debug(void)
Lines 374-379 static __init int sched_init_debug(void) Link Here
374
391
375
	debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
392
	debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
376
393
394
	debugfs_create_file("hog", 0200, debugfs_sched, NULL, &sched_hog_fops);
395
377
	return 0;
396
	return 0;
378
}
397
}
379
late_initcall(sched_init_debug);
398
late_initcall(sched_init_debug);
(-)a/kernel/sched/fair.c (-15 / +31 lines)
Lines 981-988 static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se); Link Here
981
 * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
981
 * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
982
 * this is probably good enough.
982
 * this is probably good enough.
983
 */
983
 */
984
static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
984
static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se, bool tick)
985
{
985
{
986
	struct rq *rq = rq_of(cfs_rq);
987
986
	if ((s64)(se->vruntime - se->deadline) < 0)
988
	if ((s64)(se->vruntime - se->deadline) < 0)
987
		return;
989
		return;
988
990
Lines 1001-1010 static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) Link Here
1001
	/*
1003
	/*
1002
	 * The task has consumed its request, reschedule.
1004
	 * The task has consumed its request, reschedule.
1003
	 */
1005
	 */
1004
	if (cfs_rq->nr_running > 1) {
1006
	if (cfs_rq->nr_running < 2)
1005
		resched_curr(rq_of(cfs_rq));
1007
		return;
1006
		clear_buddies(cfs_rq, se);
1008
1009
	if (!IS_ENABLED(CONFIG_PREEMPT_BUILD_AUTO) || sched_feat(FORCE_NEED_RESCHED)) {
1010
		resched_curr(rq);
1011
	} else {
1012
		/* Did the task ignore the lazy reschedule request? */
1013
		if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY))
1014
			resched_curr(rq);
1015
		else
1016
			resched_curr_lazy(rq);
1007
	}
1017
	}
1018
	clear_buddies(cfs_rq, se);
1008
}
1019
}
1009
1020
1010
#include "pelt.h"
1021
#include "pelt.h"
Lines 1159-1165 s64 update_curr_common(struct rq *rq) Link Here
1159
/*
1170
/*
1160
 * Update the current task's runtime statistics.
1171
 * Update the current task's runtime statistics.
1161
 */
1172
 */
1162
static void update_curr(struct cfs_rq *cfs_rq)
1173
static void __update_curr(struct cfs_rq *cfs_rq, bool tick)
1163
{
1174
{
1164
	struct sched_entity *curr = cfs_rq->curr;
1175
	struct sched_entity *curr = cfs_rq->curr;
1165
	s64 delta_exec;
1176
	s64 delta_exec;
Lines 1362-1368 Link Here
1362
#else // !CONFIG_SCHED_BORE
1362
#else // !CONFIG_SCHED_BORE
1363
	curr->vruntime += calc_delta_fair(delta_exec, curr);
1363
	curr->vruntime += calc_delta_fair(delta_exec, curr);
1364
#endif // CONFIG_SCHED_BORE
1364
#endif // CONFIG_SCHED_BORE
1365
	update_deadline(cfs_rq, curr);
1365
	update_deadline(cfs_rq, curr, tick);
1366
	update_min_vruntime(cfs_rq);
1366
	update_min_vruntime(cfs_rq);
1367
1367
1368
	if (entity_is_task(curr))
1368
	if (entity_is_task(curr))
Lines 1181-1186 static void update_curr(struct cfs_rq *cfs_rq) Link Here
1181
	account_cfs_rq_runtime(cfs_rq, delta_exec);
1192
	account_cfs_rq_runtime(cfs_rq, delta_exec);
1182
}
1193
}
1183
1194
1195
static inline void update_curr(struct cfs_rq *cfs_rq)
1196
{
1197
	__update_curr(cfs_rq, false);
1198
}
1199
1184
static void update_curr_fair(struct rq *rq)
1200
static void update_curr_fair(struct rq *rq)
1185
{
1201
{
1186
	update_curr(cfs_rq_of(&rq->curr->se));
1202
	update_curr(cfs_rq_of(&rq->curr->se));
Lines 5505-5511 entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) Link Here
5505
	/*
5521
	/*
5506
	 * Update run-time statistics of the 'current'.
5522
	 * Update run-time statistics of the 'current'.
5507
	 */
5523
	 */
5508
	update_curr(cfs_rq);
5524
	__update_curr(cfs_rq, true);
5509
5525
5510
	/*
5526
	/*
5511
	 * Ensure that runnable average is periodically updated.
5527
	 * Ensure that runnable average is periodically updated.
Lines 5519-5525 entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) Link Here
5519
	 * validating it and just reschedule.
5535
	 * validating it and just reschedule.
5520
	 */
5536
	 */
5521
	if (queued) {
5537
	if (queued) {
5522
		resched_curr(rq_of(cfs_rq));
5538
		resched_curr_lazy(rq_of(cfs_rq));
5523
		return;
5539
		return;
5524
	}
5540
	}
5525
	/*
5541
	/*
Lines 5665-5671 static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) Link Here
5665
	 * hierarchy can be throttled
5681
	 * hierarchy can be throttled
5666
	 */
5682
	 */
5667
	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
5683
	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
5668
		resched_curr(rq_of(cfs_rq));
5684
		resched_curr_lazy(rq_of(cfs_rq));
5669
}
5685
}
5670
5686
5671
static __always_inline
5687
static __always_inline
Lines 5925-5931 void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) Link Here
5925
5941
5926
	/* Determine whether we need to wake up potentially idle CPU: */
5942
	/* Determine whether we need to wake up potentially idle CPU: */
5927
	if (rq->curr == rq->idle && rq->cfs.nr_running)
5943
	if (rq->curr == rq->idle && rq->cfs.nr_running)
5928
		resched_curr(rq);
5944
		resched_curr_lazy(rq);
5929
}
5945
}
5930
5946
5931
#ifdef CONFIG_SMP
5947
#ifdef CONFIG_SMP
Lines 6640-6646 static void hrtick_start_fair(struct rq *rq, struct task_struct *p) Link Here
6640
6656
6641
		if (delta < 0) {
6657
		if (delta < 0) {
6642
			if (task_current(rq, p))
6658
			if (task_current(rq, p))
6643
				resched_curr(rq);
6659
				resched_curr_lazy(rq);
6644
			return;
6660
			return;
6645
		}
6661
		}
6646
		hrtick_start(rq, delta);
6662
		hrtick_start(rq, delta);
Lines 8316-8322 static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int Link Here
8316
	 * prevents us from potentially nominating it as a false LAST_BUDDY
8332
	 * prevents us from potentially nominating it as a false LAST_BUDDY
8317
	 * below.
8333
	 * below.
8318
	 */
8334
	 */
8319
	if (test_tsk_need_resched(curr))
8335
	if (need_resched())
8320
		return;
8336
		return;
8321
8337
8322
	/* Idle tasks are by definition preempted by non-idle tasks. */
8338
	/* Idle tasks are by definition preempted by non-idle tasks. */
Lines 8358-8364 static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int Link Here
8358
	return;
8374
	return;
8359
8375
8360
preempt:
8376
preempt:
8361
	resched_curr(rq);
8377
	resched_curr_lazy(rq);
8362
}
8378
}
8363
8379
8364
#ifdef CONFIG_SMP
8380
#ifdef CONFIG_SMP
Lines 12504-12510 static inline void task_tick_core(struct rq *rq, struct task_struct *curr) Link Here
12504
	 */
12520
	 */
12505
	if (rq->core->core_forceidle_count && rq->cfs.nr_running == 1 &&
12521
	if (rq->core->core_forceidle_count && rq->cfs.nr_running == 1 &&
12506
	    __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
12522
	    __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
12507
		resched_curr(rq);
12523
		resched_curr_lazy(rq);
12508
}
12524
}
12509
12525
12510
/*
12526
/*
Lines 12669-12675 prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) Link Here
12669
	 */
12685
	 */
12670
	if (task_current(rq, p)) {
12686
	if (task_current(rq, p)) {
12671
		if (p->prio > oldprio)
12687
		if (p->prio > oldprio)
12672
			resched_curr(rq);
12688
			resched_curr_lazy(rq);
12673
	} else
12689
	} else
12674
		wakeup_preempt(rq, p, 0);
12690
		wakeup_preempt(rq, p, 0);
12675
}
12691
}
(-)a/kernel/sched/features.h (+2 lines)
Lines 87-89 SCHED_FEAT(UTIL_EST, true) Link Here
87
SCHED_FEAT(LATENCY_WARN, false)
87
SCHED_FEAT(LATENCY_WARN, false)
88
88
89
SCHED_FEAT(HZ_BW, true)
89
SCHED_FEAT(HZ_BW, true)
90
91
SCHED_FEAT(FORCE_NEED_RESCHED, false)
(-)a/kernel/sched/idle.c (-2 / +1 lines)
Lines 57-64 static noinline int __cpuidle cpu_idle_poll(void) Link Here
57
	ct_cpuidle_enter();
57
	ct_cpuidle_enter();
58
58
59
	raw_local_irq_enable();
59
	raw_local_irq_enable();
60
	while (!tif_need_resched() &&
60
	while (!need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired()))
61
	       (cpu_idle_force_poll || tick_check_broadcast_expired()))
62
		cpu_relax();
61
		cpu_relax();
63
	raw_local_irq_disable();
62
	raw_local_irq_disable();
64
63
(-)a/kernel/sched/rt.c (-1 / +4 lines)
Lines 2194-2201 static int rto_next_cpu(struct root_domain *rd) Link Here
2194
2194
2195
		rd->rto_cpu = cpu;
2195
		rd->rto_cpu = cpu;
2196
2196
2197
		if (cpu < nr_cpu_ids)
2197
		if (cpu < nr_cpu_ids) {
2198
			if (!has_pushable_tasks(cpu_rq(cpu)))
2199
				continue;
2198
			return cpu;
2200
			return cpu;
2201
		}
2199
2202
2200
		rd->rto_cpu = -1;
2203
		rd->rto_cpu = -1;
2201
2204
(-)a/kernel/sched/sched.h (+1 lines)
Lines 2465-2470 extern void init_sched_fair_class(void); Link Here
2465
extern void reweight_task(struct task_struct *p, int prio);
2465
extern void reweight_task(struct task_struct *p, int prio);
2466
2466
2467
extern void resched_curr(struct rq *rq);
2467
extern void resched_curr(struct rq *rq);
2468
extern void resched_curr_lazy(struct rq *rq);
2468
extern void resched_cpu(int cpu);
2469
extern void resched_cpu(int cpu);
2469
2470
2470
extern struct rt_bandwidth def_rt_bandwidth;
2471
extern struct rt_bandwidth def_rt_bandwidth;
(-)a/kernel/softirq.c (-1 / +94 lines)
Lines 248-253 void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) Link Here
248
}
248
}
249
EXPORT_SYMBOL(__local_bh_enable_ip);
249
EXPORT_SYMBOL(__local_bh_enable_ip);
250
250
251
void softirq_preempt(void)
252
{
253
	if (WARN_ON_ONCE(!preemptible()))
254
		return;
255
256
	if (WARN_ON_ONCE(__this_cpu_read(softirq_ctrl.cnt) != SOFTIRQ_OFFSET))
257
		return;
258
259
	__local_bh_enable(SOFTIRQ_OFFSET, true);
260
	/* preemption point */
261
	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
262
}
263
251
/*
264
/*
252
 * Invoked from ksoftirqd_run() outside of the interrupt disabled section
265
 * Invoked from ksoftirqd_run() outside of the interrupt disabled section
253
 * to acquire the per CPU local lock for reentrancy protection.
266
 * to acquire the per CPU local lock for reentrancy protection.
Lines 624-629 static inline void tick_irq_exit(void) Link Here
624
#endif
637
#endif
625
}
638
}
626
639
640
#ifdef CONFIG_PREEMPT_RT
641
DEFINE_PER_CPU(struct task_struct *, timersd);
642
DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
643
644
static void wake_timersd(void)
645
{
646
        struct task_struct *tsk = __this_cpu_read(timersd);
647
648
        if (tsk)
649
                wake_up_process(tsk);
650
}
651
652
#else
653
654
static inline void wake_timersd(void) { }
655
656
#endif
657
627
static inline void __irq_exit_rcu(void)
658
static inline void __irq_exit_rcu(void)
628
{
659
{
629
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
660
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
Lines 636-641 static inline void __irq_exit_rcu(void) Link Here
636
	if (!in_interrupt() && local_softirq_pending())
667
	if (!in_interrupt() && local_softirq_pending())
637
		invoke_softirq();
668
		invoke_softirq();
638
669
670
	if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() &&
671
	    !(in_nmi() | in_hardirq()))
672
		wake_timersd();
673
639
	tick_irq_exit();
674
	tick_irq_exit();
640
}
675
}
641
676
Lines 972-983 static struct smp_hotplug_thread softirq_threads = { Link Here
972
	.thread_comm		= "ksoftirqd/%u",
1007
	.thread_comm		= "ksoftirqd/%u",
973
};
1008
};
974
1009
1010
#ifdef CONFIG_PREEMPT_RT
1011
static void timersd_setup(unsigned int cpu)
1012
{
1013
        sched_set_fifo_low(current);
1014
}
1015
1016
static int timersd_should_run(unsigned int cpu)
1017
{
1018
        return local_pending_timers();
1019
}
1020
1021
static void run_timersd(unsigned int cpu)
1022
{
1023
	unsigned int timer_si;
1024
1025
	ksoftirqd_run_begin();
1026
1027
	timer_si = local_pending_timers();
1028
	__this_cpu_write(pending_timer_softirq, 0);
1029
	or_softirq_pending(timer_si);
1030
1031
	__do_softirq();
1032
1033
	ksoftirqd_run_end();
1034
}
1035
1036
static void raise_ktimers_thread(unsigned int nr)
1037
{
1038
	trace_softirq_raise(nr);
1039
	__this_cpu_or(pending_timer_softirq, 1 << nr);
1040
}
1041
1042
void raise_hrtimer_softirq(void)
1043
{
1044
	raise_ktimers_thread(HRTIMER_SOFTIRQ);
1045
}
1046
1047
void raise_timer_softirq(void)
1048
{
1049
	unsigned long flags;
1050
1051
	local_irq_save(flags);
1052
	raise_ktimers_thread(TIMER_SOFTIRQ);
1053
	wake_timersd();
1054
	local_irq_restore(flags);
1055
}
1056
1057
static struct smp_hotplug_thread timer_threads = {
1058
        .store                  = &timersd,
1059
        .setup                  = timersd_setup,
1060
        .thread_should_run      = timersd_should_run,
1061
        .thread_fn              = run_timersd,
1062
        .thread_comm            = "ktimers/%u",
1063
};
1064
#endif
1065
975
static __init int spawn_ksoftirqd(void)
1066
static __init int spawn_ksoftirqd(void)
976
{
1067
{
977
	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
1068
	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
978
				  takeover_tasklets);
1069
				  takeover_tasklets);
979
	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
1070
	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
980
1071
#ifdef CONFIG_PREEMPT_RT
1072
	BUG_ON(smpboot_register_percpu_thread(&timer_threads));
1073
#endif
981
	return 0;
1074
	return 0;
982
}
1075
}
983
early_initcall(spawn_ksoftirqd);
1076
early_initcall(spawn_ksoftirqd);
(-)a/kernel/time/hrtimer.c (-2 / +2 lines)
Lines 1812-1818 void hrtimer_interrupt(struct clock_event_device *dev) Link Here
1812
	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1812
	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1813
		cpu_base->softirq_expires_next = KTIME_MAX;
1813
		cpu_base->softirq_expires_next = KTIME_MAX;
1814
		cpu_base->softirq_activated = 1;
1814
		cpu_base->softirq_activated = 1;
1815
		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1815
		raise_hrtimer_softirq();
1816
	}
1816
	}
1817
1817
1818
	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1818
	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
Lines 1925-1931 void hrtimer_run_queues(void) Link Here
1925
	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1925
	if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1926
		cpu_base->softirq_expires_next = KTIME_MAX;
1926
		cpu_base->softirq_expires_next = KTIME_MAX;
1927
		cpu_base->softirq_activated = 1;
1927
		cpu_base->softirq_activated = 1;
1928
		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1928
		raise_hrtimer_softirq();
1929
	}
1929
	}
1930
1930
1931
	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1931
	__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
(-)a/kernel/time/tick-sched.c (-1 / +1 lines)
Lines 859-865 static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) Link Here
859
859
860
static inline bool local_timer_softirq_pending(void)
860
static inline bool local_timer_softirq_pending(void)
861
{
861
{
862
	return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
862
	return local_pending_timers() & BIT(TIMER_SOFTIRQ);
863
}
863
}
864
864
865
/*
865
/*
(-)a/kernel/time/timer.c (-2 / +9 lines)
Lines 1563-1571 static inline void timer_base_unlock_expiry(struct timer_base *base) Link Here
1563
 */
1563
 */
1564
static void timer_sync_wait_running(struct timer_base *base)
1564
static void timer_sync_wait_running(struct timer_base *base)
1565
{
1565
{
1566
	if (atomic_read(&base->timer_waiters)) {
1566
	bool need_preempt;
1567
1568
	need_preempt = task_is_pi_boosted(current);
1569
	if (need_preempt || atomic_read(&base->timer_waiters)) {
1567
		raw_spin_unlock_irq(&base->lock);
1570
		raw_spin_unlock_irq(&base->lock);
1568
		spin_unlock(&base->expiry_lock);
1571
		spin_unlock(&base->expiry_lock);
1572
1573
		if (need_preempt)
1574
			softirq_preempt();
1575
1569
		spin_lock(&base->expiry_lock);
1576
		spin_lock(&base->expiry_lock);
1570
		raw_spin_lock_irq(&base->lock);
1577
		raw_spin_lock_irq(&base->lock);
1571
	}
1578
	}
Lines 2466-2472 static void run_local_timers(void) Link Here
2466
		/* Raise the softirq only if required. */
2473
		/* Raise the softirq only if required. */
2467
		if (time_after_eq(jiffies, base->next_expiry) ||
2474
		if (time_after_eq(jiffies, base->next_expiry) ||
2468
		    (i == BASE_DEF && tmigr_requires_handle_remote())) {
2475
		    (i == BASE_DEF && tmigr_requires_handle_remote())) {
2469
			raise_softirq(TIMER_SOFTIRQ);
2476
			raise_timer_softirq();
2470
			return;
2477
			return;
2471
		}
2478
		}
2472
	}
2479
	}
(-)a/kernel/trace/trace.c (+2 lines)
Lines 2513-2518 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) Link Here
2513
2513
2514
	if (tif_need_resched())
2514
	if (tif_need_resched())
2515
		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2515
		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2516
	if (tif_need_resched_lazy())
2517
		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2516
	if (test_preempt_need_resched())
2518
	if (test_preempt_need_resched())
2517
		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2519
		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2518
	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2520
	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
(-)a/kernel/trace/trace_output.c (-2 / +14 lines)
Lines 460-476 int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) Link Here
460
		(entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
460
		(entry->flags & TRACE_FLAG_IRQS_OFF && bh_off) ? 'D' :
461
		(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
461
		(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
462
		bh_off ? 'b' :
462
		bh_off ? 'b' :
463
		(entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
463
		!IS_ENABLED(CONFIG_TRACE_IRQFLAGS_SUPPORT) ? 'X' :
464
		'.';
464
		'.';
465
465
466
	switch (entry->flags & (TRACE_FLAG_NEED_RESCHED |
466
	switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY |
467
				TRACE_FLAG_PREEMPT_RESCHED)) {
467
				TRACE_FLAG_PREEMPT_RESCHED)) {
468
	case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED:
469
		need_resched = 'B';
470
		break;
468
	case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
471
	case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED:
469
		need_resched = 'N';
472
		need_resched = 'N';
470
		break;
473
		break;
474
	case TRACE_FLAG_NEED_RESCHED_LAZY | TRACE_FLAG_PREEMPT_RESCHED:
475
		need_resched = 'L';
476
		break;
477
	case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_NEED_RESCHED_LAZY:
478
		need_resched = 'b';
479
		break;
471
	case TRACE_FLAG_NEED_RESCHED:
480
	case TRACE_FLAG_NEED_RESCHED:
472
		need_resched = 'n';
481
		need_resched = 'n';
473
		break;
482
		break;
483
	case TRACE_FLAG_NEED_RESCHED_LAZY:
484
		need_resched = 'l';
485
		break;
474
	case TRACE_FLAG_PREEMPT_RESCHED:
486
	case TRACE_FLAG_PREEMPT_RESCHED:
475
		need_resched = 'p';
487
		need_resched = 'p';
476
		break;
488
		break;
(-)a/localversion-rt (+1 lines)
Line 0 Link Here
1
-rt5
(-)a/net/core/dev.c (-71 / +156 lines)
Lines 78-83 Link Here
78
#include <linux/slab.h>
78
#include <linux/slab.h>
79
#include <linux/sched.h>
79
#include <linux/sched.h>
80
#include <linux/sched/mm.h>
80
#include <linux/sched/mm.h>
81
#include <linux/smpboot.h>
81
#include <linux/mutex.h>
82
#include <linux/mutex.h>
82
#include <linux/rwsem.h>
83
#include <linux/rwsem.h>
83
#include <linux/string.h>
84
#include <linux/string.h>
Lines 197-231 static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) Link Here
197
	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
198
	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
198
}
199
}
199
200
200
static inline void rps_lock_irqsave(struct softnet_data *sd,
201
#ifndef CONFIG_PREEMPT_RT
201
				    unsigned long *flags)
202
203
static DEFINE_STATIC_KEY_FALSE(use_backlog_threads_key);
204
205
static int __init setup_backlog_napi_threads(char *arg)
202
{
206
{
203
	if (IS_ENABLED(CONFIG_RPS))
207
	static_branch_enable(&use_backlog_threads_key);
208
	return 0;
209
}
210
early_param("thread_backlog_napi", setup_backlog_napi_threads);
211
212
static bool use_backlog_threads(void)
213
{
214
	return static_branch_unlikely(&use_backlog_threads_key);
215
}
216
217
#else
218
219
static bool use_backlog_threads(void)
220
{
221
	return true;
222
}
223
224
#endif
225
226
static inline void backlog_lock_irq_save(struct softnet_data *sd,
227
					 unsigned long *flags)
228
{
229
	if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
204
		spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
230
		spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
205
	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
231
	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
206
		local_irq_save(*flags);
232
		local_irq_save(*flags);
207
}
233
}
208
234
209
static inline void rps_lock_irq_disable(struct softnet_data *sd)
235
static inline void backlog_lock_irq_disable(struct softnet_data *sd)
210
{
236
{
211
	if (IS_ENABLED(CONFIG_RPS))
237
	if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
212
		spin_lock_irq(&sd->input_pkt_queue.lock);
238
		spin_lock_irq(&sd->input_pkt_queue.lock);
213
	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
239
	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
214
		local_irq_disable();
240
		local_irq_disable();
215
}
241
}
216
242
217
static inline void rps_unlock_irq_restore(struct softnet_data *sd,
243
static inline void backlog_unlock_irq_restore(struct softnet_data *sd,
218
					  unsigned long *flags)
244
					      unsigned long *flags)
219
{
245
{
220
	if (IS_ENABLED(CONFIG_RPS))
246
	if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
221
		spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
247
		spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
222
	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
248
	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
223
		local_irq_restore(*flags);
249
		local_irq_restore(*flags);
224
}
250
}
225
251
226
static inline void rps_unlock_irq_enable(struct softnet_data *sd)
252
static inline void backlog_unlock_irq_enable(struct softnet_data *sd)
227
{
253
{
228
	if (IS_ENABLED(CONFIG_RPS))
254
	if (IS_ENABLED(CONFIG_RPS) || use_backlog_threads())
229
		spin_unlock_irq(&sd->input_pkt_queue.lock);
255
		spin_unlock_irq(&sd->input_pkt_queue.lock);
230
	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
256
	else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
231
		local_irq_enable();
257
		local_irq_enable();
Lines 4410-4415 EXPORT_SYMBOL(__dev_direct_xmit); Link Here
4410
/*************************************************************************
4436
/*************************************************************************
4411
 *			Receiver routines
4437
 *			Receiver routines
4412
 *************************************************************************/
4438
 *************************************************************************/
4439
static DEFINE_PER_CPU(struct task_struct *, backlog_napi);
4413
4440
4414
unsigned int sysctl_skb_defer_max __read_mostly = 64;
4441
unsigned int sysctl_skb_defer_max __read_mostly = 64;
4415
int weight_p __read_mostly = 64;           /* old backlog weight */
4442
int weight_p __read_mostly = 64;           /* old backlog weight */
Lines 4433-4450 static inline void ____napi_schedule(struct softnet_data *sd, Link Here
4433
		 */
4460
		 */
4434
		thread = READ_ONCE(napi->thread);
4461
		thread = READ_ONCE(napi->thread);
4435
		if (thread) {
4462
		if (thread) {
4436
			/* Avoid doing set_bit() if the thread is in
4463
			if (use_backlog_threads() && thread == raw_cpu_read(backlog_napi))
4437
			 * INTERRUPTIBLE state, cause napi_thread_wait()
4464
				goto use_local_napi;
4438
			 * makes sure to proceed with napi polling
4465
4439
			 * if the thread is explicitly woken from here.
4466
			set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
4440
			 */
4441
			if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE)
4442
				set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
4443
			wake_up_process(thread);
4467
			wake_up_process(thread);
4444
			return;
4468
			return;
4445
		}
4469
		}
4446
	}
4470
	}
4447
4471
4472
use_local_napi:
4448
	list_add_tail(&napi->poll_list, &sd->poll_list);
4473
	list_add_tail(&napi->poll_list, &sd->poll_list);
4449
	WRITE_ONCE(napi->list_owner, smp_processor_id());
4474
	WRITE_ONCE(napi->list_owner, smp_processor_id());
4450
	/* If not called from net_rx_action()
4475
	/* If not called from net_rx_action()
Lines 4684-4689 static void napi_schedule_rps(struct softnet_data *sd) Link Here
4684
4709
4685
#ifdef CONFIG_RPS
4710
#ifdef CONFIG_RPS
4686
	if (sd != mysd) {
4711
	if (sd != mysd) {
4712
		if (use_backlog_threads()) {
4713
			__napi_schedule_irqoff(&sd->backlog);
4714
			return;
4715
		}
4716
4687
		sd->rps_ipi_next = mysd->rps_ipi_list;
4717
		sd->rps_ipi_next = mysd->rps_ipi_list;
4688
		mysd->rps_ipi_list = sd;
4718
		mysd->rps_ipi_list = sd;
4689
4719
Lines 4698-4703 static void napi_schedule_rps(struct softnet_data *sd) Link Here
4698
	__napi_schedule_irqoff(&mysd->backlog);
4728
	__napi_schedule_irqoff(&mysd->backlog);
4699
}
4729
}
4700
4730
4731
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu)
4732
{
4733
	unsigned long flags;
4734
4735
	if (use_backlog_threads()) {
4736
		backlog_lock_irq_save(sd, &flags);
4737
4738
		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
4739
			__napi_schedule_irqoff(&sd->backlog);
4740
4741
		backlog_unlock_irq_restore(sd, &flags);
4742
4743
	} else if (!cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) {
4744
		smp_call_function_single_async(cpu, &sd->defer_csd);
4745
	}
4746
}
4747
4701
#ifdef CONFIG_NET_FLOW_LIMIT
4748
#ifdef CONFIG_NET_FLOW_LIMIT
4702
int netdev_flow_limit_table_len __read_mostly = (1 << 12);
4749
int netdev_flow_limit_table_len __read_mostly = (1 << 12);
4703
#endif
4750
#endif
Lines 4753-4759 static int enqueue_to_backlog(struct sk_buff *skb, int cpu, Link Here
4753
	reason = SKB_DROP_REASON_NOT_SPECIFIED;
4800
	reason = SKB_DROP_REASON_NOT_SPECIFIED;
4754
	sd = &per_cpu(softnet_data, cpu);
4801
	sd = &per_cpu(softnet_data, cpu);
4755
4802
4756
	rps_lock_irqsave(sd, &flags);
4803
	backlog_lock_irq_save(sd, &flags);
4757
	if (!netif_running(skb->dev))
4804
	if (!netif_running(skb->dev))
4758
		goto drop;
4805
		goto drop;
4759
	qlen = skb_queue_len(&sd->input_pkt_queue);
4806
	qlen = skb_queue_len(&sd->input_pkt_queue);
Lines 4763-4769 static int enqueue_to_backlog(struct sk_buff *skb, int cpu, Link Here
4763
enqueue:
4810
enqueue:
4764
			__skb_queue_tail(&sd->input_pkt_queue, skb);
4811
			__skb_queue_tail(&sd->input_pkt_queue, skb);
4765
			input_queue_tail_incr_save(sd, qtail);
4812
			input_queue_tail_incr_save(sd, qtail);
4766
			rps_unlock_irq_restore(sd, &flags);
4813
			backlog_unlock_irq_restore(sd, &flags);
4767
			return NET_RX_SUCCESS;
4814
			return NET_RX_SUCCESS;
4768
		}
4815
		}
4769
4816
Lines 4778-4784 static int enqueue_to_backlog(struct sk_buff *skb, int cpu, Link Here
4778
4825
4779
drop:
4826
drop:
4780
	sd->dropped++;
4827
	sd->dropped++;
4781
	rps_unlock_irq_restore(sd, &flags);
4828
	backlog_unlock_irq_restore(sd, &flags);
4782
4829
4783
	dev_core_stats_rx_dropped_inc(skb->dev);
4830
	dev_core_stats_rx_dropped_inc(skb->dev);
4784
	kfree_skb_reason(skb, reason);
4831
	kfree_skb_reason(skb, reason);
Lines 5844-5850 static void flush_backlog(struct work_struct *work) Link Here
5844
	local_bh_disable();
5891
	local_bh_disable();
5845
	sd = this_cpu_ptr(&softnet_data);
5892
	sd = this_cpu_ptr(&softnet_data);
5846
5893
5847
	rps_lock_irq_disable(sd);
5894
	backlog_lock_irq_disable(sd);
5848
	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
5895
	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
5849
		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5896
		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5850
			__skb_unlink(skb, &sd->input_pkt_queue);
5897
			__skb_unlink(skb, &sd->input_pkt_queue);
Lines 5852-5858 static void flush_backlog(struct work_struct *work) Link Here
5852
			input_queue_head_incr(sd);
5899
			input_queue_head_incr(sd);
5853
		}
5900
		}
5854
	}
5901
	}
5855
	rps_unlock_irq_enable(sd);
5902
	backlog_unlock_irq_enable(sd);
5856
5903
5857
	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
5904
	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
5858
		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5905
		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
Lines 5870-5883 static bool flush_required(int cpu) Link Here
5870
	struct softnet_data *sd = &per_cpu(softnet_data, cpu);
5917
	struct softnet_data *sd = &per_cpu(softnet_data, cpu);
5871
	bool do_flush;
5918
	bool do_flush;
5872
5919
5873
	rps_lock_irq_disable(sd);
5920
	backlog_lock_irq_disable(sd);
5874
5921
5875
	/* as insertion into process_queue happens with the rps lock held,
5922
	/* as insertion into process_queue happens with the rps lock held,
5876
	 * process_queue access may race only with dequeue
5923
	 * process_queue access may race only with dequeue
5877
	 */
5924
	 */
5878
	do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
5925
	do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
5879
		   !skb_queue_empty_lockless(&sd->process_queue);
5926
		   !skb_queue_empty_lockless(&sd->process_queue);
5880
	rps_unlock_irq_enable(sd);
5927
	backlog_unlock_irq_enable(sd);
5881
5928
5882
	return do_flush;
5929
	return do_flush;
5883
#endif
5930
#endif
Lines 5943-5949 static void net_rps_action_and_irq_enable(struct softnet_data *sd) Link Here
5943
#ifdef CONFIG_RPS
5990
#ifdef CONFIG_RPS
5944
	struct softnet_data *remsd = sd->rps_ipi_list;
5991
	struct softnet_data *remsd = sd->rps_ipi_list;
5945
5992
5946
	if (remsd) {
5993
	if (!use_backlog_threads() && remsd) {
5947
		sd->rps_ipi_list = NULL;
5994
		sd->rps_ipi_list = NULL;
5948
5995
5949
		local_irq_enable();
5996
		local_irq_enable();
Lines 5958-5964 static void net_rps_action_and_irq_enable(struct softnet_data *sd) Link Here
5958
static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
6005
static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
5959
{
6006
{
5960
#ifdef CONFIG_RPS
6007
#ifdef CONFIG_RPS
5961
	return sd->rps_ipi_list != NULL;
6008
	return !use_backlog_threads() && sd->rps_ipi_list;
5962
#else
6009
#else
5963
	return false;
6010
	return false;
5964
#endif
6011
#endif
Lines 5992-5998 static int process_backlog(struct napi_struct *napi, int quota) Link Here
5992
6039
5993
		}
6040
		}
5994
6041
5995
		rps_lock_irq_disable(sd);
6042
		backlog_lock_irq_disable(sd);
5996
		if (skb_queue_empty(&sd->input_pkt_queue)) {
6043
		if (skb_queue_empty(&sd->input_pkt_queue)) {
5997
			/*
6044
			/*
5998
			 * Inline a custom version of __napi_complete().
6045
			 * Inline a custom version of __napi_complete().
Lines 6002-6014 static int process_backlog(struct napi_struct *napi, int quota) Link Here
6002
			 * We can use a plain write instead of clear_bit(),
6049
			 * We can use a plain write instead of clear_bit(),
6003
			 * and we dont need an smp_mb() memory barrier.
6050
			 * and we dont need an smp_mb() memory barrier.
6004
			 */
6051
			 */
6005
			napi->state = 0;
6052
			napi->state &= NAPIF_STATE_THREADED;
6006
			again = false;
6053
			again = false;
6007
		} else {
6054
		} else {
6008
			skb_queue_splice_tail_init(&sd->input_pkt_queue,
6055
			skb_queue_splice_tail_init(&sd->input_pkt_queue,
6009
						   &sd->process_queue);
6056
						   &sd->process_queue);
6010
		}
6057
		}
6011
		rps_unlock_irq_enable(sd);
6058
		backlog_unlock_irq_enable(sd);
6012
	}
6059
	}
6013
6060
6014
	return work;
6061
	return work;
Lines 6716-6723 static int napi_poll(struct napi_struct *n, struct list_head *repoll) Link Here
6716
6763
6717
static int napi_thread_wait(struct napi_struct *napi)
6764
static int napi_thread_wait(struct napi_struct *napi)
6718
{
6765
{
6719
	bool woken = false;
6720
6721
	set_current_state(TASK_INTERRUPTIBLE);
6766
	set_current_state(TASK_INTERRUPTIBLE);
6722
6767
6723
	while (!kthread_should_stop()) {
6768
	while (!kthread_should_stop()) {
Lines 6726-6740 static int napi_thread_wait(struct napi_struct *napi) Link Here
6726
		 * Testing SCHED bit is not enough because SCHED bit might be
6771
		 * Testing SCHED bit is not enough because SCHED bit might be
6727
		 * set by some other busy poll thread or by napi_disable().
6772
		 * set by some other busy poll thread or by napi_disable().
6728
		 */
6773
		 */
6729
		if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
6774
		if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state)) {
6730
			WARN_ON(!list_empty(&napi->poll_list));
6775
			WARN_ON(!list_empty(&napi->poll_list));
6731
			__set_current_state(TASK_RUNNING);
6776
			__set_current_state(TASK_RUNNING);
6732
			return 0;
6777
			return 0;
6733
		}
6778
		}
6734
6779
6735
		schedule();
6780
		schedule();
6736
		/* woken being true indicates this thread owns this napi. */
6737
		woken = true;
6738
		set_current_state(TASK_INTERRUPTIBLE);
6781
		set_current_state(TASK_INTERRUPTIBLE);
6739
	}
6782
	}
6740
	__set_current_state(TASK_RUNNING);
6783
	__set_current_state(TASK_RUNNING);
Lines 6742-6784 static int napi_thread_wait(struct napi_struct *napi) Link Here
6742
	return -1;
6785
	return -1;
6743
}
6786
}
6744
6787
6788
static void napi_threaded_poll_loop(struct napi_struct *napi)
6789
{
6790
	struct softnet_data *sd;
6791
	unsigned long last_qs = jiffies;
6792
6793
	for (;;) {
6794
		bool repoll = false;
6795
		void *have;
6796
6797
		local_bh_disable();
6798
		sd = this_cpu_ptr(&softnet_data);
6799
		sd->in_napi_threaded_poll = true;
6800
6801
		have = netpoll_poll_lock(napi);
6802
		__napi_poll(napi, &repoll);
6803
		netpoll_poll_unlock(have);
6804
6805
		sd->in_napi_threaded_poll = false;
6806
		barrier();
6807
6808
		if (sd_has_rps_ipi_waiting(sd)) {
6809
			local_irq_disable();
6810
			net_rps_action_and_irq_enable(sd);
6811
		}
6812
		skb_defer_free_flush(sd);
6813
		local_bh_enable();
6814
6815
		if (!repoll)
6816
			break;
6817
6818
		rcu_softirq_qs_periodic(last_qs);
6819
		cond_resched();
6820
	}
6821
}
6822
6745
static int napi_threaded_poll(void *data)
6823
static int napi_threaded_poll(void *data)
6746
{
6824
{
6747
	struct napi_struct *napi = data;
6825
	struct napi_struct *napi = data;
6748
	struct softnet_data *sd;
6749
	void *have;
6750
6826
6751
	while (!napi_thread_wait(napi)) {
6827
	while (!napi_thread_wait(napi))
6752
		unsigned long last_qs = jiffies;
6828
		napi_threaded_poll_loop(napi);
6753
6829
6754
		for (;;) {
6755
			bool repoll = false;
6756
6757
			local_bh_disable();
6758
			sd = this_cpu_ptr(&softnet_data);
6759
			sd->in_napi_threaded_poll = true;
6760
6761
			have = netpoll_poll_lock(napi);
6762
			__napi_poll(napi, &repoll);
6763
			netpoll_poll_unlock(have);
6764
6765
			sd->in_napi_threaded_poll = false;
6766
			barrier();
6767
6768
			if (sd_has_rps_ipi_waiting(sd)) {
6769
				local_irq_disable();
6770
				net_rps_action_and_irq_enable(sd);
6771
			}
6772
			skb_defer_free_flush(sd);
6773
			local_bh_enable();
6774
6775
			if (!repoll)
6776
				break;
6777
6778
			rcu_softirq_qs_periodic(last_qs);
6779
			cond_resched();
6780
		}
6781
	}
6782
	return 0;
6830
	return 0;
6783
}
6831
}
6784
6832
Lines 11379-11385 static int dev_cpu_dead(unsigned int oldcpu) Link Here
11379
11427
11380
		list_del_init(&napi->poll_list);
11428
		list_del_init(&napi->poll_list);
11381
		if (napi->poll == process_backlog)
11429
		if (napi->poll == process_backlog)
11382
			napi->state = 0;
11430
			napi->state &= NAPIF_STATE_THREADED;
11383
		else
11431
		else
11384
			____napi_schedule(sd, napi);
11432
			____napi_schedule(sd, napi);
11385
	}
11433
	}
Lines 11387-11398 static int dev_cpu_dead(unsigned int oldcpu) Link Here
11387
	raise_softirq_irqoff(NET_TX_SOFTIRQ);
11435
	raise_softirq_irqoff(NET_TX_SOFTIRQ);
11388
	local_irq_enable();
11436
	local_irq_enable();
11389
11437
11438
	if (!use_backlog_threads()) {
11390
#ifdef CONFIG_RPS
11439
#ifdef CONFIG_RPS
11391
	remsd = oldsd->rps_ipi_list;
11440
		remsd = oldsd->rps_ipi_list;
11392
	oldsd->rps_ipi_list = NULL;
11441
		oldsd->rps_ipi_list = NULL;
11393
#endif
11442
#endif
11394
	/* send out pending IPI's on offline CPU */
11443
		/* send out pending IPI's on offline CPU */
11395
	net_rps_send_ipi(remsd);
11444
		net_rps_send_ipi(remsd);
11445
	}
11396
11446
11397
	/* Process offline CPU's input_pkt_queue */
11447
	/* Process offline CPU's input_pkt_queue */
11398
	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
11448
	while ((skb = __skb_dequeue(&oldsd->process_queue))) {
Lines 11731-11736 static int net_page_pool_create(int cpuid) Link Here
11731
	return 0;
11781
	return 0;
11732
}
11782
}
11733
11783
11784
static int backlog_napi_should_run(unsigned int cpu)
11785
{
11786
	struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
11787
	struct napi_struct *napi = &sd->backlog;
11788
11789
	return test_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
11790
}
11791
11792
static void run_backlog_napi(unsigned int cpu)
11793
{
11794
	struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
11795
11796
	napi_threaded_poll_loop(&sd->backlog);
11797
}
11798
11799
static void backlog_napi_setup(unsigned int cpu)
11800
{
11801
	struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
11802
	struct napi_struct *napi = &sd->backlog;
11803
11804
	napi->thread = this_cpu_read(backlog_napi);
11805
	set_bit(NAPI_STATE_THREADED, &napi->state);
11806
}
11807
11808
static struct smp_hotplug_thread backlog_threads = {
11809
	.store			= &backlog_napi,
11810
	.thread_should_run	= backlog_napi_should_run,
11811
	.thread_fn		= run_backlog_napi,
11812
	.thread_comm		= "backlog_napi/%u",
11813
	.setup			= backlog_napi_setup,
11814
};
11815
11734
/*
11816
/*
11735
 *       This is called single threaded during boot, so no need
11817
 *       This is called single threaded during boot, so no need
11736
 *       to take the rtnl semaphore.
11818
 *       to take the rtnl semaphore.
Lines 11782-11791 static int __init net_dev_init(void) Link Here
11782
		init_gro_hash(&sd->backlog);
11864
		init_gro_hash(&sd->backlog);
11783
		sd->backlog.poll = process_backlog;
11865
		sd->backlog.poll = process_backlog;
11784
		sd->backlog.weight = weight_p;
11866
		sd->backlog.weight = weight_p;
11867
		INIT_LIST_HEAD(&sd->backlog.poll_list);
11785
11868
11786
		if (net_page_pool_create(i))
11869
		if (net_page_pool_create(i))
11787
			goto out;
11870
			goto out;
11788
	}
11871
	}
11872
	if (use_backlog_threads())
11873
		smpboot_register_percpu_thread(&backlog_threads);
11789
11874
11790
	dev_boot_phase = 0;
11875
	dev_boot_phase = 0;
11791
11876
(-)a/net/core/skbuff.c (-2 / +2 lines)
Lines 7050-7057 nodefer: __kfree_skb(skb); Link Here
7050
	/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
7050
	/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
7051
	 * if we are unlucky enough (this seems very unlikely).
7051
	 * if we are unlucky enough (this seems very unlikely).
7052
	 */
7052
	 */
7053
	if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
7053
	if (unlikely(kick))
7054
		smp_call_function_single_async(cpu, &sd->defer_csd);
7054
		kick_defer_list_purge(sd, cpu);
7055
}
7055
}
7056
7056
7057
static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
7057
static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,

Return to bug 916954