Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 692204
Collapse All | Expand All

(-) (+2156 lines)
Added Link Here
1
#include <linux/btrfs.h>
2
#include <linux/capability.h>
3
#include <linux/cred.h>
4
#include <linux/mount.h>
5
#include <linux/fdtable.h>
6
#include <linux/file.h>
7
#include <linux/fs.h>
8
#include <linux/namei.h>
9
#include <linux/module.h>
10
#include <linux/kernel.h>
11
#include <linux/magic.h>
12
#include <linux/parser.h>
13
#include <linux/security.h>
14
#include <linux/seq_file.h>
15
#include <linux/statfs.h>
16
#include <linux/slab.h>
17
#include <linux/user_namespace.h>
18
#include <linux/uidgid.h>
19
#include <linux/xattr.h>
20
#include <linux/posix_acl.h>
21
#include <linux/posix_acl_xattr.h>
22
#include <linux/uio.h>
23
24
struct shiftfs_super_info {
25
	struct vfsmount *mnt;
26
	struct user_namespace *userns;
27
	/* creds of process who created the super block */
28
	const struct cred *creator_cred;
29
	bool mark;
30
	unsigned int passthrough;
31
	unsigned int passthrough_mark;
32
};
33
34
static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
35
			       umode_t mode, dev_t dev, struct dentry *dentry);
36
37
#define SHIFTFS_PASSTHROUGH_NONE 0
38
#define SHIFTFS_PASSTHROUGH_STAT 1
39
#define SHIFTFS_PASSTHROUGH_IOCTL 2
40
#define SHIFTFS_PASSTHROUGH_ALL                                                \
41
	(SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL)
42
43
static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info)
44
{
45
	if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL))
46
		return false;
47
48
	return true;
49
}
50
51
static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info)
52
{
53
	if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT))
54
		return false;
55
56
	return true;
57
}
58
59
enum {
60
	OPT_MARK,
61
	OPT_PASSTHROUGH,
62
	OPT_LAST,
63
};
64
65
/* global filesystem options */
66
static const match_table_t tokens = {
67
	{ OPT_MARK, "mark" },
68
	{ OPT_PASSTHROUGH, "passthrough=%u" },
69
	{ OPT_LAST, NULL }
70
};
71
72
static const struct cred *shiftfs_override_creds(const struct super_block *sb)
73
{
74
	struct shiftfs_super_info *sbinfo = sb->s_fs_info;
75
76
	return override_creds(sbinfo->creator_cred);
77
}
78
79
static inline void shiftfs_revert_object_creds(const struct cred *oldcred,
80
					       struct cred *newcred)
81
{
82
	revert_creds(oldcred);
83
	put_cred(newcred);
84
}
85
86
static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to,
87
			 kuid_t kuid)
88
{
89
	uid_t uid = from_kuid(from, kuid);
90
	return make_kuid(to, uid);
91
}
92
93
static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to,
94
			 kgid_t kgid)
95
{
96
	gid_t gid = from_kgid(from, kgid);
97
	return make_kgid(to, gid);
98
}
99
100
static int shiftfs_override_object_creds(const struct super_block *sb,
101
					 const struct cred **oldcred,
102
					 struct cred **newcred,
103
					 struct dentry *dentry, umode_t mode,
104
					 bool hardlink)
105
{
106
	struct shiftfs_super_info *sbinfo = sb->s_fs_info;
107
	kuid_t fsuid = current_fsuid();
108
	kgid_t fsgid = current_fsgid();
109
110
	*oldcred = shiftfs_override_creds(sb);
111
112
	*newcred = prepare_creds();
113
	if (!*newcred) {
114
		revert_creds(*oldcred);
115
		return -ENOMEM;
116
	}
117
118
	(*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
119
	(*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
120
121
	if (!hardlink) {
122
		int err = security_dentry_create_files_as(dentry, mode,
123
							  &dentry->d_name,
124
							  *oldcred, *newcred);
125
		if (err) {
126
			shiftfs_revert_object_creds(*oldcred, *newcred);
127
			return err;
128
		}
129
	}
130
131
	put_cred(override_creds(*newcred));
132
	return 0;
133
}
134
135
static void shiftfs_copyattr(struct inode *from, struct inode *to)
136
{
137
	struct user_namespace *from_ns = from->i_sb->s_user_ns;
138
	struct user_namespace *to_ns = to->i_sb->s_user_ns;
139
140
	to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid);
141
	to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid);
142
	to->i_mode = from->i_mode;
143
	to->i_atime = from->i_atime;
144
	to->i_mtime = from->i_mtime;
145
	to->i_ctime = from->i_ctime;
146
	i_size_write(to, i_size_read(from));
147
}
148
149
static void shiftfs_copyflags(struct inode *from, struct inode *to)
150
{
151
	unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
152
153
	inode_set_flags(to, from->i_flags & mask, mask);
154
}
155
156
static void shiftfs_file_accessed(struct file *file)
157
{
158
	struct inode *upperi, *loweri;
159
160
	if (file->f_flags & O_NOATIME)
161
		return;
162
163
	upperi = file_inode(file);
164
	loweri = upperi->i_private;
165
166
	if (!loweri)
167
		return;
168
169
	upperi->i_mtime = loweri->i_mtime;
170
	upperi->i_ctime = loweri->i_ctime;
171
172
	touch_atime(&file->f_path);
173
}
174
175
static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo,
176
				       char *options)
177
{
178
	char *p;
179
	substring_t args[MAX_OPT_ARGS];
180
181
	sbinfo->mark = false;
182
	sbinfo->passthrough = 0;
183
184
	while ((p = strsep(&options, ",")) != NULL) {
185
		int err, intarg, token;
186
187
		if (!*p)
188
			continue;
189
190
		token = match_token(p, tokens, args);
191
		switch (token) {
192
		case OPT_MARK:
193
			sbinfo->mark = true;
194
			break;
195
		case OPT_PASSTHROUGH:
196
			err = match_int(&args[0], &intarg);
197
			if (err)
198
				return err;
199
200
			if (intarg & ~SHIFTFS_PASSTHROUGH_ALL)
201
				return -EINVAL;
202
203
			sbinfo->passthrough = intarg;
204
			break;
205
		default:
206
			return -EINVAL;
207
		}
208
	}
209
210
	return 0;
211
}
212
213
static void shiftfs_d_release(struct dentry *dentry)
214
{
215
	struct dentry *lowerd = dentry->d_fsdata;
216
217
	if (lowerd)
218
		dput(lowerd);
219
}
220
221
static struct dentry *shiftfs_d_real(struct dentry *dentry,
222
				     const struct inode *inode)
223
{
224
	struct dentry *lowerd = dentry->d_fsdata;
225
226
	if (inode && d_inode(dentry) == inode)
227
		return dentry;
228
229
	lowerd = d_real(lowerd, inode);
230
	if (lowerd && (!inode || inode == d_inode(lowerd)))
231
		return lowerd;
232
233
	WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
234
	     inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
235
	return dentry;
236
}
237
238
static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
239
{
240
	int err = 1;
241
	struct dentry *lowerd = dentry->d_fsdata;
242
243
	if (d_is_negative(lowerd) != d_is_negative(dentry))
244
		return 0;
245
246
	if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE))
247
		err = lowerd->d_op->d_weak_revalidate(lowerd, flags);
248
249
	if (d_really_is_positive(dentry)) {
250
		struct inode *inode = d_inode(dentry);
251
		struct inode *loweri = d_inode(lowerd);
252
253
		shiftfs_copyattr(loweri, inode);
254
	}
255
256
	return err;
257
}
258
259
static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
260
{
261
	int err = 1;
262
	struct dentry *lowerd = dentry->d_fsdata;
263
264
	if (d_unhashed(lowerd) ||
265
	    ((d_is_negative(lowerd) != d_is_negative(dentry))))
266
		return 0;
267
268
	if (flags & LOOKUP_RCU)
269
		return -ECHILD;
270
271
	if ((lowerd->d_flags & DCACHE_OP_REVALIDATE))
272
		err = lowerd->d_op->d_revalidate(lowerd, flags);
273
274
	if (d_really_is_positive(dentry)) {
275
		struct inode *inode = d_inode(dentry);
276
		struct inode *loweri = d_inode(lowerd);
277
278
		shiftfs_copyattr(loweri, inode);
279
	}
280
281
	return err;
282
}
283
284
static const struct dentry_operations shiftfs_dentry_ops = {
285
	.d_release	   = shiftfs_d_release,
286
	.d_real		   = shiftfs_d_real,
287
	.d_revalidate	   = shiftfs_d_revalidate,
288
	.d_weak_revalidate = shiftfs_d_weak_revalidate,
289
};
290
291
static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
292
				    struct delayed_call *done)
293
{
294
	const char *p;
295
	const struct cred *oldcred;
296
	struct dentry *lowerd;
297
298
	/* RCU lookup not supported */
299
	if (!dentry)
300
		return ERR_PTR(-ECHILD);
301
302
	lowerd = dentry->d_fsdata;
303
	oldcred = shiftfs_override_creds(dentry->d_sb);
304
	p = vfs_get_link(lowerd, done);
305
	revert_creds(oldcred);
306
307
	return p;
308
}
309
310
static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
311
			    const char *name, const void *value,
312
			    size_t size, int flags)
313
{
314
	struct dentry *lowerd = dentry->d_fsdata;
315
	int err;
316
	const struct cred *oldcred;
317
318
	oldcred = shiftfs_override_creds(dentry->d_sb);
319
	err = vfs_setxattr(lowerd, name, value, size, flags);
320
	revert_creds(oldcred);
321
322
	shiftfs_copyattr(lowerd->d_inode, inode);
323
324
	return err;
325
}
326
327
static int shiftfs_xattr_get(const struct xattr_handler *handler,
328
			     struct dentry *dentry, struct inode *inode,
329
			     const char *name, void *value, size_t size)
330
{
331
	struct dentry *lowerd = dentry->d_fsdata;
332
	int err;
333
	const struct cred *oldcred;
334
335
	oldcred = shiftfs_override_creds(dentry->d_sb);
336
	err = vfs_getxattr(lowerd, name, value, size);
337
	revert_creds(oldcred);
338
339
	return err;
340
}
341
342
static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
343
				 size_t size)
344
{
345
	struct dentry *lowerd = dentry->d_fsdata;
346
	int err;
347
	const struct cred *oldcred;
348
349
	oldcred = shiftfs_override_creds(dentry->d_sb);
350
	err = vfs_listxattr(lowerd, list, size);
351
	revert_creds(oldcred);
352
353
	return err;
354
}
355
356
static int shiftfs_removexattr(struct dentry *dentry, const char *name)
357
{
358
	struct dentry *lowerd = dentry->d_fsdata;
359
	int err;
360
	const struct cred *oldcred;
361
362
	oldcred = shiftfs_override_creds(dentry->d_sb);
363
	err = vfs_removexattr(lowerd, name);
364
	revert_creds(oldcred);
365
366
	/* update c/mtime */
367
	shiftfs_copyattr(lowerd->d_inode, d_inode(dentry));
368
369
	return err;
370
}
371
372
static int shiftfs_xattr_set(const struct xattr_handler *handler,
373
			     struct dentry *dentry, struct inode *inode,
374
			     const char *name, const void *value, size_t size,
375
			     int flags)
376
{
377
	if (!value)
378
		return shiftfs_removexattr(dentry, name);
379
	return shiftfs_setxattr(dentry, inode, name, value, size, flags);
380
}
381
382
static int shiftfs_inode_test(struct inode *inode, void *data)
383
{
384
	return inode->i_private == data;
385
}
386
387
static int shiftfs_inode_set(struct inode *inode, void *data)
388
{
389
	inode->i_private = data;
390
	return 0;
391
}
392
393
static int shiftfs_create_object(struct inode *diri, struct dentry *dentry,
394
				 umode_t mode, const char *symlink,
395
				 struct dentry *hardlink, bool excl)
396
{
397
	int err;
398
	const struct cred *oldcred;
399
	struct cred *newcred;
400
	void *loweri_iop_ptr = NULL;
401
	umode_t modei = mode;
402
	struct super_block *dir_sb = diri->i_sb;
403
	struct dentry *lowerd_new = dentry->d_fsdata;
404
	struct inode *inode = NULL, *loweri_dir = diri->i_private;
405
	const struct inode_operations *loweri_dir_iop = loweri_dir->i_op;
406
	struct dentry *lowerd_link = NULL;
407
408
	if (hardlink) {
409
		loweri_iop_ptr = loweri_dir_iop->link;
410
	} else {
411
		switch (mode & S_IFMT) {
412
		case S_IFDIR:
413
			loweri_iop_ptr = loweri_dir_iop->mkdir;
414
			break;
415
		case S_IFREG:
416
			loweri_iop_ptr = loweri_dir_iop->create;
417
			break;
418
		case S_IFLNK:
419
			loweri_iop_ptr = loweri_dir_iop->symlink;
420
			break;
421
		case S_IFSOCK:
422
			/* fall through */
423
		case S_IFIFO:
424
			loweri_iop_ptr = loweri_dir_iop->mknod;
425
			break;
426
		}
427
	}
428
	if (!loweri_iop_ptr) {
429
		err = -EINVAL;
430
		goto out_iput;
431
	}
432
433
	inode_lock_nested(loweri_dir, I_MUTEX_PARENT);
434
435
	if (!hardlink) {
436
		inode = new_inode(dir_sb);
437
		if (!inode) {
438
			err = -ENOMEM;
439
			goto out_iput;
440
		}
441
442
		/*
443
		 * new_inode() will have added the new inode to the super
444
		 * block's list of inodes. Further below we will call
445
		 * inode_insert5() Which would perform the same operation again
446
		 * thereby corrupting the list. To avoid this raise I_CREATING
447
		 * in i_state which will cause inode_insert5() to skip this
448
		 * step. I_CREATING will be cleared by d_instantiate_new()
449
		 * below.
450
		 */
451
		spin_lock(&inode->i_lock);
452
		inode->i_state |= I_CREATING;
453
		spin_unlock(&inode->i_lock);
454
455
		inode_init_owner(inode, diri, mode);
456
		modei = inode->i_mode;
457
	}
458
459
	err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred,
460
					    dentry, modei, hardlink != NULL);
461
	if (err)
462
		goto out_iput;
463
464
	if (hardlink) {
465
		lowerd_link = hardlink->d_fsdata;
466
		err = vfs_link(lowerd_link, loweri_dir, lowerd_new, NULL);
467
	} else {
468
		switch (modei & S_IFMT) {
469
		case S_IFDIR:
470
			err = vfs_mkdir(loweri_dir, lowerd_new, modei);
471
			break;
472
		case S_IFREG:
473
			err = vfs_create(loweri_dir, lowerd_new, modei, excl);
474
			break;
475
		case S_IFLNK:
476
			err = vfs_symlink(loweri_dir, lowerd_new, symlink);
477
			break;
478
		case S_IFSOCK:
479
			/* fall through */
480
		case S_IFIFO:
481
			err = vfs_mknod(loweri_dir, lowerd_new, modei, 0);
482
			break;
483
		default:
484
			err = -EINVAL;
485
			break;
486
		}
487
	}
488
489
	shiftfs_revert_object_creds(oldcred, newcred);
490
491
	if (!err && WARN_ON(!lowerd_new->d_inode))
492
		err = -EIO;
493
	if (err)
494
		goto out_iput;
495
496
	if (hardlink) {
497
		inode = d_inode(hardlink);
498
		ihold(inode);
499
500
		/* copy up times from lower inode */
501
		shiftfs_copyattr(d_inode(lowerd_link), inode);
502
		set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink);
503
		d_instantiate(dentry, inode);
504
	} else {
505
		struct inode *inode_tmp;
506
		struct inode *loweri_new = d_inode(lowerd_new);
507
508
		inode_tmp = inode_insert5(inode, (unsigned long)loweri_new,
509
					  shiftfs_inode_test, shiftfs_inode_set,
510
					  loweri_new);
511
		if (unlikely(inode_tmp != inode)) {
512
			pr_err_ratelimited("shiftfs: newly created inode found in cache\n");
513
			iput(inode_tmp);
514
			err = -EINVAL;
515
			goto out_iput;
516
		}
517
518
		ihold(loweri_new);
519
		shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode,
520
				   0, lowerd_new);
521
		d_instantiate_new(dentry, inode);
522
	}
523
524
	shiftfs_copyattr(loweri_dir, diri);
525
	if (loweri_iop_ptr == loweri_dir_iop->mkdir)
526
		set_nlink(diri, loweri_dir->i_nlink);
527
528
	inode = NULL;
529
530
out_iput:
531
	iput(inode);
532
	inode_unlock(loweri_dir);
533
534
	return err;
535
}
536
537
static int shiftfs_create(struct inode *dir, struct dentry *dentry,
538
			  umode_t mode,  bool excl)
539
{
540
	mode |= S_IFREG;
541
542
	return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl);
543
}
544
545
static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
546
			 umode_t mode)
547
{
548
	mode |= S_IFDIR;
549
550
	return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
551
}
552
553
static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
554
			struct dentry *dentry)
555
{
556
	return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false);
557
}
558
559
static int shiftfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
560
			 dev_t rdev)
561
{
562
	if (!S_ISFIFO(mode) && !S_ISSOCK(mode))
563
		return -EPERM;
564
565
	return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false);
566
}
567
568
static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
569
			   const char *symlink)
570
{
571
	return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false);
572
}
573
574
static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
575
{
576
	struct dentry *lowerd = dentry->d_fsdata;
577
	struct inode *loweri = dir->i_private;
578
	struct inode *inode = d_inode(dentry);
579
	int err;
580
	const struct cred *oldcred;
581
582
	dget(lowerd);
583
	oldcred = shiftfs_override_creds(dentry->d_sb);
584
	inode_lock_nested(loweri, I_MUTEX_PARENT);
585
	if (rmdir)
586
		err = vfs_rmdir(loweri, lowerd);
587
	else
588
		err = vfs_unlink(loweri, lowerd, NULL);
589
	revert_creds(oldcred);
590
591
	if (!err) {
592
		d_drop(dentry);
593
594
		if (rmdir)
595
			clear_nlink(inode);
596
		else
597
			drop_nlink(inode);
598
	}
599
	inode_unlock(loweri);
600
601
	shiftfs_copyattr(loweri, dir);
602
	dput(lowerd);
603
604
	return err;
605
}
606
607
static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
608
{
609
	return shiftfs_rm(dir, dentry, false);
610
}
611
612
static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
613
{
614
	return shiftfs_rm(dir, dentry, true);
615
}
616
617
static int shiftfs_rename(struct inode *olddir, struct dentry *old,
618
			  struct inode *newdir, struct dentry *new,
619
			  unsigned int flags)
620
{
621
	struct dentry *lowerd_dir_old = old->d_parent->d_fsdata,
622
		      *lowerd_dir_new = new->d_parent->d_fsdata,
623
		      *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata,
624
		      *trapd;
625
	struct inode *loweri_dir_old = lowerd_dir_old->d_inode,
626
		     *loweri_dir_new = lowerd_dir_new->d_inode;
627
	int err = -EINVAL;
628
	const struct cred *oldcred;
629
630
	trapd = lock_rename(lowerd_dir_new, lowerd_dir_old);
631
632
	if (trapd == lowerd_old || trapd == lowerd_new)
633
		goto out_unlock;
634
635
	oldcred = shiftfs_override_creds(old->d_sb);
636
	err = vfs_rename(loweri_dir_old, lowerd_old, loweri_dir_new, lowerd_new,
637
			 NULL, flags);
638
	revert_creds(oldcred);
639
640
	shiftfs_copyattr(loweri_dir_old, olddir);
641
	shiftfs_copyattr(loweri_dir_new, newdir);
642
643
out_unlock:
644
	unlock_rename(lowerd_dir_new, lowerd_dir_old);
645
646
	return err;
647
}
648
649
static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
650
				     unsigned int flags)
651
{
652
	struct dentry *new;
653
	struct inode *newi;
654
	const struct cred *oldcred;
655
	struct dentry *lowerd = dentry->d_parent->d_fsdata;
656
	struct inode *inode = NULL, *loweri = lowerd->d_inode;
657
658
	inode_lock(loweri);
659
	oldcred = shiftfs_override_creds(dentry->d_sb);
660
	new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len);
661
	revert_creds(oldcred);
662
	inode_unlock(loweri);
663
664
	if (IS_ERR(new))
665
		return new;
666
667
	dentry->d_fsdata = new;
668
669
	newi = new->d_inode;
670
	if (!newi)
671
		goto out;
672
673
	inode = iget5_locked(dentry->d_sb, (unsigned long)newi,
674
			     shiftfs_inode_test, shiftfs_inode_set, newi);
675
	if (!inode) {
676
		dput(new);
677
		return ERR_PTR(-ENOMEM);
678
	}
679
	if (inode->i_state & I_NEW) {
680
		/*
681
		 * inode->i_private set by shiftfs_inode_set(), but we still
682
		 * need to take a reference
683
		*/
684
		ihold(newi);
685
		shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new);
686
		unlock_new_inode(inode);
687
	}
688
689
out:
690
	return d_splice_alias(inode, dentry);
691
}
692
693
static int shiftfs_permission(struct inode *inode, int mask)
694
{
695
	int err;
696
	const struct cred *oldcred;
697
	struct inode *loweri = inode->i_private;
698
699
	if (!loweri) {
700
		WARN_ON(!(mask & MAY_NOT_BLOCK));
701
		return -ECHILD;
702
	}
703
704
	err = generic_permission(inode, mask);
705
	if (err)
706
		return err;
707
708
	oldcred = shiftfs_override_creds(inode->i_sb);
709
	err = inode_permission(loweri, mask);
710
	revert_creds(oldcred);
711
712
	return err;
713
}
714
715
static int shiftfs_fiemap(struct inode *inode,
716
			  struct fiemap_extent_info *fieinfo, u64 start,
717
			  u64 len)
718
{
719
	int err;
720
	const struct cred *oldcred;
721
	struct inode *loweri = inode->i_private;
722
723
	if (!loweri->i_op->fiemap)
724
		return -EOPNOTSUPP;
725
726
	oldcred = shiftfs_override_creds(inode->i_sb);
727
	if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC)
728
		filemap_write_and_wait(loweri->i_mapping);
729
	err = loweri->i_op->fiemap(loweri, fieinfo, start, len);
730
	revert_creds(oldcred);
731
732
	return err;
733
}
734
735
static int shiftfs_tmpfile(struct inode *dir, struct dentry *dentry,
736
			   umode_t mode)
737
{
738
	int err;
739
	const struct cred *oldcred;
740
	struct dentry *lowerd = dentry->d_fsdata;
741
	struct inode *loweri = dir->i_private;
742
743
	if (!loweri->i_op->tmpfile)
744
		return -EOPNOTSUPP;
745
746
	oldcred = shiftfs_override_creds(dir->i_sb);
747
	err = loweri->i_op->tmpfile(loweri, lowerd, mode);
748
	revert_creds(oldcred);
749
750
	return err;
751
}
752
753
static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
754
{
755
	struct dentry *lowerd = dentry->d_fsdata;
756
	struct inode *loweri = lowerd->d_inode;
757
	struct iattr newattr;
758
	const struct cred *oldcred;
759
	struct super_block *sb = dentry->d_sb;
760
	struct shiftfs_super_info *sbinfo = sb->s_fs_info;
761
	int err;
762
763
	err = setattr_prepare(dentry, attr);
764
	if (err)
765
		return err;
766
767
	newattr = *attr;
768
	newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid);
769
	newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid);
770
771
	/*
772
	 * mode change is for clearing setuid/setgid bits. Allow lower fs
773
	 * to interpret this in its own way.
774
	 */
775
	if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
776
		newattr.ia_valid &= ~ATTR_MODE;
777
778
	inode_lock(loweri);
779
	oldcred = shiftfs_override_creds(dentry->d_sb);
780
	err = notify_change(lowerd, &newattr, NULL);
781
	revert_creds(oldcred);
782
	inode_unlock(loweri);
783
784
	shiftfs_copyattr(loweri, d_inode(dentry));
785
786
	return err;
787
}
788
789
static int shiftfs_getattr(const struct path *path, struct kstat *stat,
790
			   u32 request_mask, unsigned int query_flags)
791
{
792
	struct inode *inode = path->dentry->d_inode;
793
	struct dentry *lowerd = path->dentry->d_fsdata;
794
	struct inode *loweri = lowerd->d_inode;
795
	struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info;
796
	struct path newpath = { .mnt = info->mnt, .dentry = lowerd };
797
	struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
798
	struct user_namespace *to_ns = inode->i_sb->s_user_ns;
799
	const struct cred *oldcred;
800
	int err;
801
802
	oldcred = shiftfs_override_creds(inode->i_sb);
803
	err = vfs_getattr(&newpath, stat, request_mask, query_flags);
804
	revert_creds(oldcred);
805
806
	if (err)
807
		return err;
808
809
	/* transform the underlying id */
810
	stat->uid = shift_kuid(from_ns, to_ns, stat->uid);
811
	stat->gid = shift_kgid(from_ns, to_ns, stat->gid);
812
	return 0;
813
}
814
815
#ifdef CONFIG_SHIFT_FS_POSIX_ACL
816
817
static int
818
shift_acl_ids(struct user_namespace *from, struct user_namespace *to,
819
	      struct posix_acl *acl)
820
{
821
	int i;
822
823
	for (i = 0; i < acl->a_count; i++) {
824
		struct posix_acl_entry *e = &acl->a_entries[i];
825
		switch(e->e_tag) {
826
		case ACL_USER:
827
			e->e_uid = shift_kuid(from, to, e->e_uid);
828
			if (!uid_valid(e->e_uid))
829
				return -EOVERFLOW;
830
			break;
831
		case ACL_GROUP:
832
			e->e_gid = shift_kgid(from, to, e->e_gid);
833
			if (!gid_valid(e->e_gid))
834
				return -EOVERFLOW;
835
			break;
836
		}
837
	}
838
	return 0;
839
}
840
841
static void
842
shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to,
843
		    void *value, size_t size)
844
{
845
	struct posix_acl_xattr_header *header = value;
846
	struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
847
	int count;
848
	kuid_t kuid;
849
	kgid_t kgid;
850
851
	if (!value)
852
		return;
853
	if (size < sizeof(struct posix_acl_xattr_header))
854
		return;
855
	if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
856
		return;
857
858
	count = posix_acl_xattr_count(size);
859
	if (count < 0)
860
		return;
861
	if (count == 0)
862
		return;
863
864
	for (end = entry + count; entry != end; entry++) {
865
		switch(le16_to_cpu(entry->e_tag)) {
866
		case ACL_USER:
867
			kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id));
868
			kuid = shift_kuid(from, to, kuid);
869
			entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid));
870
			break;
871
		case ACL_GROUP:
872
			kgid = make_kgid(&init_user_ns, le32_to_cpu(entry->e_id));
873
			kgid = shift_kgid(from, to, kgid);
874
			entry->e_id = cpu_to_le32(from_kgid(&init_user_ns, kgid));
875
			break;
876
		default:
877
			break;
878
		}
879
	}
880
}
881
882
static struct posix_acl *shiftfs_get_acl(struct inode *inode, int type)
883
{
884
	struct inode *loweri = inode->i_private;
885
	const struct cred *oldcred;
886
	struct posix_acl *lower_acl, *acl = NULL;
887
	struct user_namespace *from_ns = loweri->i_sb->s_user_ns;
888
	struct user_namespace *to_ns = inode->i_sb->s_user_ns;
889
	int size;
890
	int err;
891
892
	if (!IS_POSIXACL(loweri))
893
		return NULL;
894
895
	oldcred = shiftfs_override_creds(inode->i_sb);
896
	lower_acl = get_acl(loweri, type);
897
	revert_creds(oldcred);
898
899
	if (lower_acl && !IS_ERR(lower_acl)) {
900
		/* XXX: export posix_acl_clone? */
901
		size = sizeof(struct posix_acl) +
902
		       lower_acl->a_count * sizeof(struct posix_acl_entry);
903
		acl = kmemdup(lower_acl, size, GFP_KERNEL);
904
		posix_acl_release(lower_acl);
905
906
		if (!acl)
907
			return ERR_PTR(-ENOMEM);
908
909
		refcount_set(&acl->a_refcount, 1);
910
911
		err = shift_acl_ids(from_ns, to_ns, acl);
912
		if (err) {
913
			kfree(acl);
914
			return ERR_PTR(err);
915
		}
916
	}
917
918
	return acl;
919
}
920
921
static int
922
shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler,
923
			   struct dentry *dentry, struct inode *inode,
924
			   const char *name, void *buffer, size_t size)
925
{
926
	struct inode *loweri = inode->i_private;
927
	int ret;
928
929
	ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name,
930
				buffer, size);
931
	if (ret < 0)
932
		return ret;
933
934
	inode_lock(loweri);
935
	shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns,
936
			    buffer, size);
937
	inode_unlock(loweri);
938
	return ret;
939
}
940
941
static int
942
shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler,
943
			    struct dentry *dentry, struct inode *inode,
944
			    const char *name, const void *value,
945
			    size_t size, int flags)
946
{
947
	struct inode *loweri = inode->i_private;
948
	int err;
949
950
	if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl)
951
		return -EOPNOTSUPP;
952
	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
953
		return value ? -EACCES : 0;
954
	if (!inode_owner_or_capable(inode))
955
		return -EPERM;
956
957
	if (value) {
958
		shift_acl_xattr_ids(inode->i_sb->s_user_ns,
959
				    loweri->i_sb->s_user_ns,
960
				    (void *)value, size);
961
		err = shiftfs_setxattr(dentry, inode, handler->name, value,
962
				       size, flags);
963
	} else {
964
		err = shiftfs_removexattr(dentry, handler->name);
965
	}
966
967
	if (!err)
968
		shiftfs_copyattr(loweri, inode);
969
970
	return err;
971
}
972
973
static const struct xattr_handler
974
shiftfs_posix_acl_access_xattr_handler = {
975
	.name = XATTR_NAME_POSIX_ACL_ACCESS,
976
	.flags = ACL_TYPE_ACCESS,
977
	.get = shiftfs_posix_acl_xattr_get,
978
	.set = shiftfs_posix_acl_xattr_set,
979
};
980
981
static const struct xattr_handler
982
shiftfs_posix_acl_default_xattr_handler = {
983
	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
984
	.flags = ACL_TYPE_DEFAULT,
985
	.get = shiftfs_posix_acl_xattr_get,
986
	.set = shiftfs_posix_acl_xattr_set,
987
};
988
989
#else /* !CONFIG_SHIFT_FS_POSIX_ACL */
990
991
#define shiftfs_get_acl NULL
992
993
#endif /* CONFIG_SHIFT_FS_POSIX_ACL */
994
995
static const struct inode_operations shiftfs_dir_inode_operations = {
996
	.lookup		= shiftfs_lookup,
997
	.mkdir		= shiftfs_mkdir,
998
	.symlink	= shiftfs_symlink,
999
	.unlink		= shiftfs_unlink,
1000
	.rmdir		= shiftfs_rmdir,
1001
	.rename		= shiftfs_rename,
1002
	.link		= shiftfs_link,
1003
	.setattr	= shiftfs_setattr,
1004
	.create		= shiftfs_create,
1005
	.mknod		= shiftfs_mknod,
1006
	.permission	= shiftfs_permission,
1007
	.getattr	= shiftfs_getattr,
1008
	.listxattr	= shiftfs_listxattr,
1009
	.get_acl	= shiftfs_get_acl,
1010
};
1011
1012
static const struct inode_operations shiftfs_file_inode_operations = {
1013
	.fiemap		= shiftfs_fiemap,
1014
	.getattr	= shiftfs_getattr,
1015
	.get_acl	= shiftfs_get_acl,
1016
	.listxattr	= shiftfs_listxattr,
1017
	.permission	= shiftfs_permission,
1018
	.setattr	= shiftfs_setattr,
1019
	.tmpfile	= shiftfs_tmpfile,
1020
};
1021
1022
static const struct inode_operations shiftfs_special_inode_operations = {
1023
	.getattr	= shiftfs_getattr,
1024
	.get_acl	= shiftfs_get_acl,
1025
	.listxattr	= shiftfs_listxattr,
1026
	.permission	= shiftfs_permission,
1027
	.setattr	= shiftfs_setattr,
1028
};
1029
1030
static const struct inode_operations shiftfs_symlink_inode_operations = {
1031
	.getattr	= shiftfs_getattr,
1032
	.get_link	= shiftfs_get_link,
1033
	.listxattr	= shiftfs_listxattr,
1034
	.setattr	= shiftfs_setattr,
1035
};
1036
1037
static struct file *shiftfs_open_realfile(const struct file *file,
1038
					  struct inode *realinode)
1039
{
1040
	struct file *realfile;
1041
	const struct cred *old_cred;
1042
	struct inode *inode = file_inode(file);
1043
	struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1044
	struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1045
	struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1046
1047
	old_cred = shiftfs_override_creds(inode->i_sb);
1048
	realfile = open_with_fake_path(&realpath, file->f_flags, realinode,
1049
				       info->creator_cred);
1050
	revert_creds(old_cred);
1051
1052
	return realfile;
1053
}
1054
1055
#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
1056
1057
static int shiftfs_change_flags(struct file *file, unsigned int flags)
1058
{
1059
	struct inode *inode = file_inode(file);
1060
	int err;
1061
1062
	/* if some flag changed that cannot be changed then something's amiss */
1063
	if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK))
1064
		return -EIO;
1065
1066
	flags &= SHIFTFS_SETFL_MASK;
1067
1068
	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
1069
		return -EPERM;
1070
1071
	if (flags & O_DIRECT) {
1072
		if (!file->f_mapping->a_ops ||
1073
		    !file->f_mapping->a_ops->direct_IO)
1074
			return -EINVAL;
1075
	}
1076
1077
	if (file->f_op->check_flags) {
1078
		err = file->f_op->check_flags(flags);
1079
		if (err)
1080
			return err;
1081
	}
1082
1083
	spin_lock(&file->f_lock);
1084
	file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags;
1085
	spin_unlock(&file->f_lock);
1086
1087
	return 0;
1088
}
1089
1090
static int shiftfs_open(struct inode *inode, struct file *file)
1091
{
1092
	struct file *realfile;
1093
1094
	realfile = shiftfs_open_realfile(file, inode->i_private);
1095
	if (IS_ERR(realfile))
1096
		return PTR_ERR(realfile);
1097
1098
	file->private_data = realfile;
1099
	/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */
1100
	file->f_mapping = realfile->f_mapping;
1101
1102
	return 0;
1103
}
1104
1105
static int shiftfs_dir_open(struct inode *inode, struct file *file)
1106
{
1107
	struct file *realfile;
1108
	const struct cred *oldcred;
1109
	struct dentry *lowerd = file->f_path.dentry->d_fsdata;
1110
	struct shiftfs_super_info *info = inode->i_sb->s_fs_info;
1111
	struct path realpath = { .mnt = info->mnt, .dentry = lowerd };
1112
1113
	oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1114
	realfile = dentry_open(&realpath, file->f_flags | O_NOATIME,
1115
			       info->creator_cred);
1116
	revert_creds(oldcred);
1117
	if (IS_ERR(realfile))
1118
		return PTR_ERR(realfile);
1119
1120
	file->private_data = realfile;
1121
1122
	return 0;
1123
}
1124
1125
static int shiftfs_release(struct inode *inode, struct file *file)
1126
{
1127
	struct file *realfile = file->private_data;
1128
1129
	if (realfile)
1130
		fput(realfile);
1131
1132
	return 0;
1133
}
1134
1135
static int shiftfs_dir_release(struct inode *inode, struct file *file)
1136
{
1137
	return shiftfs_release(inode, file);
1138
}
1139
1140
static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence)
1141
{
1142
	struct file *realfile = file->private_data;
1143
1144
	return vfs_llseek(realfile, offset, whence);
1145
}
1146
1147
static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence)
1148
{
1149
	struct inode *realinode = file_inode(file)->i_private;
1150
1151
	return generic_file_llseek_size(file, offset, whence,
1152
					realinode->i_sb->s_maxbytes,
1153
					i_size_read(realinode));
1154
}
1155
1156
/* XXX: Need to figure out what to to about atime updates, maybe other
1157
 * timestamps too ... ref. ovl_file_accessed() */
1158
1159
static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb)
1160
{
1161
	int ifl = iocb->ki_flags;
1162
	rwf_t flags = 0;
1163
1164
	if (ifl & IOCB_NOWAIT)
1165
		flags |= RWF_NOWAIT;
1166
	if (ifl & IOCB_HIPRI)
1167
		flags |= RWF_HIPRI;
1168
	if (ifl & IOCB_DSYNC)
1169
		flags |= RWF_DSYNC;
1170
	if (ifl & IOCB_SYNC)
1171
		flags |= RWF_SYNC;
1172
1173
	return flags;
1174
}
1175
1176
static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd)
1177
{
1178
	struct file *realfile;
1179
1180
	if (file->f_op->open != shiftfs_open &&
1181
	    file->f_op->open != shiftfs_dir_open)
1182
		return -EINVAL;
1183
1184
	realfile = file->private_data;
1185
	lowerfd->flags = 0;
1186
	lowerfd->file = realfile;
1187
1188
	/* Did the flags change since open? */
1189
	if (unlikely(file->f_flags & ~lowerfd->file->f_flags))
1190
		return shiftfs_change_flags(lowerfd->file, file->f_flags);
1191
1192
	return 0;
1193
}
1194
1195
static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter)
1196
{
1197
	struct file *file = iocb->ki_filp;
1198
	struct fd lowerfd;
1199
	const struct cred *oldcred;
1200
	ssize_t ret;
1201
1202
	if (!iov_iter_count(iter))
1203
		return 0;
1204
1205
	ret = shiftfs_real_fdget(file, &lowerfd);
1206
	if (ret)
1207
		return ret;
1208
1209
	oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1210
	ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos,
1211
			    shiftfs_iocb_to_rwf(iocb));
1212
	revert_creds(oldcred);
1213
1214
	shiftfs_file_accessed(file);
1215
1216
	fdput(lowerfd);
1217
	return ret;
1218
}
1219
1220
static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter)
1221
{
1222
	struct file *file = iocb->ki_filp;
1223
	struct inode *inode = file_inode(file);
1224
	struct fd lowerfd;
1225
	const struct cred *oldcred;
1226
	ssize_t ret;
1227
1228
	if (!iov_iter_count(iter))
1229
		return 0;
1230
1231
	inode_lock(inode);
1232
	/* Update mode */
1233
	shiftfs_copyattr(inode->i_private, inode);
1234
	ret = file_remove_privs(file);
1235
	if (ret)
1236
		goto out_unlock;
1237
1238
	ret = shiftfs_real_fdget(file, &lowerfd);
1239
	if (ret)
1240
		goto out_unlock;
1241
1242
	oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1243
	file_start_write(lowerfd.file);
1244
	ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos,
1245
			     shiftfs_iocb_to_rwf(iocb));
1246
	file_end_write(lowerfd.file);
1247
	revert_creds(oldcred);
1248
1249
	/* Update size */
1250
	shiftfs_copyattr(inode->i_private, inode);
1251
1252
	fdput(lowerfd);
1253
1254
out_unlock:
1255
	inode_unlock(inode);
1256
	return ret;
1257
}
1258
1259
static int shiftfs_fsync(struct file *file, loff_t start, loff_t end,
1260
			 int datasync)
1261
{
1262
	struct fd lowerfd;
1263
	const struct cred *oldcred;
1264
	int ret;
1265
1266
	ret = shiftfs_real_fdget(file, &lowerfd);
1267
	if (ret)
1268
		return ret;
1269
1270
	oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1271
	ret = vfs_fsync_range(lowerfd.file, start, end, datasync);
1272
	revert_creds(oldcred);
1273
1274
	fdput(lowerfd);
1275
	return ret;
1276
}
1277
1278
static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma)
1279
{
1280
	struct file *realfile = file->private_data;
1281
	const struct cred *oldcred;
1282
	int ret;
1283
1284
	if (!realfile->f_op->mmap)
1285
		return -ENODEV;
1286
1287
	if (WARN_ON(file != vma->vm_file))
1288
		return -EIO;
1289
1290
	oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1291
	vma->vm_file = get_file(realfile);
1292
	ret = call_mmap(vma->vm_file, vma);
1293
	revert_creds(oldcred);
1294
1295
	shiftfs_file_accessed(file);
1296
1297
	if (ret) {
1298
		/*
1299
		 * Drop refcount from new vm_file value and restore original
1300
		 * vm_file value
1301
		 */
1302
		vma->vm_file = file;
1303
		fput(realfile);
1304
	} else {
1305
		/* Drop refcount from previous vm_file value */
1306
		fput(file);
1307
	}
1308
1309
	return ret;
1310
}
1311
1312
static long shiftfs_fallocate(struct file *file, int mode, loff_t offset,
1313
			      loff_t len)
1314
{
1315
	struct inode *inode = file_inode(file);
1316
	struct inode *loweri = inode->i_private;
1317
	struct fd lowerfd;
1318
	const struct cred *oldcred;
1319
	int ret;
1320
1321
	ret = shiftfs_real_fdget(file, &lowerfd);
1322
	if (ret)
1323
		return ret;
1324
1325
	oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1326
	ret = vfs_fallocate(lowerfd.file, mode, offset, len);
1327
	revert_creds(oldcred);
1328
1329
	/* Update size */
1330
	shiftfs_copyattr(loweri, inode);
1331
1332
	fdput(lowerfd);
1333
	return ret;
1334
}
1335
1336
static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len,
1337
			   int advice)
1338
{
1339
	struct fd lowerfd;
1340
	const struct cred *oldcred;
1341
	int ret;
1342
1343
	ret = shiftfs_real_fdget(file, &lowerfd);
1344
	if (ret)
1345
		return ret;
1346
1347
	oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1348
	ret = vfs_fadvise(lowerfd.file, offset, len, advice);
1349
	revert_creds(oldcred);
1350
1351
	fdput(lowerfd);
1352
	return ret;
1353
}
1354
1355
static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb,
1356
					const struct cred **oldcred,
1357
					struct cred **newcred)
1358
{
1359
	struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1360
	kuid_t fsuid = current_fsuid();
1361
	kgid_t fsgid = current_fsgid();
1362
1363
	*oldcred = shiftfs_override_creds(sb);
1364
1365
	*newcred = prepare_creds();
1366
	if (!*newcred) {
1367
		revert_creds(*oldcred);
1368
		return -ENOMEM;
1369
	}
1370
1371
	(*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid);
1372
	(*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid);
1373
1374
	/* clear all caps to prevent bypassing capable() checks */
1375
	cap_clear((*newcred)->cap_bset);
1376
	cap_clear((*newcred)->cap_effective);
1377
	cap_clear((*newcred)->cap_inheritable);
1378
	cap_clear((*newcred)->cap_permitted);
1379
1380
	if (cmd == BTRFS_IOC_SNAP_DESTROY) {
1381
		kuid_t kuid_root = make_kuid(sb->s_user_ns, 0);
1382
		/*
1383
		 * Allow the root user in the container to remove subvolumes
1384
		 * from other users.
1385
		 */
1386
		if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root))
1387
			cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE);
1388
	}
1389
1390
	put_cred(override_creds(*newcred));
1391
	return 0;
1392
}
1393
1394
static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred,
1395
					      struct cred *newcred)
1396
{
1397
	return shiftfs_revert_object_creds(oldcred, newcred);
1398
}
1399
1400
static inline bool is_btrfs_snap_ioctl(int cmd)
1401
{
1402
	if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2))
1403
		return true;
1404
1405
	return false;
1406
}
1407
1408
static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg,
1409
					  struct btrfs_ioctl_vol_args *v1,
1410
					  struct btrfs_ioctl_vol_args_v2 *v2)
1411
{
1412
	int ret;
1413
1414
	if (!is_btrfs_snap_ioctl(cmd))
1415
		return 0;
1416
1417
	if (cmd == BTRFS_IOC_SNAP_CREATE)
1418
		ret = copy_to_user(arg, v1, sizeof(*v1));
1419
	else
1420
		ret = copy_to_user(arg, v2, sizeof(*v2));
1421
1422
	__close_fd(current->files, fd);
1423
	kfree(v1);
1424
	kfree(v2);
1425
1426
	return ret;
1427
}
1428
1429
static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg,
1430
					  struct btrfs_ioctl_vol_args **b1,
1431
					  struct btrfs_ioctl_vol_args_v2 **b2,
1432
					  int *newfd)
1433
{
1434
	int oldfd, ret;
1435
	struct fd src;
1436
	struct fd lfd = {};
1437
	struct btrfs_ioctl_vol_args *v1 = NULL;
1438
	struct btrfs_ioctl_vol_args_v2 *v2 = NULL;
1439
1440
	if (!is_btrfs_snap_ioctl(cmd))
1441
		return 0;
1442
1443
	if (cmd == BTRFS_IOC_SNAP_CREATE) {
1444
		v1 = memdup_user(arg, sizeof(*v1));
1445
		if (IS_ERR(v1))
1446
			return PTR_ERR(v1);
1447
		oldfd = v1->fd;
1448
		*b1 = v1;
1449
	} else {
1450
		v2 = memdup_user(arg, sizeof(*v2));
1451
		if (IS_ERR(v2))
1452
			return PTR_ERR(v2);
1453
		oldfd = v2->fd;
1454
		*b2 = v2;
1455
	}
1456
1457
	src = fdget(oldfd);
1458
	if (!src.file)
1459
		return -EINVAL;
1460
1461
	ret = shiftfs_real_fdget(src.file, &lfd);
1462
	if (ret) {
1463
		fdput(src);
1464
		return ret;
1465
	}
1466
1467
	/*
1468
	 * shiftfs_real_fdget() does not take a reference to lfd.file, so
1469
	 * take a reference here to offset the one which will be put by
1470
	 * __close_fd(), and make sure that reference is put on fdput(lfd).
1471
	 */
1472
	get_file(lfd.file);
1473
	lfd.flags |= FDPUT_FPUT;
1474
	fdput(src);
1475
1476
	*newfd = get_unused_fd_flags(lfd.file->f_flags);
1477
	if (*newfd < 0) {
1478
		fdput(lfd);
1479
		return *newfd;
1480
	}
1481
1482
	fd_install(*newfd, lfd.file);
1483
1484
	if (cmd == BTRFS_IOC_SNAP_CREATE) {
1485
		v1->fd = *newfd;
1486
		ret = copy_to_user(arg, v1, sizeof(*v1));
1487
		v1->fd = oldfd;
1488
	} else {
1489
		v2->fd = *newfd;
1490
		ret = copy_to_user(arg, v2, sizeof(*v2));
1491
		v2->fd = oldfd;
1492
	}
1493
1494
	if (ret)
1495
		shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2);
1496
1497
	return ret;
1498
}
1499
1500
static long shiftfs_real_ioctl(struct file *file, unsigned int cmd,
1501
			       unsigned long arg)
1502
{
1503
	struct fd lowerfd;
1504
	struct cred *newcred;
1505
	const struct cred *oldcred;
1506
	int newfd = -EBADF;
1507
	long err = 0, ret = 0;
1508
	void __user *argp = (void __user *)arg;
1509
	struct super_block *sb = file->f_path.dentry->d_sb;
1510
	struct btrfs_ioctl_vol_args *btrfs_v1 = NULL;
1511
	struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL;
1512
1513
	ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2,
1514
					     &newfd);
1515
	if (ret < 0)
1516
		return ret;
1517
1518
	ret = shiftfs_real_fdget(file, &lowerfd);
1519
	if (ret)
1520
		goto out_restore;
1521
1522
	ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred);
1523
	if (ret)
1524
		goto out_fdput;
1525
1526
	ret = vfs_ioctl(lowerfd.file, cmd, arg);
1527
1528
	shiftfs_revert_ioctl_creds(oldcred, newcred);
1529
1530
	shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file));
1531
	shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file));
1532
1533
out_fdput:
1534
	fdput(lowerfd);
1535
1536
out_restore:
1537
	err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp,
1538
					     btrfs_v1, btrfs_v2);
1539
	if (!ret)
1540
		ret = err;
1541
1542
	return ret;
1543
}
1544
1545
static bool in_ioctl_whitelist(int flag, unsigned long arg)
1546
{
1547
	void __user *argp = (void __user *)arg;
1548
	u64 flags = 0;
1549
1550
	switch (flag) {
1551
	case BTRFS_IOC_FS_INFO:
1552
		return true;
1553
	case BTRFS_IOC_SNAP_CREATE:
1554
		return true;
1555
	case BTRFS_IOC_SNAP_CREATE_V2:
1556
		return true;
1557
	case BTRFS_IOC_SUBVOL_CREATE:
1558
		return true;
1559
	case BTRFS_IOC_SUBVOL_CREATE_V2:
1560
		return true;
1561
	case BTRFS_IOC_SUBVOL_GETFLAGS:
1562
		return true;
1563
	case BTRFS_IOC_SUBVOL_SETFLAGS:
1564
		if (copy_from_user(&flags, argp, sizeof(flags)))
1565
			return false;
1566
1567
		if (flags & ~BTRFS_SUBVOL_RDONLY)
1568
			return false;
1569
1570
		return true;
1571
	case BTRFS_IOC_SNAP_DESTROY:
1572
		return true;
1573
	}
1574
1575
	return false;
1576
}
1577
1578
static long shiftfs_ioctl(struct file *file, unsigned int cmd,
1579
			  unsigned long arg)
1580
{
1581
	switch (cmd) {
1582
	case FS_IOC_GETVERSION:
1583
		/* fall through */
1584
	case FS_IOC_GETFLAGS:
1585
		/* fall through */
1586
	case FS_IOC_SETFLAGS:
1587
		break;
1588
	default:
1589
		if (!in_ioctl_whitelist(cmd, arg) ||
1590
		    !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1591
			return -ENOTTY;
1592
	}
1593
1594
	return shiftfs_real_ioctl(file, cmd, arg);
1595
}
1596
1597
static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd,
1598
				 unsigned long arg)
1599
{
1600
	switch (cmd) {
1601
	case FS_IOC32_GETVERSION:
1602
		/* fall through */
1603
	case FS_IOC32_GETFLAGS:
1604
		/* fall through */
1605
	case FS_IOC32_SETFLAGS:
1606
		break;
1607
	default:
1608
		if (!in_ioctl_whitelist(cmd, arg) ||
1609
		    !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info))
1610
			return -ENOIOCTLCMD;
1611
	}
1612
1613
	return shiftfs_real_ioctl(file, cmd, arg);
1614
}
1615
1616
enum shiftfs_copyop {
1617
	SHIFTFS_COPY,
1618
	SHIFTFS_CLONE,
1619
	SHIFTFS_DEDUPE,
1620
};
1621
1622
static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in,
1623
				struct file *file_out, loff_t pos_out, u64 len,
1624
				unsigned int flags, enum shiftfs_copyop op)
1625
{
1626
	ssize_t ret;
1627
	struct fd real_in, real_out;
1628
	const struct cred *oldcred;
1629
	struct inode *inode_out = file_inode(file_out);
1630
	struct inode *loweri = inode_out->i_private;
1631
1632
	ret = shiftfs_real_fdget(file_out, &real_out);
1633
	if (ret)
1634
		return ret;
1635
1636
	ret = shiftfs_real_fdget(file_in, &real_in);
1637
	if (ret) {
1638
		fdput(real_out);
1639
		return ret;
1640
	}
1641
1642
	oldcred = shiftfs_override_creds(inode_out->i_sb);
1643
	switch (op) {
1644
	case SHIFTFS_COPY:
1645
		ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file,
1646
					  pos_out, len, flags);
1647
		break;
1648
1649
	case SHIFTFS_CLONE:
1650
		ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file,
1651
					   pos_out, len, flags);
1652
		break;
1653
1654
	case SHIFTFS_DEDUPE:
1655
		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
1656
						real_out.file, pos_out, len,
1657
						flags);
1658
		break;
1659
	}
1660
	revert_creds(oldcred);
1661
1662
	/* Update size */
1663
	shiftfs_copyattr(loweri, inode_out);
1664
1665
	fdput(real_in);
1666
	fdput(real_out);
1667
1668
	return ret;
1669
}
1670
1671
static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in,
1672
				       struct file *file_out, loff_t pos_out,
1673
				       size_t len, unsigned int flags)
1674
{
1675
	return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
1676
				SHIFTFS_COPY);
1677
}
1678
1679
static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in,
1680
				       struct file *file_out, loff_t pos_out,
1681
				       loff_t len, unsigned int remap_flags)
1682
{
1683
	enum shiftfs_copyop op;
1684
1685
	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
1686
		return -EINVAL;
1687
1688
	if (remap_flags & REMAP_FILE_DEDUP)
1689
		op = SHIFTFS_DEDUPE;
1690
	else
1691
		op = SHIFTFS_CLONE;
1692
1693
	return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len,
1694
				remap_flags, op);
1695
}
1696
1697
static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx)
1698
{
1699
	const struct cred *oldcred;
1700
	int err = -ENOTDIR;
1701
	struct file *realfile = file->private_data;
1702
1703
	oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb);
1704
	err = iterate_dir(realfile, ctx);
1705
	revert_creds(oldcred);
1706
1707
	return err;
1708
}
1709
1710
const struct file_operations shiftfs_file_operations = {
1711
	.open			= shiftfs_open,
1712
	.release		= shiftfs_release,
1713
	.llseek			= shiftfs_file_llseek,
1714
	.read_iter		= shiftfs_read_iter,
1715
	.write_iter		= shiftfs_write_iter,
1716
	.fsync			= shiftfs_fsync,
1717
	.mmap			= shiftfs_mmap,
1718
	.fallocate		= shiftfs_fallocate,
1719
	.fadvise		= shiftfs_fadvise,
1720
	.unlocked_ioctl		= shiftfs_ioctl,
1721
	.compat_ioctl		= shiftfs_compat_ioctl,
1722
	.copy_file_range	= shiftfs_copy_file_range,
1723
	.remap_file_range	= shiftfs_remap_file_range,
1724
};
1725
1726
const struct file_operations shiftfs_dir_operations = {
1727
	.open			= shiftfs_dir_open,
1728
	.release		= shiftfs_dir_release,
1729
	.compat_ioctl		= shiftfs_compat_ioctl,
1730
	.fsync			= shiftfs_fsync,
1731
	.iterate_shared		= shiftfs_iterate_shared,
1732
	.llseek			= shiftfs_dir_llseek,
1733
	.read			= generic_read_dir,
1734
	.unlocked_ioctl		= shiftfs_ioctl,
1735
};
1736
1737
static const struct address_space_operations shiftfs_aops = {
1738
	/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
1739
	.direct_IO	= noop_direct_IO,
1740
};
1741
1742
static void shiftfs_fill_inode(struct inode *inode, unsigned long ino,
1743
			       umode_t mode, dev_t dev, struct dentry *dentry)
1744
{
1745
	struct inode *loweri;
1746
1747
	inode->i_ino = ino;
1748
	inode->i_flags |= S_NOCMTIME;
1749
1750
	mode &= S_IFMT;
1751
	inode->i_mode = mode;
1752
	switch (mode & S_IFMT) {
1753
	case S_IFDIR:
1754
		inode->i_op = &shiftfs_dir_inode_operations;
1755
		inode->i_fop = &shiftfs_dir_operations;
1756
		break;
1757
	case S_IFLNK:
1758
		inode->i_op = &shiftfs_symlink_inode_operations;
1759
		break;
1760
	case S_IFREG:
1761
		inode->i_op = &shiftfs_file_inode_operations;
1762
		inode->i_fop = &shiftfs_file_operations;
1763
		inode->i_mapping->a_ops = &shiftfs_aops;
1764
		break;
1765
	default:
1766
		inode->i_op = &shiftfs_special_inode_operations;
1767
		init_special_inode(inode, mode, dev);
1768
		break;
1769
	}
1770
1771
	if (!dentry)
1772
		return;
1773
1774
	loweri = dentry->d_inode;
1775
	if (!loweri->i_op->get_link)
1776
		inode->i_opflags |= IOP_NOFOLLOW;
1777
1778
	shiftfs_copyattr(loweri, inode);
1779
	shiftfs_copyflags(loweri, inode);
1780
	set_nlink(inode, loweri->i_nlink);
1781
}
1782
1783
static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
1784
{
1785
	struct super_block *sb = dentry->d_sb;
1786
	struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1787
1788
	if (sbinfo->mark)
1789
		seq_show_option(m, "mark", NULL);
1790
1791
	if (sbinfo->passthrough)
1792
		seq_printf(m, ",passthrough=%u", sbinfo->passthrough);
1793
1794
	return 0;
1795
}
1796
1797
static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1798
{
1799
	struct super_block *sb = dentry->d_sb;
1800
	struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1801
	struct dentry *root = sb->s_root;
1802
	struct dentry *realroot = root->d_fsdata;
1803
	struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot };
1804
	int err;
1805
1806
	err = vfs_statfs(&realpath, buf);
1807
	if (err)
1808
		return err;
1809
1810
	if (!shiftfs_passthrough_statfs(sbinfo))
1811
		buf->f_type = sb->s_magic;
1812
1813
	return 0;
1814
}
1815
1816
static void shiftfs_evict_inode(struct inode *inode)
1817
{
1818
	struct inode *loweri = inode->i_private;
1819
1820
	clear_inode(inode);
1821
1822
	if (loweri)
1823
		iput(loweri);
1824
}
1825
1826
static void shiftfs_put_super(struct super_block *sb)
1827
{
1828
	struct shiftfs_super_info *sbinfo = sb->s_fs_info;
1829
1830
	if (sbinfo) {
1831
		mntput(sbinfo->mnt);
1832
		put_cred(sbinfo->creator_cred);
1833
		kfree(sbinfo);
1834
	}
1835
}
1836
1837
static const struct xattr_handler shiftfs_xattr_handler = {
1838
	.prefix = "",
1839
	.get    = shiftfs_xattr_get,
1840
	.set    = shiftfs_xattr_set,
1841
};
1842
1843
const struct xattr_handler *shiftfs_xattr_handlers[] = {
1844
#ifdef CONFIG_SHIFT_FS_POSIX_ACL
1845
	&shiftfs_posix_acl_access_xattr_handler,
1846
	&shiftfs_posix_acl_default_xattr_handler,
1847
#endif
1848
	&shiftfs_xattr_handler,
1849
	NULL
1850
};
1851
1852
static inline bool passthrough_is_subset(int old_flags, int new_flags)
1853
{
1854
	if ((new_flags & old_flags) != new_flags)
1855
		return false;
1856
1857
	return true;
1858
}
1859
1860
static int shiftfs_super_check_flags(unsigned long old_flags,
1861
				     unsigned long new_flags)
1862
{
1863
	if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY))
1864
		return -EPERM;
1865
1866
	if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID))
1867
		return -EPERM;
1868
1869
	if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV))
1870
		return -EPERM;
1871
1872
	if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC))
1873
		return -EPERM;
1874
1875
	if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME))
1876
		return -EPERM;
1877
1878
	if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME))
1879
		return -EPERM;
1880
1881
	if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL))
1882
		return -EPERM;
1883
1884
	return 0;
1885
}
1886
1887
static int shiftfs_remount(struct super_block *sb, int *flags, char *data)
1888
{
1889
	int err;
1890
	struct shiftfs_super_info new = {};
1891
	struct shiftfs_super_info *info = sb->s_fs_info;
1892
1893
	err = shiftfs_parse_mount_options(&new, data);
1894
	if (err)
1895
		return err;
1896
1897
	err = shiftfs_super_check_flags(sb->s_flags, *flags);
1898
	if (err)
1899
		return err;
1900
1901
	/* Mark mount option cannot be changed. */
1902
	if (info->mark || (info->mark != new.mark))
1903
		return -EPERM;
1904
1905
	if (info->passthrough != new.passthrough) {
1906
		/* Don't allow exceeding passthrough options of mark mount. */
1907
		if (!passthrough_is_subset(info->passthrough_mark,
1908
					   info->passthrough))
1909
			return -EPERM;
1910
1911
		info->passthrough = new.passthrough;
1912
	}
1913
1914
	return 0;
1915
}
1916
1917
static const struct super_operations shiftfs_super_ops = {
1918
	.put_super	= shiftfs_put_super,
1919
	.show_options	= shiftfs_show_options,
1920
	.statfs		= shiftfs_statfs,
1921
	.remount_fs	= shiftfs_remount,
1922
	.evict_inode	= shiftfs_evict_inode,
1923
};
1924
1925
struct shiftfs_data {
1926
	void *data;
1927
	const char *path;
1928
};
1929
1930
static void shiftfs_super_force_flags(struct super_block *sb,
1931
				      unsigned long lower_flags)
1932
{
1933
	sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV |
1934
				      SB_NOEXEC | SB_NOATIME | SB_NODIRATIME);
1935
1936
	if (!(lower_flags & SB_POSIXACL))
1937
		sb->s_flags &= ~SB_POSIXACL;
1938
}
1939
1940
static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
1941
			      int silent)
1942
{
1943
	int err;
1944
	struct path path = {};
1945
	struct shiftfs_super_info *sbinfo_mp;
1946
	char *name = NULL;
1947
	struct inode *inode = NULL;
1948
	struct dentry *dentry = NULL;
1949
	struct shiftfs_data *data = raw_data;
1950
	struct shiftfs_super_info *sbinfo = NULL;
1951
1952
	if (!data->path)
1953
		return -EINVAL;
1954
1955
	sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
1956
	if (!sb->s_fs_info)
1957
		return -ENOMEM;
1958
	sbinfo = sb->s_fs_info;
1959
1960
	err = shiftfs_parse_mount_options(sbinfo, data->data);
1961
	if (err)
1962
		return err;
1963
1964
	/* to mount a mark, must be userns admin */
1965
	if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
1966
		return -EPERM;
1967
1968
	name = kstrdup(data->path, GFP_KERNEL);
1969
	if (!name)
1970
		return -ENOMEM;
1971
1972
	err = kern_path(name, LOOKUP_FOLLOW, &path);
1973
	if (err)
1974
		goto out_free_name;
1975
1976
	if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
1977
		err = -ENOTDIR;
1978
		goto out_put_path;
1979
	}
1980
1981
	sb->s_flags |= SB_POSIXACL;
1982
1983
	if (sbinfo->mark) {
1984
		struct cred *cred_tmp;
1985
		struct super_block *lower_sb = path.mnt->mnt_sb;
1986
1987
		/* to mark a mount point, must root wrt lower s_user_ns */
1988
		if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) {
1989
			err = -EPERM;
1990
			goto out_put_path;
1991
		}
1992
1993
		/*
1994
		 * this part is visible unshifted, so make sure no
1995
		 * executables that could be used to give suid
1996
		 * privileges
1997
		 */
1998
		sb->s_iflags = SB_I_NOEXEC;
1999
2000
		shiftfs_super_force_flags(sb, lower_sb->s_flags);
2001
2002
		/*
2003
		 * Handle nesting of shiftfs mounts by referring this mark
2004
		 * mount back to the original mark mount. This is more
2005
		 * efficient and alleviates concerns about stack depth.
2006
		 */
2007
		if (lower_sb->s_magic == SHIFTFS_MAGIC) {
2008
			sbinfo_mp = lower_sb->s_fs_info;
2009
2010
			/* Doesn't make sense to mark a mark mount */
2011
			if (sbinfo_mp->mark) {
2012
				err = -EINVAL;
2013
				goto out_put_path;
2014
			}
2015
2016
			if (!passthrough_is_subset(sbinfo_mp->passthrough,
2017
						   sbinfo->passthrough)) {
2018
				err = -EPERM;
2019
				goto out_put_path;
2020
			}
2021
2022
			sbinfo->mnt = mntget(sbinfo_mp->mnt);
2023
			dentry = dget(path.dentry->d_fsdata);
2024
			/*
2025
			 * Copy up the passthrough mount options from the
2026
			 * parent mark mountpoint.
2027
			 */
2028
			sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark;
2029
			sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2030
		} else {
2031
			sbinfo->mnt = mntget(path.mnt);
2032
			dentry = dget(path.dentry);
2033
			/*
2034
			 * For a new mark passthrough_mark and passthrough
2035
			 * are identical.
2036
			 */
2037
			sbinfo->passthrough_mark = sbinfo->passthrough;
2038
2039
			cred_tmp = prepare_creds();
2040
			if (!cred_tmp) {
2041
				err = -ENOMEM;
2042
				goto out_put_path;
2043
			}
2044
			/* Don't override disk quota limits or use reserved space. */
2045
			cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE);
2046
			sbinfo->creator_cred = cred_tmp;
2047
		}
2048
	} else {
2049
		/*
2050
		 * This leg executes if we're admin capable in the namespace,
2051
		 * so be very careful.
2052
		 */
2053
		err = -EPERM;
2054
		if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
2055
			goto out_put_path;
2056
2057
		sbinfo_mp = path.dentry->d_sb->s_fs_info;
2058
		if (!sbinfo_mp->mark)
2059
			goto out_put_path;
2060
2061
		if (!passthrough_is_subset(sbinfo_mp->passthrough,
2062
					   sbinfo->passthrough))
2063
			goto out_put_path;
2064
2065
		sbinfo->mnt = mntget(sbinfo_mp->mnt);
2066
		sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred);
2067
		dentry = dget(path.dentry->d_fsdata);
2068
		/*
2069
		 * Copy up passthrough settings from mark mountpoint so we can
2070
		 * verify when the overlay wants to remount with different
2071
		 * passthrough settings.
2072
		 */
2073
		sbinfo->passthrough_mark = sbinfo_mp->passthrough;
2074
		shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags);
2075
	}
2076
2077
	sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1;
2078
	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
2079
		printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
2080
		err = -EINVAL;
2081
		goto out_put_path;
2082
	}
2083
2084
	inode = new_inode(sb);
2085
	if (!inode) {
2086
		err = -ENOMEM;
2087
		goto out_put_path;
2088
	}
2089
	shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry);
2090
2091
	ihold(dentry->d_inode);
2092
	inode->i_private = dentry->d_inode;
2093
2094
	sb->s_magic = SHIFTFS_MAGIC;
2095
	sb->s_maxbytes = MAX_LFS_FILESIZE;
2096
	sb->s_op = &shiftfs_super_ops;
2097
	sb->s_xattr = shiftfs_xattr_handlers;
2098
	sb->s_d_op = &shiftfs_dentry_ops;
2099
	sb->s_root = d_make_root(inode);
2100
	if (!sb->s_root) {
2101
		err = -ENOMEM;
2102
		goto out_put_path;
2103
	}
2104
2105
	sb->s_root->d_fsdata = dentry;
2106
	sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns);
2107
	shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode);
2108
2109
	dentry = NULL;
2110
	err = 0;
2111
2112
out_put_path:
2113
	path_put(&path);
2114
2115
out_free_name:
2116
	kfree(name);
2117
2118
	dput(dentry);
2119
2120
	return err;
2121
}
2122
2123
static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
2124
				    int flags, const char *dev_name, void *data)
2125
{
2126
	struct shiftfs_data d = { data, dev_name };
2127
2128
	return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
2129
}
2130
2131
static struct file_system_type shiftfs_type = {
2132
	.owner		= THIS_MODULE,
2133
	.name		= "shiftfs",
2134
	.mount		= shiftfs_mount,
2135
	.kill_sb	= kill_anon_super,
2136
	.fs_flags	= FS_USERNS_MOUNT,
2137
};
2138
2139
static int __init shiftfs_init(void)
2140
{
2141
	return register_filesystem(&shiftfs_type);
2142
}
2143
2144
static void __exit shiftfs_exit(void)
2145
{
2146
	unregister_filesystem(&shiftfs_type);
2147
}
2148
2149
MODULE_ALIAS_FS("shiftfs");
2150
MODULE_AUTHOR("James Bottomley");
2151
MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>");
2152
MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>");
2153
MODULE_DESCRIPTION("id shifting filesystem");
2154
MODULE_LICENSE("GPL v2");
2155
module_init(shiftfs_init)
2156
module_exit(shiftfs_exit)
(-)a/include/uapi/linux/magic.h (+2 lines)
Lines 96-99 Link Here
96
#define DEVMEM_MAGIC		0x454d444d	/* "DMEM" */
96
#define DEVMEM_MAGIC		0x454d444d	/* "DMEM" */
97
#define Z3FOLD_MAGIC		0x33
97
#define Z3FOLD_MAGIC		0x33
98
98
99
#define SHIFTFS_MAGIC         0x6a656a62
100
99
#endif /* __LINUX_MAGIC_H__ */
101
#endif /* __LINUX_MAGIC_H__ */
(-)a/fs/Makefile (+1 lines)
Lines 132-134 obj-$(CONFIG_CEPH_FS) += ceph/ Link Here
132
obj-$(CONFIG_PSTORE)		+= pstore/
132
obj-$(CONFIG_PSTORE)		+= pstore/
133
obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
133
obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
134
obj-$(CONFIG_EROFS_FS)		+= erofs/
134
obj-$(CONFIG_EROFS_FS)		+= erofs/
135
obj-$(CONFIG_SHIFT_FS)    += shiftfs.o
(-)a/fs/Kconfig (+18 lines)
Lines 122-127 source "fs/autofs/Kconfig" Link Here
122
source "fs/fuse/Kconfig"
122
source "fs/fuse/Kconfig"
123
source "fs/overlayfs/Kconfig"
123
source "fs/overlayfs/Kconfig"
124
124
125
config SHIFT_FS
126
  tristate "UID/GID shifting overlay filesystem for containers"
127
  help
128
    This filesystem can overlay any mounted filesystem and shift
129
    the uid/gid the files appear at.  The idea is that
130
    unprivileged containers can use this to mount root volumes
131
    using this technique.
132
133
config SHIFT_FS_POSIX_ACL
134
  bool "shiftfs POSIX Access Control Lists"
135
  depends on SHIFT_FS
136
  select FS_POSIX_ACL
137
  help
138
    POSIX Access Control Lists (ACLs) support permissions for users and
139
    groups beyond the owner/group/world scheme.
140
141
    If you don't know what Access Control Lists are, say N.
142
125
menu "Caches"
143
menu "Caches"
126
144
127
source "fs/fscache/Kconfig"
145
source "fs/fscache/Kconfig"

Return to bug 692204