diff options
author | Mike Pagano <mpagano@gentoo.org> | 2023-03-06 12:29:51 -0500 |
---|---|---|
committer | Mike Pagano <mpagano@gentoo.org> | 2023-03-06 12:29:51 -0500 |
commit | 66dd8dbc053422be2cf35862fe70e7996615c0e8 (patch) | |
tree | eaf05fbc5f5701a263133fed5adea36d5cc87c4a | |
parent | Remove shiftfs until I fix the patch (diff) | |
download | linux-patches-66dd8dbc053422be2cf35862fe70e7996615c0e8.tar.gz linux-patches-66dd8dbc053422be2cf35862fe70e7996615c0e8.tar.bz2 linux-patches-66dd8dbc053422be2cf35862fe70e7996615c0e8.zip |
Add shifts back
Signed-off-by: Mike Pagano <mpagano@gentoo.org>
-rw-r--r-- | 0000_README | 4 | ||||
-rw-r--r-- | 5000_shiftfs-6.1.patch | 2249 |
2 files changed, 2253 insertions, 0 deletions
diff --git a/0000_README b/0000_README index dca64867..6e5b60b1 100644 --- a/0000_README +++ b/0000_README @@ -147,6 +147,10 @@ Patch: 4567_distro-Gentoo-Kconfig.patch From: Tom Wijsman <TomWij@gentoo.org> Desc: Add Gentoo Linux support config settings and defaults. +Patch: 5000_shiftfs-6.1.patch +From: https://github.com/toby63/shiftfs-dkms +Desc: Kernel fs for Linux that provides easier uid/gid-shifting for containers + Patch: 5010_enable-cpu-optimizations-universal.patch From: https://github.com/graysky2/kernel_compiler_patch Desc: Kernel >= 5.15 patch enables gcc = v11.1+ optimizations for additional CPUs. diff --git a/5000_shiftfs-6.1.patch b/5000_shiftfs-6.1.patch new file mode 100644 index 00000000..f0c715e3 --- /dev/null +++ b/5000_shiftfs-6.1.patch @@ -0,0 +1,2249 @@ +--- /dev/null 2023-03-06 07:06:05.852282227 -0500 ++++ b/fs/shiftfs.c 2023-03-06 11:58:38.389647656 -0500 +@@ -0,0 +1,2203 @@ ++#include <linux/btrfs.h> ++#include <linux/capability.h> ++#include <linux/cred.h> ++#include <linux/mount.h> ++#include <linux/fdtable.h> ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/namei.h> ++#include <linux/module.h> ++#include <linux/kernel.h> ++#include <linux/magic.h> ++#include <linux/parser.h> ++#include <linux/security.h> ++#include <linux/seq_file.h> ++#include <linux/statfs.h> ++#include <linux/slab.h> ++#include <linux/user_namespace.h> ++#include <linux/uidgid.h> ++#include <linux/xattr.h> ++#include <linux/posix_acl.h> ++#include <linux/posix_acl_xattr.h> ++#include <linux/uio.h> ++#include <linux/fiemap.h> ++#include <linux/pagemap.h> ++ ++struct shiftfs_super_info { ++ struct vfsmount *mnt; ++ struct user_namespace *userns; ++ /* creds of process who created the super block */ ++ const struct cred *creator_cred; ++ bool mark; ++ unsigned int passthrough; ++ unsigned int passthrough_mark; ++}; ++ ++static void shiftfs_fill_inode(struct inode *inode, unsigned long ino, ++ umode_t mode, dev_t dev, struct dentry *dentry); ++ ++#define SHIFTFS_PASSTHROUGH_NONE 0 ++#define SHIFTFS_PASSTHROUGH_STAT 1 ++#define SHIFTFS_PASSTHROUGH_IOCTL 2 ++#define SHIFTFS_PASSTHROUGH_ALL \ ++ (SHIFTFS_PASSTHROUGH_STAT | SHIFTFS_PASSTHROUGH_IOCTL) ++ ++static inline bool shiftfs_passthrough_ioctls(struct shiftfs_super_info *info) ++{ ++ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_IOCTL)) ++ return false; ++ ++ return true; ++} ++ ++static inline bool shiftfs_passthrough_statfs(struct shiftfs_super_info *info) ++{ ++ if (!(info->passthrough & SHIFTFS_PASSTHROUGH_STAT)) ++ return false; ++ ++ return true; ++} ++ ++enum { ++ OPT_MARK, ++ OPT_PASSTHROUGH, ++ OPT_LAST, ++}; ++ ++/* global filesystem options */ ++static const match_table_t tokens = { ++ { OPT_MARK, "mark" }, ++ { OPT_PASSTHROUGH, "passthrough=%u" }, ++ { OPT_LAST, NULL } ++}; ++ ++static const struct cred *shiftfs_override_creds(const struct super_block *sb) ++{ ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info; ++ ++ return override_creds(sbinfo->creator_cred); ++} ++ ++static inline void shiftfs_revert_object_creds(const struct cred *oldcred, ++ struct cred *newcred) ++{ ++ revert_creds(oldcred); ++ put_cred(newcred); ++} ++ ++static kuid_t shift_kuid(struct user_namespace *from, struct user_namespace *to, ++ kuid_t kuid) ++{ ++ uid_t uid = from_kuid(from, kuid); ++ return make_kuid(to, uid); ++} ++ ++static kgid_t shift_kgid(struct user_namespace *from, struct user_namespace *to, ++ kgid_t kgid) ++{ ++ gid_t gid = from_kgid(from, kgid); ++ return make_kgid(to, gid); ++} ++ ++static int shiftfs_override_object_creds(const struct super_block *sb, ++ const struct cred **oldcred, ++ struct cred **newcred, ++ struct dentry *dentry, umode_t mode, ++ bool hardlink) ++{ ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info; ++ kuid_t fsuid = current_fsuid(); ++ kgid_t fsgid = current_fsgid(); ++ ++ *oldcred = shiftfs_override_creds(sb); ++ ++ *newcred = prepare_creds(); ++ if (!*newcred) { ++ revert_creds(*oldcred); ++ return -ENOMEM; ++ } ++ ++ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid); ++ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid); ++ ++ if (!hardlink) { ++ int err = security_dentry_create_files_as(dentry, mode, ++ &dentry->d_name, ++ *oldcred, *newcred); ++ if (err) { ++ shiftfs_revert_object_creds(*oldcred, *newcred); ++ return err; ++ } ++ } ++ ++ put_cred(override_creds(*newcred)); ++ return 0; ++} ++ ++static void shiftfs_copyattr(struct inode *from, struct inode *to) ++{ ++ struct user_namespace *from_ns = from->i_sb->s_user_ns; ++ struct user_namespace *to_ns = to->i_sb->s_user_ns; ++ ++ to->i_uid = shift_kuid(from_ns, to_ns, from->i_uid); ++ to->i_gid = shift_kgid(from_ns, to_ns, from->i_gid); ++ to->i_mode = from->i_mode; ++ to->i_atime = from->i_atime; ++ to->i_mtime = from->i_mtime; ++ to->i_ctime = from->i_ctime; ++ i_size_write(to, i_size_read(from)); ++} ++ ++static void shiftfs_copyflags(struct inode *from, struct inode *to) ++{ ++ unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME; ++ ++ inode_set_flags(to, from->i_flags & mask, mask); ++} ++ ++static void shiftfs_file_accessed(struct file *file) ++{ ++ struct inode *upperi, *loweri; ++ ++ if (file->f_flags & O_NOATIME) ++ return; ++ ++ upperi = file_inode(file); ++ loweri = upperi->i_private; ++ ++ if (!loweri) ++ return; ++ ++ upperi->i_mtime = loweri->i_mtime; ++ upperi->i_ctime = loweri->i_ctime; ++ ++ touch_atime(&file->f_path); ++} ++ ++static int shiftfs_parse_mount_options(struct shiftfs_super_info *sbinfo, ++ char *options) ++{ ++ char *p; ++ substring_t args[MAX_OPT_ARGS]; ++ ++ sbinfo->mark = false; ++ sbinfo->passthrough = 0; ++ ++ while ((p = strsep(&options, ",")) != NULL) { ++ int err, intarg, token; ++ ++ if (!*p) ++ continue; ++ ++ token = match_token(p, tokens, args); ++ switch (token) { ++ case OPT_MARK: ++ sbinfo->mark = true; ++ break; ++ case OPT_PASSTHROUGH: ++ err = match_int(&args[0], &intarg); ++ if (err) ++ return err; ++ ++ if (intarg & ~SHIFTFS_PASSTHROUGH_ALL) ++ return -EINVAL; ++ ++ sbinfo->passthrough = intarg; ++ break; ++ default: ++ return -EINVAL; ++ } ++ } ++ ++ return 0; ++} ++ ++static void shiftfs_d_release(struct dentry *dentry) ++{ ++ struct dentry *lowerd = dentry->d_fsdata; ++ ++ if (lowerd) ++ dput(lowerd); ++} ++ ++static struct dentry *shiftfs_d_real(struct dentry *dentry, ++ const struct inode *inode) ++{ ++ struct dentry *lowerd = dentry->d_fsdata; ++ ++ if (inode && d_inode(dentry) == inode) ++ return dentry; ++ ++ lowerd = d_real(lowerd, inode); ++ if (lowerd && (!inode || inode == d_inode(lowerd))) ++ return lowerd; ++ ++ WARN(1, "shiftfs_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, ++ inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); ++ return dentry; ++} ++ ++static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags) ++{ ++ int err = 1; ++ struct dentry *lowerd = dentry->d_fsdata; ++ ++ if (d_is_negative(lowerd) != d_is_negative(dentry)) ++ return 0; ++ ++ if ((lowerd->d_flags & DCACHE_OP_WEAK_REVALIDATE)) ++ err = lowerd->d_op->d_weak_revalidate(lowerd, flags); ++ ++ if (d_really_is_positive(dentry)) { ++ struct inode *inode = d_inode(dentry); ++ struct inode *loweri = d_inode(lowerd); ++ ++ shiftfs_copyattr(loweri, inode); ++ } ++ ++ return err; ++} ++ ++static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags) ++{ ++ int err = 1; ++ struct dentry *lowerd = dentry->d_fsdata; ++ ++ if (d_unhashed(lowerd) || ++ ((d_is_negative(lowerd) != d_is_negative(dentry)))) ++ return 0; ++ ++ if (flags & LOOKUP_RCU) ++ return -ECHILD; ++ ++ if ((lowerd->d_flags & DCACHE_OP_REVALIDATE)) ++ err = lowerd->d_op->d_revalidate(lowerd, flags); ++ ++ if (d_really_is_positive(dentry)) { ++ struct inode *inode = d_inode(dentry); ++ struct inode *loweri = d_inode(lowerd); ++ ++ shiftfs_copyattr(loweri, inode); ++ } ++ ++ return err; ++} ++ ++static const struct dentry_operations shiftfs_dentry_ops = { ++ .d_release = shiftfs_d_release, ++ .d_real = shiftfs_d_real, ++ .d_revalidate = shiftfs_d_revalidate, ++ .d_weak_revalidate = shiftfs_d_weak_revalidate, ++}; ++ ++static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode, ++ struct delayed_call *done) ++{ ++ const char *p; ++ const struct cred *oldcred; ++ struct dentry *lowerd; ++ ++ /* RCU lookup not supported */ ++ if (!dentry) ++ return ERR_PTR(-ECHILD); ++ ++ lowerd = dentry->d_fsdata; ++ oldcred = shiftfs_override_creds(dentry->d_sb); ++ p = vfs_get_link(lowerd, done); ++ revert_creds(oldcred); ++ ++ return p; ++} ++ ++static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode, ++ const char *name, const void *value, ++ size_t size, int flags) ++{ ++ struct dentry *lowerd = dentry->d_fsdata; ++ int err; ++ const struct cred *oldcred; ++ ++ oldcred = shiftfs_override_creds(dentry->d_sb); ++ err = vfs_setxattr(&init_user_ns, lowerd, name, value, size, flags); ++ revert_creds(oldcred); ++ ++ shiftfs_copyattr(lowerd->d_inode, inode); ++ ++ return err; ++} ++ ++static int shiftfs_xattr_get(const struct xattr_handler *handler, ++ struct dentry *dentry, struct inode *inode, ++ const char *name, void *value, size_t size) ++{ ++ struct dentry *lowerd = dentry->d_fsdata; ++ int err; ++ const struct cred *oldcred; ++ ++ oldcred = shiftfs_override_creds(dentry->d_sb); ++ err = vfs_getxattr(&init_user_ns, lowerd, name, value, size); ++ revert_creds(oldcred); ++ ++ return err; ++} ++ ++static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list, ++ size_t size) ++{ ++ struct dentry *lowerd = dentry->d_fsdata; ++ int err; ++ const struct cred *oldcred; ++ ++ oldcred = shiftfs_override_creds(dentry->d_sb); ++ err = vfs_listxattr(lowerd, list, size); ++ revert_creds(oldcred); ++ ++ return err; ++} ++ ++static int shiftfs_removexattr(struct user_namespace *ns, ++ struct dentry *dentry, const char *name) ++{ ++ struct dentry *lowerd = dentry->d_fsdata; ++ int err; ++ const struct cred *oldcred; ++ ++ oldcred = shiftfs_override_creds(dentry->d_sb); ++ err = vfs_removexattr(&init_user_ns, lowerd, name); ++ revert_creds(oldcred); ++ ++ /* update c/mtime */ ++ shiftfs_copyattr(lowerd->d_inode, d_inode(dentry)); ++ ++ return err; ++} ++ ++static int shiftfs_xattr_set(const struct xattr_handler *handler, ++ struct user_namespace *ns, ++ struct dentry *dentry, struct inode *inode, ++ const char *name, const void *value, size_t size, ++ int flags) ++{ ++ if (!value) ++ return shiftfs_removexattr(&init_user_ns, dentry, name); ++ return shiftfs_setxattr(dentry, inode, name, value, size, flags); ++} ++ ++static int shiftfs_inode_test(struct inode *inode, void *data) ++{ ++ return inode->i_private == data; ++} ++ ++static int shiftfs_inode_set(struct inode *inode, void *data) ++{ ++ inode->i_private = data; ++ return 0; ++} ++ ++static int shiftfs_create_object(struct inode *diri, struct dentry *dentry, ++ umode_t mode, const char *symlink, ++ struct dentry *hardlink, bool excl) ++{ ++ int err; ++ const struct cred *oldcred; ++ struct cred *newcred; ++ void *loweri_iop_ptr = NULL; ++ umode_t modei = mode; ++ struct super_block *dir_sb = diri->i_sb; ++ struct dentry *lowerd_new = dentry->d_fsdata; ++ struct inode *inode = NULL, *loweri_dir = diri->i_private; ++ const struct inode_operations *loweri_dir_iop = loweri_dir->i_op; ++ struct dentry *lowerd_link = NULL; ++ ++ if (hardlink) { ++ loweri_iop_ptr = loweri_dir_iop->link; ++ } else { ++ switch (mode & S_IFMT) { ++ case S_IFDIR: ++ loweri_iop_ptr = loweri_dir_iop->mkdir; ++ break; ++ case S_IFREG: ++ loweri_iop_ptr = loweri_dir_iop->create; ++ break; ++ case S_IFLNK: ++ loweri_iop_ptr = loweri_dir_iop->symlink; ++ break; ++ case S_IFSOCK: ++ /* fall through */ ++ case S_IFIFO: ++ loweri_iop_ptr = loweri_dir_iop->mknod; ++ break; ++ } ++ } ++ if (!loweri_iop_ptr) { ++ err = -EINVAL; ++ goto out_iput; ++ } ++ ++ inode_lock_nested(loweri_dir, I_MUTEX_PARENT); ++ ++ if (!hardlink) { ++ inode = new_inode(dir_sb); ++ if (!inode) { ++ err = -ENOMEM; ++ goto out_iput; ++ } ++ ++ /* ++ * new_inode() will have added the new inode to the super ++ * block's list of inodes. Further below we will call ++ * inode_insert5() Which would perform the same operation again ++ * thereby corrupting the list. To avoid this raise I_CREATING ++ * in i_state which will cause inode_insert5() to skip this ++ * step. I_CREATING will be cleared by d_instantiate_new() ++ * below. ++ */ ++ spin_lock(&inode->i_lock); ++ inode->i_state |= I_CREATING; ++ spin_unlock(&inode->i_lock); ++ ++ inode_init_owner(&init_user_ns, inode, diri, mode); ++ modei = inode->i_mode; ++ } ++ ++ err = shiftfs_override_object_creds(dentry->d_sb, &oldcred, &newcred, ++ dentry, modei, hardlink != NULL); ++ if (err) ++ goto out_iput; ++ ++ if (hardlink) { ++ lowerd_link = hardlink->d_fsdata; ++ err = vfs_link(lowerd_link, &init_user_ns, loweri_dir, lowerd_new, NULL); ++ } else { ++ switch (modei & S_IFMT) { ++ case S_IFDIR: ++ err = vfs_mkdir(&init_user_ns, loweri_dir, lowerd_new, modei); ++ break; ++ case S_IFREG: ++ err = vfs_create(&init_user_ns, loweri_dir, lowerd_new, modei, excl); ++ break; ++ case S_IFLNK: ++ err = vfs_symlink(&init_user_ns, loweri_dir, lowerd_new, symlink); ++ break; ++ case S_IFSOCK: ++ /* fall through */ ++ case S_IFIFO: ++ err = vfs_mknod(&init_user_ns, loweri_dir, lowerd_new, modei, 0); ++ break; ++ default: ++ err = -EINVAL; ++ break; ++ } ++ } ++ ++ shiftfs_revert_object_creds(oldcred, newcred); ++ ++ if (!err && WARN_ON(!lowerd_new->d_inode)) ++ err = -EIO; ++ if (err) ++ goto out_iput; ++ ++ if (hardlink) { ++ inode = d_inode(hardlink); ++ ihold(inode); ++ ++ /* copy up times from lower inode */ ++ shiftfs_copyattr(d_inode(lowerd_link), inode); ++ set_nlink(d_inode(hardlink), d_inode(lowerd_link)->i_nlink); ++ d_instantiate(dentry, inode); ++ } else { ++ struct inode *inode_tmp; ++ struct inode *loweri_new = d_inode(lowerd_new); ++ ++ inode_tmp = inode_insert5(inode, (unsigned long)loweri_new, ++ shiftfs_inode_test, shiftfs_inode_set, ++ loweri_new); ++ if (unlikely(inode_tmp != inode)) { ++ pr_err_ratelimited("shiftfs: newly created inode found in cache\n"); ++ iput(inode_tmp); ++ err = -EINVAL; ++ goto out_iput; ++ } ++ ++ ihold(loweri_new); ++ shiftfs_fill_inode(inode, loweri_new->i_ino, loweri_new->i_mode, ++ 0, lowerd_new); ++ d_instantiate_new(dentry, inode); ++ } ++ ++ shiftfs_copyattr(loweri_dir, diri); ++ if (loweri_iop_ptr == loweri_dir_iop->mkdir) ++ set_nlink(diri, loweri_dir->i_nlink); ++ ++ inode = NULL; ++ ++out_iput: ++ iput(inode); ++ inode_unlock(loweri_dir); ++ ++ return err; ++} ++ ++static int shiftfs_create(struct user_namespace *ns, ++ struct inode *dir, struct dentry *dentry, ++ umode_t mode, bool excl) ++{ ++ mode |= S_IFREG; ++ ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, excl); ++} ++ ++static int shiftfs_mkdir(struct user_namespace *ns, struct inode *dir, struct dentry *dentry, ++ umode_t mode) ++{ ++ mode |= S_IFDIR; ++ ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false); ++} ++ ++static int shiftfs_link(struct dentry *hardlink, struct inode *dir, ++ struct dentry *dentry) ++{ ++ return shiftfs_create_object(dir, dentry, 0, NULL, hardlink, false); ++} ++ ++static int shiftfs_mknod(struct user_namespace *ns, ++ struct inode *dir, struct dentry *dentry, umode_t mode, ++ dev_t rdev) ++{ ++ if (!S_ISFIFO(mode) && !S_ISSOCK(mode)) ++ return -EPERM; ++ ++ return shiftfs_create_object(dir, dentry, mode, NULL, NULL, false); ++} ++ ++static int shiftfs_symlink(struct user_namespace *ns, struct inode *dir, struct dentry *dentry, ++ const char *symlink) ++{ ++ return shiftfs_create_object(dir, dentry, S_IFLNK, symlink, NULL, false); ++} ++ ++static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir) ++{ ++ struct dentry *lowerd = dentry->d_fsdata; ++ struct inode *loweri = dir->i_private; ++ struct inode *inode = d_inode(dentry); ++ int err; ++ const struct cred *oldcred; ++ ++ dget(lowerd); ++ oldcred = shiftfs_override_creds(dentry->d_sb); ++ inode_lock_nested(loweri, I_MUTEX_PARENT); ++ if (rmdir) ++ err = vfs_rmdir(&init_user_ns, loweri, lowerd); ++ else ++ err = vfs_unlink(&init_user_ns, loweri, lowerd, NULL); ++ revert_creds(oldcred); ++ ++ if (!err) { ++ d_drop(dentry); ++ ++ if (rmdir) ++ clear_nlink(inode); ++ else ++ drop_nlink(inode); ++ } ++ inode_unlock(loweri); ++ ++ shiftfs_copyattr(loweri, dir); ++ dput(lowerd); ++ ++ return err; ++} ++ ++static int shiftfs_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ return shiftfs_rm(dir, dentry, false); ++} ++ ++static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ return shiftfs_rm(dir, dentry, true); ++} ++ ++static int shiftfs_rename(struct user_namespace *ns, ++ struct inode *olddir, struct dentry *old, ++ struct inode *newdir, struct dentry *new, ++ unsigned int flags) ++{ ++ struct dentry *lowerd_dir_old = old->d_parent->d_fsdata, ++ *lowerd_dir_new = new->d_parent->d_fsdata, ++ *lowerd_old = old->d_fsdata, *lowerd_new = new->d_fsdata, ++ *trapd; ++ struct inode *loweri_dir_old = lowerd_dir_old->d_inode, ++ *loweri_dir_new = lowerd_dir_new->d_inode; ++ struct renamedata rd = { ++ .old_mnt_userns = &init_user_ns, ++ .old_dir = loweri_dir_old, ++ .old_dentry = lowerd_old, ++ .new_mnt_userns = &init_user_ns, ++ .new_dir = loweri_dir_new, ++ .new_dentry = lowerd_new, ++ }; ++ int err = -EINVAL; ++ const struct cred *oldcred; ++ ++ trapd = lock_rename(lowerd_dir_new, lowerd_dir_old); ++ ++ if (trapd == lowerd_old || trapd == lowerd_new) ++ goto out_unlock; ++ ++ oldcred = shiftfs_override_creds(old->d_sb); ++ err = vfs_rename(&rd); ++ revert_creds(oldcred); ++ ++ shiftfs_copyattr(loweri_dir_old, olddir); ++ shiftfs_copyattr(loweri_dir_new, newdir); ++ ++out_unlock: ++ unlock_rename(lowerd_dir_new, lowerd_dir_old); ++ ++ return err; ++} ++ ++static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry, ++ unsigned int flags) ++{ ++ struct dentry *new; ++ struct inode *newi; ++ const struct cred *oldcred; ++ struct dentry *lowerd = dentry->d_parent->d_fsdata; ++ struct inode *inode = NULL, *loweri = lowerd->d_inode; ++ ++ inode_lock(loweri); ++ oldcred = shiftfs_override_creds(dentry->d_sb); ++ new = lookup_one_len(dentry->d_name.name, lowerd, dentry->d_name.len); ++ revert_creds(oldcred); ++ inode_unlock(loweri); ++ ++ if (IS_ERR(new)) ++ return new; ++ ++ dentry->d_fsdata = new; ++ ++ newi = new->d_inode; ++ if (!newi) ++ goto out; ++ ++ inode = iget5_locked(dentry->d_sb, (unsigned long)newi, ++ shiftfs_inode_test, shiftfs_inode_set, newi); ++ if (!inode) { ++ dput(new); ++ return ERR_PTR(-ENOMEM); ++ } ++ if (inode->i_state & I_NEW) { ++ /* ++ * inode->i_private set by shiftfs_inode_set(), but we still ++ * need to take a reference ++ */ ++ ihold(newi); ++ shiftfs_fill_inode(inode, newi->i_ino, newi->i_mode, 0, new); ++ unlock_new_inode(inode); ++ } ++ ++out: ++ return d_splice_alias(inode, dentry); ++} ++ ++static int shiftfs_permission(struct user_namespace *ns, struct inode *inode, int mask) ++{ ++ int err; ++ const struct cred *oldcred; ++ struct inode *loweri = inode->i_private; ++ ++ if (!loweri) { ++ WARN_ON(!(mask & MAY_NOT_BLOCK)); ++ return -ECHILD; ++ } ++ ++ err = generic_permission(&init_user_ns, inode, mask); ++ if (err) ++ return err; ++ ++ oldcred = shiftfs_override_creds(inode->i_sb); ++ err = inode_permission(&init_user_ns, loweri, mask); ++ revert_creds(oldcred); ++ ++ return err; ++} ++ ++static int shiftfs_fiemap(struct inode *inode, ++ struct fiemap_extent_info *fieinfo, u64 start, ++ u64 len) ++{ ++ int err; ++ const struct cred *oldcred; ++ struct inode *loweri = inode->i_private; ++ ++ if (!loweri->i_op->fiemap) ++ return -EOPNOTSUPP; ++ ++ oldcred = shiftfs_override_creds(inode->i_sb); ++ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) ++ filemap_write_and_wait(loweri->i_mapping); ++ err = loweri->i_op->fiemap(loweri, fieinfo, start, len); ++ revert_creds(oldcred); ++ ++ return err; ++} ++ ++static int shiftfs_tmpfile(struct user_namespace *ns, ++ struct inode *dir, struct file *file, ++ umode_t mode) ++{ ++ int err; ++ const struct cred *oldcred; ++ struct inode *loweri = dir->i_private; ++ ++ if (!loweri->i_op->tmpfile) ++ return -EOPNOTSUPP; ++ ++ oldcred = shiftfs_override_creds(dir->i_sb); ++ err = loweri->i_op->tmpfile(&init_user_ns, loweri, file, mode); ++ revert_creds(oldcred); ++ ++ return err; ++} ++ ++static int shiftfs_setattr(struct user_namespace *ns, struct dentry *dentry, struct iattr *attr) ++{ ++ struct dentry *lowerd = dentry->d_fsdata; ++ struct inode *loweri = lowerd->d_inode; ++ struct iattr newattr; ++ const struct cred *oldcred; ++ struct super_block *sb = dentry->d_sb; ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info; ++ int err; ++ ++ err = setattr_prepare(&init_user_ns, dentry, attr); ++ if (err) ++ return err; ++ ++ newattr = *attr; ++ newattr.ia_uid = shift_kuid(sb->s_user_ns, sbinfo->userns, attr->ia_uid); ++ newattr.ia_gid = shift_kgid(sb->s_user_ns, sbinfo->userns, attr->ia_gid); ++ ++ /* ++ * mode change is for clearing setuid/setgid bits. Allow lower fs ++ * to interpret this in its own way. ++ */ ++ if (newattr.ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) ++ newattr.ia_valid &= ~ATTR_MODE; ++ ++ inode_lock(loweri); ++ oldcred = shiftfs_override_creds(dentry->d_sb); ++ err = notify_change(&init_user_ns, lowerd, &newattr, NULL); ++ revert_creds(oldcred); ++ inode_unlock(loweri); ++ ++ shiftfs_copyattr(loweri, d_inode(dentry)); ++ ++ return err; ++} ++ ++static int shiftfs_getattr(struct user_namespace *ns, ++ const struct path *path, struct kstat *stat, ++ u32 request_mask, unsigned int query_flags) ++{ ++ struct inode *inode = path->dentry->d_inode; ++ struct dentry *lowerd = path->dentry->d_fsdata; ++ struct inode *loweri = lowerd->d_inode; ++ struct shiftfs_super_info *info = path->dentry->d_sb->s_fs_info; ++ struct path newpath = { .mnt = info->mnt, .dentry = lowerd }; ++ struct user_namespace *from_ns = loweri->i_sb->s_user_ns; ++ struct user_namespace *to_ns = inode->i_sb->s_user_ns; ++ const struct cred *oldcred; ++ int err; ++ ++ oldcred = shiftfs_override_creds(inode->i_sb); ++ err = vfs_getattr(&newpath, stat, request_mask, query_flags); ++ revert_creds(oldcred); ++ ++ if (err) ++ return err; ++ ++ /* transform the underlying id */ ++ stat->uid = shift_kuid(from_ns, to_ns, stat->uid); ++ stat->gid = shift_kgid(from_ns, to_ns, stat->gid); ++ return 0; ++} ++ ++#ifdef CONFIG_SHIFT_FS_POSIX_ACL ++ ++static int ++shift_acl_ids(struct user_namespace *from, struct user_namespace *to, ++ struct posix_acl *acl) ++{ ++ int i; ++ ++ for (i = 0; i < acl->a_count; i++) { ++ struct posix_acl_entry *e = &acl->a_entries[i]; ++ switch(e->e_tag) { ++ case ACL_USER: ++ e->e_uid = shift_kuid(from, to, e->e_uid); ++ if (!uid_valid(e->e_uid)) ++ return -EOVERFLOW; ++ break; ++ case ACL_GROUP: ++ e->e_gid = shift_kgid(from, to, e->e_gid); ++ if (!gid_valid(e->e_gid)) ++ return -EOVERFLOW; ++ break; ++ } ++ } ++ return 0; ++} ++ ++static void ++shift_acl_xattr_ids(struct user_namespace *from, struct user_namespace *to, ++ void *value, size_t size) ++{ ++ struct posix_acl_xattr_header *header = value; ++ struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end; ++ int count; ++ kuid_t kuid; ++ kgid_t kgid; ++ ++ if (!value) ++ return; ++ if (size < sizeof(struct posix_acl_xattr_header)) ++ return; ++ if (header->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION)) ++ return; ++ ++ count = posix_acl_xattr_count(size); ++ if (count < 0) ++ return; ++ if (count == 0) ++ return; ++ ++ for (end = entry + count; entry != end; entry++) { ++ switch(le16_to_cpu(entry->e_tag)) { ++ case ACL_USER: ++ kuid = make_kuid(&init_user_ns, le32_to_cpu(entry->e_id)); ++ kuid = shift_kuid(from, to, kuid); ++ entry->e_id = cpu_to_le32(from_kuid(&init_user_ns, kuid)); ++ break; ++ case ACL_GROUP: ++ kgid = make_kgid(from, le32_to_cpu(entry->e_id)); ++ kgid = shift_kgid(from, to, kgid); ++ entry->e_id = cpu_to_le32(from_kgid(from, kgid)); ++ break; ++ default: ++ break; ++ } ++ } ++} ++ ++static struct posix_acl * ++shiftfs_get_acl(struct inode *inode, int type, bool rcu) ++{ ++ struct inode *loweri = inode->i_private; ++ const struct cred *oldcred; ++ struct posix_acl *lower_acl, *acl = NULL; ++ struct user_namespace *from_ns = loweri->i_sb->s_user_ns; ++ struct user_namespace *to_ns = inode->i_sb->s_user_ns; ++ int size; ++ int err; ++ ++ if (rcu) ++ return ERR_PTR(-ECHILD); ++ ++ if (!IS_POSIXACL(loweri)) ++ return NULL; ++ ++ oldcred = shiftfs_override_creds(inode->i_sb); ++ lower_acl = get_acl(loweri, type); ++ revert_creds(oldcred); ++ ++ if (lower_acl && !IS_ERR(lower_acl)) { ++ /* XXX: export posix_acl_clone? */ ++ size = sizeof(struct posix_acl) + ++ lower_acl->a_count * sizeof(struct posix_acl_entry); ++ acl = kmemdup(lower_acl, size, GFP_KERNEL); ++ posix_acl_release(lower_acl); ++ ++ if (!acl) ++ return ERR_PTR(-ENOMEM); ++ ++ refcount_set(&acl->a_refcount, 1); ++ ++ err = shift_acl_ids(from_ns, to_ns, acl); ++ if (err) { ++ kfree(acl); ++ return ERR_PTR(err); ++ } ++ } ++ ++ return acl; ++} ++ ++static int ++shiftfs_posix_acl_xattr_get(const struct xattr_handler *handler, ++ struct dentry *dentry, struct inode *inode, ++ const char *name, void *buffer, size_t size) ++{ ++ struct inode *loweri = inode->i_private; ++ int ret; ++ ++ ret = shiftfs_xattr_get(NULL, dentry, inode, handler->name, ++ buffer, size); ++ if (ret < 0) ++ return ret; ++ ++ inode_lock(loweri); ++ shift_acl_xattr_ids(loweri->i_sb->s_user_ns, inode->i_sb->s_user_ns, ++ buffer, size); ++ inode_unlock(loweri); ++ return ret; ++} ++ ++static int ++shiftfs_posix_acl_xattr_set(const struct xattr_handler *handler, ++ struct user_namespace *ns, ++ struct dentry *dentry, struct inode *inode, ++ const char *name, const void *value, ++ size_t size, int flags) ++{ ++ struct inode *loweri = inode->i_private; ++ int err; ++ ++ if (!IS_POSIXACL(loweri) || !loweri->i_op->set_acl) ++ return -EOPNOTSUPP; ++ if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) ++ return value ? -EACCES : 0; ++ if (!inode_owner_or_capable(&init_user_ns, inode)) ++ return -EPERM; ++ ++ if (value) { ++ shift_acl_xattr_ids(inode->i_sb->s_user_ns, ++ loweri->i_sb->s_user_ns, ++ (void *)value, size); ++ err = shiftfs_setxattr(dentry, inode, handler->name, value, ++ size, flags); ++ } else { ++ err = shiftfs_removexattr(&init_user_ns, dentry, handler->name); ++ } ++ ++ if (!err) ++ shiftfs_copyattr(loweri, inode); ++ ++ return err; ++} ++ ++static const struct xattr_handler ++shiftfs_posix_acl_access_xattr_handler = { ++ .name = XATTR_NAME_POSIX_ACL_ACCESS, ++ .flags = ACL_TYPE_ACCESS, ++ .get = shiftfs_posix_acl_xattr_get, ++ .set = shiftfs_posix_acl_xattr_set, ++}; ++ ++static const struct xattr_handler ++shiftfs_posix_acl_default_xattr_handler = { ++ .name = XATTR_NAME_POSIX_ACL_DEFAULT, ++ .flags = ACL_TYPE_DEFAULT, ++ .get = shiftfs_posix_acl_xattr_get, ++ .set = shiftfs_posix_acl_xattr_set, ++}; ++ ++#else /* !CONFIG_SHIFT_FS_POSIX_ACL */ ++ ++#define shiftfs_get_acl NULL ++ ++#endif /* CONFIG_SHIFT_FS_POSIX_ACL */ ++ ++static const struct inode_operations shiftfs_dir_inode_operations = { ++ .lookup = shiftfs_lookup, ++ .mkdir = shiftfs_mkdir, ++ .symlink = shiftfs_symlink, ++ .unlink = shiftfs_unlink, ++ .rmdir = shiftfs_rmdir, ++ .rename = shiftfs_rename, ++ .link = shiftfs_link, ++ .setattr = shiftfs_setattr, ++ .create = shiftfs_create, ++ .mknod = shiftfs_mknod, ++ .permission = shiftfs_permission, ++ .getattr = shiftfs_getattr, ++ .listxattr = shiftfs_listxattr, ++ .get_acl = shiftfs_get_acl, ++}; ++ ++static const struct inode_operations shiftfs_file_inode_operations = { ++ .fiemap = shiftfs_fiemap, ++ .getattr = shiftfs_getattr, ++ .get_acl = shiftfs_get_acl, ++ .listxattr = shiftfs_listxattr, ++ .permission = shiftfs_permission, ++ .setattr = shiftfs_setattr, ++ .tmpfile = shiftfs_tmpfile, ++}; ++ ++static const struct inode_operations shiftfs_special_inode_operations = { ++ .getattr = shiftfs_getattr, ++ .get_acl = shiftfs_get_acl, ++ .listxattr = shiftfs_listxattr, ++ .permission = shiftfs_permission, ++ .setattr = shiftfs_setattr, ++}; ++ ++static const struct inode_operations shiftfs_symlink_inode_operations = { ++ .getattr = shiftfs_getattr, ++ .get_link = shiftfs_get_link, ++ .listxattr = shiftfs_listxattr, ++ .setattr = shiftfs_setattr, ++}; ++ ++static struct file *shiftfs_open_realfile(const struct file *file, ++ struct inode *realinode) ++{ ++ struct file *realfile; ++ const struct cred *old_cred; ++ struct inode *inode = file_inode(file); ++ struct dentry *lowerd = file->f_path.dentry->d_fsdata; ++ struct shiftfs_super_info *info = inode->i_sb->s_fs_info; ++ struct path realpath = { .mnt = info->mnt, .dentry = lowerd }; ++ ++ old_cred = shiftfs_override_creds(inode->i_sb); ++ realfile = open_with_fake_path(&realpath, file->f_flags, realinode, ++ info->creator_cred); ++ revert_creds(old_cred); ++ ++ return realfile; ++} ++ ++#define SHIFTFS_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT) ++ ++static int shiftfs_change_flags(struct file *file, unsigned int flags) ++{ ++ struct inode *inode = file_inode(file); ++ int err; ++ ++ /* if some flag changed that cannot be changed then something's amiss */ ++ if (WARN_ON((file->f_flags ^ flags) & ~SHIFTFS_SETFL_MASK)) ++ return -EIO; ++ ++ flags &= SHIFTFS_SETFL_MASK; ++ ++ if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode)) ++ return -EPERM; ++ ++ if (flags & O_DIRECT) { ++ if (!file->f_mapping->a_ops || ++ !file->f_mapping->a_ops->direct_IO) ++ return -EINVAL; ++ } ++ ++ if (file->f_op->check_flags) { ++ err = file->f_op->check_flags(flags); ++ if (err) ++ return err; ++ } ++ ++ spin_lock(&file->f_lock); ++ file->f_flags = (file->f_flags & ~SHIFTFS_SETFL_MASK) | flags; ++ spin_unlock(&file->f_lock); ++ ++ return 0; ++} ++ ++static int shiftfs_open(struct inode *inode, struct file *file) ++{ ++ struct file *realfile; ++ ++ realfile = shiftfs_open_realfile(file, inode->i_private); ++ if (IS_ERR(realfile)) ++ return PTR_ERR(realfile); ++ ++ file->private_data = realfile; ++ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO. */ ++ file->f_mapping = realfile->f_mapping; ++ ++ return 0; ++} ++ ++static int shiftfs_dir_open(struct inode *inode, struct file *file) ++{ ++ struct file *realfile; ++ const struct cred *oldcred; ++ struct dentry *lowerd = file->f_path.dentry->d_fsdata; ++ struct shiftfs_super_info *info = inode->i_sb->s_fs_info; ++ struct path realpath = { .mnt = info->mnt, .dentry = lowerd }; ++ ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ++ realfile = dentry_open(&realpath, file->f_flags | O_NOATIME, ++ info->creator_cred); ++ revert_creds(oldcred); ++ if (IS_ERR(realfile)) ++ return PTR_ERR(realfile); ++ ++ file->private_data = realfile; ++ ++ return 0; ++} ++ ++static int shiftfs_release(struct inode *inode, struct file *file) ++{ ++ struct file *realfile = file->private_data; ++ ++ if (realfile) ++ fput(realfile); ++ ++ return 0; ++} ++ ++static int shiftfs_dir_release(struct inode *inode, struct file *file) ++{ ++ return shiftfs_release(inode, file); ++} ++ ++static loff_t shiftfs_dir_llseek(struct file *file, loff_t offset, int whence) ++{ ++ struct file *realfile = file->private_data; ++ ++ return vfs_llseek(realfile, offset, whence); ++} ++ ++static loff_t shiftfs_file_llseek(struct file *file, loff_t offset, int whence) ++{ ++ struct inode *realinode = file_inode(file)->i_private; ++ ++ return generic_file_llseek_size(file, offset, whence, ++ realinode->i_sb->s_maxbytes, ++ i_size_read(realinode)); ++} ++ ++/* XXX: Need to figure out what to to about atime updates, maybe other ++ * timestamps too ... ref. ovl_file_accessed() */ ++ ++static rwf_t shiftfs_iocb_to_rwf(struct kiocb *iocb) ++{ ++ int ifl = iocb->ki_flags; ++ rwf_t flags = 0; ++ ++ if (ifl & IOCB_NOWAIT) ++ flags |= RWF_NOWAIT; ++ if (ifl & IOCB_HIPRI) ++ flags |= RWF_HIPRI; ++ if (ifl & IOCB_DSYNC) ++ flags |= RWF_DSYNC; ++ if (ifl & IOCB_SYNC) ++ flags |= RWF_SYNC; ++ ++ return flags; ++} ++ ++static int shiftfs_real_fdget(const struct file *file, struct fd *lowerfd) ++{ ++ struct file *realfile; ++ ++ if (file->f_op->open != shiftfs_open && ++ file->f_op->open != shiftfs_dir_open) ++ return -EINVAL; ++ ++ realfile = file->private_data; ++ lowerfd->flags = 0; ++ lowerfd->file = realfile; ++ ++ /* Did the flags change since open? */ ++ if (unlikely(file->f_flags & ~lowerfd->file->f_flags)) ++ return shiftfs_change_flags(lowerfd->file, file->f_flags); ++ ++ return 0; ++} ++ ++static ssize_t shiftfs_read_iter(struct kiocb *iocb, struct iov_iter *iter) ++{ ++ struct file *file = iocb->ki_filp; ++ struct fd lowerfd; ++ const struct cred *oldcred; ++ ssize_t ret; ++ ++ if (!iov_iter_count(iter)) ++ return 0; ++ ++ ret = shiftfs_real_fdget(file, &lowerfd); ++ if (ret) ++ return ret; ++ ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ++ ret = vfs_iter_read(lowerfd.file, iter, &iocb->ki_pos, ++ shiftfs_iocb_to_rwf(iocb)); ++ revert_creds(oldcred); ++ ++ shiftfs_file_accessed(file); ++ ++ fdput(lowerfd); ++ return ret; ++} ++ ++static ssize_t shiftfs_write_iter(struct kiocb *iocb, struct iov_iter *iter) ++{ ++ struct file *file = iocb->ki_filp; ++ struct inode *inode = file_inode(file); ++ struct fd lowerfd; ++ const struct cred *oldcred; ++ ssize_t ret; ++ ++ if (!iov_iter_count(iter)) ++ return 0; ++ ++ inode_lock(inode); ++ /* Update mode */ ++ shiftfs_copyattr(inode->i_private, inode); ++ ret = file_remove_privs(file); ++ if (ret) ++ goto out_unlock; ++ ++ ret = shiftfs_real_fdget(file, &lowerfd); ++ if (ret) ++ goto out_unlock; ++ ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ++ file_start_write(lowerfd.file); ++ ret = vfs_iter_write(lowerfd.file, iter, &iocb->ki_pos, ++ shiftfs_iocb_to_rwf(iocb)); ++ file_end_write(lowerfd.file); ++ revert_creds(oldcred); ++ ++ /* Update size */ ++ shiftfs_copyattr(inode->i_private, inode); ++ ++ fdput(lowerfd); ++ ++out_unlock: ++ inode_unlock(inode); ++ return ret; ++} ++ ++static int shiftfs_fsync(struct file *file, loff_t start, loff_t end, ++ int datasync) ++{ ++ struct fd lowerfd; ++ const struct cred *oldcred; ++ int ret; ++ ++ ret = shiftfs_real_fdget(file, &lowerfd); ++ if (ret) ++ return ret; ++ ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ++ ret = vfs_fsync_range(lowerfd.file, start, end, datasync); ++ revert_creds(oldcred); ++ ++ fdput(lowerfd); ++ return ret; ++} ++ ++static int shiftfs_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ struct file *realfile = file->private_data; ++ const struct cred *oldcred; ++ int ret; ++ ++ if (!realfile->f_op->mmap) ++ return -ENODEV; ++ ++ if (WARN_ON(file != vma->vm_file)) ++ return -EIO; ++ ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ++ vma->vm_file = get_file(realfile); ++ ret = call_mmap(vma->vm_file, vma); ++ revert_creds(oldcred); ++ ++ shiftfs_file_accessed(file); ++ ++ if (ret) { ++ /* ++ * Drop refcount from new vm_file value and restore original ++ * vm_file value ++ */ ++ vma->vm_file = file; ++ fput(realfile); ++ } else { ++ /* Drop refcount from previous vm_file value */ ++ fput(file); ++ } ++ ++ return ret; ++} ++ ++static long shiftfs_fallocate(struct file *file, int mode, loff_t offset, ++ loff_t len) ++{ ++ struct inode *inode = file_inode(file); ++ struct inode *loweri = inode->i_private; ++ struct fd lowerfd; ++ const struct cred *oldcred; ++ int ret; ++ ++ ret = shiftfs_real_fdget(file, &lowerfd); ++ if (ret) ++ return ret; ++ ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ++ ret = vfs_fallocate(lowerfd.file, mode, offset, len); ++ revert_creds(oldcred); ++ ++ /* Update size */ ++ shiftfs_copyattr(loweri, inode); ++ ++ fdput(lowerfd); ++ return ret; ++} ++ ++static int shiftfs_fadvise(struct file *file, loff_t offset, loff_t len, ++ int advice) ++{ ++ struct fd lowerfd; ++ const struct cred *oldcred; ++ int ret; ++ ++ ret = shiftfs_real_fdget(file, &lowerfd); ++ if (ret) ++ return ret; ++ ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ++ ret = vfs_fadvise(lowerfd.file, offset, len, advice); ++ revert_creds(oldcred); ++ ++ fdput(lowerfd); ++ return ret; ++} ++ ++static int shiftfs_override_ioctl_creds(int cmd, const struct super_block *sb, ++ const struct cred **oldcred, ++ struct cred **newcred) ++{ ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info; ++ kuid_t fsuid = current_fsuid(); ++ kgid_t fsgid = current_fsgid(); ++ ++ *oldcred = shiftfs_override_creds(sb); ++ ++ *newcred = prepare_creds(); ++ if (!*newcred) { ++ revert_creds(*oldcred); ++ return -ENOMEM; ++ } ++ ++ (*newcred)->fsuid = shift_kuid(sb->s_user_ns, sbinfo->userns, fsuid); ++ (*newcred)->fsgid = shift_kgid(sb->s_user_ns, sbinfo->userns, fsgid); ++ ++ /* clear all caps to prevent bypassing capable() checks */ ++ cap_clear((*newcred)->cap_bset); ++ cap_clear((*newcred)->cap_effective); ++ cap_clear((*newcred)->cap_inheritable); ++ cap_clear((*newcred)->cap_permitted); ++ ++ if (cmd == BTRFS_IOC_SNAP_DESTROY) { ++ kuid_t kuid_root = make_kuid(sb->s_user_ns, 0); ++ /* ++ * Allow the root user in the container to remove subvolumes ++ * from other users. ++ */ ++ if (uid_valid(kuid_root) && uid_eq(fsuid, kuid_root)) ++ cap_raise((*newcred)->cap_effective, CAP_DAC_OVERRIDE); ++ } ++ ++ put_cred(override_creds(*newcred)); ++ return 0; ++} ++ ++static inline void shiftfs_revert_ioctl_creds(const struct cred *oldcred, ++ struct cred *newcred) ++{ ++ return shiftfs_revert_object_creds(oldcred, newcred); ++} ++ ++static inline bool is_btrfs_snap_ioctl(int cmd) ++{ ++ if ((cmd == BTRFS_IOC_SNAP_CREATE) || (cmd == BTRFS_IOC_SNAP_CREATE_V2)) ++ return true; ++ ++ return false; ++} ++ ++static int shiftfs_btrfs_ioctl_fd_restore(int cmd, int fd, void __user *arg, ++ struct btrfs_ioctl_vol_args *v1, ++ struct btrfs_ioctl_vol_args_v2 *v2) ++{ ++ int ret; ++ ++ if (!is_btrfs_snap_ioctl(cmd)) ++ return 0; ++ ++ if (cmd == BTRFS_IOC_SNAP_CREATE) ++ ret = copy_to_user(arg, v1, sizeof(*v1)); ++ else ++ ret = copy_to_user(arg, v2, sizeof(*v2)); ++ ++ close_fd(fd); ++ kfree(v1); ++ kfree(v2); ++ ++ return ret ? -EFAULT: 0; ++} ++ ++static int shiftfs_btrfs_ioctl_fd_replace(int cmd, void __user *arg, ++ struct btrfs_ioctl_vol_args **b1, ++ struct btrfs_ioctl_vol_args_v2 **b2, ++ int *newfd) ++{ ++ int oldfd, ret; ++ struct fd src; ++ struct fd lfd = {}; ++ struct btrfs_ioctl_vol_args *v1 = NULL; ++ struct btrfs_ioctl_vol_args_v2 *v2 = NULL; ++ ++ *b1 = NULL; ++ *b2 = NULL; ++ ++ if (!is_btrfs_snap_ioctl(cmd)) ++ return 0; ++ ++ if (cmd == BTRFS_IOC_SNAP_CREATE) { ++ v1 = memdup_user(arg, sizeof(*v1)); ++ if (IS_ERR(v1)) ++ return PTR_ERR(v1); ++ oldfd = v1->fd; ++ } else { ++ v2 = memdup_user(arg, sizeof(*v2)); ++ if (IS_ERR(v2)) ++ return PTR_ERR(v2); ++ oldfd = v2->fd; ++ } ++ ++ src = fdget(oldfd); ++ if (!src.file) { ++ ret = -EINVAL; ++ goto err_free; ++ } ++ ++ ret = shiftfs_real_fdget(src.file, &lfd); ++ if (ret) { ++ fdput(src); ++ goto err_free; ++ } ++ ++ /* ++ * shiftfs_real_fdget() does not take a reference to lfd.file, so ++ * take a reference here to offset the one which will be put by ++ * close_fd(), and make sure that reference is put on fdput(lfd). ++ */ ++ get_file(lfd.file); ++ lfd.flags |= FDPUT_FPUT; ++ fdput(src); ++ ++ *newfd = get_unused_fd_flags(lfd.file->f_flags); ++ if (*newfd < 0) { ++ fdput(lfd); ++ ret = *newfd; ++ goto err_free; ++ } ++ ++ fd_install(*newfd, lfd.file); ++ ++ if (cmd == BTRFS_IOC_SNAP_CREATE) { ++ v1->fd = *newfd; ++ ret = copy_to_user(arg, v1, sizeof(*v1)); ++ v1->fd = oldfd; ++ } else { ++ v2->fd = *newfd; ++ ret = copy_to_user(arg, v2, sizeof(*v2)); ++ v2->fd = oldfd; ++ } ++ ++ if (!ret) { ++ *b1 = v1; ++ *b2 = v2; ++ } else { ++ shiftfs_btrfs_ioctl_fd_restore(cmd, *newfd, arg, v1, v2); ++ ret = -EFAULT; ++ } ++ ++ return ret; ++ ++err_free: ++ kfree(v1); ++ kfree(v2); ++ ++ return ret; ++} ++ ++static long shiftfs_real_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ struct fd lowerfd; ++ struct cred *newcred; ++ const struct cred *oldcred; ++ int newfd = -EBADF; ++ long err = 0, ret = 0; ++ void __user *argp = (void __user *)arg; ++ struct super_block *sb = file->f_path.dentry->d_sb; ++ struct btrfs_ioctl_vol_args *btrfs_v1 = NULL; ++ struct btrfs_ioctl_vol_args_v2 *btrfs_v2 = NULL; ++ ++ ret = shiftfs_btrfs_ioctl_fd_replace(cmd, argp, &btrfs_v1, &btrfs_v2, ++ &newfd); ++ if (ret < 0) ++ return ret; ++ ++ ret = shiftfs_real_fdget(file, &lowerfd); ++ if (ret) ++ goto out_restore; ++ ++ ret = shiftfs_override_ioctl_creds(cmd, sb, &oldcred, &newcred); ++ if (ret) ++ goto out_fdput; ++ ++ ret = vfs_ioctl(lowerfd.file, cmd, arg); ++ ++ shiftfs_revert_ioctl_creds(oldcred, newcred); ++ ++ shiftfs_copyattr(file_inode(lowerfd.file), file_inode(file)); ++ shiftfs_copyflags(file_inode(lowerfd.file), file_inode(file)); ++ ++out_fdput: ++ fdput(lowerfd); ++ ++out_restore: ++ err = shiftfs_btrfs_ioctl_fd_restore(cmd, newfd, argp, ++ btrfs_v1, btrfs_v2); ++ if (!ret) ++ ret = err; ++ ++ return ret; ++} ++ ++static bool in_ioctl_whitelist(int flag, unsigned long arg) ++{ ++ void __user *argp = (void __user *)arg; ++ u64 flags = 0; ++ ++ switch (flag) { ++ case BTRFS_IOC_FS_INFO: ++ return true; ++ case BTRFS_IOC_SNAP_CREATE: ++ return true; ++ case BTRFS_IOC_SNAP_CREATE_V2: ++ return true; ++ case BTRFS_IOC_SUBVOL_CREATE: ++ return true; ++ case BTRFS_IOC_SUBVOL_CREATE_V2: ++ return true; ++ case BTRFS_IOC_SUBVOL_GETFLAGS: ++ return true; ++ case BTRFS_IOC_SUBVOL_SETFLAGS: ++ if (copy_from_user(&flags, argp, sizeof(flags))) ++ return false; ++ ++ if (flags & ~BTRFS_SUBVOL_RDONLY) ++ return false; ++ ++ return true; ++ case BTRFS_IOC_SNAP_DESTROY: ++ return true; ++ } ++ ++ return false; ++} ++ ++static long shiftfs_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ switch (cmd) { ++ case FS_IOC_GETVERSION: ++ /* fall through */ ++ case FS_IOC_GETFLAGS: ++ /* fall through */ ++ case FS_IOC_SETFLAGS: ++ break; ++ default: ++ if (!in_ioctl_whitelist(cmd, arg) || ++ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info)) ++ return -ENOTTY; ++ } ++ ++ return shiftfs_real_ioctl(file, cmd, arg); ++} ++ ++static long shiftfs_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ switch (cmd) { ++ case FS_IOC32_GETVERSION: ++ /* fall through */ ++ case FS_IOC32_GETFLAGS: ++ /* fall through */ ++ case FS_IOC32_SETFLAGS: ++ break; ++ default: ++ if (!in_ioctl_whitelist(cmd, arg) || ++ !shiftfs_passthrough_ioctls(file->f_path.dentry->d_sb->s_fs_info)) ++ return -ENOIOCTLCMD; ++ } ++ ++ return shiftfs_real_ioctl(file, cmd, arg); ++} ++ ++enum shiftfs_copyop { ++ SHIFTFS_COPY, ++ SHIFTFS_CLONE, ++ SHIFTFS_DEDUPE, ++}; ++ ++static ssize_t shiftfs_copyfile(struct file *file_in, loff_t pos_in, ++ struct file *file_out, loff_t pos_out, u64 len, ++ unsigned int flags, enum shiftfs_copyop op) ++{ ++ ssize_t ret; ++ struct fd real_in, real_out; ++ const struct cred *oldcred; ++ struct inode *inode_out = file_inode(file_out); ++ struct inode *loweri = inode_out->i_private; ++ ++ ret = shiftfs_real_fdget(file_out, &real_out); ++ if (ret) ++ return ret; ++ ++ ret = shiftfs_real_fdget(file_in, &real_in); ++ if (ret) { ++ fdput(real_out); ++ return ret; ++ } ++ ++ oldcred = shiftfs_override_creds(inode_out->i_sb); ++ switch (op) { ++ case SHIFTFS_COPY: ++ ret = vfs_copy_file_range(real_in.file, pos_in, real_out.file, ++ pos_out, len, flags); ++ break; ++ ++ case SHIFTFS_CLONE: ++ ret = vfs_clone_file_range(real_in.file, pos_in, real_out.file, ++ pos_out, len, flags); ++ break; ++ ++ case SHIFTFS_DEDUPE: ++ ret = vfs_dedupe_file_range_one(real_in.file, pos_in, ++ real_out.file, pos_out, len, ++ flags); ++ break; ++ } ++ revert_creds(oldcred); ++ ++ /* Update size */ ++ shiftfs_copyattr(loweri, inode_out); ++ ++ fdput(real_in); ++ fdput(real_out); ++ ++ return ret; ++} ++ ++static ssize_t shiftfs_copy_file_range(struct file *file_in, loff_t pos_in, ++ struct file *file_out, loff_t pos_out, ++ size_t len, unsigned int flags) ++{ ++ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, flags, ++ SHIFTFS_COPY); ++} ++ ++static loff_t shiftfs_remap_file_range(struct file *file_in, loff_t pos_in, ++ struct file *file_out, loff_t pos_out, ++ loff_t len, unsigned int remap_flags) ++{ ++ enum shiftfs_copyop op; ++ ++ if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) ++ return -EINVAL; ++ ++ if (remap_flags & REMAP_FILE_DEDUP) ++ op = SHIFTFS_DEDUPE; ++ else ++ op = SHIFTFS_CLONE; ++ ++ return shiftfs_copyfile(file_in, pos_in, file_out, pos_out, len, ++ remap_flags, op); ++} ++ ++static int shiftfs_iterate_shared(struct file *file, struct dir_context *ctx) ++{ ++ const struct cred *oldcred; ++ int err = -ENOTDIR; ++ struct file *realfile = file->private_data; ++ ++ oldcred = shiftfs_override_creds(file->f_path.dentry->d_sb); ++ err = iterate_dir(realfile, ctx); ++ revert_creds(oldcred); ++ ++ return err; ++} ++ ++const struct file_operations shiftfs_file_operations = { ++ .open = shiftfs_open, ++ .release = shiftfs_release, ++ .llseek = shiftfs_file_llseek, ++ .read_iter = shiftfs_read_iter, ++ .write_iter = shiftfs_write_iter, ++ .fsync = shiftfs_fsync, ++ .mmap = shiftfs_mmap, ++ .fallocate = shiftfs_fallocate, ++ .fadvise = shiftfs_fadvise, ++ .unlocked_ioctl = shiftfs_ioctl, ++ .compat_ioctl = shiftfs_compat_ioctl, ++ .copy_file_range = shiftfs_copy_file_range, ++ .remap_file_range = shiftfs_remap_file_range, ++ .splice_read = generic_file_splice_read, ++ .splice_write = iter_file_splice_write, ++}; ++ ++const struct file_operations shiftfs_dir_operations = { ++ .open = shiftfs_dir_open, ++ .release = shiftfs_dir_release, ++ .compat_ioctl = shiftfs_compat_ioctl, ++ .fsync = shiftfs_fsync, ++ .iterate_shared = shiftfs_iterate_shared, ++ .llseek = shiftfs_dir_llseek, ++ .read = generic_read_dir, ++ .unlocked_ioctl = shiftfs_ioctl, ++}; ++ ++static const struct address_space_operations shiftfs_aops = { ++ /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ ++ .direct_IO = noop_direct_IO, ++}; ++ ++static void shiftfs_fill_inode(struct inode *inode, unsigned long ino, ++ umode_t mode, dev_t dev, struct dentry *dentry) ++{ ++ struct inode *loweri; ++ ++ inode->i_ino = ino; ++ inode->i_flags |= S_NOCMTIME; ++ ++ mode &= S_IFMT; ++ inode->i_mode = mode; ++ switch (mode & S_IFMT) { ++ case S_IFDIR: ++ inode->i_op = &shiftfs_dir_inode_operations; ++ inode->i_fop = &shiftfs_dir_operations; ++ break; ++ case S_IFLNK: ++ inode->i_op = &shiftfs_symlink_inode_operations; ++ break; ++ case S_IFREG: ++ inode->i_op = &shiftfs_file_inode_operations; ++ inode->i_fop = &shiftfs_file_operations; ++ inode->i_mapping->a_ops = &shiftfs_aops; ++ break; ++ default: ++ inode->i_op = &shiftfs_special_inode_operations; ++ init_special_inode(inode, mode, dev); ++ break; ++ } ++ ++ if (!dentry) ++ return; ++ ++ loweri = dentry->d_inode; ++ if (!loweri->i_op->get_link) ++ inode->i_opflags |= IOP_NOFOLLOW; ++ ++ shiftfs_copyattr(loweri, inode); ++ shiftfs_copyflags(loweri, inode); ++ set_nlink(inode, loweri->i_nlink); ++} ++ ++static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info; ++ ++ if (sbinfo->mark) ++ seq_show_option(m, "mark", NULL); ++ ++ if (sbinfo->passthrough) ++ seq_printf(m, ",passthrough=%u", sbinfo->passthrough); ++ ++ return 0; ++} ++ ++static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info; ++ struct dentry *root = sb->s_root; ++ struct dentry *realroot = root->d_fsdata; ++ struct path realpath = { .mnt = sbinfo->mnt, .dentry = realroot }; ++ int err; ++ ++ err = vfs_statfs(&realpath, buf); ++ if (err) ++ return err; ++ ++ if (!shiftfs_passthrough_statfs(sbinfo)) ++ buf->f_type = sb->s_magic; ++ ++ return 0; ++} ++ ++static void shiftfs_evict_inode(struct inode *inode) ++{ ++ struct inode *loweri = inode->i_private; ++ ++ clear_inode(inode); ++ ++ if (loweri) ++ iput(loweri); ++} ++ ++static void shiftfs_put_super(struct super_block *sb) ++{ ++ struct shiftfs_super_info *sbinfo = sb->s_fs_info; ++ ++ if (sbinfo) { ++ mntput(sbinfo->mnt); ++ put_cred(sbinfo->creator_cred); ++ kfree(sbinfo); ++ } ++} ++ ++static const struct xattr_handler shiftfs_xattr_handler = { ++ .prefix = "", ++ .get = shiftfs_xattr_get, ++ .set = shiftfs_xattr_set, ++}; ++ ++const struct xattr_handler *shiftfs_xattr_handlers[] = { ++#ifdef CONFIG_SHIFT_FS_POSIX_ACL ++ &shiftfs_posix_acl_access_xattr_handler, ++ &shiftfs_posix_acl_default_xattr_handler, ++#endif ++ &shiftfs_xattr_handler, ++ NULL ++}; ++ ++static inline bool passthrough_is_subset(int old_flags, int new_flags) ++{ ++ if ((new_flags & old_flags) != new_flags) ++ return false; ++ ++ return true; ++} ++ ++static int shiftfs_super_check_flags(unsigned long old_flags, ++ unsigned long new_flags) ++{ ++ if ((old_flags & SB_RDONLY) && !(new_flags & SB_RDONLY)) ++ return -EPERM; ++ ++ if ((old_flags & SB_NOSUID) && !(new_flags & SB_NOSUID)) ++ return -EPERM; ++ ++ if ((old_flags & SB_NODEV) && !(new_flags & SB_NODEV)) ++ return -EPERM; ++ ++ if ((old_flags & SB_NOEXEC) && !(new_flags & SB_NOEXEC)) ++ return -EPERM; ++ ++ if ((old_flags & SB_NOATIME) && !(new_flags & SB_NOATIME)) ++ return -EPERM; ++ ++ if ((old_flags & SB_NODIRATIME) && !(new_flags & SB_NODIRATIME)) ++ return -EPERM; ++ ++ if (!(old_flags & SB_POSIXACL) && (new_flags & SB_POSIXACL)) ++ return -EPERM; ++ ++ return 0; ++} ++ ++static int shiftfs_remount(struct super_block *sb, int *flags, char *data) ++{ ++ int err; ++ struct shiftfs_super_info new = {}; ++ struct shiftfs_super_info *info = sb->s_fs_info; ++ ++ err = shiftfs_parse_mount_options(&new, data); ++ if (err) ++ return err; ++ ++ err = shiftfs_super_check_flags(sb->s_flags, *flags); ++ if (err) ++ return err; ++ ++ /* Mark mount option cannot be changed. */ ++ if (info->mark || (info->mark != new.mark)) ++ return -EPERM; ++ ++ if (info->passthrough != new.passthrough) { ++ /* Don't allow exceeding passthrough options of mark mount. */ ++ if (!passthrough_is_subset(info->passthrough_mark, ++ info->passthrough)) ++ return -EPERM; ++ ++ info->passthrough = new.passthrough; ++ } ++ ++ return 0; ++} ++ ++static const struct super_operations shiftfs_super_ops = { ++ .put_super = shiftfs_put_super, ++ .show_options = shiftfs_show_options, ++ .statfs = shiftfs_statfs, ++ .remount_fs = shiftfs_remount, ++ .evict_inode = shiftfs_evict_inode, ++}; ++ ++struct shiftfs_data { ++ void *data; ++ const char *path; ++}; ++ ++static void shiftfs_super_force_flags(struct super_block *sb, ++ unsigned long lower_flags) ++{ ++ sb->s_flags |= lower_flags & (SB_RDONLY | SB_NOSUID | SB_NODEV | ++ SB_NOEXEC | SB_NOATIME | SB_NODIRATIME); ++ ++ if (!(lower_flags & SB_POSIXACL)) ++ sb->s_flags &= ~SB_POSIXACL; ++} ++ ++static int shiftfs_fill_super(struct super_block *sb, void *raw_data, ++ int silent) ++{ ++ int err; ++ struct path path = {}; ++ struct shiftfs_super_info *sbinfo_mp; ++ char *name = NULL; ++ struct inode *inode = NULL; ++ struct dentry *dentry = NULL; ++ struct shiftfs_data *data = raw_data; ++ struct shiftfs_super_info *sbinfo = NULL; ++ ++ if (!data->path) ++ return -EINVAL; ++ ++ sb->s_fs_info = kzalloc(sizeof(*sbinfo), GFP_KERNEL); ++ if (!sb->s_fs_info) ++ return -ENOMEM; ++ sbinfo = sb->s_fs_info; ++ ++ err = shiftfs_parse_mount_options(sbinfo, data->data); ++ if (err) ++ return err; ++ ++ /* to mount a mark, must be userns admin */ ++ if (!sbinfo->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ name = kstrdup(data->path, GFP_KERNEL); ++ if (!name) ++ return -ENOMEM; ++ ++ err = kern_path(name, LOOKUP_FOLLOW, &path); ++ if (err) ++ goto out_free_name; ++ ++ if (!S_ISDIR(path.dentry->d_inode->i_mode)) { ++ err = -ENOTDIR; ++ goto out_put_path; ++ } ++ ++ /* ++ * It makes no sense to handle idmapped layers from shiftfs. ++ * And we didn't support it properly anyway. ++ */ ++ if (is_idmapped_mnt(path.mnt)) { ++ err = -EINVAL; ++ pr_err("idmapped layers are currently not supported\n"); ++ goto out_put_path; ++ } ++ ++ sb->s_flags |= SB_POSIXACL; ++ ++ if (sbinfo->mark) { ++ struct cred *cred_tmp; ++ struct super_block *lower_sb = path.mnt->mnt_sb; ++ ++ /* to mark a mount point, must root wrt lower s_user_ns */ ++ if (!ns_capable(lower_sb->s_user_ns, CAP_SYS_ADMIN)) { ++ err = -EPERM; ++ goto out_put_path; ++ } ++ ++ /* ++ * this part is visible unshifted, so make sure no ++ * executables that could be used to give suid ++ * privileges ++ */ ++ sb->s_iflags = SB_I_NOEXEC; ++ ++ shiftfs_super_force_flags(sb, lower_sb->s_flags); ++ ++ /* ++ * Handle nesting of shiftfs mounts by referring this mark ++ * mount back to the original mark mount. This is more ++ * efficient and alleviates concerns about stack depth. ++ */ ++ if (lower_sb->s_magic == SHIFTFS_MAGIC) { ++ sbinfo_mp = lower_sb->s_fs_info; ++ ++ /* Doesn't make sense to mark a mark mount */ ++ if (sbinfo_mp->mark) { ++ err = -EINVAL; ++ goto out_put_path; ++ } ++ ++ if (!passthrough_is_subset(sbinfo_mp->passthrough, ++ sbinfo->passthrough)) { ++ err = -EPERM; ++ goto out_put_path; ++ } ++ ++ sbinfo->mnt = mntget(sbinfo_mp->mnt); ++ dentry = dget(path.dentry->d_fsdata); ++ /* ++ * Copy up the passthrough mount options from the ++ * parent mark mountpoint. ++ */ ++ sbinfo->passthrough_mark = sbinfo_mp->passthrough_mark; ++ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred); ++ } else { ++ sbinfo->mnt = mntget(path.mnt); ++ dentry = dget(path.dentry); ++ /* ++ * For a new mark passthrough_mark and passthrough ++ * are identical. ++ */ ++ sbinfo->passthrough_mark = sbinfo->passthrough; ++ ++ cred_tmp = prepare_creds(); ++ if (!cred_tmp) { ++ err = -ENOMEM; ++ goto out_put_path; ++ } ++ /* Don't override disk quota limits or use reserved space. */ ++ cap_lower(cred_tmp->cap_effective, CAP_SYS_RESOURCE); ++ sbinfo->creator_cred = cred_tmp; ++ } ++ } else { ++ /* ++ * This leg executes if we're admin capable in the namespace, ++ * so be very careful. ++ */ ++ err = -EPERM; ++ if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC) ++ goto out_put_path; ++ ++ sbinfo_mp = path.dentry->d_sb->s_fs_info; ++ if (!sbinfo_mp->mark) ++ goto out_put_path; ++ ++ if (!passthrough_is_subset(sbinfo_mp->passthrough, ++ sbinfo->passthrough)) ++ goto out_put_path; ++ ++ sbinfo->mnt = mntget(sbinfo_mp->mnt); ++ sbinfo->creator_cred = get_cred(sbinfo_mp->creator_cred); ++ dentry = dget(path.dentry->d_fsdata); ++ /* ++ * Copy up passthrough settings from mark mountpoint so we can ++ * verify when the overlay wants to remount with different ++ * passthrough settings. ++ */ ++ sbinfo->passthrough_mark = sbinfo_mp->passthrough; ++ shiftfs_super_force_flags(sb, path.mnt->mnt_sb->s_flags); ++ } ++ ++ sb->s_stack_depth = dentry->d_sb->s_stack_depth + 1; ++ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { ++ printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n"); ++ err = -EINVAL; ++ goto out_put_path; ++ } ++ ++ inode = new_inode(sb); ++ if (!inode) { ++ err = -ENOMEM; ++ goto out_put_path; ++ } ++ shiftfs_fill_inode(inode, dentry->d_inode->i_ino, S_IFDIR, 0, dentry); ++ ++ ihold(dentry->d_inode); ++ inode->i_private = dentry->d_inode; ++ ++ sb->s_magic = SHIFTFS_MAGIC; ++ sb->s_maxbytes = MAX_LFS_FILESIZE; ++ sb->s_op = &shiftfs_super_ops; ++ sb->s_xattr = shiftfs_xattr_handlers; ++ sb->s_d_op = &shiftfs_dentry_ops; ++ sb->s_root = d_make_root(inode); ++ if (!sb->s_root) { ++ err = -ENOMEM; ++ goto out_put_path; ++ } ++ ++ sb->s_root->d_fsdata = dentry; ++ sbinfo->userns = get_user_ns(dentry->d_sb->s_user_ns); ++ shiftfs_copyattr(dentry->d_inode, sb->s_root->d_inode); ++ ++ dentry = NULL; ++ err = 0; ++ ++out_put_path: ++ path_put(&path); ++ ++out_free_name: ++ kfree(name); ++ ++ dput(dentry); ++ ++ return err; ++} ++ ++static struct dentry *shiftfs_mount(struct file_system_type *fs_type, ++ int flags, const char *dev_name, void *data) ++{ ++ struct shiftfs_data d = { data, dev_name }; ++ ++ return mount_nodev(fs_type, flags, &d, shiftfs_fill_super); ++} ++ ++static struct file_system_type shiftfs_type = { ++ .owner = THIS_MODULE, ++ .name = "shiftfs", ++ .mount = shiftfs_mount, ++ .kill_sb = kill_anon_super, ++ .fs_flags = FS_USERNS_MOUNT, ++}; ++ ++static int __init shiftfs_init(void) ++{ ++ return register_filesystem(&shiftfs_type); ++} ++ ++static void __exit shiftfs_exit(void) ++{ ++ unregister_filesystem(&shiftfs_type); ++} ++ ++MODULE_ALIAS_FS("shiftfs"); ++MODULE_AUTHOR("James Bottomley"); ++MODULE_AUTHOR("Seth Forshee <seth.forshee@canonical.com>"); ++MODULE_AUTHOR("Christian Brauner <christian.brauner@ubuntu.com>"); ++MODULE_DESCRIPTION("id shifting filesystem"); ++MODULE_LICENSE("GPL v2"); ++module_init(shiftfs_init) ++module_exit(shiftfs_exit) +--- a/include/uapi/linux/magic.h 2023-03-06 11:56:37.299048477 -0500 ++++ b/include/uapi/linux/magic.h 2023-03-06 11:57:37.466014822 -0500 +@@ -102,4 +102,6 @@ + #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ + #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ + ++#define SHIFTFS_MAGIC 0x6a656a62 ++ + #endif /* __LINUX_MAGIC_H__ */ +--- a/fs/Makefile 2023-03-06 11:54:08.154952284 -0500 ++++ b/fs/Makefile 2023-03-06 11:55:21.982000456 -0500 +@@ -137,3 +137,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ + obj-$(CONFIG_EROFS_FS) += erofs/ + obj-$(CONFIG_VBOXSF_FS) += vboxsf/ + obj-$(CONFIG_ZONEFS_FS) += zonefs/ ++obj-$(CONFIG_SHIFT_FS) += shiftfs.o +--- a/fs/Kconfig 2023-03-06 11:52:29.331094254 -0500 ++++ b/fs/Kconfig 2023-03-06 11:52:43.127835240 -0500 +@@ -128,6 +128,24 @@ source "fs/autofs/Kconfig" + source "fs/fuse/Kconfig" + source "fs/overlayfs/Kconfig" + ++config SHIFT_FS ++ tristate "UID/GID shifting overlay filesystem for containers" ++ help ++ This filesystem can overlay any mounted filesystem and shift ++ the uid/gid the files appear at. The idea is that ++ unprivileged containers can use this to mount root volumes ++ using this technique. ++ ++config SHIFT_FS_POSIX_ACL ++ bool "shiftfs POSIX Access Control Lists" ++ depends on SHIFT_FS ++ select FS_POSIX_ACL ++ help ++ POSIX Access Control Lists (ACLs) support permissions for users and ++ groups beyond the owner/group/world scheme. ++ ++ If you don't know what Access Control Lists are, say N. ++ + menu "Caches" + + source "fs/netfs/Kconfig" |