diff --git a/arch/um/include/as-layout.h b/arch/um/include/as-layout.h index cac542d..929d053 100644 --- a/arch/um/include/as-layout.h +++ b/arch/um/include/as-layout.h @@ -23,16 +23,15 @@ */ #ifdef __ASSEMBLY__ -#define _AC(X, Y) (Y) +#define _C(Y) (Y) #else -#define __AC(X, Y) (X (Y)) -#define _AC(X, Y) __AC(X, Y) +#define _C(Y) ((unsigned long) (Y)) #endif -#define STUB_START _AC(, 0x100000) -#define STUB_CODE _AC((unsigned long), STUB_START) -#define STUB_DATA _AC((unsigned long), STUB_CODE + UM_KERN_PAGE_SIZE) -#define STUB_END _AC((unsigned long), STUB_DATA + UM_KERN_PAGE_SIZE) +#define STUB_START _C(0x100000) +#define STUB_CODE STUB_START +#define STUB_DATA (STUB_CODE + UM_KERN_PAGE_SIZE) +#define STUB_END (STUB_DATA + UM_KERN_PAGE_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h index 3c34122..3421c47 100644 --- a/arch/um/include/kern_util.h +++ b/arch/um/include/kern_util.h @@ -20,9 +20,9 @@ extern int kmalloc_ok; extern unsigned long alloc_stack(int order, int atomic); extern void free_stack(unsigned long stack, int order); -extern int do_signal(void); +extern void do_signal(void); extern void copy_sc(struct uml_pt_regs *regs, void *from); -extern void interrupt_end(void); +extern int interrupt_end(void); extern void relay_signal(int sig, struct uml_pt_regs *regs); extern unsigned long segv(struct faultinfo fi, unsigned long ip, diff --git a/arch/um/include/os.h b/arch/um/include/os.h index 32c799e..309dd51 100644 --- a/arch/um/include/os.h +++ b/arch/um/include/os.h @@ -265,6 +265,7 @@ extern int is_skas_winch(int pid, int fd, void *data); extern int start_userspace(unsigned long stub_stack); extern int copy_context_skas0(unsigned long stack, int pid); extern void userspace(struct uml_pt_regs *regs); +extern void vcpu_userspace(struct uml_pt_regs *regs, int mm_fd); extern int map_stub_pages(int fd, unsigned long code, unsigned long data, unsigned long stack); extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void)); diff --git a/arch/um/include/siginfo_segv.h b/arch/um/include/siginfo_segv.h new file mode 100644 index 0000000..c000267 --- /dev/null +++ b/arch/um/include/siginfo_segv.h @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#ifndef __SIGINFO_SIGSEGV_H_ +#define __SIGINFO_SIGSEGV_H_ + +/* + * Provide signal.h, except for replacing siginfo_t with one that has + * the CPU trap number and error code in the SIGSEGV case. + */ + +#include + +/* Rename the signal.h siginfo and siginfo_t out of the way */ +#define siginfo old_siginfo +#define siginfo_t old_siginfo_t + +#include + +#undef siginfo +#undef siginfo_t + +#define __ARCH_SI_TRAPNO +#define __ARCH_SI_ERROR + +/* The new siginfo_t, plus associated definitions */ + +/* + * This is the size (including padding) of the part of the + * struct siginfo that is before the union. + */ +#ifndef __ARCH_SI_PREAMBLE_SIZE +#define __ARCH_SI_PREAMBLE_SIZE (3 * sizeof(int)) +#endif + +#define SI_MAX_SIZE 128 +#ifndef SI_PAD_SIZE +#define SI_PAD_SIZE ((SI_MAX_SIZE - __ARCH_SI_PREAMBLE_SIZE) / sizeof(int)) +#endif + +#ifndef __ARCH_SI_UID_T +#define __ARCH_SI_UID_T uid_t +#endif + +/* + * The default "si_band" type is "long", as specified by POSIX. + * However, some architectures want to override this to "int" + * for historical compatibility reasons, so we allow that. + */ +#ifndef __ARCH_SI_BAND_T +#define __ARCH_SI_BAND_T long +#endif + +#define __user + +typedef struct siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE]; + + /* kill() */ + struct { + pid_t _pid; /* sender's pid */ + __ARCH_SI_UID_T _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)]; + sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + pid_t _pid; /* sender's pid */ + __ARCH_SI_UID_T _uid; /* sender's uid */ + sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + pid_t _pid; /* which child */ + __ARCH_SI_UID_T _uid; /* sender's uid */ + int _status; /* exit code */ + clock_t _utime; + clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + void __user *_addr; /* faulting insn/memory ref. */ +#ifdef __ARCH_SI_TRAPNO + int _trapno; /* TRAP # which caused the signal */ +#endif +#ifdef __ARCH_SI_ERROR + int _error; /* CPU error code */ +#endif + } _sigfault; + + /* SIGPOLL */ + struct { + __ARCH_SI_BAND_T _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} siginfo_t; + +#ifdef __ARCH_SI_TRAPNO +#define si_trapno _sifields._sigfault._trapno +#endif +#ifdef __ARCH_SI_ERROR +#define si_error _sifields._sigfault._error +#endif + +#undef si_addr +#define si_addr _sifields._sigfault._addr + +#define GET_FAULTINFO_FROM_SI(fi, si) \ + { \ + (fi).cr2 = (unsigned long) (si).si_addr; \ + (fi).error_code = (si).si_error; \ + (fi).trap_no = (si).si_trapno; \ + } + +#endif diff --git a/arch/um/include/skas/mm_id.h b/arch/um/include/skas/mm_id.h index 48dd098..a2e7643 100644 --- a/arch/um/include/skas/mm_id.h +++ b/arch/um/include/skas/mm_id.h @@ -7,7 +7,7 @@ #define __MM_ID_H struct mm_id { - union { + struct { int mm_fd; int pid; } u; diff --git a/arch/um/include/skas/skas.h b/arch/um/include/skas/skas.h index b073f8a..590fcff 100644 --- a/arch/um/include/skas/skas.h +++ b/arch/um/include/skas/skas.h @@ -6,18 +6,128 @@ #ifndef __SKAS_H #define __SKAS_H +#ifndef __KERNEL__ +#include +#include +#endif +#include "uml-config.h" + +#ifdef UML_CONFIG_X86_32 +#define __NR_new_mm 327 +#define __NR_switch_mm 328 +#define __NR_vcpu 329 +#else +#define __NR_new_mm 288 +#define __NR_switch_mm 289 +#define __NR_vcpu 290 +#endif + +#define PTRACE_SWITCH_MM 34 + +#ifndef __ASSEMBLY__ + +#include #include "sysdep/ptrace.h" +#define STUB_ADDR(x) (STUB_CODE + (unsigned long) (x) - \ + (unsigned long) &__syscall_stub_start) + extern int userspace_pid[]; extern int proc_mm, ptrace_faultinfo, ptrace_ldt; extern int skas_needs_stub; +extern int have_switch_mm; +extern int have_ptrace_switch_mm; +extern int have_siginfo_segv; +extern int have_vcpu; +extern int self_mm_fd; + extern int user_thread(unsigned long stack, int flags); extern void new_thread_handler(void); extern void handle_syscall(struct uml_pt_regs *regs); -extern int new_mm(unsigned long stack); +extern int make_new_mm(unsigned long stack); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); extern unsigned long current_stub_stack(void); +#ifndef __KERNEL__ +#include +#include +#include "siginfo_segv.h" + +#ifdef UML_CONFIG_X86_32 +#define GDT_ENTRY_TLS_ENTRIES 3 + +struct vcpu_arch { + struct user_desc tls_array[GDT_ENTRY_TLS_ENTRIES]; +}; +#else +struct vcpu_arch { }; +#endif + +struct user_regs { + unsigned long regs[MAX_REG_NR]; +#ifdef UML_CONFIG_X86_32 + struct user_fxsr_struct *fp_state; + struct user_fxsr_struct fpregs; +#else + struct user_i387_struct *fp_state; + struct user_i387_struct fpregs; +#endif +}; + +struct vcpu_user { + enum { VCPU_SYSCALL, VCPU_SIGNAL } event; + struct user_regs regs; + siginfo_t siginfo; + struct vcpu_arch arch; +}; + +static inline long new_mm(void) +{ + int ret = syscall(__NR_new_mm, 0, 0, 0, 0, 0, 0); + + if (ret < 0) + return -errno; + + return ret; +} + +static inline long switch_mm(int mm_fd, struct user_regs *save_regs, + struct user_regs *new_regs, unsigned long ip, + unsigned long sp) +{ + int ret = syscall(__NR_switch_mm, mm_fd, save_regs, new_regs, ip, sp, + 0); + + if (ret < 0) + return -errno; + + return 0; +} + +static inline long vcpu(long mm_fd, struct vcpu_user *vcpu) +{ + int ret = syscall(__NR_vcpu, mm_fd, vcpu, 0, 0, 0, 0); + + if (ret < 0) + return -errno; + + return ret; +} + +static inline int get_thread_area(struct user_desc *u_info) +{ + int ret = syscall(__NR_get_thread_area, u_info, 0, 0, 0, 0, 0); + + if (ret < 0) + return -errno; + + return ret; +} + +#endif + +#endif + #endif diff --git a/arch/um/include/skas_ptrace.h b/arch/um/include/skas_ptrace.h index cd2327d..38ec9fd 100644 --- a/arch/um/include/skas_ptrace.h +++ b/arch/um/include/skas_ptrace.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -7,19 +7,10 @@ #define __SKAS_PTRACE_H #define PTRACE_FAULTINFO 52 -#define PTRACE_SWITCH_MM 55 +#ifndef OLD_PTRACE_SWITCH_MM +#define OLD_PTRACE_SWITCH_MM 55 +#endif #include "sysdep/skas_ptrace.h" #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/include/sysdep-i386/ptrace.h b/arch/um/include/sysdep-i386/ptrace.h index 11c0896..510c80f 100644 --- a/arch/um/include/sysdep-i386/ptrace.h +++ b/arch/um/include/sysdep-i386/ptrace.h @@ -156,7 +156,7 @@ struct syscall_args { } while (0) #define UPT_SET_SYSCALL_RETURN(r, res) \ - REGS_SET_SYSCALL_RETURN((r)->regs, (res)) + REGS_SET_SYSCALL_RETURN((r)->gp, (res)) #define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp) diff --git a/arch/um/include/sysdep-i386/ptrace_user.h b/arch/um/include/sysdep-i386/ptrace_user.h index 7565072..9a4892d 100644 --- a/arch/um/include/sysdep-i386/ptrace_user.h +++ b/arch/um/include/sysdep-i386/ptrace_user.h @@ -43,6 +43,8 @@ #define FP_SIZE ((HOST_XFP_SIZE > HOST_FP_SIZE) ? HOST_XFP_SIZE : HOST_FP_SIZE) +#define FP_SIZE ((HOST_XFP_SIZE > HOST_FP_SIZE) ? HOST_XFP_SIZE : HOST_FP_SIZE) + #ifndef FRAME_SIZE #define FRAME_SIZE (17) #endif diff --git a/arch/um/include/sysdep-i386/tls.h b/arch/um/include/sysdep-i386/tls.h index 918fd3c..844f0c2 100644 --- a/arch/um/include/sysdep-i386/tls.h +++ b/arch/um/include/sysdep-i386/tls.h @@ -1,7 +1,7 @@ #ifndef _SYSDEP_TLS_H #define _SYSDEP_TLS_H -# ifndef __KERNEL__ +#ifndef __KERNEL__ /* Change name to avoid conflicts with the original one from , which * may be named user_desc (but in 2.4 and in header matching its API was named @@ -19,13 +19,19 @@ typedef struct um_dup_user_desc { unsigned int useable:1; } user_desc_t; -# else /* __KERNEL__ */ +#else /* __KERNEL__ */ -# include +#include typedef struct user_desc user_desc_t; # endif /* __KERNEL__ */ +struct uml_tls_struct { + user_desc_t tls; + unsigned flushed:1; + unsigned present:1; +}; + #define GDT_ENTRY_TLS_MIN_I386 6 #define GDT_ENTRY_TLS_MIN_X86_64 12 diff --git a/arch/um/include/sysdep-x86_64/ptrace.h b/arch/um/include/sysdep-x86_64/ptrace.h index 9ea44d1..18ad3a8 100644 --- a/arch/um/include/sysdep-x86_64/ptrace.h +++ b/arch/um/include/sysdep-x86_64/ptrace.h @@ -225,16 +225,14 @@ struct syscall_args { }) #define UPT_SET_SYSCALL_RETURN(r, res) \ - REGS_SET_SYSCALL_RETURN((r)->regs, (res)) + REGS_SET_SYSCALL_RETURN((r)->gp, (res)) #define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp) -#define UPT_SEGV_IS_FIXABLE(r) REGS_SEGV_IS_FIXABLE(&r->skas) +#define UPT_SEGV_IS_FIXABLE(r) REGS_SEGV_IS_FIXABLE(&(r)->skas) #define UPT_FAULTINFO(r) (&(r)->faultinfo) -static inline void arch_init_registers(int pid) -{ -} +extern void arch_init_registers(int pid); #endif diff --git a/arch/um/include/sysdep-x86_64/ptrace_user.h b/arch/um/include/sysdep-x86_64/ptrace_user.h index 45c0bd8..4e10c60 100644 --- a/arch/um/include/sysdep-x86_64/ptrace_user.h +++ b/arch/um/include/sysdep-x86_64/ptrace_user.h @@ -72,6 +72,8 @@ #define FP_SIZE (HOST_FP_SIZE) +#define FP_SIZE (HOST_FP_SIZE) + #endif /* diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index e8cb9ff..7f07ad3 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -111,12 +111,13 @@ void *_switch_to(void *prev, void *next, void *last) } -void interrupt_end(void) +int interrupt_end(void) { if (need_resched()) schedule(); - if (test_tsk_thread_flag(current, TIF_SIGPENDING)) + if (test_thread_flag(TIF_SIGPENDING)) do_signal(); + return current->mm->context.id.u.mm_fd; } void exit_thread(void) @@ -152,7 +153,11 @@ void new_thread_handler(void) if (n == 1) { /* Handle any immediate reschedules or signals */ interrupt_end(); - userspace(¤t->thread.regs.regs); + if (have_vcpu) + vcpu_userspace(¤t->thread.regs.regs, + current->mm->context.id.u.mm_fd); + else + userspace(¤t->thread.regs.regs); } else do_exit(0); } @@ -176,7 +181,11 @@ void fork_handler(void) /* Handle any immediate reschedules or signals */ interrupt_end(); - userspace(¤t->thread.regs.regs); + if (have_vcpu) + vcpu_userspace(¤t->thread.regs.regs, + current->mm->context.id.u.mm_fd); + else + userspace(¤t->thread.regs.regs); } int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 47b57b4..6b6855a 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -192,7 +192,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) } #endif #ifdef CONFIG_PROC_MM - case PTRACE_SWITCH_MM: { + case OLD_PTRACE_SWITCH_MM: { struct mm_struct *old = child->mm; struct mm_struct *new = proc_mm_get_mm(data); @@ -292,3 +292,36 @@ void syscall_trace(struct uml_pt_regs *regs, int entryexit) current->exit_code = 0; } } + +int ptrace_to_pt_regs(struct pt_regs *to, struct user_regs __user *from) +{ + struct user_regs regs; + int rem; + + rem = copy_from_user(®s, from, sizeof(regs)); + if (rem) + return -EFAULT; + + memcpy(&to->regs.gp, ®s.regs, sizeof(to->regs.gp)); + + return put_fp_registers(userspace_pid[0], + (unsigned long *) ®s.fpregs); +} + +int pt_regs_to_ptrace(struct user_regs __user *to, struct pt_regs *from) +{ + struct user_regs regs; + int err; + + err = get_fp_registers(userspace_pid[0], + (unsigned long *) ®s.fpregs); + if (err) + return err; + + memcpy(®s.regs, &from->regs.gp, sizeof(regs.regs)); + + if(copy_to_user(to, ®s, sizeof(regs))) + return -EFAULT; + + return 0; +} diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 00197d3..a597b5d 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -12,7 +12,7 @@ void (*pm_power_off)(void); static void kill_off_processes(void) { - if (proc_mm) + if (proc_mm || have_switch_mm) /* * FIXME: need to loop over userspace_pids */ diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index b0fce72..b1fcfde 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -85,8 +85,11 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr, return err; } -static int kern_do_signal(struct pt_regs *regs) +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + +void do_signal(void) { + struct pt_regs *regs = ¤t->thread.regs; struct k_sigaction ka_copy; siginfo_t info; sigset_t *oldset; @@ -98,6 +101,11 @@ static int kern_do_signal(struct pt_regs *regs) oldset = ¤t->blocked; while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) { + if (test_thread_flag(TIF_VCPU)) { + PT_REGS_SET_SYSCALL_RETURN(regs, unvcpu(regs, &info)); + return; + } + handled_sig = 1; /* Whee! Actually deliver the signal. */ if (!handle_signal(regs, sig, &ka_copy, &info, oldset)) { @@ -150,12 +158,6 @@ static int kern_do_signal(struct pt_regs *regs) clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } - return handled_sig; -} - -int do_signal(void) -{ - return kern_do_signal(¤t->thread.regs); } /* diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 2c8583c..6b19d0a 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -3,8 +3,8 @@ * Licensed under the GPL */ -#include #include +#include #include #include #include "as-layout.h" diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 0cd9a7a..5f4c32e 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -46,6 +46,9 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, return -ENOMEM; } +extern int copy_context_skas4(struct mm_id *id); +extern int get_new_mm(void); + int init_new_context(struct task_struct *task, struct mm_struct *mm) { struct mm_context *from_mm = NULL; @@ -64,13 +67,26 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm) from_mm = ¤t->mm->context; if (proc_mm) { - ret = new_mm(stack); + ret = make_new_mm(stack); if (ret < 0) { printk(KERN_ERR "init_new_context_skas - " - "new_mm failed, errno = %d\n", ret); + "make_new_mm failed, errno = %d\n", ret); goto out_free; } to_mm->id.u.mm_fd = ret; + } else if (have_switch_mm) { + to_mm->id.u.mm_fd = get_new_mm(); + if (to_mm->id.u.mm_fd < 0) { + ret = to_mm->id.u.mm_fd; + goto out_free; + } + + ret = copy_context_skas4(&to_mm->id); + if (ret < 0) { + os_close_file(to_mm->id.u.mm_fd); + to_mm->id.u.mm_fd = -1; + goto out_free; + } } else { if (from_mm) @@ -167,7 +183,7 @@ void destroy_context(struct mm_struct *mm) { struct mm_context *mmu = &mm->context; - if (proc_mm) + if (proc_mm || have_switch_mm) os_close_file(mmu->id.u.mm_fd); else { /* diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 2e9852c..ec82db3 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -10,7 +10,7 @@ #include "os.h" #include "skas.h" -int new_mm(unsigned long stack) +int make_new_mm(unsigned long stack) { int fd, err; @@ -55,7 +55,8 @@ int __init start_uml(void) { stack_protections((unsigned long) &cpu0_irqstack); set_sigstack(cpu0_irqstack, THREAD_SIZE); - if (proc_mm) { + + if (!have_vcpu && (proc_mm || have_switch_mm)) { userspace_pid[0] = start_userspace(0); if (userspace_pid[0] < 0) { printf("start_uml - start_userspace returned %d\n", diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c index 4e3b820..c677b8e 100644 --- a/arch/um/kernel/skas/syscall.c +++ b/arch/um/kernel/skas/syscall.c @@ -12,12 +12,19 @@ extern int syscall_table_size; #define NR_syscalls (syscall_table_size / sizeof(void *)) +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + void handle_syscall(struct uml_pt_regs *r) { struct pt_regs *regs = container_of(r, struct pt_regs, regs); long result; int syscall; + if (test_thread_flag(TIF_VCPU)) { + REGS_SET_SYSCALL_RETURN(r->gp, unvcpu(regs, NULL)); + return; + } + syscall_trace(r, 0); /* diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index 9cffc62..63c782d 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -1,17 +1,17 @@ /* - * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2000 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ -#include "linux/file.h" -#include "linux/fs.h" -#include "linux/mm.h" -#include "linux/sched.h" -#include "linux/utsname.h" -#include "asm/current.h" -#include "asm/mman.h" -#include "asm/uaccess.h" -#include "asm/unistd.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include long sys_fork(void) { @@ -148,3 +148,21 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]) return ret; } + +extern long do_switch_mm(int fd, long __user *save, long __user *new, + unsigned long ip, unsigned long sp, + struct pt_regs *regs); + +long sys_switch_mm(int fd, long __user *save, long __user *new, + unsigned long ip, unsigned long sp) +{ + return do_switch_mm(fd, save, new, ip, sp, ¤t->thread.regs); +} + +extern long do_vcpu(int mm_fd, struct vcpu_user __user *new, + struct pt_regs *regs); + +long sys_vcpu(int mm_fd, struct vcpu_user __user *new) +{ + return do_vcpu(mm_fd, new, ¤t->thread.regs); +} diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index a6c1dd1..d00ebbd 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -284,7 +284,9 @@ int __init linux_main(int argc, char **argv) can_do_skas(); - if (proc_mm && ptrace_faultinfo) + if (have_switch_mm) + mode = "SKAS4"; + else if (proc_mm && ptrace_faultinfo) mode = "SKAS3"; else mode = "SKAS0"; diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index 484e68f..73b1dff 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -22,7 +22,7 @@ #include "sysdep/stub.h" #include "uml-config.h" -extern unsigned long batch_syscall_stub, __syscall_stub_start; +extern unsigned long batch_syscall_stub, switch_mm_stub, __syscall_stub_start; extern void wait_stub_done(int pid); @@ -41,34 +41,63 @@ static unsigned long syscall_regs[MAX_REG_NR]; static int __init init_syscall_regs(void) { get_safe_registers(syscall_regs); - syscall_regs[REGS_IP_INDEX] = STUB_CODE + - ((unsigned long) &batch_syscall_stub - - (unsigned long) &__syscall_stub_start); + + syscall_regs[REGS_IP_INDEX] = STUB_ADDR(&batch_syscall_stub); return 0; } __initcall(init_syscall_regs); -extern int proc_mm; +static int syscall_stub_done(unsigned long stack) +{ + unsigned long *syscall, *data, offset; + int ret, n; + + /* + * When the stub stops, we find the following values on the + * beginning of the stack: + * (long) return_value + * (long) offset to failed sycall data (0 if no error) + */ + ret = *((unsigned long *) stack); + offset = *((unsigned long *) stack + 1); + if (offset == 0) + return 0; + + data = (unsigned long *)(stack + offset - STUB_DATA); + printk(UM_KERN_ERR "syscall_stub_done : ret = %d, offset = %ld, " + "data = %p\n", ret, offset, data); + syscall = (unsigned long *)((unsigned long)data + data[0]); + printk(UM_KERN_ERR "syscall_stub_done : syscall %ld failed, " + "return value = 0x%x, expected return value = 0x%lx\n", + syscall[0], ret, syscall[7]); + printk(UM_KERN_ERR " syscall parameters: " + "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + syscall[1], syscall[2], syscall[3], + syscall[4], syscall[5], syscall[6]); + for (n = 1; n < data[0]/sizeof(long); n++) { + if (n == 1) + printk(UM_KERN_ERR " additional syscall " + "data:"); + if (n % 4 == 1) + printk("\n" UM_KERN_ERR " "); + printk(UM_KERN_CONT " 0x%lx", data[n]); + } + if (n > 1) + printk("\n"); -int single_count = 0; -int multi_count = 0; -int multi_op_count = 0; + return ret; +} -static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) +static long do_syscall_stub(struct mm_id *mm_idp, void **addr) { - int n, i; - long ret, offset; - unsigned long * data; - unsigned long * syscall; - int err, pid = mm_idp->u.pid; + long ret; + int n, i, err, pid = mm_idp->u.pid; if (proc_mm) /* FIXME: Need to look up userspace_pid by cpu */ pid = userspace_pid[0]; - multi_count++; - n = ptrace_setregs(pid, syscall_regs); if (n < 0) { printk(UM_KERN_ERR "Registers - \n"); @@ -85,52 +114,73 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr) wait_stub_done(pid); - /* - * When the stub stops, we find the following values on the - * beginning of the stack: - * (long )return_value - * (long )offset to failed sycall-data (0, if no error) - */ - ret = *((unsigned long *) mm_idp->stack); - offset = *((unsigned long *) mm_idp->stack + 1); - if (offset) { - data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA); - printk(UM_KERN_ERR "do_syscall_stub : ret = %ld, offset = %ld, " - "data = %p\n", ret, offset, data); - syscall = (unsigned long *)((unsigned long)data + data[0]); - printk(UM_KERN_ERR "do_syscall_stub: syscall %ld failed, " - "return value = 0x%lx, expected return value = 0x%lx\n", - syscall[0], ret, syscall[7]); - printk(UM_KERN_ERR " syscall parameters: " - "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", - syscall[1], syscall[2], syscall[3], - syscall[4], syscall[5], syscall[6]); - for (n = 1; n < data[0]/sizeof(long); n++) { - if (n == 1) - printk(UM_KERN_ERR " additional syscall " - "data:"); - if (n % 4 == 1) - printk("\n" UM_KERN_ERR " "); - printk(" 0x%lx", data[n]); - } - if (n > 1) - printk("\n"); - } - else ret = 0; + ret = syscall_stub_done(mm_idp->stack); *addr = check_init_stack(mm_idp, NULL); return ret; } -long run_syscall_stub(struct mm_id * mm_idp, int syscall, +static struct user_regs return_regs; + +long do_syscall_stub_skas4(struct mm_id *mm_idp, void **addr, unsigned long ip, + unsigned long sp) +{ + long ret; + unsigned long *ptr; + int err; + sigset_t sigs, old; + + ptr = (unsigned long *) (mm_idp->stack + UM_KERN_PAGE_SIZE - + sizeof(long)); + *ptr = (unsigned long) &return_regs; + *(ptr - 1) = self_mm_fd; + + sigfillset(&sigs); + sigprocmask(SIG_SETMASK, &sigs, &old); + err = switch_mm(mm_idp->u.mm_fd, &return_regs, NULL, ip, sp); + sigprocmask(SIG_SETMASK, &old, NULL); + + ret = syscall_stub_done(mm_idp->stack); + + *addr = check_init_stack(mm_idp, NULL); + + return ret; +} + +static int flush_syscalls(struct mm_id *mm_idp, void **addr, int extra) +{ + unsigned long *stack = check_init_stack(mm_idp, *addr); + int current, end; + + current = ((unsigned long) stack) & ~UM_KERN_PAGE_MASK; + end = UM_KERN_PAGE_SIZE; + + if (have_switch_mm) + end -= 2 * sizeof(long); + + if (current + (10 + extra) * sizeof(long) < end) + return 0; + + if (have_switch_mm) + return do_syscall_stub_skas4(mm_idp, addr, + STUB_ADDR(&switch_mm_stub), 0); + else + return do_syscall_stub(mm_idp, addr); +} + +long run_syscall_stub(struct mm_id *mm_idp, int syscall, unsigned long *args, long expected, void **addr, int done) { - unsigned long *stack = check_init_stack(mm_idp, *addr); + unsigned long *stack; + int ret; - if (done && *addr == NULL) - single_count++; + ret = flush_syscalls(mm_idp, addr, 0); + if (ret) + return ret; + + stack = check_init_stack(mm_idp, *addr); *stack += sizeof(long); stack += *stack / sizeof(long); @@ -144,45 +194,40 @@ long run_syscall_stub(struct mm_id * mm_idp, int syscall, *stack++ = args[5]; *stack++ = expected; *stack = 0; - multi_op_count++; - if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) < - UM_KERN_PAGE_SIZE - 10 * sizeof(long))) { + if (!done) { *addr = stack; return 0; } - return do_syscall_stub(mm_idp, addr); + if (have_switch_mm) + return do_syscall_stub_skas4(mm_idp, addr, + STUB_ADDR(&switch_mm_stub), 0); + else + return do_syscall_stub(mm_idp, addr); + + *addr = stack; + return 0; } -long syscall_stub_data(struct mm_id * mm_idp, - unsigned long *data, int data_count, - void **addr, void **stub_addr) +long syscall_stub_data(struct mm_id *mm_idp, unsigned long *data, + int data_count, void **addr, void **stub_addr) { unsigned long *stack; - int ret = 0; + int ret; - /* - * If *addr still is uninitialized, it *must* contain NULL. - * Thus in this case do_syscall_stub correctly won't be called. - */ - if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >= - UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) { - ret = do_syscall_stub(mm_idp, addr); - /* in case of error, don't overwrite data on stack */ - if (ret) - return ret; - } + ret = flush_syscalls(mm_idp, addr, data_count); + if (ret) + return ret; stack = check_init_stack(mm_idp, *addr); - *addr = stack; - - *stack = data_count * sizeof(long); + *stack = data_count; + *addr = stack++; - memcpy(stack + 1, data, data_count * sizeof(long)); + memcpy(stack, data, data_count); - *stub_addr = (void *)(((unsigned long)(stack + 1) & - ~UM_KERN_PAGE_MASK) + STUB_DATA); + *stub_addr = (void *)(((unsigned long) stack & ~UM_KERN_PAGE_MASK) + + STUB_DATA); return 0; } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 1e8cba6..593df24 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -3,6 +3,9 @@ * Licensed under the GPL */ +/* Include this first, before anything else includes */ +#include "siginfo_segv.h" + #include #include #include @@ -96,11 +99,23 @@ bad_wait: extern unsigned long current_stub_stack(void); +#ifndef PTRACE_GETSIGINFO +#define PTRACE_GETSIGINFO 0x4202 +#endif + void get_skas_faultinfo(int pid, struct faultinfo * fi) { + siginfo_t si; int err; - if (ptrace_faultinfo) { + if (have_siginfo_segv) { + err = ptrace(PTRACE_GETSIGINFO, pid, 0, &si); + if (err) + printk(UM_KERN_ERR "PTRACE_GETSIGINFO failed, " + "err = %d\n", errno); + + GET_FAULTINFO_FROM_SI(*fi, si); + } else if (ptrace_faultinfo) { err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); if (err) { printk(UM_KERN_ERR "get_skas_faultinfo - " @@ -113,8 +128,7 @@ void get_skas_faultinfo(int pid, struct faultinfo * fi) memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, sizeof(struct faultinfo) - sizeof(struct ptrace_faultinfo)); - } - else { + } else { unsigned long fpregs[FP_SIZE]; err = get_fp_registers(pid, fpregs); @@ -248,12 +262,9 @@ static int userspace_tramp(void *stack) } } } - if (!ptrace_faultinfo && (stack != NULL)) { + if (!ptrace_faultinfo) { struct sigaction sa; - - unsigned long v = STUB_CODE + - (unsigned long) stub_segv_handler - - (unsigned long) &__syscall_stub_start; + unsigned long v = STUB_ADDR(stub_segv_handler); set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE); sigemptyset(&sa.sa_mask); @@ -295,7 +306,7 @@ int start_userspace(unsigned long stub_stack) sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *); flags = CLONE_FILES; - if (proc_mm) + if (proc_mm || have_switch_mm) flags |= CLONE_VM; else flags |= SIGCHLD; @@ -347,6 +358,85 @@ int start_userspace(unsigned long stub_stack) return err; } +#ifdef UML_CONFIG_X86_32 +extern void init_vcpu_tls(struct user_desc *tls); + +static void arch_init_vcpu(struct vcpu_arch *vcpu) +{ + init_vcpu_tls(vcpu->tls_array); +} +#else +static void arch_init_vcpu(struct vcpu_arch *vcpu) +{ +} +#endif + +extern unsigned long fp_regs[FP_SIZE]; + +void vcpu_userspace(struct uml_pt_regs *regs, int mm_fd) +{ + struct vcpu_user vcpu_state; + int err; + + memcpy(&vcpu_state.regs.fpregs, fp_regs, sizeof(fp_regs)); + vcpu_state.regs.fp_state = &vcpu_state.regs.fpregs; + while (1) { + memcpy(&vcpu_state.regs.regs, ®s->gp, + sizeof(vcpu_state.regs.regs)); + arch_init_vcpu(&vcpu_state.arch); + + err = vcpu(mm_fd, &vcpu_state); + if (err) + panic("userspace - could not resume userspace process, " + "errno = %d\n", errno); + + regs->is_user = 1; + memcpy(®s->gp, &vcpu_state.regs.regs, + sizeof(vcpu_state.regs.regs)); + + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + if (vcpu_state.event == VCPU_SYSCALL) { + UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp); + handle_syscall(regs); + } + else if (vcpu_state.event == VCPU_SIGNAL){ + int sig = vcpu_state.siginfo.si_signo; + switch(sig) { + case SIGSEGV: + GET_FAULTINFO_FROM_SI(regs->faultinfo, + vcpu_state.siginfo); + (*sig_info[SIGSEGV])(SIGSEGV, regs); + break; + case SIGTRAP: + relay_signal(SIGTRAP, regs); + break; + case SIGVTALRM: + block_signals(); + (*sig_info[sig])(sig, regs); + unblock_signals(); + break; + case SIGIO: + case SIGILL: + case SIGBUS: + case SIGFPE: + case SIGWINCH: + block_signals(); + (*sig_info[sig])(sig, regs); + unblock_signals(); + break; + default: + printk(UM_KERN_ERR "userspace - child stopped " + "with signal %d\n", sig); + } + /* Avoid -ERESTARTSYS handling in host */ + if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) + PT_SYSCALL_NR(regs->gp) = -1; + } + + mm_fd = interrupt_end(); + } +} + void userspace(struct uml_pt_regs *regs) { struct itimerval timer; @@ -446,8 +536,14 @@ void userspace(struct uml_pt_regs *regs) "with signal %d\n", sig); fatal_sigsegv(); } - pid = userspace_pid[0]; + + /* + * userspace_pid can change in in_interrupt since + * PTRACE_SWITCH_MM can cause a process to change + * address spaces + */ interrupt_end(); + pid = userspace_pid[0]; /* Avoid -ERESTARTSYS handling in host */ if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET) @@ -462,9 +558,7 @@ static int __init init_thread_regs(void) { get_safe_registers(thread_regs); /* Set parent's instruction pointer to start of clone-stub */ - thread_regs[REGS_IP_INDEX] = STUB_CODE + - (unsigned long) stub_clone_handler - - (unsigned long) &__syscall_stub_start; + thread_regs[REGS_IP_INDEX] = STUB_ADDR(stub_clone_handler); thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE - sizeof(void *); #ifdef __SIGNAL_FRAMESIZE @@ -554,6 +648,56 @@ int copy_context_skas0(unsigned long new_stack, int pid) return err; } +extern unsigned long switch_mm_stub; +extern long task_size; + +static void unmap_new_as(void) +{ + void (*p)(void); + void *addr; + unsigned long stack = (unsigned long) &stack & ~(UM_KERN_PAGE_SIZE - 1); + unsigned long long data_offset, code_offset; + int fd = phys_mapping(to_phys((void *) stack), &data_offset); + + addr = mmap((void *) STUB_DATA, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, + data_offset); + if (addr == MAP_FAILED) + panic("Failed to remap stack"); + + fd = phys_mapping(to_phys(&__syscall_stub_start), &code_offset); + addr = mmap((void *) STUB_CODE, UM_KERN_PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, + code_offset); + if (addr == MAP_FAILED) + panic("Failed to remap code"); + + p = (void (*)(void)) (STUB_ADDR(&switch_mm_stub)); + (*p)(); +} + +extern long do_syscall_stub_skas4(struct mm_id *mm_idp, void **addr, + unsigned long ip, unsigned long sp); + +int copy_context_skas4(struct mm_id *id) +{ + void *data = NULL; + int err; + + err = unmap(id, 0, STUB_START, 0, &data); + if (err) + return err; + + if (STUB_END < task_size) { + err = unmap(id, STUB_END, task_size - STUB_END, 0, &data); + if (err) + return err; + } + + return do_syscall_stub_skas4(id, &data, (unsigned long) unmap_new_as, + id->stack + UM_KERN_PAGE_SIZE / 2); +} + /* * This is used only, if stub pages are needed, while proc_mm is * available. Opening /proc/mm creates a new mm_context, which lacks @@ -713,16 +857,24 @@ void reboot_skas(void) void __switch_mm(struct mm_id *mm_idp) { int err; - /* FIXME: need cpu pid in __switch_mm */ + + if (have_vcpu) + return; + if (proc_mm) { - err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, + err = ptrace(OLD_PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_idp->u.mm_fd); if (err) { printk(UM_KERN_ERR "__switch_mm - PTRACE_SWITCH_MM " "failed, errno = %d\n", errno); fatal_sigsegv(); } - } - else userspace_pid[0] = mm_idp->u.pid; + } else if (have_ptrace_switch_mm) { + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, + mm_idp->u.mm_fd); + if (err) + panic("__switch_mm - PTRACE_SWITCH_MM " + "failed, errno = %d\n", errno); + } else userspace_pid[0] = mm_idp->u.pid; } diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index b616e15..28a7984 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -3,6 +3,9 @@ * Licensed under the GPL */ +/* Include this first, before anything else includes */ +#include "siginfo_segv.h" + #include #include #include @@ -23,7 +26,10 @@ #include "mem_user.h" #include "ptrace_user.h" #include "registers.h" +#include "skas.h" #include "skas_ptrace.h" +#include "sysdep/sigcontext.h" +#include "user.h" static int ptrace_child(void) { @@ -142,14 +148,40 @@ static int stop_ptraced_child(int pid, int exitcode, int mustexit) } /* Changed only during early boot */ -int ptrace_faultinfo = 1; -int ptrace_ldt = 1; -int proc_mm = 1; -int skas_needs_stub = 0; +int ptrace_faultinfo; +static int disable_ptrace_faultinfo; + +int ptrace_ldt; +static int disable_ptrace_ldt; + +int proc_mm; +static int disable_proc_mm; + +int have_switch_mm; +static int disable_switch_mm; + +int have_siginfo_segv; +static int disable_siginfo_segv; + +int have_ptrace_switch_mm; +static int disable_ptrace_switch_mm; + +int have_vcpu; +static int disable_vcpu; + +int skas_needs_stub; static int __init skas0_cmd_param(char *str, int* add) { - ptrace_faultinfo = proc_mm = 0; + disable_ptrace_faultinfo = 1; + disable_ptrace_ldt = 1; + disable_proc_mm = 1; + + disable_switch_mm = 1; + disable_siginfo_segv = 1; + disable_ptrace_switch_mm = 1; + disable_vcpu = 1; + return 0; } @@ -159,15 +191,12 @@ static int __init mode_skas0_cmd_param(char *str, int* add) __attribute__((alias("skas0_cmd_param"))); __uml_setup("skas0", skas0_cmd_param, - "skas0\n" - " Disables SKAS3 usage, so that SKAS0 is used, unless \n" - " you specify mode=tt.\n\n"); +"skas0\n" +" Disables SKAS3 and SKAS4 usage, so that SKAS0 is used\n\n"); __uml_setup("mode=skas0", mode_skas0_cmd_param, - "mode=skas0\n" - " Disables SKAS3 usage, so that SKAS0 is used, unless you \n" - " specify mode=tt. Note that this was recently added - on \n" - " older kernels you must use simply \"skas0\".\n\n"); +"mode=skas0\n" +" Disables SKAS3 and SKAS4 usage, so that SKAS0 is used.\n\n"); /* Changed only during early boot */ static int force_sysemu_disabled = 0; @@ -362,7 +391,7 @@ void __init os_early_checks(void) static int __init noprocmm_cmd_param(char *str, int* add) { - proc_mm = 0; + disable_proc_mm = 1; return 0; } @@ -374,7 +403,7 @@ __uml_setup("noprocmm", noprocmm_cmd_param, static int __init noptracefaultinfo_cmd_param(char *str, int* add) { - ptrace_faultinfo = 0; + disable_ptrace_faultinfo = 1; return 0; } @@ -386,7 +415,7 @@ __uml_setup("noptracefaultinfo", noptracefaultinfo_cmd_param, static int __init noptraceldt_cmd_param(char *str, int* add) { - ptrace_ldt = 0; + disable_ptrace_ldt = 1; return 0; } @@ -396,7 +425,7 @@ __uml_setup("noptraceldt", noptraceldt_cmd_param, " To support PTRACE_LDT, the host needs to be patched using\n" " the current skas3 patch.\n\n"); -static inline void check_skas3_ptrace_faultinfo(void) +static inline void __init check_skas3_ptrace_faultinfo(void) { struct ptrace_faultinfo fi; int pid, n; @@ -406,23 +435,21 @@ static inline void check_skas3_ptrace_faultinfo(void) n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); if (n < 0) { - ptrace_faultinfo = 0; if (errno == EIO) non_fatal("not found\n"); else perror("not found"); - } + } else if (disable_ptrace_faultinfo) + non_fatal("found but disabled on command line\n"); else { - if (!ptrace_faultinfo) - non_fatal("found but disabled on command line\n"); - else - non_fatal("found\n"); + ptrace_faultinfo = 1; + non_fatal("found\n"); } stop_ptraced_child(pid, 1, 1); } -static inline void check_skas3_ptrace_ldt(void) +static inline void __init check_skas3_ptrace_ldt(void) { #ifdef PTRACE_LDT int pid, n; @@ -442,38 +469,31 @@ static inline void check_skas3_ptrace_ldt(void) else { perror("not found"); } - ptrace_ldt = 0; - } + } else if (disable_ptrace_ldt) + non_fatal("found, but use is disabled\n"); else { - if (ptrace_ldt) - non_fatal("found\n"); - else - non_fatal("found, but use is disabled\n"); + ptrace_ldt = 1; + non_fatal("found\n"); } stop_ptraced_child(pid, 1, 1); -#else - /* PTRACE_LDT might be disabled via cmdline option. - * We want to override this, else we might use the stub - * without real need - */ - ptrace_ldt = 1; #endif } -static inline void check_skas3_proc_mm(void) +static inline void __init check_skas3_proc_mm(void) { non_fatal(" - /proc/mm..."); - if (access("/proc/mm", W_OK) < 0) { - proc_mm = 0; + if (access("/proc/mm", W_OK) < 0) perror("not found"); - } - else if (!proc_mm) + else if (disable_proc_mm) non_fatal("found but disabled on command line\n"); - else non_fatal("found\n"); + else { + proc_mm = 1; + non_fatal("found\n"); + } } -void can_do_skas(void) +static void __init can_do_skas3(void) { non_fatal("Checking for the skas3 patch in the host:\n"); @@ -481,8 +501,417 @@ void can_do_skas(void) check_skas3_ptrace_faultinfo(); check_skas3_ptrace_ldt(); - if (!proc_mm || !ptrace_faultinfo || !ptrace_ldt) + if (!proc_mm || (!ptrace_faultinfo && !have_siginfo_segv) || + !ptrace_ldt) + skas_needs_stub = 1; +} + +static void *fault_address; + +static __init int check_fault_info(struct faultinfo *fi) +{ + return (FAULT_ADDRESS(*fi) == (unsigned long) fault_address) && + FAULT_WRITE(*fi) && SEGV_IS_FIXABLE(fi); +} + +static jmp_buf siginfo_buf; + +static void __init segv_handler(int sig, siginfo_t *si, void *foo) +{ + struct faultinfo fi; + int n; + + GET_FAULTINFO_FROM_SI(fi, *si); + n = check_fault_info(&fi) ? 1 : 2; + longjmp(siginfo_buf, n); +} + +static int __init fault(void) +{ + struct sigaction sa, old; + int err, n; + + /* + * The cast is needed because the CPP manipulations of + * siginfo_t resulted in sa_sigaction having an old_siginfo_t + * parameter. + */ + sa.sa_sigaction = (void (*)(int, old_siginfo_t *, void *)) segv_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO | SA_NODEFER; + + err = sigaction(SIGSEGV, &sa, &old); + if (err) + fatal_perror("sigaction"); + + /* + * Provide a guaranteed invalid address by mapping a page into + * a hole in the address space and then unmapping it. + */ + fault_address = mmap(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (fault_address == MAP_FAILED) + fatal_perror("mmap failed"); + + if (munmap(fault_address, UM_KERN_PAGE_SIZE) < 0) + fatal_perror("munmap failed"); + + n = setjmp(siginfo_buf); + if (n == 0) + *((unsigned long *) fault_address) = 0; + + err = sigaction(SIGSEGV, &old, NULL); + + return n; +} + +static int __init nogetsiginfo_cmd_param(char *str, int *add) +{ + disable_siginfo_segv = 1; + return 0; +} + +__uml_setup("nogetsiginfo", nogetsiginfo_cmd_param, +"nogetsiginfo\n" +" Turns off usage of PTRACE_GETSIGINFO to read page fault information\n" +" from a child process, even if the host supports it.\n\n"); + +#ifndef PTRACE_GETSIGINFO +#define PTRACE_GETSIGINFO 0x4202 +#endif + +static int __init check_siginfo(void) +{ + siginfo_t si; + struct faultinfo fi; + int ok, pid, err, status; + + non_fatal("\tFull CPU fault information in siginfo_t ... "); + ok = fault(); + if (ok) + non_fatal("OK\n"); + else { + non_fatal("Failed\n"); + return 0; + } + + non_fatal("\tFull CPU fault information in PTRACE_GETSIGINFO ... "); + + pid = fork(); + if (pid < 0) + fatal_perror("fork failed"); + else if (pid == 0) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + fault(); + exit(1); + } + + while (1) { + err = waitpid(pid, &status, WUNTRACED); + if (err < 0) + fatal_perror("wait failed"); + + if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGSEGV)) + break; + } + + err = ptrace(PTRACE_GETSIGINFO, pid, 0, &si); + if (err < 0) + fatal_perror("PTRACE_GETSIGINFO failed"); + + ptrace(PTRACE_KILL, pid, 0, 0); + + GET_FAULTINFO_FROM_SI(fi, si); + ok = check_fault_info(&fi); + if (ok) + non_fatal("OK\n"); + else + non_fatal("Failed\n"); + + if (disable_siginfo_segv) + non_fatal("Extended PTRACE_GETSIGINFO disabled on command " + "line\n"); + else + have_siginfo_segv = 1; + + return ok; +} + +static struct user_regs return_regs; +int self_mm_fd; + +static int switch_mm_works; + +static __init void after_switch(void) +{ + /* + * If we are really in a new address space, setting this to + * zero won't affect the value of 1 already set in the old + * address space. + */ + switch_mm_works = 0; + + switch_mm(self_mm_fd, NULL, &return_regs, 0, 0); +} + +static int __init check_switch_mm(void) +{ + char *mm_stack; + int err, there = -1; + + non_fatal("\t/proc/self/mm ... "); + self_mm_fd = open("/proc/self/mm", O_RDONLY); + if (self_mm_fd < 0) + goto bad; + non_fatal("OK\n"); + + mm_stack = mmap(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mm_stack == MAP_FAILED) + goto bad; + + non_fatal("\tnew_mm ... "); + there = new_mm(); + if (there < 0) + goto bad_unmap; + non_fatal("OK\n"); + + switch_mm_works = 1; + + non_fatal("\tswitching over ... "); + err = switch_mm(there, &return_regs, NULL, (unsigned long) after_switch, + ((unsigned long) &mm_stack[UM_KERN_PAGE_SIZE]) - + sizeof(void *)); + if (err < 0) + goto bad_close; + non_fatal("switched back ... "); + if (!switch_mm_works) + goto bad_close; + else + non_fatal("OK\n"); + + munmap(mm_stack, UM_KERN_PAGE_SIZE); + close(there); + + if (disable_switch_mm) + non_fatal("switch_mm support disabled on command line\n"); + else + have_switch_mm = 1; + + return 1; + bad_close: + if (there > 0) + close(there); + bad_unmap: + munmap(mm_stack, UM_KERN_PAGE_SIZE); + bad: + non_fatal("Failed - \n"); + perror(""); + return 0; +} + +static int ptrace_switch_mm_works; + +static int __init after_ptrace_switch(void) +{ + ptrace_switch_mm_works = 1; + exit(0); +} + +static int __init check_ptrace_switch_mm(void) +{ + void *stack; + unsigned long regs[MAX_REG_NR]; + int pid, here, err, status; + + non_fatal("\tPTRACE_SWITCH_MM ... "); + pid = fork(); + if (pid == 0){ + ptrace(PTRACE_TRACEME, 0, 0, 0); + kill(getpid(), SIGSTOP); + + exit(0); + } + else if (pid < 0) + goto bad; + + stack = mmap(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (stack == MAP_FAILED) + goto bad; + + here = open("/proc/self/mm", O_RDONLY); + if (here < 0) + goto bad_unmap; + + err = waitpid(pid, &status, WUNTRACED); + if (err < 0) + goto bad_close; + else if (err != pid) { + non_fatal("waitpid returned %d, expected %d\n", err, pid); + goto bad_close; + } else if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { + non_fatal("waitpid returned status 0x%d\n", status); + goto bad_close; + } + + err = ptrace(PTRACE_GETREGS, pid, 0, regs); + if (err < 0) + goto bad_close; + + regs[REGS_IP_INDEX] = (unsigned long) after_ptrace_switch; + regs[REGS_SP_INDEX] = (unsigned long) stack + UM_KERN_PAGE_SIZE - + sizeof(void *); + + if (ptrace(PTRACE_SETREGS, pid, 0, regs) < 0) + goto bad_close; + + if (ptrace(PTRACE_SWITCH_MM, pid, NULL, here) < 0) + goto bad_close; + + if (ptrace(PTRACE_CONT, pid, NULL, 0) < 0) + goto bad_close; + + err = waitpid(pid, &status, WUNTRACED); + if (err < 0) + goto bad_close; + else if(err != pid) { + non_fatal("waitpid returned %d, expected %d\n", err, pid); + goto bad_close; + } else if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) { + non_fatal("waitpid returned status 0x%d\n", status); + goto bad_close; + } + + if (!ptrace_switch_mm_works) + goto bad_close; + else + non_fatal("OK\n"); + + if (disable_ptrace_switch_mm) + non_fatal("PTRACE_SWITCH_MM support disabled on command " + "line\n"); + else + have_ptrace_switch_mm = 1; + + close(here); + munmap(stack, UM_KERN_PAGE_SIZE); + + return 1; + + bad_close: + close(here); + bad_unmap: + munmap(stack, UM_KERN_PAGE_SIZE); + bad: + non_fatal("Failed - \n"); + perror(""); + return 0; +} + +#ifdef UML_CONFIG_X86_32 +extern int host_gdt_entry_tls_min; +extern void host_tls_support(void); + +static __init int init_vcpu_arch(struct vcpu_arch *vcpu){ + struct user_desc *tls = vcpu->tls_array; + int i, err; + + host_tls_support(); + memset(tls, 0, sizeof(vcpu->tls_array)); + for (i = 0; i < ARRAY_SIZE(vcpu->tls_array); i++) { + tls[i].entry_number = host_gdt_entry_tls_min + i; + err = get_thread_area(&tls[i]); + if (err) { + perror("get_thread_area"); + return err; + } + } + return 0; +} +#else +static int init_vcpu_arch(struct vcpu_arch *vcpu){ + return 0; +} +#endif + +static struct vcpu_user vcpu_data; + +static __init int check_vcpu(void) +{ + void *stack; + int err; + + non_fatal("\tvcpu ... "); + + stack = mmap(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (stack == MAP_FAILED) + goto bad; + + get_safe_registers(vcpu_data.regs.regs); + vcpu_data.regs.regs[REGS_IP_INDEX] = (unsigned long) ptrace_child; + vcpu_data.regs.regs[REGS_SP_INDEX] = (unsigned long) stack + + UM_KERN_PAGE_SIZE - sizeof(void *); + + if (init_vcpu_arch(&vcpu_data.arch)) + goto bad; + + err = vcpu(-1, &vcpu_data); + munmap(stack, UM_KERN_PAGE_SIZE); + if (err) { + non_fatal("vcpu failed with errno %d\n", err); + goto bad; + } + + if (vcpu_data.event != VCPU_SYSCALL) { + non_fatal("vcpu returned with event = %d\n", vcpu_data.event); + goto bad; + } + + non_fatal("OK\n"); + + if (disable_vcpu) + non_fatal("vcpu support disabled on command line\n"); + else + have_vcpu = 1; + + return 1; + + bad: + non_fatal("Failed\n"); + return 0; +} + +static int __init can_do_skas4(void) +{ + int ret; + + non_fatal("Checking for SKAS4 support in the host:\n"); + + ret = check_switch_mm() && check_ptrace_switch_mm() && check_siginfo() + && check_vcpu(); + if (ret) skas_needs_stub = 1; + + return ret; +} + +void __init can_do_skas(void) +{ + if (!can_do_skas4()) + can_do_skas3(); +} + +int get_new_mm(void) +{ + int err; + + err = new_mm(); + if (err < 0) + err = -errno; + + return err; } int __init parse_iomem(char *str, int *add) diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c index b613473..6dfd56f 100644 --- a/arch/um/os-Linux/sys-i386/registers.c +++ b/arch/um/os-Linux/sys-i386/registers.c @@ -4,10 +4,16 @@ * Licensed under the GPL */ +#include +#include #include +#include +#include +#include #include "kern_constants.h" #include "longjmp.h" #include "user.h" +#include "skas.h" #include "sysdep/ptrace_user.h" int save_fp_registers(int pid, unsigned long *fp_regs) @@ -72,12 +78,32 @@ int put_fp_registers(int pid, unsigned long *regs) return restore_fp_registers(pid, regs); } +extern int host_gdt_entry_tls_min; + +#define GDT_ENTRY_TLS_ENTRIES 3 +#define GDT_ENTRY_TLS_MIN 6 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) + +struct user_desc tls[GDT_ENTRY_TLS_ENTRIES]; + +unsigned long fp_regs[FP_SIZE]; + void arch_init_registers(int pid) { - unsigned long fpx_regs[HOST_XFP_SIZE]; - int err; + struct user_desc *entry; + int err, i; - err = ptrace(PTRACE_GETFPXREGS, pid, 0, fpx_regs); + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) { + entry = &tls[i]; + entry->entry_number = i + GDT_ENTRY_TLS_MIN; + err = get_thread_area(entry); + if (err) { + perror("get_thread_area"); + exit(1); + } + } + + err = ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs); if (!err) return; @@ -87,3 +113,4 @@ void arch_init_registers(int pid) have_fpx_regs = 0; } + diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c index 594d97a..43731fe 100644 --- a/arch/um/os-Linux/sys-x86_64/registers.c +++ b/arch/um/os-Linux/sys-x86_64/registers.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Copyright (C) 2006 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ @@ -10,6 +10,7 @@ #include "kern_constants.h" #include "longjmp.h" #include "user.h" +#include "sysdep/ptrace_user.h" int save_fp_registers(int pid, unsigned long *fp_regs) { @@ -50,3 +51,15 @@ int put_fp_registers(int pid, unsigned long *regs) { return restore_fp_registers(pid, regs); } + +unsigned long fp_regs[FP_SIZE]; + +void arch_init_registers(int pid) +{ + int err; + + err = ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs); + if(err) + panic("arch_init_registers : PTRACE_GETFPREGS failed, " + "errno = %d", errno); +} diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c index a34263e..8642b56 100644 --- a/arch/um/sys-i386/ldt.c +++ b/arch/um/sys-i386/ldt.c @@ -437,7 +437,7 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm) /* * We have a valid from_mm, so we now have to copy the LDT of * from_mm to new_mm, because using proc_mm an new mm with - * an empty/default LDT was created in new_mm() + * an empty/default LDT was created in make_new_mm() */ copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, .u = diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c index fd0c25a..68251f2 100644 --- a/arch/um/sys-i386/signal.c +++ b/arch/um/sys-i386/signal.c @@ -164,6 +164,8 @@ static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave, extern int have_fpx_regs; +extern unsigned long fp_regs[FP_SIZE]; + static int copy_sc_from_user(struct pt_regs *regs, struct sigcontext __user *from) { @@ -177,24 +179,12 @@ static int copy_sc_from_user(struct pt_regs *regs, pid = userspace_pid[current_thread_info()->cpu]; copy_sc(®s->regs, &sc); if (have_fpx_regs) { - struct user_fxsr_struct fpx; - - err = copy_from_user(&fpx, &sc.fpstate->_fxsr_env[0], - sizeof(struct user_fxsr_struct)); - if (err) - return 1; + struct user_fxsr_struct *fpx = + (struct user_fxsr_struct *) &fp_regs; - err = convert_fxsr_from_user(&fpx, sc.fpstate); + err = convert_fxsr_from_user(fpx, sc.fpstate); if (err) return 1; - - err = restore_fpx_registers(pid, (unsigned long *) &fpx); - if (err < 0) { - printk(KERN_ERR "copy_sc_from_user - " - "restore_fpx_registers failed, errno = %d\n", - -err); - return 1; - } } else { struct user_i387_struct fp; @@ -250,25 +240,19 @@ static int copy_sc_to_user(struct sigcontext __user *to, pid = userspace_pid[current_thread_info()->cpu]; if (have_fpx_regs) { - struct user_fxsr_struct fpx; - - err = save_fpx_registers(pid, (unsigned long *) &fpx); - if (err < 0){ - printk(KERN_ERR "copy_sc_to_user - save_fpx_registers " - "failed, errno = %d\n", err); - return 1; - } + struct user_fxsr_struct *fpx = + (struct user_fxsr_struct *) &fp_regs; - err = convert_fxsr_to_user(to_fp, &fpx); + err = convert_fxsr_to_user(to_fp, fpx); if (err) return 1; - err |= __put_user(fpx.swd, &to_fp->status); + err |= __put_user(fpx->swd, &to_fp->status); err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic); if (err) return 1; - if (copy_to_user(&to_fp->_fxsr_env[0], &fpx, + if (copy_to_user(&to_fp->_fxsr_env[0], fpx, sizeof(struct user_fxsr_struct))) return 1; } diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S index 7699e89..117462e 100644 --- a/arch/um/sys-i386/stub.S +++ b/arch/um/sys-i386/stub.S @@ -1,52 +1,60 @@ #include "uml-config.h" #include "as-layout.h" +#include "skas/skas.h" + +#define PROCESS_LIST \ + /* load pointer to first operation */ \ + mov $(STUB_DATA + 8), %esp; \ +1: \ + /* load length of additional data */ \ + mov 0x0(%esp), %eax; \ + /* if(length == 0) : end of list */ \ + /* write possible 0 to header */ \ + mov %eax, STUB_DATA + 4; \ + cmpl $0, %eax; \ + jz 2f; \ + /* save current pointer */ \ + mov %esp, STUB_DATA + 4; \ + /* skip additional data */ \ + add %eax, %esp; \ + /* load syscall-# */ \ + pop %eax; \ + /* load syscall params */ \ + pop %ebx; \ + pop %ecx; \ + pop %edx; \ + pop %esi; \ + pop %edi; \ + pop %ebp; \ + /* execute syscall */ \ + int $0x80; \ + /* check return value */ \ + pop %ebx; \ + cmp %ebx, %eax; \ + je 1b; \ +2: \ + /* save return value */ \ + mov %eax, STUB_DATA; .globl syscall_stub .section .__syscall_stub, "x" .globl batch_syscall_stub batch_syscall_stub: - /* load pointer to first operation */ - mov $(STUB_DATA+8), %esp - -again: - /* load length of additional data */ - mov 0x0(%esp), %eax - - /* if(length == 0) : end of list */ - /* write possible 0 to header */ - mov %eax, STUB_DATA+4 - cmpl $0, %eax - jz done - - /* save current pointer */ - mov %esp, STUB_DATA+4 - - /* skip additional data */ - add %eax, %esp - - /* load syscall-# */ - pop %eax + PROCESS_LIST + /* stop */ + int3 - /* load syscall params */ - pop %ebx - pop %ecx - pop %edx - pop %esi - pop %edi - pop %ebp + .globl switch_mm_stub +switch_mm_stub: + PROCESS_LIST - /* execute syscall */ + mov $__NR_switch_mm, %eax + mov STUB_DATA + UM_KERN_PAGE_SIZE - 8, %ebx + xor %ecx, %ecx + mov STUB_DATA + UM_KERN_PAGE_SIZE - 4, %edx + xor %esi, %esi + xor %edi, %edi int $0x80 - /* check return value */ - pop %ebx - cmp %ebx, %eax - je again - -done: - /* save return value */ - mov %eax, STUB_DATA - - /* stop */ int3 diff --git a/arch/um/sys-i386/tls.c b/arch/um/sys-i386/tls.c index c6c7131..a45d7ab 100644 --- a/arch/um/sys-i386/tls.c +++ b/arch/um/sys-i386/tls.c @@ -6,10 +6,19 @@ #include "linux/percpu.h" #include "linux/sched.h" #include "asm/uaccess.h" +#include +#include +#include "kern.h" #include "os.h" #include "skas.h" #include "sysdep/tls.h" +void copy_tls(struct user_desc *to) +{ + memcpy(to, current->thread.arch.tls_array, + sizeof(current->thread.arch.tls_array)); +} + /* * If needed we can detect when it's uninitialized. * @@ -18,11 +27,14 @@ static int host_supports_tls = -1; int host_gdt_entry_tls_min; -int do_set_thread_area(struct user_desc *info) +static int do_set_thread_area(struct user_desc *info) { int ret; u32 cpu; + if(have_vcpu) + return 0; + cpu = get_cpu(); ret = os_set_thread_area(info, userspace_pid[cpu]); put_cpu(); @@ -300,6 +312,7 @@ int sys_set_thread_area(struct user_desc __user *user_desc) ret = do_set_thread_area(&info); if (ret) return ret; + return set_tls_entry(current, &info, idx, 1); } @@ -366,31 +379,38 @@ out: return ret; } +extern struct user_desc tls[GDT_ENTRY_TLS_ENTRIES]; + /* * This code is really i386-only, but it detects and logs x86_64 GDT indexes * if a 32-bit UML is running on a 64-bit host. */ -static int __init __setup_host_supports_tls(void) +void __init host_tls_support(void) { check_host_supports_tls(&host_supports_tls, &host_gdt_entry_tls_min); if (host_supports_tls) { - printk(KERN_INFO "Host TLS support detected\n"); - printk(KERN_INFO "Detected host type: "); + printf("Host TLS support detected\n"); + printf("Detected host type: "); switch (host_gdt_entry_tls_min) { case GDT_ENTRY_TLS_MIN_I386: - printk(KERN_CONT "i386"); + printf("i386\n"); break; case GDT_ENTRY_TLS_MIN_X86_64: - printk(KERN_CONT "x86_64"); + printf("x86_64\n"); break; } - printk(KERN_CONT " (GDT indexes %d to %d)\n", - host_gdt_entry_tls_min, + printf(" (GDT indexes %d to %d)\n", host_gdt_entry_tls_min, host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES); } else - printk(KERN_ERR " Host TLS support NOT detected! " - "TLS support inside UML will not work\n"); - return 0; + printf("Host TLS support NOT detected! " + "TLS support inside UML will not work\n"); } -__initcall(__setup_host_supports_tls); +void init_vcpu_tls(struct user_desc *to) +{ + struct uml_tls_struct *tls = current->thread.arch.tls_array; + int i; + + for (i = 0; i < ARRAY_SIZE(current->thread.arch.tls_array); i++) + to[i] = tls[i].tls; +} diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c index 1a899a7..1e426f8 100644 --- a/arch/um/sys-x86_64/signal.c +++ b/arch/um/sys-x86_64/signal.c @@ -42,6 +42,8 @@ void copy_sc(struct uml_pt_regs *regs, void *from) #undef GETREG } +static unsigned long fp_regs[HOST_FP_SIZE]; + static int copy_sc_from_user(struct pt_regs *regs, struct sigcontext __user *from, struct _fpstate __user *fpp) @@ -81,13 +83,17 @@ static int copy_sc_from_user(struct pt_regs *regs, if (err) return 1; - err = restore_fp_registers(userspace_pid[current_thread_info()->cpu], - (unsigned long *) &fp); - if (err < 0) { - printk(KERN_ERR "copy_sc_from_user - " - "restore_fp_registers failed, errno = %d\n", - -err); - return 1; + if (have_vcpu) + memcpy(fp_regs, &fp, sizeof(fp_regs)); + else { + err = restore_fp_registers(userspace_pid[current_thread_info()->cpu], + (unsigned long *) &fp); + if (err < 0) { + printk(KERN_ERR "copy_sc_from_user - " + "restore_fp_registers failed, errno = %d\n", + -err); + return 1; + } } return 0; @@ -143,14 +149,18 @@ static int copy_sc_to_user(struct sigcontext __user *to, if (err) return 1; - err = save_fp_registers(userspace_pid[current_thread_info()->cpu], - (unsigned long *) &fp); - if (err < 0) { - printk(KERN_ERR "copy_sc_from_user - restore_fp_registers " - "failed, errno = %d\n", -err); - return 1; + if (have_vcpu) + memcpy(&fp, fp_regs, sizeof(fp)); + else { + err = save_fp_registers(userspace_pid[current_thread_info()->cpu], + (unsigned long *) &fp); + if (err < 0) { + printk(KERN_ERR "copy_sc_from_user - " + "restore_fp_registers failed, errno = %d\n", + -err); + return 1; + } } - if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct))) return 1; diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S index 5687687..b4043b0 100644 --- a/arch/um/sys-x86_64/stub.S +++ b/arch/um/sys-x86_64/stub.S @@ -1,67 +1,68 @@ #include "uml-config.h" #include "as-layout.h" +#include "skas/skas.h" - .globl syscall_stub -.section .__syscall_stub, "x" -syscall_stub: - syscall - /* We don't have 64-bit constants, so this constructs the address - * we need. - */ - movq $(STUB_DATA >> 32), %rbx - salq $32, %rbx - movq $(STUB_DATA & 0xffffffff), %rcx - or %rcx, %rbx - movq %rax, (%rbx) - int3 +#define PROCESS_LIST \ + mov $(STUB_DATA >> 32), %rbx; \ + sal $32, %rbx; \ + mov $(STUB_DATA & 0xffffffff), %rax; \ + or %rax, %rbx; \ + /* load pointer to first operation */ \ + mov %rbx, %rsp; \ + add $0x10, %rsp; \ +1: \ + /* load length of additional data */ \ + mov 0x0(%rsp), %rax; \ + /* if(length == 0) : end of list */ \ + /* write possible 0 to header */ \ + mov %rax, 8(%rbx); \ + cmp $0, %rax; \ + jz 2f; \ + /* save current pointer */ \ + mov %rsp, 8(%rbx); \ + /* skip additional data */ \ + add %rax, %rsp; \ + /* load syscall-# */ \ + pop %rax; \ + /* load syscall params */ \ + pop %rdi; \ + pop %rsi; \ + pop %rdx; \ + pop %r10; \ + pop %r8; \ + pop %r9; \ + /* execute syscall */ \ + syscall; \ + /* check return value */ \ + pop %rcx; \ + cmp %rcx, %rax; \ + je 1b; \ +2: \ + /* save return value */ \ + mov %rax, (%rbx); \ +.section .__syscall_stub, "x" .globl batch_syscall_stub batch_syscall_stub: - mov $(STUB_DATA >> 32), %rbx - sal $32, %rbx - mov $(STUB_DATA & 0xffffffff), %rax - or %rax, %rbx - /* load pointer to first operation */ - mov %rbx, %rsp - add $0x10, %rsp -again: - /* load length of additional data */ - mov 0x0(%rsp), %rax - - /* if(length == 0) : end of list */ - /* write possible 0 to header */ - mov %rax, 8(%rbx) - cmp $0, %rax - jz done - - /* save current pointer */ - mov %rsp, 8(%rbx) - - /* skip additional data */ - add %rax, %rsp - - /* load syscall-# */ - pop %rax + PROCESS_LIST + /* stop */ + int3 - /* load syscall params */ - pop %rdi - pop %rsi - pop %rdx - pop %r10 - pop %r8 - pop %r9 + .globl switch_mm_stub +switch_mm_stub: + PROCESS_LIST - /* execute syscall */ + mov $__NR_switch_mm, %rax + mov $(STUB_DATA >> 32), %rdi + sal $32, %rdi + mov $(STUB_DATA & 0xffffffff + 4096 - 8), %rsi + add %rdi, %rsi + mov (%rsi), %rdx + sub $8, %rsi + mov (%rsi), %rdi + xor %rsi, %rsi + xor %r10, %r10 + xor %r8, %r8 syscall - /* check return value */ - pop %rcx - cmp %rcx, %rax - je again - -done: - /* save return value */ - mov %rax, (%rbx) - - /* stop */ int3 diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c index c128eb8..9bb72fc 100644 --- a/arch/um/sys-x86_64/syscall_table.c +++ b/arch/um/sys-x86_64/syscall_table.c @@ -39,6 +39,8 @@ #define stub_rt_sigsuspend sys_rt_sigsuspend #define stub_sigaltstack sys_sigaltstack #define stub_rt_sigreturn sys_rt_sigreturn +#define stub_switch_mm sys_switch_mm +#define stub_vcpu sys_vcpu #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; #undef _ASM_X86_64_UNISTD_H_ diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c index f1199fd..fbbc903 100644 --- a/arch/um/sys-x86_64/syscalls.c +++ b/arch/um/sys-x86_64/syscalls.c @@ -28,61 +28,78 @@ asmlinkage long sys_uname64(struct new_utsname __user * name) long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr) { - unsigned long *ptr = addr, tmp; - long ret; - int pid = task->mm->context.id.u.pid; + long ret = 0; + + if (have_vcpu) { + unsigned long *regs = task->thread.regs.regs.gp; + switch (code) { + case ARCH_SET_FS: + task->thread.arch.fs = (unsigned long) addr; + regs[HOST_FS_BASE] = (unsigned long) addr; + break; + case ARCH_SET_GS: + regs[HOST_GS_BASE] = (unsigned long) addr; + break; + case ARCH_GET_FS: + ret = put_user(regs[HOST_FS_BASE], addr); + break; + case ARCH_GET_GS: + ret = put_user(regs[HOST_GS_BASE], addr); + break; + } + } else { + unsigned long *ptr = addr, tmp; + int pid = userspace_pid[0]; - /* - * With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to - * be safe), we need to call arch_prctl on the host because - * setting %fs may result in something else happening (like a - * GDT or thread.fs being set instead). So, we let the host - * fiddle the registers and thread struct and restore the - * registers afterwards. - * - * So, the saved registers are stored to the process (this - * needed because a stub may have been the last thing to run), - * arch_prctl is run on the host, then the registers are read - * back. - */ - switch (code) { - case ARCH_SET_FS: - case ARCH_SET_GS: - ret = restore_registers(pid, ¤t->thread.regs.regs); - if (ret) - return ret; - break; - case ARCH_GET_FS: - case ARCH_GET_GS: /* - * With these two, we read to a local pointer and - * put_user it to the userspace pointer that we were - * given. If addr isn't valid (because it hasn't been - * faulted in or is just bogus), we want put_user to - * fault it in (or return -EFAULT) instead of having - * the host return -EFAULT. + * With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to + * be safe), we need to call arch_prctl on the host because + * setting %fs may result in something else happening (like a + * GDT or thread.fs being set instead). So, we let the host + * fiddle the registers and thread struct and restore the + * registers afterwards. + * + * So, the saved registers are stored to the process (this + * needed because a stub may have been the last thing to run), + * arch_prctl is run on the host, then the registers are read + * back. */ - ptr = &tmp; - } - - ret = os_arch_prctl(pid, code, ptr); - if (ret) - return ret; + switch (code) { + case ARCH_SET_FS: + case ARCH_SET_GS: + restore_registers(pid, ¤t->thread.regs.regs); + break; + case ARCH_GET_FS: + case ARCH_GET_GS: + /* + * With these two, we read to a local pointer and + * put_user it to the userspace pointer that we were + * given. If addr isn't valid (because it hasn't been + * faulted in or is just bogus), we want put_user to + * fault it in (or return -EFAULT) instead of having + * the host return -EFAULT. + */ + ptr = &tmp; + } - switch (code) { - case ARCH_SET_FS: - current->thread.arch.fs = (unsigned long) ptr; - ret = save_registers(pid, ¤t->thread.regs.regs); - break; - case ARCH_SET_GS: - ret = save_registers(pid, ¤t->thread.regs.regs); - break; - case ARCH_GET_FS: - ret = put_user(tmp, addr); - break; - case ARCH_GET_GS: - ret = put_user(tmp, addr); - break; + ret = os_arch_prctl(pid, code, ptr); + if (ret) + return ret; + switch (code) { + case ARCH_SET_FS: + current->thread.arch.fs = (unsigned long) ptr; + save_registers(pid, ¤t->thread.regs.regs); + break; + case ARCH_SET_GS: + save_registers(pid, ¤t->thread.regs.regs); + break; + case ARCH_GET_FS: + ret = put_user(tmp, addr); + break; + case ARCH_GET_GS: + ret = put_user(tmp, addr); + break; + } } return ret; diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 5e7771a..a2a4c1c 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -68,6 +68,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) &to->_sifields._pad[0]); switch (from->si_code >> 16) { case __SI_FAULT >> 16: + err |= __put_user(from->si_trapno, &to->si_trapno); + err |= __put_user(from->si_error, &to->si_error); break; case __SI_CHLD >> 16: err |= __put_user(from->si_utime, &to->si_utime); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 8022d3c..8273782 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -373,6 +373,7 @@ quiet_ni_syscall: PTREGSCALL stub32_vfork, sys_vfork, %rdi PTREGSCALL stub32_iopl, sys_iopl, %rsi PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx + PTREGSCALL stub32_switch_mm, sys_switch_mm, %r9 ENTRY(ia32_ptregs_common) popq %r11 @@ -727,4 +728,6 @@ ia32_sys_call_table: .quad sys32_fallocate .quad compat_sys_timerfd_settime /* 325 */ .quad compat_sys_timerfd_gettime + .quad sys_new_mm + .quad stub32_switch_mm ia32_syscall_end: diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 4b87c32..1e2adae 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -371,7 +371,7 @@ ENTRY(system_call) GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + testl $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_VCPU),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c20c9e7..f3f403a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -244,7 +244,7 @@ ENTRY(system_call_after_swapgs) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP|_TIF_VCPU),threadinfo_flags(%rcx) jnz tracesys cmpq $__NR_syscall_max,%rax ja badsys @@ -323,6 +323,12 @@ tracesys: FIXUP_TOP_OF_STACK %rdi movq %rsp,%rdi call syscall_trace_enter + testl %eax, %eax + jz 2f + LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST + jmp int_ret_from_sys_call +2: LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST cmpq $__NR_syscall_max,%rax @@ -425,6 +431,7 @@ END(\label) PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx PTREGSCALL stub_iopl, sys_iopl, %rsi + PTREGSCALL stub_switch_mm, sys_switch_mm, %r9 ENTRY(ptregscall_common) popq %r11 @@ -481,6 +488,23 @@ ENTRY(stub_rt_sigreturn) END(stub_rt_sigreturn) /* + * vcpu is special too + */ +ENTRY(stub_vcpu) + CFI_STARTPROC + addq $8, %rsp + CFI_ADJUST_CFA_OFFSET -8 + SAVE_REST + movq %rsp,%rdx + FIXUP_TOP_OF_STACK %r11 + call sys_vcpu + movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer + RESTORE_REST + jmp int_ret_from_sys_call + CFI_ENDPROC +END(stub_vcpu) + +/* * initial frame state for interrupts and exceptions */ .macro _frame ref diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index eb92ccb..44334e2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -307,8 +307,7 @@ static int set_flags(struct task_struct *task, unsigned long value) return 0; } -static int putreg(struct task_struct *child, - unsigned long offset, unsigned long value) +int putreg(struct task_struct *child, unsigned long offset, unsigned long value) { switch (offset) { case offsetof(struct user_regs_struct, cs): @@ -360,7 +359,7 @@ static int putreg(struct task_struct *child, return 0; } -static unsigned long getreg(struct task_struct *task, unsigned long offset) +unsigned long getreg(struct task_struct *task, unsigned long offset) { switch (offset) { case offsetof(struct user_regs_struct, cs): @@ -1036,7 +1035,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) value); \ break -static int putreg32(struct task_struct *child, unsigned regno, u32 value) +int putreg32(struct task_struct *child, unsigned regno, u32 value) { struct pt_regs *regs = task_pt_regs(child); @@ -1101,7 +1100,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) offsetof(struct user_regs_struct, rs)); \ break -static int getreg32(struct task_struct *child, unsigned regno, u32 *val) +int getreg32(struct task_struct *child, unsigned regno, u32 *val) { struct pt_regs *regs = task_pt_regs(child); @@ -1254,6 +1253,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) case PTRACE_SETOPTIONS: case PTRACE_SET_THREAD_AREA: case PTRACE_GET_THREAD_AREA: + case PTRACE_SWITCH_MM: #ifdef X86_BTS case PTRACE_BTS_CONFIG: case PTRACE_BTS_STATUS: @@ -1453,6 +1453,8 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) force_sig_info(SIGTRAP, &info, tsk); } +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + /* notification of system call entry/exit * - triggered by current->work.syscall_trace */ @@ -1489,6 +1491,14 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit) goto out; } + if (test_thread_flag(TIF_VCPU)) { + if (entryexit) + return 0; + + regs->ax = unvcpu(regs, NULL); + return 1; + } + if (!(current->ptrace & PT_PTRACED)) goto out; @@ -1533,6 +1543,64 @@ out: return 1; } +int ptrace_to_pt_regs(struct pt_regs *regs, struct __user user_regs *ptrace) +{ + struct user_fxsr_struct *fp; + int i, err; + + if (!access_ok(VERIFY_READ, ptrace, sizeof(*ptrace))) + return -EFAULT; + + for (i = 0; i < FRAME_SIZE; i++) { + unsigned long n; + + if (__get_user(n, &ptrace->regs[i])) + return -EFAULT; + err = putreg(current, i * 4, n); + if (err) + return err; + } + + if (__get_user(fp, &ptrace->fp_state)) + return -EFAULT; + + if (fp == NULL) { + clear_used_math(); + return 0; + } + + set_used_math(); + + return xfpregs_set(current, NULL, 0, sizeof(*fp), NULL, fp); +} + +int pt_regs_to_ptrace(struct __user user_regs *ptrace, struct pt_regs *regs) +{ + int i; + + if (!access_ok(VERIFY_WRITE, ptrace, sizeof(*ptrace))) + return -EFAULT; + + for (i = 0; i < FRAME_SIZE; i++) { + unsigned long n = getreg(current, i * 4); + if (__put_user(n, &ptrace->regs[i])) + return -EFAULT; + } + + if (!used_math()) { + if (__put_user(NULL, &ptrace->fp_state)) + return -EFAULT; + return 0; + } + + if (__put_user(&ptrace->fpregs, &ptrace->fp_state)) + return -EFAULT; + + clear_used_math(); + + return xfpregs_get(current, NULL, 0, sizeof(ptrace->fpregs), NULL, + &ptrace->fpregs); +} #else /* CONFIG_X86_64 */ static void syscall_trace(struct pt_regs *regs) @@ -1558,11 +1626,18 @@ static void syscall_trace(struct pt_regs *regs) } } -asmlinkage void syscall_trace_enter(struct pt_regs *regs) +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) { /* do the secure computing check first */ secure_computing(regs->orig_ax); + if (test_thread_flag(TIF_VCPU)) { + regs->ax = unvcpu(regs, NULL); + return 1; + } + if (test_thread_flag(TIF_SYSCALL_TRACE) && (current->ptrace & PT_PTRACED)) syscall_trace(regs); @@ -1580,6 +1655,8 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) regs->dx, regs->r10); } } + + return 0; } asmlinkage void syscall_trace_leave(struct pt_regs *regs) @@ -1593,4 +1670,115 @@ asmlinkage void syscall_trace_leave(struct pt_regs *regs) syscall_trace(regs); } +int ptrace_to_pt_regs(struct pt_regs *regs, struct user_regs *ptrace) +{ + struct user_i387_struct *fp; + int i, err; + +#ifdef CONFIG_IA32_EMULATION + if (test_thread_flag(TIF_IA32)) { + for (i = 0; i < MAX_REG32_NR; i++) { + err = putreg32(current, i * 4, ptrace->u.regs32[i]); + if (err) + return err; + } + + return 0; + } +#endif + for (i = 0; i < MAX_REG_NR; i++){ + if(i * 8 == offsetof(struct user_regs_struct, fs)) + continue; + + err = putreg(current, i * 8, ptrace->u.regs64.regs[i]); + if (err) + return err; + } + + if (__get_user(fp, &ptrace->u.regs64.fp_state)) + return -EFAULT; + + if (fp == NULL) { + clear_used_math(); + return 0; + } + + set_used_math(); + + return xfpregs_set(current, NULL, 0, sizeof(*fp), NULL, fp); +} + +extern int getreg32(struct task_struct *child, unsigned regno, u32 *val); + +int pt_regs_to_ptrace(struct __user user_regs *ptrace, struct pt_regs *regs) +{ + int i, err; + +#ifdef CONFIG_IA32_EMULATION + if (test_thread_flag(TIF_IA32)) { + if (!access_ok(VERIFY_WRITE, &ptrace->u.regs32, + sizeof(&ptrace->u.regs32))) + return -EFAULT; + + for (i = 0; i < ARRAY_SIZE(ptrace->u.regs32); i++) { + u32 n; + + err = getreg32(current, i * 4, &n); + if (err) + return err; + + err = __put_user(n, &ptrace->u.regs32[i]); + if (err) + return err; + } + + return 0; + } +#endif + if (!access_ok(VERIFY_WRITE, &ptrace->u.regs64, + sizeof(ptrace->u.regs64))) + return -EFAULT; + + for (i = 0; i < ARRAY_SIZE(ptrace->u.regs64.regs); i++) { + unsigned long n = getreg(current, i * 8); + err = __put_user(n, &ptrace->u.regs64.regs[i]); + if (err) + return err; + } + + if (!used_math()) { + if (__put_user(NULL, &ptrace->u.regs64.fp_state)) + return -EFAULT; + return 0; + } + + if (__put_user(&ptrace->u.regs64.fpregs, &ptrace->u.regs64.fp_state)) + return -EFAULT; + + clear_used_math(); + + return xfpregs_get(current, NULL, 0, sizeof(ptrace->u.regs64.fpregs), + NULL, &ptrace->u.regs64.fpregs); +} + +#define RIP_INDEX (128 / sizeof(long)) +#define RSP_INDEX (152 / sizeof(long)) + +unsigned long ptrace_ip(struct user_regs *regs) +{ +#ifdef CONFIG_IA32_EMULATION + if (test_thread_flag(TIF_IA32)) + return ptrace_ip32(regs->u.regs32); +#endif + return regs->u.regs64.regs[RIP_INDEX]; +} + +unsigned long ptrace_sp(struct user_regs *regs) +{ +#ifdef CONFIG_IA32_EMULATION + if (test_thread_flag(TIF_IA32)) + return ptrace_sp32(regs->u.regs32); +#endif + return regs->u.regs64.regs[RSP_INDEX]; +} #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 0157a6f..73b5d21 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -573,6 +573,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, return ret; } +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + /* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by @@ -603,6 +605,11 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { + if (test_thread_flag(TIF_VCPU)) { + regs->ax = unvcpu(regs, &info); + return; + } + /* Re-enable any watchpoints before delivering the * signal to user space. The processor register will * have been cleared if the watchpoint triggered diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index 1c83e51..8978b40 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -407,6 +407,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, return ret; } +extern int unvcpu(struct pt_regs *regs, siginfo_t *siginfo); + /* * Note that 'init' is a special process: it doesn't get signals it doesn't * want to handle. Thus you cannot kill init even with a SIGKILL even by @@ -435,6 +437,11 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { + if (test_thread_flag(TIF_VCPU)) { + regs->ax = unvcpu(regs, &info); + return; + } + /* Re-enable any watchpoints before delivering the * signal to user space. The processor register will * have been cleared if the watchpoint triggered diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index a86d26f..d5d54f6 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -21,6 +21,7 @@ #include #include +#include /* * sys_pipe() is the normal C calling standard for creating @@ -261,3 +262,28 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]) : "0" (__NR_execve),"ri" (filename),"c" (argv), "d" (envp) : "memory"); return __res; } + +extern long do_switch_mm(int fd, struct __user user_regs *save, + struct __user user_regs *new, unsigned long ip, + unsigned long sp, struct pt_regs *regs); + +asmlinkage long sys_switch_mm(struct pt_regs regs) +{ + return do_switch_mm(regs.bx, (struct __user user_regs *) regs.cx, + (struct __user user_regs *) regs.dx, regs.si, + regs.di, ®s); +} + +extern long do_vcpu(int mm_fd, struct vcpu_user __user *new, + struct pt_regs *regs); + +asmlinkage long sys_vcpu(struct pt_regs regs) +{ + int err; + + err = do_vcpu(regs.bx, (struct vcpu_user __user *) regs.cx, ®s); + if (err) + return err; + + return regs.ax; +} diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index bd802a5..aab9121 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -251,3 +251,29 @@ asmlinkage long sys_uname(struct new_utsname __user * name) err |= copy_to_user(&name->machine, "i686", 5); return err ? -EFAULT : 0; } + +extern long do_switch_mm(int fd, struct __user user_regs *save, + struct __user user_regs *new, unsigned long ip, + unsigned long sp, struct pt_regs *regs); + +asmlinkage long sys_switch_mm(int fd, struct __user user_regs *save, + struct __user user_regs *new, unsigned long ip, + unsigned long sp, struct pt_regs *regs) +{ + return do_switch_mm(fd, save, new, ip, sp, regs); +} + +extern long do_vcpu(int mm_fd, struct vcpu_user __user *new, + struct pt_regs *regs); + +asmlinkage long sys_vcpu(int mm_fd, struct vcpu_user __user *new, + struct pt_regs *regs) +{ + int err; + + err = do_vcpu(mm_fd, new, regs); + if (err) + return err; + + return regs->ax; +} diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index adff556..5b9803a 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -326,3 +326,6 @@ ENTRY(sys_call_table) .long sys_fallocate .long sys_timerfd_settime /* 325 */ .long sys_timerfd_gettime + .long sys_new_mm + .long sys_switch_mm + .long sys_vcpu diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ec08d83..f6f3990 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -173,6 +173,8 @@ static void force_sig_info_fault(int si_signo, int si_code, info.si_errno = 0; info.si_code = si_code; info.si_addr = (void __user *)address; + info.si_trapno = tsk->thread.trap_no; + info.si_error = tsk->thread.error_code; force_sig_info(si_signo, &info, tsk); } diff --git a/fs/proc/base.c b/fs/proc/base.c index 81d7d14..082f349 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2279,6 +2279,37 @@ static int proc_pid_io_accounting(struct task_struct *task, char *buffer) } #endif +static int proc_pid_mm_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = pid_task(proc_pid(inode), PIDTYPE_PID); + struct mm_struct *mm; + + if (task == NULL) + return -ENOENT; + + mm = get_task_mm(task); + if (mm == NULL) + return -EINVAL; + + file->private_data = mm; + return 0; +} + +static int proc_pid_mm_release(struct inode *inode, struct file *file) +{ + struct mm_struct *mm = file->private_data; + + if(mm != NULL) + mmput(mm); + + return 0; +} + +const struct file_operations proc_pid_mm_operations = { + .open = proc_pid_mm_open, + .release = proc_pid_mm_release, +}; + /* * Thread groups */ @@ -2350,6 +2381,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_TASK_IO_ACCOUNTING INF("io", S_IRUGO, pid_io_accounting), #endif + REG("mm", S_IRUSR | S_IWUSR, pid_mm), }; static int proc_tgid_base_readdir(struct file * filp, diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h index 8786e01..b295e86 100644 --- a/include/asm-generic/siginfo.h +++ b/include/asm-generic/siginfo.h @@ -82,6 +82,9 @@ typedef struct siginfo { #ifdef __ARCH_SI_TRAPNO int _trapno; /* TRAP # which caused the signal */ #endif +#ifdef __ARCH_SI_ERROR + int _error; /* CPU error code */ +#endif } _sigfault; /* SIGPOLL */ @@ -112,6 +115,9 @@ typedef struct siginfo { #ifdef __ARCH_SI_TRAPNO #define si_trapno _sifields._sigfault._trapno #endif +#ifdef __ARCH_SI_ERROR +#define si_error _sifields._sigfault._error +#endif #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd diff --git a/include/asm-um/desc.h b/include/asm-um/desc.h index 4ec34a5..efbabaf 100644 --- a/include/asm-um/desc.h +++ b/include/asm-um/desc.h @@ -1,6 +1,11 @@ #ifndef __UM_DESC_H #define __UM_DESC_H +#ifdef CONFIG_64BIT +#define LM(info) (info)->lm == 0 +#else +#define LM(info) (1) +#endif /* Taken from asm-i386/desc.h, it's the only thing we need. The rest wouldn't * compile, and has never been used. */ #define LDT_empty(info) (\ @@ -11,6 +16,7 @@ (info)->seg_32bit == 0 && \ (info)->limit_in_pages == 0 && \ (info)->seg_not_present == 1 && \ + LM(info) && \ (info)->useable == 0 ) #endif diff --git a/include/asm-um/host_ldt-i386.h b/include/asm-um/host_ldt-i386.h index b27cb0a..e2ad59c 100644 --- a/include/asm-um/host_ldt-i386.h +++ b/include/asm-um/host_ldt-i386.h @@ -1,7 +1,8 @@ #ifndef __ASM_HOST_LDT_I386_H #define __ASM_HOST_LDT_I386_H -#include "asm/arch/ldt.h" +#include +#include /* * macros stolen from include/asm-i386/desc.h @@ -21,14 +22,4 @@ ((info)->useable << 20) | \ 0x7000) -#define LDT_empty(info) (\ - (info)->base_addr == 0 && \ - (info)->limit == 0 && \ - (info)->contents == 0 && \ - (info)->read_exec_only == 1 && \ - (info)->seg_32bit == 0 && \ - (info)->limit_in_pages == 0 && \ - (info)->seg_not_present == 1 && \ - (info)->useable == 0 ) - #endif diff --git a/include/asm-um/host_ldt-x86_64.h b/include/asm-um/host_ldt-x86_64.h index 74a63f7..585c162 100644 --- a/include/asm-um/host_ldt-x86_64.h +++ b/include/asm-um/host_ldt-x86_64.h @@ -1,7 +1,8 @@ #ifndef __ASM_HOST_LDT_X86_64_H #define __ASM_HOST_LDT_X86_64_H -#include "asm/arch/ldt.h" +#include +#include /* * macros stolen from include/asm-x86_64/desc.h @@ -24,15 +25,4 @@ /* ((info)->lm << 21) | */ \ 0x7000) -#define LDT_empty(info) (\ - (info)->base_addr == 0 && \ - (info)->limit == 0 && \ - (info)->contents == 0 && \ - (info)->read_exec_only == 1 && \ - (info)->seg_32bit == 0 && \ - (info)->limit_in_pages == 0 && \ - (info)->seg_not_present == 1 && \ - (info)->useable == 0 && \ - (info)->lm == 0) - #endif diff --git a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h index a2b7fe1..d7bca3e 100644 --- a/include/asm-um/processor-i386.h +++ b/include/asm-um/processor-i386.h @@ -1,25 +1,19 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #ifndef __UM_PROCESSOR_I386_H #define __UM_PROCESSOR_I386_H -#include "linux/string.h" -#include "asm/host_ldt.h" -#include "asm/segment.h" - -extern int host_has_cmov; - -/* include faultinfo structure */ +#include +#include +#include +#include #include "sysdep/faultinfo.h" +#include "sysdep/tls.h" -struct uml_tls_struct { - struct user_desc tls; - unsigned flushed:1; - unsigned present:1; -}; +extern int host_has_cmov; struct arch_thread { struct uml_tls_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -38,8 +32,12 @@ struct arch_thread { static inline void arch_flush_thread(struct arch_thread *thread) { + int i; + /* Clear any TLS still hanging */ memset(&thread->tls_array, 0, sizeof(thread->tls_array)); + for (i = 0; i < ARRAY_SIZE(thread->tls_array); i++) + thread->tls_array[i].tls.entry_number = GDT_ENTRY_TLS_MIN + i; } static inline void arch_copy_thread(struct arch_thread *from, diff --git a/include/asm-um/ptrace-generic.h b/include/asm-um/ptrace-generic.h index 6aefcd3..46f8a3f 100644 --- a/include/asm-um/ptrace-generic.h +++ b/include/asm-um/ptrace-generic.h @@ -34,6 +34,15 @@ struct pt_regs { #define instruction_pointer(regs) PT_REGS_IP(regs) +struct user_regs { + unsigned long regs[MAX_REG_NR]; + void *ptr; + unsigned long fpregs[FP_SIZE]; +}; + +extern int ptrace_to_pt_regs(struct pt_regs *to, struct user_regs __user *from); +extern int pt_regs_to_ptrace(struct user_regs __user *to, struct pt_regs *from); + struct task_struct; extern long subarch_ptrace(struct task_struct *child, long request, long addr, diff --git a/include/asm-um/ptrace-i386.h b/include/asm-um/ptrace-i386.h index b2d24c5..8c9c160 100644 --- a/include/asm-um/ptrace-i386.h +++ b/include/asm-um/ptrace-i386.h @@ -8,8 +8,11 @@ #define HOST_AUDIT_ARCH AUDIT_ARCH_I386 -#include "linux/compiler.h" -#include "asm/ptrace-generic.h" +#include "user_constants.h" +#define FP_SIZE ((HOST_XFP_SIZE > HOST_FP_SIZE) ? HOST_XFP_SIZE : HOST_FP_SIZE) + +#include +#include #include #include "sysdep/ptrace.h" @@ -40,6 +43,12 @@ #define user_mode(r) UPT_IS_USER(&(r)->regs) +#define pt_regs_ip(r) (r).regs.gp[EIP] +#define pt_regs_sp(r) (r).regs.gp[UESP] + +#define ptrace_ip(r) (r)->regs[EIP] +#define ptrace_sp(r) (r)->regs[UESP] + /* * Forward declaration to avoid including sysdep/tls.h, which causes a * circular include, and compilation failures. diff --git a/include/asm-um/ptrace-x86_64.h b/include/asm-um/ptrace-x86_64.h index 4c47535..21345b5 100644 --- a/include/asm-um/ptrace-x86_64.h +++ b/include/asm-um/ptrace-x86_64.h @@ -7,6 +7,9 @@ #ifndef __UM_PTRACE_X86_64_H #define __UM_PTRACE_X86_64_H +#include "user_constants.h" +#define FP_SIZE (HOST_FP_SIZE) + #include "linux/compiler.h" #include "asm/errno.h" #include "asm/host_ldt.h" @@ -62,6 +65,12 @@ #define PT_FIX_EXEC_STACK(sp) do ; while(0) +#define pt_regs_ip(r) (r).regs.gp[RIP / sizeof(long)] +#define pt_regs_sp(r) (r).regs.gp[RSP / sizeof(long)] + +#define ptrace_ip(r) (r)->regs[RIP / sizeof(long)] +#define ptrace_sp(r) (r)->regs[RSP / sizeof(long)] + #define profile_pc(regs) PT_REGS_IP(regs) static inline int ptrace_get_thread_area(struct task_struct *child, int idx, diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h index 356b83e..6aa19f3 100644 --- a/include/asm-um/thread_info.h +++ b/include/asm-um/thread_info.h @@ -83,6 +83,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 5 #define TIF_SYSCALL_AUDIT 6 #define TIF_RESTORE_SIGMASK 7 +#define TIF_VCPU 8 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -91,5 +92,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_VCPU (1 << TIF_VCPU) #endif diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild index 3b8160a..45f5d02 100644 --- a/include/asm-x86/Kbuild +++ b/include/asm-x86/Kbuild @@ -21,5 +21,6 @@ unifdef-y += posix_types_64.h unifdef-y += ptrace.h unifdef-y += unistd_32.h unifdef-y += unistd_64.h +unifdef-y += user.h unifdef-y += vm86.h unifdef-y += vsyscall.h diff --git a/include/asm-x86/ia32.h b/include/asm-x86/ia32.h index aa97332..b1c76ef 100644 --- a/include/asm-x86/ia32.h +++ b/include/asm-x86/ia32.h @@ -119,6 +119,8 @@ typedef struct compat_siginfo{ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ struct { unsigned int _addr; /* faulting insn/memory ref. */ + int _trapno; /* TRAP # which caused the signal */ + int _error; /* CPU error code */ } _sigfault; /* SIGPOLL */ diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index d9e04b4..046fb58 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -3,7 +3,7 @@ #include /* For __user */ #include - +#include #ifndef __ASSEMBLY__ @@ -55,6 +55,24 @@ struct pt_regs { int ss; }; +#define pt_regs_ip(r) (r).ip +#define pt_regs_sp(r) (r).sp + +struct user_regs { + unsigned long regs[FRAME_SIZE]; + struct user_fxsr_struct *fp_state; + struct user_fxsr_struct fpregs; +}; + +#define ptrace_ip(r) (r)->regs.ip +#define ptrace_sp(r) (r)->regs.sp + +struct pt_regs; +extern int ptrace_to_pt_regs(struct pt_regs *regs, + struct user_regs __user *ptrace); +extern int pt_regs_to_ptrace(struct __user user_regs *ptrace, + struct pt_regs *regs); + #include #include @@ -227,6 +245,46 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); +#ifdef CONFIG_X86_64 +#ifdef CONFIG_IA32_EMULATION +#define MAX_REG32_NR 17 + +#define EIP 12 +#define UESP 15 + +#define ptrace_ip32(regs) (unsigned long) (regs)[EIP] +#define ptrace_sp32(regs) (unsigned long) (regs)[UESP] + +#endif + +#define MAX_REG_NR (sizeof(struct user_regs_struct) / sizeof(long)) + +struct user_regs { + union { + struct { + unsigned long regs[MAX_REG_NR]; + struct user_i387_struct *fp_state; + struct user_i387_struct fpregs; + } regs64; +#ifdef CONFIG_IA32_EMULATION + u32 regs32[MAX_REG32_NR]; +#endif + } u; +}; + +#define pt_regs_ip(regs) (regs).ip +#define pt_regs_sp(regs) (regs).sp + +extern unsigned long ptrace_ip(struct user_regs *regs); +extern unsigned long ptrace_sp(struct user_regs *regs); + +extern int ptrace_to_pt_regs(struct pt_regs *regs, + struct user_regs __user *ptrace); +extern int pt_regs_to_ptrace(struct __user user_regs *ptrace, + struct pt_regs *regs); +#else +#endif + #endif /* __KERNEL__ */ #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-x86/siginfo.h b/include/asm-x86/siginfo.h index a477bea..59c8d37 100644 --- a/include/asm-x86/siginfo.h +++ b/include/asm-x86/siginfo.h @@ -5,6 +5,9 @@ # define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) #endif +#define __ARCH_SI_TRAPNO +#define __ARCH_SI_ERROR + #include #endif diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h index 5bd5082..920c94a 100644 --- a/include/asm-x86/thread_info_32.h +++ b/include/asm-x86/thread_info_32.h @@ -142,6 +142,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_DEBUGCTLMSR 22 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 23 /* uses thread_struct.ds_area_msr */ #define TIF_BTS_TRACE_TS 24 /* record scheduling event timestamps */ +#define TIF_VCPU 25 #define _TIF_SYSCALL_TRACE (1< #include #include +#include #include #include @@ -991,6 +992,24 @@ struct sched_rt_entity { #endif }; +struct vcpu_user { + enum { VCPU_SYSCALL, VCPU_SIGNAL } event; + struct user_regs regs; + siginfo_t siginfo; +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + struct user_desc tls_array[GDT_ENTRY_TLS_ENTRIES]; +#endif +}; + +struct vcpu { + struct vcpu_user user; + struct mm_struct *mm; + struct vcpu_user __user *state; +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + struct user_desc tls[GDT_ENTRY_TLS_ENTRIES]; +#endif +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1103,6 +1122,7 @@ struct task_struct { cputime_t it_prof_expires, it_virt_expires; unsigned long long it_sched_expires; struct list_head cpu_timers[3]; + struct vcpu *vcpu; /* process credentials */ uid_t uid,euid,suid,fsuid; @@ -1750,6 +1770,7 @@ static inline int sas_ss_flags(unsigned long sp) * Routines for handling mm_structs */ extern struct mm_struct * mm_alloc(void); +extern struct mm_struct *dup_mm(struct task_struct *tsk); /* mmdrop drops the mm and the page tables */ extern void __mmdrop(struct mm_struct *); diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index ea037f2..dd6ca3e 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -26,6 +26,8 @@ struct signalfd_siginfo { __u64 ssi_utime; __u64 ssi_stime; __u64 ssi_addr; + __u32 ssi_trap_no; + __u32 ssi_error_code; /* * Pad strcture to 128 bytes. Remember to update the @@ -36,7 +38,7 @@ struct signalfd_siginfo { * comes out of a read(2) and we really don't want to have * a compat on read(2). */ - __u8 __pad[48]; + __u8 __pad[40]; }; diff --git a/kernel/Makefile b/kernel/Makefile index 6c584c5..0119a37 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o \ - notifier.o ksysfs.o pm_qos_params.o + notifier.o ksysfs.o pm_qos_params.o vcpu.o obj-$(CONFIG_SYSCTL) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/kernel/exit.c b/kernel/exit.c index 073005b..bda5e7f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -175,6 +175,11 @@ repeat: write_unlock_irq(&tasklist_lock); release_thread(p); + + if (p->vcpu && p->vcpu->mm) + mmput(p->vcpu->mm); + kfree(p->vcpu); + call_rcu(&p->rcu, delayed_put_task_struct); p = leader; diff --git a/kernel/fork.c b/kernel/fork.c index 9c042f9..3b8ed4c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -498,7 +498,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */ -static struct mm_struct *dup_mm(struct task_struct *tsk) +struct mm_struct *dup_mm(struct task_struct *tsk) { struct mm_struct *mm, *oldmm = current->mm; int err; @@ -1086,6 +1086,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, clear_tsk_thread_flag(p, TIF_SIGPENDING); init_sigpending(&p->pending); + p->vcpu = NULL; + p->utime = cputime_zero; p->stime = cputime_zero; p->gtime = cputime_zero; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index fdb34e8..2200f84 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -420,6 +420,8 @@ static int ptrace_resume(struct task_struct *child, long request, long data) return 0; } +extern int do_switch(struct task_struct *task, int fd); + int ptrace_request(struct task_struct *child, long request, long addr, long data) { @@ -471,6 +473,10 @@ int ptrace_request(struct task_struct *child, long request, return 0; return ptrace_resume(child, request, SIGKILL); + case PTRACE_SWITCH_MM: + ret = do_switch(child, data); + break; + default: break; } diff --git a/kernel/signal.c b/kernel/signal.c index 6af1210..67b5ec5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1785,6 +1785,9 @@ relock: if (!signr) break; /* will return 0 */ + if (test_thread_flag(TIF_VCPU)) + break; + if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { ptrace_signal_deliver(regs, cookie); @@ -2106,7 +2109,7 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) */ err = __put_user(from->si_signo, &to->si_signo); err |= __put_user(from->si_errno, &to->si_errno); - err |= __put_user((short)from->si_code, &to->si_code); + err |= __put_user(from->si_code, &to->si_code); switch (from->si_code & __SI_MASK) { case __SI_KILL: err |= __put_user(from->si_pid, &to->si_pid); @@ -2126,6 +2129,9 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) #ifdef __ARCH_SI_TRAPNO err |= __put_user(from->si_trapno, &to->si_trapno); #endif +#ifdef __ARCH_SI_ERROR + err |= __put_user(from->si_error, &to->si_error); +#endif break; case __SI_CHLD: err |= __put_user(from->si_pid, &to->si_pid); diff --git a/kernel/vcpu.c b/kernel/vcpu.c new file mode 100644 index 0000000..5ca259e --- /dev/null +++ b/kernel/vcpu.c @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include +#include +#include + +extern asmlinkage int sys_get_thread_area(struct user_desc __user *u_info); +extern asmlinkage int sys_set_thread_area(struct user_desc __user *u_info); +extern int do_switch(struct task_struct *task, int fd); + +long do_vcpu(int mm_fd, struct vcpu_user __user *new, struct pt_regs *regs) +{ + mm_segment_t fs; + struct vcpu *vcpu; + int err; + + if (current->vcpu == NULL) { + current->vcpu = kmalloc(sizeof(struct vcpu), GFP_KERNEL); + if (current->vcpu == NULL) + return -ENOMEM; + } + + vcpu = current->vcpu; + vcpu->mm = NULL; + vcpu->state = new; + + fs = get_fs(); + set_fs(KERNEL_DS); + err = pt_regs_to_ptrace(&vcpu->user.regs, regs); + set_fs(fs); + if (err) + return err; + + err = ptrace_to_pt_regs(regs, &new->regs); + if (err) + return err; + +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + { int i; + + memcpy(vcpu->tls, current->thread.tls_array, sizeof(vcpu->tls)); + for (i = 0; i < ARRAY_SIZE(new->tls_array); i++){ + fs = get_fs(); + set_fs(KERNEL_DS); + vcpu->tls[i].entry_number = GDT_ENTRY_TLS_MIN + i; + err = sys_get_thread_area(&vcpu->tls[i]); + set_fs(fs); + if (err) + return err; + + err = sys_set_thread_area(&new->tls_array[i]); + if (err) + return err; + } + } +#endif + + if (mm_fd != -1) { + vcpu->mm = current->mm; + atomic_inc(&vcpu->mm->mm_users); + + err = do_switch(current, mm_fd); + if (err) + return err; + } + +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + loadsegment(gs, current->thread.gs); +#endif + set_thread_flag(TIF_VCPU); + + return 0; +} + +extern void do_switch_mm_struct(struct task_struct *task, + struct mm_struct *new); + +int unvcpu(struct pt_regs *regs, siginfo_t *siginfo) +{ + mm_segment_t fs; + struct vcpu *vcpu; + int err, event; + + clear_thread_flag(TIF_VCPU); + + vcpu = current->vcpu; + if (vcpu->mm != NULL) { + do_switch_mm_struct(current, vcpu->mm); + mmput(vcpu->mm); + vcpu->mm = NULL; + } + + err = pt_regs_to_ptrace(&vcpu->state->regs, regs); + if (err) + return err; + + err = -EFAULT; + if ((siginfo != NULL) && + (copy_to_user(&vcpu->state->siginfo, siginfo, + sizeof(siginfo_t)) != 0)) + return err; + + event = (siginfo != NULL) ? VCPU_SIGNAL : VCPU_SYSCALL; + if (copy_to_user(&vcpu->state->event, &event, sizeof(event)) != 0) + return err; + +#if defined(CONFIG_X86_32) && !defined(CONFIG_UML) + { int i; + for (i = 0; i < ARRAY_SIZE(vcpu->state->tls_array); i++){ + fs = get_fs(); + set_fs(KERNEL_DS); + err = sys_set_thread_area(&vcpu->tls[i]); + set_fs(fs); + if (err) + return err; + } + } +#endif + + fs = get_fs(); + set_fs(KERNEL_DS); + err = ptrace_to_pt_regs(regs, &vcpu->user.regs); + set_fs(fs); + + return err; +} diff --git a/mm/Makefile b/mm/Makefile index a5b0dd9..123ca7d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -4,8 +4,8 @@ mmu-y := nommu.o mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ - mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ - vmalloc.o + mlock.o mmap.o mmfs.o mprotect.o mremap.o msync.o \ + rmap.o vmalloc.o obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ page_alloc.o page-writeback.o pdflush.o \ diff --git a/mm/mmfs.c b/mm/mmfs.c new file mode 100644 index 0000000..247f7a3 --- /dev/null +++ b/mm/mmfs.c @@ -0,0 +1,215 @@ +#define __FRAME_OFFSETS +#include +#include +#include +#include +#include +#include +#include +#include + +static int release_mm(struct inode *inode, struct file *file) +{ + struct mm_struct *mm = file->private_data; + + mmput(mm); + return 0; +} + +#define MM_MAGIC 0xE0AAC500 + +static int mm_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) +{ + return get_sb_pseudo(fs_type, "mm:", NULL, MM_MAGIC, mnt); +} + +static struct vfsmount *mm_mnt; + +static struct file_system_type mm_fs_type = { + .name = "mm", + .get_sb = mm_get_sb, + .kill_sb = kill_anon_super, +}; + +static int __init init_mm_fs(void) +{ + int err; + + err = register_filesystem(&mm_fs_type); + if (err) + return err; + + mm_mnt = kern_mount(&mm_fs_type); + if (IS_ERR(mm_mnt)) { + err = PTR_ERR(mm_mnt); + unregister_filesystem(&mm_fs_type); + } + + return err; +} + +static void __exit exit_mm_fs(void) +{ + unregister_filesystem(&mm_fs_type); + mntput(mm_mnt); +} + +fs_initcall(init_mm_fs); +module_exit(exit_mm_fs); + +static int mm_delete_dentry(struct dentry *dentry) +{ + /* + * At creation time, we pretended this dentry was hashed + * (by clearing DCACHE_UNHASHED bit in d_flags) + * At delete time, we restore the truth : not hashed. + * (so that dput() can proceed correctly) + */ + dentry->d_flags |= DCACHE_UNHASHED; + return 0; +} + +/* + * pipefs_dname() is called from d_path(). + */ +static char *mm_dname(struct dentry *dentry, char *buffer, int buflen) +{ + return dynamic_dname(dentry, buffer, buflen, "mm:[%lu]", + dentry->d_inode->i_ino); +} + +static struct dentry_operations mm_dentry_operations = { + .d_delete = mm_delete_dentry, + .d_dname = mm_dname, +}; + +static struct file_operations mm_fops = { + .release = release_mm, +}; + +asmlinkage long sys_new_mm(void) +{ + struct file *file; + struct mm_struct *mm; + struct inode *inode; + struct dentry *dentry; + struct qstr name = { .name = "" }; + int err, fd; + + mm = dup_mm(current); + if (mm == NULL) + return -ENOMEM; + + fd = get_unused_fd(); + if (fd < 0) { + err = fd; + goto out_free; + } + + err = -ENOMEM; + dentry = d_alloc(mm_mnt->mnt_sb->s_root, &name); + if (dentry == NULL) + goto out_put; + + dentry->d_op = &mm_dentry_operations; + dentry->d_flags &= ~DCACHE_UNHASHED; + + inode = new_inode(mm_mnt->mnt_sb); + if (inode == NULL) + goto out_dput; + + inode->i_mode = S_IRUSR; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + + d_instantiate(dentry, inode); + + file = alloc_file(mm_mnt, dentry, FMODE_READ, &mm_fops); + if (file == NULL) + goto out_dput; + + file->f_flags = O_RDONLY; + file->private_data = mm; + + fd_install(fd, file); + + return fd; + + out_dput: + dput(dentry); + out_put: + put_unused_fd(fd); + out_free: + mmput(mm); + return err; +} + +void do_switch_mm_struct(struct task_struct *task, struct mm_struct *new) +{ + struct mm_struct *old = task->mm; + + task_lock(task); + + atomic_inc(&new->mm_users); + task->mm = new; + task->active_mm = new; + + if (task == current) + switch_mm(old, task->mm, task); + + task_unlock(task); + + mmput(old); +} + +extern const struct file_operations proc_pid_mm_operations; + +int do_switch(struct task_struct *task, int fd) +{ + struct file *file = fget(fd); + int err; + + if (!file) + return -EBADF; + + err = -EINVAL; + if ((file->f_op != &mm_fops) && (file->f_op != &proc_pid_mm_operations)) + goto out; + + do_switch_mm_struct(task, file->private_data); + + err = 0; + + out: + fput(file); + return err; +} + +long do_switch_mm(int fd, struct __user user_regs *save, + struct __user user_regs *new, unsigned long ip, + unsigned long sp, struct pt_regs *regs) +{ + int ret; + + if (current->mm == NULL) + return -EINVAL; + + if ((save != NULL) && pt_regs_to_ptrace(save, regs)) + return -EFAULT; + + ret = do_switch(current, fd); + if (ret) + return ret; + + if (new != NULL) + ret = ptrace_to_pt_regs(regs, new); + else { + pt_regs_ip(*regs) = ip; + pt_regs_sp(*regs) = sp; + } + + return ret; +}