@@ -, +, @@ /dev/vmmon[7386]: /dev/rtc enable interrupt failed: -25 --- vmmon-only/include/compat_sched.h 2008-10-31 01:17:48.000000000 +0000 +++ vmmon-only/include/compat_sched.h 2008-12-29 22:07:40.000000000 +0000 @@ -154,4 +154,31 @@ static inline void daemonize(void) { #define compat_set_user_nice(task, n) do { (task)->nice = (n); } while (0) #endif +/* + * Since 2.6.27-rc2 kill_proc() is gone... Replacement (GPL-only!) + * API is available since 2.6.19. Use them from 2.6.27-rc1 up. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 27) +typedef int compat_pid; +#define compat_find_get_pid(pid) (pid) +#define compat_put_pid(pid) do { } while (0) +#define compat_kill_pid(pid, sig, flag) kill_proc(pid, sig, flag) +#else +typedef struct pid * compat_pid; +#define compat_find_get_pid(pid) find_get_pid(pid) +#define compat_put_pid(pid) put_pid(pid) +#define compat_kill_pid(pid, sig, flag) kill_pid(pid, sig, flag) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 5, 61) +#define compat_allow_signal(signr) do { \ + spin_lock_irq(¤t->compat_sigmask_lock); \ + sigdelset(¤t->blocked, signr); \ + compat_recalc_sigpending(); \ + spin_unlock_irq(¤t->compat_sigmask_lock); \ + } while (0) +#else +#define compat_allow_signal(signr) allow_signal(signr) +#endif + #endif /* __COMPAT_SCHED_H__ */ --- vmmon-only/linux/driver.c 2008-12-29 22:06:15.000000000 +0000 +++ vmmon-only/linux/driver.c 2008-12-29 22:07:40.000000000 +0000 @@ -378,12 +378,9 @@ init_module(void) linuxState.pollTimer.data = 0; linuxState.pollTimer.function = LinuxDriverPollTimeout; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24) - linuxState.fastClockTask = NULL; -#else linuxState.fastClockThread = 0; -#endif linuxState.fastClockRate = 0; + linuxState.fastClockPriority = -20; #ifdef POLLSPINLOCK spin_lock_init(&linuxState.pollListLock); @@ -908,12 +905,7 @@ LinuxDriverPoll(struct file *filp, mask = POLLIN; } } else { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24) - if ((linuxState.fastClockTask!=NULL) && - vmLinux->pollTimeoutPtr != NULL) { -#else if (linuxState.fastClockThread && vmLinux->pollTimeoutPtr != NULL) { -#endif struct timeval tv; do_gettimeofday(&tv); poll_wait(filp, &vmLinux->pollQueue, wait); @@ -2383,3 +2375,4 @@ static int LinuxDriverAPMCallback(apm_ev MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Machine Monitor."); +MODULE_LICENSE("GPL v2"); --- vmmon-only/linux/driver.h 2008-12-29 22:06:15.000000000 +0000 +++ vmmon-only/linux/driver.h 2008-12-29 22:07:40.000000000 +0000 @@ -19,7 +19,7 @@ #include "compat_spinlock.h" #include "compat_wait.h" #include "driver_vmcore.h" - +#include "compat_sched.h" /* * Per-instance driver state @@ -101,12 +101,9 @@ typedef struct VMXLinuxState { spinlock_t pollListLock; #endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24) - struct task_struct *fastClockTask; -#else - volatile int fastClockThread; -#endif + volatile compat_pid fastClockThread; unsigned fastClockRate; + long fastClockPriority; } VMXLinuxState; extern VMXLinuxState linuxState; --- vmmon-only/linux/hostif.c 2008-12-29 22:06:15.000000000 +0000 +++ vmmon-only/linux/hostif.c 2008-12-29 22:07:40.000000000 +0000 @@ -3421,6 +3421,44 @@ HostIF_NumOnlineLogicalCPUs(void) #endif } +/* + *---------------------------------------------------------------------- + * + * HostIFDoIoctl -- + * + * Issue ioctl. Assume kernel is not locked. It is not true now, + * but it makes things easier to understand, and won't surprise us + * later when we get rid of kernel lock from our code. + * + * Results: + * Same as ioctl method. + * + * Side effects: + * none. + * + *---------------------------------------------------------------------- + */ + +static long +HostIFDoIoctl(struct file *filp, + u_int iocmd, + unsigned long ioarg) +{ +#ifdef HAVE_UNLOCKED_IOCTL + if (filp->f_op->unlocked_ioctl) { + return filp->f_op->unlocked_ioctl(filp, iocmd, ioarg); + } +#endif + if (filp->f_op->ioctl) { + long err; + + lock_kernel(); + err = filp->f_op->ioctl(filp->f_dentry->d_inode, filp, iocmd, ioarg); + unlock_kernel(); + return err; + } + return -ENOIOCTLCMD; +} /* *---------------------------------------------------------------------- @@ -3443,23 +3481,21 @@ HostIF_NumOnlineLogicalCPUs(void) */ static int -HostIFFastClockThread(void *data) +HostIFFastClockThread(void *data) // IN: { struct file *filp = (struct file *) data; int res; mm_segment_t oldFS; unsigned rate = 0; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24) compat_daemonize("vmware-rtc"); -#endif oldFS = get_fs(); set_fs(KERNEL_DS); + compat_allow_signal(SIGKILL); cap_raise(current->cap_effective, CAP_SYS_RESOURCE); - compat_set_user_nice(current, -20); + compat_set_user_nice(current, linuxState.fastClockPriority); - while (linuxState.fastClockRate > HZ + HZ/16 && - !signal_pending(current)) { + while (linuxState.fastClockRate > HZ + HZ/16) { unsigned long buf; loff_t pos = 0; unsigned p2rate; @@ -3473,8 +3509,7 @@ HostIFFastClockThread(void *data) p2rate <<= 1; } - res = filp->f_op->ioctl(filp->f_dentry->d_inode, - filp, RTC_IRQP_SET, p2rate); + res = HostIFDoIoctl(filp, RTC_IRQP_SET, p2rate); if (res < 0) { Warning("/dev/rtc set rate %d failed: %d\n", p2rate, res); goto out; @@ -3542,20 +3577,44 @@ HostIFFastClockThread(void *data) */ int -HostIF_SetFastClockRate(unsigned rate) +HostIF_SetFastClockRate(unsigned int rate) // IN: Frequency in Hz. { ASSERT(MutexIsLocked(&fastClockMutex)); linuxState.fastClockRate = rate; + + /* + * Overview + * -------- + * An SMP Linux kernel programs the 8253 timer (to increment the 'jiffies' + * counter) _and_ all local APICs (to run the scheduler code) to deliver + * interrupts HZ times a second. + * + * Time + * ---- + * The kernel tries very hard to spread all these interrupts evenly over + * time, i.e. on a 1 CPU system, the 1 local APIC phase is shifted by 1/2 + * period compared to the 8253, and on a 2 CPU system, the 2 local APIC + * phases are respectively shifted by 1/3 and 2/3 period compared to the + * 8253. This is done to reduce contention on locks guarding the global task + * queue. + * + * Space + * ----- + * The 8253 interrupts are distributed between physical CPUs, evenly on a P3 + * system, whereas on a P4 system physical CPU 0 gets all of them. + * + * Long story short, unless the monitor requested rate is significantly + * higher than HZ, we don't need to send IPIs or exclusively grab /dev/rtc + * to periodically kick vCPU threads running in the monitor on all physical + * CPUs. + */ + if (rate > HZ + HZ/16) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24) - if (linuxState.fastClockTask==NULL) { - struct task_struct *t; -#else if (!linuxState.fastClockThread) { -#endif struct file *filp; int fsuid, res; Bool cap; + long pid; fsuid = current->fsuid; current->fsuid = 0; @@ -3567,7 +3626,7 @@ HostIF_SetFastClockRate(unsigned rate) } cap = cap_raised(current->cap_effective, CAP_SYS_RESOURCE); cap_raise(current->cap_effective, CAP_SYS_RESOURCE); - res = filp->f_op->ioctl(filp->f_dentry->d_inode, filp, RTC_PIE_ON, 0); + res = HostIFDoIoctl(filp, RTC_PIE_ON, 0); if (!cap) { cap_lower(current->cap_effective, CAP_SYS_RESOURCE); } @@ -3576,38 +3635,32 @@ HostIF_SetFastClockRate(unsigned rate) compat_filp_close(filp, current->files); return -res; } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24) - t = kthread_create(HostIFFastClockThread, filp, "vmware-rtc"); - if (IS_ERR(t)) { - compat_filp_close(filp, current->files); - return -PTR_ERR(t); - } - linuxState.fastClockTask=t; - wake_up_process(t); -#else - linuxState.fastClockThread = - kernel_thread(HostIFFastClockThread, filp, 0); -#endif + pid = kernel_thread(HostIFFastClockThread, filp, 0); + if (pid < 0) { + /* + * Ignore ERESTARTNOINTR silently, it occurs when signal is + * pending, and syscall layer automatically reissues operation + * after signal is handled. + */ + if (pid != -ERESTARTNOINTR) { + Warning("/dev/rtc cannot start watch thread: %ld\n", pid); + } + compat_filp_close(filp, current->files); + return -pid; + } + linuxState.fastClockThread = compat_find_get_pid(pid); } } else { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 24) - if (linuxState.fastClockTask!=NULL) { - kthread_stop(linuxState.fastClockTask); - linuxState.fastClockTask = NULL; - compat_wait_for_completion(&fastClockExited); - } -#else if (linuxState.fastClockThread) { - kill_proc(linuxState.fastClockThread, SIGKILL, 1); + compat_kill_pid(linuxState.fastClockThread, SIGKILL, 1); + compat_put_pid(linuxState.fastClockThread); linuxState.fastClockThread = 0; compat_wait_for_completion(&fastClockExited); } -#endif } return 0; } - /* *----------------------------------------------------------------------------- *