Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 282139 Details for
Bug 377831
sys-kernel/rtai-sources - Real Time Application Interface kernel sources
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
hal-linux-2.6.32.11-x86-2.6-03.patch
hal-linux-2.6.32.11-x86-2.6-03.patch.txt.crdownload (text/plain), 319.18 KB, created by
Chris Brown
on 2011-08-05 04:03:15 UTC
(
hide
)
Description:
hal-linux-2.6.32.11-x86-2.6-03.patch
Filename:
MIME Type:
Creator:
Chris Brown
Created:
2011-08-05 04:03:15 UTC
Size:
319.18 KB
patch
obsolete
>diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig >index 4fdb669..89b72a9 100644 >--- a/arch/x86/Kconfig >+++ b/arch/x86/Kconfig >@@ -480,6 +480,7 @@ config SCHED_OMIT_FRAME_POINTER > > menuconfig PARAVIRT_GUEST > bool "Paravirtualized guest support" >+ depends on !IPIPE > ---help--- > Say Y here to get to see options related to running Linux under > various hypervisors. This option alone does not add any kernel code. >@@ -531,6 +532,7 @@ source "arch/x86/lguest/Kconfig" > > config PARAVIRT > bool "Enable paravirtualization code" >+ depends on !IPIPE > ---help--- > This changes the kernel so it can modify itself when it is run > under a hypervisor, potentially improving performance significantly >@@ -750,6 +752,8 @@ config SCHED_MC > > source "kernel/Kconfig.preempt" > >+source "kernel/ipipe/Kconfig" >+ > config X86_UP_APIC > bool "Local APIC support on uniprocessors" > depends on X86_32 && !SMP && !X86_32_NON_STANDARD >diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h >index 474d80d..0b33b55 100644 >--- a/arch/x86/include/asm/apic.h >+++ b/arch/x86/include/asm/apic.h >@@ -404,7 +404,13 @@ static inline u32 safe_apic_wait_icr_idle(void) > } > > >+#ifdef CONFIG_IPIPE >+#define ack_APIC_irq() do { } while(0) >+static inline void __ack_APIC_irq(void) >+#else /* !CONFIG_IPIPE */ >+#define __ack_APIC_irq() ack_APIC_irq() > static inline void ack_APIC_irq(void) >+#endif /* CONFIG_IPIPE */ > { > #ifdef CONFIG_X86_LOCAL_APIC > /* >diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h >index 3b62da9..855534f 100644 >--- a/arch/x86/include/asm/apicdef.h >+++ b/arch/x86/include/asm/apicdef.h >@@ -143,6 +143,7 @@ > # define MAX_LOCAL_APIC 32768 > #endif > >+#ifndef __ASSEMBLY__ > /* > * All x86-64 systems are xAPIC compatible. > * In the following, "apicid" is a physical APIC ID. >@@ -418,4 +419,7 @@ struct local_apic { > #else > #define BAD_APICID 0xFFFFu > #endif >+ >+#endif /* !__ASSEMBLY__ */ >+ > #endif /* _ASM_X86_APICDEF_H */ >diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h >index f5693c8..b45303a 100644 >--- a/arch/x86/include/asm/entry_arch.h >+++ b/arch/x86/include/asm/entry_arch.h >@@ -22,6 +22,7 @@ BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1, > smp_invalidate_interrupt) > BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2, > smp_invalidate_interrupt) >+#ifndef CONFIG_IPIPE > BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3, > smp_invalidate_interrupt) > BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4, >@@ -32,6 +33,7 @@ BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6, > smp_invalidate_interrupt) > BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7, > smp_invalidate_interrupt) >+#endif /* !CONFIG_IPIPE */ > #endif > > BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR) >diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h >index ba180d9..6a7c6bc 100644 >--- a/arch/x86/include/asm/hw_irq.h >+++ b/arch/x86/include/asm/hw_irq.h >@@ -35,6 +35,13 @@ extern void spurious_interrupt(void); > extern void thermal_interrupt(void); > extern void reschedule_interrupt(void); > extern void mce_self_interrupt(void); >+#ifdef CONFIG_IPIPE >+void ipipe_ipi0(void); >+void ipipe_ipi1(void); >+void ipipe_ipi2(void); >+void ipipe_ipi3(void); >+void ipipe_ipiX(void); >+#endif > > extern void invalidate_interrupt(void); > extern void invalidate_interrupt0(void); >@@ -115,6 +122,7 @@ extern void smp_invalidate_interrupt(struct pt_regs *); > #else > extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); > #endif >+extern asmlinkage void smp_reboot_interrupt(void); > #endif > > extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); >diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h >index 0b20bbb..b8a7638 100644 >--- a/arch/x86/include/asm/i387.h >+++ b/arch/x86/include/asm/i387.h >@@ -289,11 +289,14 @@ static inline void __clear_fpu(struct task_struct *tsk) > static inline void kernel_fpu_begin(void) > { > struct thread_info *me = current_thread_info(); >+ unsigned long flags; > preempt_disable(); >+ local_irq_save_hw_cond(flags); > if (me->status & TS_USEDFPU) > __save_init_fpu(me->task); > else > clts(); >+ local_irq_restore_hw_cond(flags); > } > > static inline void kernel_fpu_end(void) >diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h >index 58d7091..ac8bd15 100644 >--- a/arch/x86/include/asm/i8259.h >+++ b/arch/x86/include/asm/i8259.h >@@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask; > #define SLAVE_ICW4_DEFAULT 0x01 > #define PIC_ICW4_AEOI 2 > >-extern spinlock_t i8259A_lock; >+extern ipipe_spinlock_t i8259A_lock; > > extern void init_8259A(int auto_eoi); > extern void enable_8259A_irq(unsigned int irq); >diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h >index 0b72282..6574056 100644 >--- a/arch/x86/include/asm/ipi.h >+++ b/arch/x86/include/asm/ipi.h >@@ -68,6 +68,9 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest > * to the APIC. > */ > unsigned int cfg; >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); > > /* > * Wait for idle. >@@ -83,6 +86,8 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest > * Send the IPI. The write to APIC_ICR fires this off. > */ > native_apic_mem_write(APIC_ICR, cfg); >+ >+ local_irq_restore_hw(flags); > } > > /* >diff --git a/arch/x86/include/asm/ipipe.h b/arch/x86/include/asm/ipipe.h >new file mode 100644 >index 0000000..7d29f03 >--- /dev/null >+++ b/arch/x86/include/asm/ipipe.h >@@ -0,0 +1,158 @@ >+/* -*- linux-c -*- >+ * arch/x86/include/asm/ipipe.h >+ * >+ * Copyright (C) 2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __X86_IPIPE_H >+#define __X86_IPIPE_H >+ >+#ifdef CONFIG_IPIPE >+ >+#ifndef IPIPE_ARCH_STRING >+#define IPIPE_ARCH_STRING "2.6-03" >+#define IPIPE_MAJOR_NUMBER 2 >+#define IPIPE_MINOR_NUMBER 6 >+#define IPIPE_PATCH_NUMBER 3 >+#endif >+ >+DECLARE_PER_CPU(struct pt_regs, __ipipe_tick_regs); >+ >+DECLARE_PER_CPU(unsigned long, __ipipe_cr2); >+ >+static inline unsigned __ipipe_get_irq_vector(int irq) >+{ >+#ifdef CONFIG_X86_IO_APIC >+ unsigned __ipipe_get_ioapic_irq_vector(int irq); >+ return __ipipe_get_ioapic_irq_vector(irq); >+#elif defined(CONFIG_X86_LOCAL_APIC) >+ return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ? >+ ipipe_apic_irq_vector(irq) : irq + IRQ0_VECTOR; >+#else >+ return irq + IRQ0_VECTOR; >+#endif >+} >+ >+#ifdef CONFIG_X86_32 >+# include "ipipe_32.h" >+#else >+# include "ipipe_64.h" >+#endif >+ >+/* >+ * The logical processor id and the current Linux task are read from the PDA, >+ * so this is always safe, regardless of the underlying stack. >+ */ >+#define ipipe_processor_id() raw_smp_processor_id() >+#define ipipe_safe_current() current >+ >+#define prepare_arch_switch(next) \ >+do { \ >+ ipipe_schedule_notify(current, next); \ >+ local_irq_disable_hw(); \ >+} while(0) >+ >+#define task_hijacked(p) \ >+ ({ int x = __ipipe_root_domain_p; \ >+ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_root_cpudom_var(status)); \ >+ if (x) local_irq_enable_hw(); !x; }) >+ >+struct ipipe_domain; >+ >+struct ipipe_sysinfo { >+ >+ int ncpus; /* Number of CPUs on board */ >+ u64 cpufreq; /* CPU frequency (in Hz) */ >+ >+ /* Arch-dependent block */ >+ >+ struct { >+ unsigned tmirq; /* Timer tick IRQ */ >+ u64 tmfreq; /* Timer frequency */ >+ } archdep; >+}; >+ >+/* Private interface -- Internal use only */ >+ >+#define __ipipe_check_platform() do { } while(0) >+#define __ipipe_init_platform() do { } while(0) >+#define __ipipe_enable_irq(irq) irq_to_desc(irq)->chip->enable(irq) >+#define __ipipe_disable_irq(irq) irq_to_desc(irq)->chip->disable(irq) >+ >+#ifdef CONFIG_SMP >+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd); >+#else >+#define __ipipe_hook_critical_ipi(ipd) do { } while(0) >+#endif >+ >+#define __ipipe_disable_irqdesc(ipd, irq) do { } while(0) >+ >+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq); >+ >+void __ipipe_enable_pipeline(void); >+ >+void __ipipe_do_critical_sync(unsigned irq, void *cookie); >+ >+void __ipipe_serial_debug(const char *fmt, ...); >+ >+extern int __ipipe_tick_irq; >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+#define ipipe_update_tick_evtdev(evtdev) \ >+ do { \ >+ if (strcmp((evtdev)->name, "lapic") == 0) \ >+ __ipipe_tick_irq = \ >+ ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR); \ >+ else \ >+ __ipipe_tick_irq = 0; \ >+ } while (0) >+#else >+#define ipipe_update_tick_evtdev(evtdev) \ >+ __ipipe_tick_irq = 0 >+#endif >+ >+int __ipipe_check_lapic(void); >+ >+int __ipipe_check_tickdev(const char *devname); >+ >+#define __ipipe_syscall_watched_p(p, sc) \ >+ (((p)->flags & PF_EVNOTIFY) || (unsigned long)sc >= NR_syscalls) >+ >+#define __ipipe_root_tick_p(regs) ((regs)->flags & X86_EFLAGS_IF) >+ >+#else /* !CONFIG_IPIPE */ >+ >+#define ipipe_update_tick_evtdev(evtdev) do { } while (0) >+#define task_hijacked(p) 0 >+ >+#endif /* CONFIG_IPIPE */ >+ >+#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE) >+#define __ipipe_move_root_irq(irq) \ >+ do { \ >+ if (irq < NR_IRQS) { \ >+ struct irq_chip *chip = irq_to_desc(irq)->chip; \ >+ if (chip->move) \ >+ chip->move(irq); \ >+ } \ >+ } while (0) >+#else /* !(CONFIG_SMP && CONFIG_IPIPE) */ >+#define __ipipe_move_root_irq(irq) do { } while (0) >+#endif /* !(CONFIG_SMP && CONFIG_IPIPE) */ >+ >+#endif /* !__X86_IPIPE_H */ >diff --git a/arch/x86/include/asm/ipipe_32.h b/arch/x86/include/asm/ipipe_32.h >new file mode 100644 >index 0000000..8d1f4b5 >--- /dev/null >+++ b/arch/x86/include/asm/ipipe_32.h >@@ -0,0 +1,156 @@ >+/* -*- linux-c -*- >+ * arch/x86/include/asm/ipipe_32.h >+ * >+ * Copyright (C) 2002-2005 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __X86_IPIPE_32_H >+#define __X86_IPIPE_32_H >+ >+#include <linux/cpumask.h> >+#include <linux/list.h> >+#include <linux/threads.h> >+#include <linux/ipipe_percpu.h> >+#include <asm/ptrace.h> >+ >+#define ipipe_read_tsc(t) __asm__ __volatile__("rdtsc" : "=A" (t)) >+#define ipipe_cpu_freq() ({ unsigned long long __freq = cpu_has_tsc?(1000LL * cpu_khz):CLOCK_TICK_RATE; __freq; }) >+ >+#define ipipe_tsc2ns(t) \ >+({ \ >+ unsigned long long delta = (t)*1000; \ >+ do_div(delta, cpu_khz/1000+1); \ >+ (unsigned long)delta; \ >+}) >+ >+#define ipipe_tsc2us(t) \ >+({ \ >+ unsigned long long delta = (t); \ >+ do_div(delta, cpu_khz/1000+1); \ >+ (unsigned long)delta; \ >+}) >+ >+/* Private interface -- Internal use only */ >+ >+int __ipipe_handle_irq(struct pt_regs *regs); >+ >+static inline unsigned long __ipipe_ffnz(unsigned long ul) >+{ >+ __asm__("bsrl %1, %0":"=r"(ul) >+ : "r"(ul)); >+ return ul; >+} >+ >+struct irq_desc; >+ >+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc); >+ >+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc); >+ >+static inline void __ipipe_call_root_xirq_handler(unsigned irq, >+ ipipe_irq_handler_t handler) >+{ >+ struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs); >+ >+ regs->orig_ax = ~__ipipe_get_irq_vector(irq); >+ >+ __asm__ __volatile__("pushfl\n\t" >+ "pushl %%cs\n\t" >+ "pushl $__xirq_end\n\t" >+ "pushl %%eax\n\t" >+ "pushl %%gs\n\t" >+ "pushl %%fs\n\t" >+ "pushl %%es\n\t" >+ "pushl %%ds\n\t" >+ "pushl %%eax\n\t" >+ "pushl %%ebp\n\t" >+ "pushl %%edi\n\t" >+ "pushl %%esi\n\t" >+ "pushl %%edx\n\t" >+ "pushl %%ecx\n\t" >+ "pushl %%ebx\n\t" >+ "movl %2,%%eax\n\t" >+ "call *%1\n\t" >+ "jmp ret_from_intr\n\t" >+ "__xirq_end: cli\n" >+ : /* no output */ >+ : "a" (~irq), "r" (handler), "rm" (regs)); >+} >+ >+void irq_enter(void); >+void irq_exit(void); >+ >+static inline void __ipipe_call_root_virq_handler(unsigned irq, >+ ipipe_irq_handler_t handler, >+ void *cookie) >+{ >+ irq_enter(); >+ __asm__ __volatile__("pushfl\n\t" >+ "pushl %%cs\n\t" >+ "pushl $__virq_end\n\t" >+ "pushl $-1\n\t" >+ "pushl %%gs\n\t" >+ "pushl %%fs\n\t" >+ "pushl %%es\n\t" >+ "pushl %%ds\n\t" >+ "pushl %%eax\n\t" >+ "pushl %%ebp\n\t" >+ "pushl %%edi\n\t" >+ "pushl %%esi\n\t" >+ "pushl %%edx\n\t" >+ "pushl %%ecx\n\t" >+ "pushl %%ebx\n\t" >+ "pushl %2\n\t" >+ "pushl %%eax\n\t" >+ "call *%1\n\t" >+ "addl $8,%%esp\n" >+ : /* no output */ >+ : "a" (irq), "r" (handler), "d" (cookie)); >+ irq_exit(); >+ __asm__ __volatile__("jmp ret_from_intr\n\t" >+ "__virq_end: cli\n" >+ : /* no output */ >+ : /* no input */); >+} >+ >+/* >+ * When running handlers, enable hw interrupts for all domains but the >+ * one heading the pipeline, so that IRQs can never be significantly >+ * deferred for the latter. >+ */ >+#define __ipipe_run_isr(ipd, irq) \ >+do { \ >+ if (!__ipipe_pipeline_head_p(ipd)) \ >+ local_irq_enable_hw(); \ >+ if (ipd == ipipe_root_domain) { \ >+ if (likely(!ipipe_virtual_irq_p(irq))) \ >+ __ipipe_call_root_xirq_handler(irq, \ >+ ipd->irqs[irq].handler); \ >+ else \ >+ __ipipe_call_root_virq_handler(irq, \ >+ ipd->irqs[irq].handler, \ >+ ipd->irqs[irq].cookie); \ >+ } else { \ >+ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ >+ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); \ >+ __set_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ >+ } \ >+ local_irq_disable_hw(); \ >+} while(0) >+ >+#endif /* !__X86_IPIPE_32_H */ >diff --git a/arch/x86/include/asm/ipipe_64.h b/arch/x86/include/asm/ipipe_64.h >new file mode 100644 >index 0000000..bc427b8 >--- /dev/null >+++ b/arch/x86/include/asm/ipipe_64.h >@@ -0,0 +1,161 @@ >+/* -*- linux-c -*- >+ * arch/x86/include/asm/ipipe_64.h >+ * >+ * Copyright (C) 2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __X86_IPIPE_64_H >+#define __X86_IPIPE_64_H >+ >+#include <asm/ptrace.h> >+#include <asm/irq.h> >+#include <linux/cpumask.h> >+#include <linux/list.h> >+#include <linux/ipipe_percpu.h> >+#ifdef CONFIG_SMP >+#include <asm/mpspec.h> >+#include <linux/thread_info.h> >+#endif >+ >+#define ipipe_read_tsc(t) do { \ >+ unsigned int __a,__d; \ >+ asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ >+ (t) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ >+} while(0) >+ >+extern unsigned cpu_khz; >+#define ipipe_cpu_freq() ({ unsigned long __freq = (1000UL * cpu_khz); __freq; }) >+#define ipipe_tsc2ns(t) (((t) * 1000UL) / (ipipe_cpu_freq() / 1000000UL)) >+#define ipipe_tsc2us(t) ((t) / (ipipe_cpu_freq() / 1000000UL)) >+ >+/* Private interface -- Internal use only */ >+ >+int __ipipe_handle_irq(struct pt_regs *regs); >+ >+static inline unsigned long __ipipe_ffnz(unsigned long ul) >+{ >+ __asm__("bsrq %1, %0":"=r"(ul) >+ : "rm"(ul)); >+ return ul; >+} >+ >+struct irq_desc; >+ >+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc); >+ >+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc); >+ >+static inline void __ipipe_call_root_xirq_handler(unsigned irq, >+ void (*handler)(unsigned, void *)) >+{ >+ struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs); >+ >+ regs->orig_ax = ~__ipipe_get_irq_vector(irq); >+ >+ __asm__ __volatile__("movq %%rsp, %%rax\n\t" >+ "pushq $0\n\t" >+ "pushq %%rax\n\t" >+ "pushfq\n\t" >+ "pushq %[kernel_cs]\n\t" >+ "pushq $__xirq_end\n\t" >+ "pushq %[vector]\n\t" >+ "subq $9*8,%%rsp\n\t" >+ "movq %%rdi,8*8(%%rsp)\n\t" >+ "movq %%rsi,7*8(%%rsp)\n\t" >+ "movq %%rdx,6*8(%%rsp)\n\t" >+ "movq %%rcx,5*8(%%rsp)\n\t" >+ "movq %%rax,4*8(%%rsp)\n\t" >+ "movq %%r8,3*8(%%rsp)\n\t" >+ "movq %%r9,2*8(%%rsp)\n\t" >+ "movq %%r10,1*8(%%rsp)\n\t" >+ "movq %%r11,(%%rsp)\n\t" >+ "call *%[handler]\n\t" >+ "cli\n\t" >+ "jmp exit_intr\n\t" >+ "__xirq_end: cli\n" >+ : /* no output */ >+ : [kernel_cs] "i" (__KERNEL_CS), >+ [vector] "rm" (regs->orig_ax), >+ [handler] "r" (handler), "D" (regs) >+ : "rax"); >+} >+ >+void irq_enter(void); >+void irq_exit(void); >+ >+static inline void __ipipe_call_root_virq_handler(unsigned irq, >+ void (*handler)(unsigned, void *), >+ void *cookie) >+{ >+ irq_enter(); >+ __asm__ __volatile__("movq %%rsp, %%rax\n\t" >+ "pushq $0\n\t" >+ "pushq %%rax\n\t" >+ "pushfq\n\t" >+ "pushq %[kernel_cs]\n\t" >+ "pushq $__virq_end\n\t" >+ "pushq $-1\n\t" >+ "subq $9*8,%%rsp\n\t" >+ "movq %%rdi,8*8(%%rsp)\n\t" >+ "movq %%rsi,7*8(%%rsp)\n\t" >+ "movq %%rdx,6*8(%%rsp)\n\t" >+ "movq %%rcx,5*8(%%rsp)\n\t" >+ "movq %%rax,4*8(%%rsp)\n\t" >+ "movq %%r8,3*8(%%rsp)\n\t" >+ "movq %%r9,2*8(%%rsp)\n\t" >+ "movq %%r10,1*8(%%rsp)\n\t" >+ "movq %%r11,(%%rsp)\n\t" >+ "call *%[handler]\n\t" >+ : /* no output */ >+ : [kernel_cs] "i" (__KERNEL_CS), >+ [handler] "r" (handler), "D" (irq), "S" (cookie) >+ : "rax"); >+ irq_exit(); >+ __asm__ __volatile__("cli\n\t" >+ "jmp exit_intr\n\t" >+ "__virq_end: cli\n" >+ : /* no output */ >+ : /* no input */); >+} >+ >+/* >+ * When running handlers, enable hw interrupts for all domains but the >+ * one heading the pipeline, so that IRQs can never be significantly >+ * deferred for the latter. >+ */ >+#define __ipipe_run_isr(ipd, irq) \ >+ do { \ >+ if (!__ipipe_pipeline_head_p(ipd)) \ >+ local_irq_enable_hw(); \ >+ if (ipd == ipipe_root_domain) { \ >+ if (likely(!ipipe_virtual_irq_p(irq))) \ >+ __ipipe_call_root_xirq_handler( \ >+ irq, (ipd)->irqs[irq].handler); \ >+ else \ >+ __ipipe_call_root_virq_handler( \ >+ irq, (ipd)->irqs[irq].handler, \ >+ (ipd)->irqs[irq].cookie); \ >+ } else { \ >+ __clear_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ >+ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); \ >+ __set_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ >+ } \ >+ local_irq_disable_hw(); \ >+ } while(0) >+ >+#endif /* !__X86_IPIPE_64_H */ >diff --git a/arch/x86/include/asm/ipipe_base.h b/arch/x86/include/asm/ipipe_base.h >new file mode 100644 >index 0000000..1098d6f >--- /dev/null >+++ b/arch/x86/include/asm/ipipe_base.h >@@ -0,0 +1,210 @@ >+/* -*- linux-c -*- >+ * arch/x86/include/asm/ipipe_base.h >+ * >+ * Copyright (C) 2007-2009 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __X86_IPIPE_BASE_H >+#define __X86_IPIPE_BASE_H >+ >+#include <linux/threads.h> >+#include <asm/apicdef.h> >+#include <asm/irq_vectors.h> >+ >+#ifdef CONFIG_X86_32 >+#define IPIPE_NR_FAULTS 33 /* 32 from IDT + iret_error */ >+#else >+#define IPIPE_NR_FAULTS 32 >+#endif >+ >+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) >+/* >+ * System interrupts are mapped beyond the last defined external IRQ >+ * number. >+ */ >+#define IPIPE_NR_XIRQS (NR_IRQS + 32) >+#define IPIPE_FIRST_APIC_IRQ NR_IRQS >+#define IPIPE_SERVICE_VECTOR0 (INVALIDATE_TLB_VECTOR_END + 1) >+#define IPIPE_SERVICE_IPI0 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0) >+#define IPIPE_SERVICE_VECTOR1 (INVALIDATE_TLB_VECTOR_END + 2) >+#define IPIPE_SERVICE_IPI1 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1) >+#define IPIPE_SERVICE_VECTOR2 (INVALIDATE_TLB_VECTOR_END + 3) >+#define IPIPE_SERVICE_IPI2 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2) >+#define IPIPE_SERVICE_VECTOR3 (INVALIDATE_TLB_VECTOR_END + 4) >+#define IPIPE_SERVICE_IPI3 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3) >+#ifdef CONFIG_SMP >+#define IPIPE_CRITICAL_VECTOR (INVALIDATE_TLB_VECTOR_END + 5) >+#define IPIPE_CRITICAL_IPI ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR) >+#endif >+#define ipipe_apic_irq_vector(irq) ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR) >+#define ipipe_apic_vector_irq(vec) ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ) >+#else /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */ >+#define IPIPE_NR_XIRQS NR_IRQS >+#endif /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */ >+ >+/* Pseudo-vectors used for kernel events */ >+#define IPIPE_FIRST_EVENT IPIPE_NR_FAULTS >+#define IPIPE_EVENT_SYSCALL (IPIPE_FIRST_EVENT) >+#define IPIPE_EVENT_SCHEDULE (IPIPE_FIRST_EVENT + 1) >+#define IPIPE_EVENT_SIGWAKE (IPIPE_FIRST_EVENT + 2) >+#define IPIPE_EVENT_SETSCHED (IPIPE_FIRST_EVENT + 3) >+#define IPIPE_EVENT_INIT (IPIPE_FIRST_EVENT + 4) >+#define IPIPE_EVENT_EXIT (IPIPE_FIRST_EVENT + 5) >+#define IPIPE_EVENT_CLEANUP (IPIPE_FIRST_EVENT + 6) >+#define IPIPE_LAST_EVENT IPIPE_EVENT_CLEANUP >+#define IPIPE_NR_EVENTS (IPIPE_LAST_EVENT + 1) >+ >+#define ex_do_divide_error 0 >+#define ex_do_debug 1 >+/* NMI not pipelined. */ >+#define ex_do_int3 3 >+#define ex_do_overflow 4 >+#define ex_do_bounds 5 >+#define ex_do_invalid_op 6 >+#define ex_do_device_not_available 7 >+/* Double fault not pipelined. */ >+#define ex_do_coprocessor_segment_overrun 9 >+#define ex_do_invalid_TSS 10 >+#define ex_do_segment_not_present 11 >+#define ex_do_stack_segment 12 >+#define ex_do_general_protection 13 >+#define ex_do_page_fault 14 >+#define ex_do_spurious_interrupt_bug 15 >+#define ex_do_coprocessor_error 16 >+#define ex_do_alignment_check 17 >+#define ex_machine_check_vector 18 >+#define ex_reserved ex_machine_check_vector >+#define ex_do_simd_coprocessor_error 19 >+#define ex_do_iret_error 32 >+ >+#ifndef __ASSEMBLY__ >+ >+#ifdef CONFIG_SMP >+ >+#include <asm/alternative.h> >+ >+#ifdef CONFIG_X86_32 >+#define GET_ROOT_STATUS_ADDR \ >+ "pushfl; cli;" \ >+ "movl %%fs:per_cpu__this_cpu_off, %%eax;" \ >+ "lea per_cpu__ipipe_percpu_darray(%%eax), %%eax;" >+#define PUT_ROOT_STATUS_ADDR "popfl;" >+#define TEST_AND_SET_ROOT_STATUS \ >+ "btsl $0,(%%eax);" >+#define TEST_ROOT_STATUS \ >+ "btl $0,(%%eax);" >+#define ROOT_TEST_CLOBBER_LIST "eax" >+#else /* CONFIG_X86_64 */ >+#define GET_ROOT_STATUS_ADDR \ >+ "pushfq; cli;" \ >+ "movq %%gs:per_cpu__this_cpu_off, %%rax;" \ >+ "lea per_cpu__ipipe_percpu_darray(%%rax), %%rax;" >+#define PUT_ROOT_STATUS_ADDR "popfq;" >+#define TEST_AND_SET_ROOT_STATUS \ >+ "btsl $0,(%%rax);" >+#define TEST_ROOT_STATUS \ >+ "btl $0,(%%rax);" >+#define ROOT_TEST_CLOBBER_LIST "rax" >+#endif /* CONFIG_X86_64 */ >+ >+static inline void __ipipe_stall_root(void) >+{ >+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR >+ LOCK_PREFIX >+ TEST_AND_SET_ROOT_STATUS >+ PUT_ROOT_STATUS_ADDR >+ : : : ROOT_TEST_CLOBBER_LIST, "memory"); >+} >+ >+static inline unsigned long __ipipe_test_and_stall_root(void) >+{ >+ int oldbit; >+ >+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR >+ LOCK_PREFIX >+ TEST_AND_SET_ROOT_STATUS >+ "sbbl %0,%0;" >+ PUT_ROOT_STATUS_ADDR >+ :"=r" (oldbit) >+ : : ROOT_TEST_CLOBBER_LIST, "memory"); >+ return oldbit; >+} >+ >+static inline unsigned long __ipipe_test_root(void) >+{ >+ int oldbit; >+ >+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR >+ TEST_ROOT_STATUS >+ "sbbl %0,%0;" >+ PUT_ROOT_STATUS_ADDR >+ :"=r" (oldbit) >+ : : ROOT_TEST_CLOBBER_LIST); >+ return oldbit; >+} >+ >+#else /* !CONFIG_SMP */ >+ >+#if __GNUC__ >= 4 >+/* Alias to ipipe_root_cpudom_var(status) */ >+extern unsigned long __ipipe_root_status; >+#else >+extern unsigned long *const __ipipe_root_status_addr; >+#define __ipipe_root_status (*__ipipe_root_status_addr) >+#endif >+ >+static inline void __ipipe_stall_root(void) >+{ >+ volatile unsigned long *p = &__ipipe_root_status; >+ __asm__ __volatile__("btsl $0,%0;" >+ :"+m" (*p) : : "memory"); >+} >+ >+static inline unsigned long __ipipe_test_and_stall_root(void) >+{ >+ volatile unsigned long *p = &__ipipe_root_status; >+ int oldbit; >+ >+ __asm__ __volatile__("btsl $0,%1;" >+ "sbbl %0,%0;" >+ :"=r" (oldbit), "+m" (*p) >+ : : "memory"); >+ return oldbit; >+} >+ >+static inline unsigned long __ipipe_test_root(void) >+{ >+ volatile unsigned long *p = &__ipipe_root_status; >+ int oldbit; >+ >+ __asm__ __volatile__("btl $0,%1;" >+ "sbbl %0,%0;" >+ :"=r" (oldbit) >+ :"m" (*p)); >+ return oldbit; >+} >+ >+#endif /* !CONFIG_SMP */ >+ >+void __ipipe_halt_root(void); >+ >+void __ipipe_serial_debug(const char *fmt, ...); >+ >+#endif /* !__ASSEMBLY__ */ >+ >+#endif /* !__X86_IPIPE_BASE_H */ >diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h >index 6e90a04..6178f92 100644 >--- a/arch/x86/include/asm/irq_vectors.h >+++ b/arch/x86/include/asm/irq_vectors.h >@@ -91,10 +91,17 @@ > #define THRESHOLD_APIC_VECTOR 0xf9 > #define REBOOT_VECTOR 0xf8 > >+#ifdef CONFIG_IPIPE >+/* f0-f2 used for TLB flush, f3-f7 reserved for the I-pipe */ >+#define INVALIDATE_TLB_VECTOR_END 0xf2 >+#define INVALIDATE_TLB_VECTOR_START 0xf0 >+#define NUM_INVALIDATE_TLB_VECTORS 3 >+#else /* !CONFIG_IPIPE */ > /* f0-f7 used for spreading out TLB flushes: */ > #define INVALIDATE_TLB_VECTOR_END 0xf7 > #define INVALIDATE_TLB_VECTOR_START 0xf0 > #define NUM_INVALIDATE_TLB_VECTORS 8 >+#endif > > /* > * Local APIC timer IRQ vector is on a different priority level, >@@ -120,6 +127,9 @@ > */ > #define MCE_SELF_VECTOR 0xeb > >+/* I-pipe: Lowest number of vectors above */ >+#define FIRST_SYSTEM_VECTOR 0xea >+ > /* > * First APIC vector available to drivers: (vectors 0x30-0xee) we > * start at 0x31(0x41) to spread out vectors evenly between priority >diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h >index 9e2b952..0d8d5e5 100644 >--- a/arch/x86/include/asm/irqflags.h >+++ b/arch/x86/include/asm/irqflags.h >@@ -4,6 +4,10 @@ > #include <asm/processor-flags.h> > > #ifndef __ASSEMBLY__ >+ >+#include <linux/ipipe_base.h> >+#include <linux/ipipe_trace.h> >+ > /* > * Interrupt control: > */ >@@ -12,6 +16,10 @@ static inline unsigned long native_save_fl(void) > { > unsigned long flags; > >+#ifdef CONFIG_IPIPE >+ flags = (!__ipipe_test_root()) << 9; >+ barrier(); >+#else > /* > * "=rm" is safe here, because "pop" adjusts the stack before > * it evaluates its effective address -- this is part of the >@@ -22,31 +30,53 @@ static inline unsigned long native_save_fl(void) > : "=rm" (flags) > : /* no input */ > : "memory"); >+#endif > > return flags; > } > > static inline void native_restore_fl(unsigned long flags) > { >+#ifdef CONFIG_IPIPE >+ barrier(); >+ __ipipe_restore_root(!(flags & X86_EFLAGS_IF)); >+#else > asm volatile("push %0 ; popf" > : /* no output */ > :"g" (flags) > :"memory", "cc"); >+#endif > } > > static inline void native_irq_disable(void) > { >+#ifdef CONFIG_IPIPE >+ ipipe_check_context(ipipe_root_domain); >+ __ipipe_stall_root(); >+ barrier(); >+#else > asm volatile("cli": : :"memory"); >+#endif > } > > static inline void native_irq_enable(void) > { >+#ifdef CONFIG_IPIPE >+ barrier(); >+ __ipipe_unstall_root(); >+#else > asm volatile("sti": : :"memory"); >+#endif > } > > static inline void native_safe_halt(void) > { >+#ifdef CONFIG_IPIPE >+ barrier(); >+ __ipipe_halt_root(); >+#else > asm volatile("sti; hlt": : :"memory"); >+#endif > } > > static inline void native_halt(void) >@@ -71,6 +101,71 @@ static inline void raw_local_irq_restore(unsigned long flags) > native_restore_fl(flags); > } > >+static inline unsigned long raw_mangle_irq_bits(int virt, unsigned long real) >+{ >+ /* >+ * Merge virtual and real interrupt mask bits into a single >+ * (32bit) word. >+ */ >+ return (real & ~(1L << 31)) | ((virt != 0) << 31); >+} >+ >+static inline int raw_demangle_irq_bits(unsigned long *x) >+{ >+ int virt = (*x & (1L << 31)) != 0; >+ *x &= ~(1L << 31); >+ return virt; >+} >+ >+#define local_irq_save_hw_notrace(x) \ >+ __asm__ __volatile__("pushf ; pop %0 ; cli":"=g" (x): /* no input */ :"memory") >+#define local_irq_restore_hw_notrace(x) \ >+ __asm__ __volatile__("push %0 ; popf": /* no output */ :"g" (x):"memory", "cc") >+ >+#define local_save_flags_hw(x) __asm__ __volatile__("pushf ; pop %0":"=g" (x): /* no input */) >+ >+#define irqs_disabled_hw() \ >+ ({ \ >+ unsigned long x; \ >+ local_save_flags_hw(x); \ >+ !((x) & X86_EFLAGS_IF); \ >+ }) >+ >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+#define local_irq_disable_hw() do { \ >+ if (!irqs_disabled_hw()) { \ >+ local_irq_disable_hw_notrace(); \ >+ ipipe_trace_begin(0x80000000); \ >+ } \ >+ } while (0) >+#define local_irq_enable_hw() do { \ >+ if (irqs_disabled_hw()) { \ >+ ipipe_trace_end(0x80000000); \ >+ local_irq_enable_hw_notrace(); \ >+ } \ >+ } while (0) >+#define local_irq_save_hw(x) do { \ >+ local_save_flags_hw(x); \ >+ if ((x) & X86_EFLAGS_IF) { \ >+ local_irq_disable_hw_notrace(); \ >+ ipipe_trace_begin(0x80000001); \ >+ } \ >+ } while (0) >+#define local_irq_restore_hw(x) do { \ >+ if ((x) & X86_EFLAGS_IF) \ >+ ipipe_trace_end(0x80000001); \ >+ local_irq_restore_hw_notrace(x); \ >+ } while (0) >+#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ >+#define local_irq_save_hw(x) local_irq_save_hw_notrace(x) >+#define local_irq_restore_hw(x) local_irq_restore_hw_notrace(x) >+#define local_irq_enable_hw() local_irq_enable_hw_notrace() >+#define local_irq_disable_hw() local_irq_disable_hw_notrace() >+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ >+ >+#define local_irq_disable_hw_notrace() __asm__ __volatile__("cli": : :"memory") >+#define local_irq_enable_hw_notrace() __asm__ __volatile__("sti": : :"memory") >+ > static inline void raw_local_irq_disable(void) > { > native_irq_disable(); >@@ -104,16 +199,40 @@ static inline void halt(void) > */ > static inline unsigned long __raw_local_irq_save(void) > { >+#ifdef CONFIG_IPIPE >+ unsigned long flags = (!__ipipe_test_and_stall_root()) << 9; >+ barrier(); >+#else > unsigned long flags = __raw_local_save_flags(); > > raw_local_irq_disable(); >+#endif > > return flags; > } > #else > >-#define ENABLE_INTERRUPTS(x) sti >-#define DISABLE_INTERRUPTS(x) cli >+#ifdef CONFIG_IPIPE >+#ifdef CONFIG_X86_32 >+#define DISABLE_INTERRUPTS(clobbers) PER_CPU(ipipe_percpu_darray, %eax); btsl $0,(%eax); sti >+#define ENABLE_INTERRUPTS(clobbers) call __ipipe_unstall_root >+#else /* CONFIG_X86_64 */ >+/* Not worth virtualizing in x86_64 mode. */ >+#define DISABLE_INTERRUPTS(clobbers) cli >+#define ENABLE_INTERRUPTS(clobbers) sti >+#endif /* CONFIG_X86_64 */ >+#define ENABLE_INTERRUPTS_HW_COND sti >+#define DISABLE_INTERRUPTS_HW_COND cli >+#define DISABLE_INTERRUPTS_HW(clobbers) cli >+#define ENABLE_INTERRUPTS_HW(clobbers) sti >+#else /* !CONFIG_IPIPE */ >+#define ENABLE_INTERRUPTS(x) sti >+#define DISABLE_INTERRUPTS(x) cli >+#define ENABLE_INTERRUPTS_HW_COND >+#define DISABLE_INTERRUPTS_HW_COND >+#define DISABLE_INTERRUPTS_HW(clobbers) DISABLE_INTERRUPTS(clobbers) >+#define ENABLE_INTERRUPTS_HW(clobbers) ENABLE_INTERRUPTS(clobbers) >+#endif /* !CONFIG_IPIPE */ > > #ifdef CONFIG_X86_64 > #define SWAPGS swapgs >@@ -156,8 +275,10 @@ static inline unsigned long __raw_local_irq_save(void) > #define raw_local_save_flags(flags) \ > do { (flags) = __raw_local_save_flags(); } while (0) > >-#define raw_local_irq_save(flags) \ >- do { (flags) = __raw_local_irq_save(); } while (0) >+#define raw_local_irq_save(flags) do { \ >+ ipipe_check_context(ipipe_root_domain); \ >+ (flags) = __raw_local_irq_save(); \ >+ } while (0) > > static inline int raw_irqs_disabled_flags(unsigned long flags) > { >diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h >index 4a2d4e0..1ee45d4 100644 >--- a/arch/x86/include/asm/mmu_context.h >+++ b/arch/x86/include/asm/mmu_context.h >@@ -30,11 +30,14 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) > #endif > } > >-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, >- struct task_struct *tsk) >+static inline void __switch_mm(struct mm_struct *prev, struct mm_struct *next, >+ struct task_struct *tsk) > { > unsigned cpu = smp_processor_id(); > >+#ifdef CONFIG_IPIPE_DEBUG_INTERNAL >+ WARN_ON_ONCE(!irqs_disabled_hw()); >+#endif > if (likely(prev != next)) { > /* stop flush ipis for the previous mm */ > cpumask_clear_cpu(cpu, mm_cpumask(prev)); >@@ -70,10 +73,23 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, > #endif > } > >+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, >+ struct task_struct *tsk) >+{ >+ unsigned long flags; >+ local_irq_save_hw_cond(flags); >+ __switch_mm(prev, next, tsk); >+ local_irq_restore_hw_cond(flags); >+} >+ >+#define ipipe_mm_switch_protect(flags) local_irq_save_hw_cond(flags) >+#define ipipe_mm_switch_unprotect(flags) \ >+ local_irq_restore_hw_cond(flags) >+ > #define activate_mm(prev, next) \ > do { \ > paravirt_activate_mm((prev), (next)); \ >- switch_mm((prev), (next), NULL); \ >+ __switch_mm((prev), (next), NULL); \ > } while (0); > > #ifdef CONFIG_X86_32 >diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h >index 139d4c1..3914d19 100644 >--- a/arch/x86/include/asm/nmi.h >+++ b/arch/x86/include/asm/nmi.h >@@ -29,7 +29,7 @@ extern void setup_apic_nmi_watchdog(void *); > extern void stop_apic_nmi_watchdog(void *); > extern void disable_timer_nmi_watchdog(void); > extern void enable_timer_nmi_watchdog(void); >-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); >+extern int (*nmi_watchdog_tick)(struct pt_regs *regs, unsigned reason); > extern void cpu_nmi_set_wd_enabled(void); > > extern atomic_t nmi_active; >diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h >index 13b1885..3e80c19 100644 >--- a/arch/x86/include/asm/processor.h >+++ b/arch/x86/include/asm/processor.h >@@ -435,6 +435,7 @@ struct thread_struct { > unsigned short ds; > unsigned short fsindex; > unsigned short gsindex; >+ unsigned long rip; > #endif > #ifdef CONFIG_X86_32 > unsigned long ip; >diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h >index f08f973..093687e 100644 >--- a/arch/x86/include/asm/system.h >+++ b/arch/x86/include/asm/system.h >@@ -126,8 +126,10 @@ do { \ > #define switch_to(prev, next, last) \ > asm volatile(SAVE_CONTEXT \ > "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ >+ "movq $thread_return,%P[threadrip](%[prev])\n\t" /* save RIP */ \ > "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ >- "call __switch_to\n\t" \ >+ "pushq %P[threadrip](%[next])\n\t" /* restore RIP */ \ >+ "jmp __switch_to\n\t" \ > ".globl thread_return\n" \ > "thread_return:\n\t" \ > "movq "__percpu_arg([current_task])",%%rsi\n\t" \ >@@ -141,6 +143,7 @@ do { \ > __switch_canary_oparam \ > : [next] "S" (next), [prev] "D" (prev), \ > [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ >+ [threadrip] "i" (offsetof(struct task_struct, thread.rip)), \ > [ti_flags] "i" (offsetof(struct thread_info, flags)), \ > [_tif_fork] "i" (_TIF_FORK), \ > [thread_info] "i" (offsetof(struct task_struct, stack)), \ >@@ -305,8 +308,13 @@ static inline void native_wbinvd(void) > #else > #define read_cr0() (native_read_cr0()) > #define write_cr0(x) (native_write_cr0(x)) >+#ifdef CONFIG_IPIPE >+#define read_cr2() __raw_get_cpu_var(__ipipe_cr2) >+#define write_cr2(x) __raw_get_cpu_var(__ipipe_cr2) = (x) >+#else /* !CONFIG_IPIPE */ > #define read_cr2() (native_read_cr2()) > #define write_cr2(x) (native_write_cr2(x)) >+#endif /* !CONFIG_IPIPE */ > #define read_cr3() (native_read_cr3()) > #define write_cr3(x) (native_write_cr3(x)) > #define read_cr4() (native_read_cr4()) >diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h >index 4da91ad..25e346e 100644 >--- a/arch/x86/include/asm/traps.h >+++ b/arch/x86/include/asm/traps.h >@@ -82,8 +82,8 @@ extern int panic_on_unrecovered_nmi; > void math_error(void __user *); > void math_emulate(struct math_emu_info *); > #ifndef CONFIG_X86_32 >-asmlinkage void smp_thermal_interrupt(void); > asmlinkage void mce_threshold_interrupt(void); > #endif >+asmlinkage void smp_thermal_interrupt(void); > > #endif /* _ASM_X86_TRAPS_H */ >diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile >index d8e5d0c..847cc01 100644 >--- a/arch/x86/kernel/Makefile >+++ b/arch/x86/kernel/Makefile >@@ -85,6 +85,7 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o > obj-$(CONFIG_KGDB) += kgdb.o > obj-$(CONFIG_VM86) += vm86_32.o > obj-$(CONFIG_EARLY_PRINTK) += early_printk.o >+obj-$(CONFIG_IPIPE) += ipipe.o > > obj-$(CONFIG_HPET_TIMER) += hpet.o > >diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c >index 0e69e17..47586ca 100644 >--- a/arch/x86/kernel/apic/apic.c >+++ b/arch/x86/kernel/apic/apic.c >@@ -446,7 +446,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, > if (evt->features & CLOCK_EVT_FEAT_DUMMY) > return; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > > switch (mode) { > case CLOCK_EVT_MODE_PERIODIC: >@@ -466,7 +466,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, > break; > } > >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > /* >@@ -982,7 +982,7 @@ void lapic_shutdown(void) > if (!cpu_has_apic && !apic_from_smp_config()) > return; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > > #ifdef CONFIG_X86_32 > if (!enabled_via_apicbase) >@@ -992,7 +992,7 @@ void lapic_shutdown(void) > disable_local_APIC(); > > >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > /* >@@ -1166,6 +1166,10 @@ static void __cpuinit lapic_setup_esr(void) > oldvalue, value); > } > >+int __ipipe_check_lapic(void) >+{ >+ return !(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY); >+} > > /** > * setup_local_APIC - setup the local APIC >@@ -1229,7 +1233,7 @@ void __cpuinit setup_local_APIC(void) > value = apic_read(APIC_ISR + i*0x10); > for (j = 31; j >= 0; j--) { > if (value & (1<<j)) >- ack_APIC_irq(); >+ __ack_APIC_irq(); > } > } > >@@ -1735,7 +1739,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) > */ > v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); > if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) >- ack_APIC_irq(); >+ __ack_APIC_irq(); > > inc_irq_stat(irq_spurious_count); > >@@ -2004,13 +2008,13 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) > apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); > #endif > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > disable_local_APIC(); > > if (intr_remapping_enabled) > disable_intr_remapping(); > >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > return 0; > } > >@@ -2025,7 +2029,7 @@ static int lapic_resume(struct sys_device *dev) > if (!apic_pm_state.active) > return 0; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > if (intr_remapping_enabled) { > ioapic_entries = alloc_ioapic_entries(); > if (!ioapic_entries) { >@@ -2091,7 +2095,7 @@ static int lapic_resume(struct sys_device *dev) > free_ioapic_entries(ioapic_entries); > } > restore: >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > > return ret; > } >diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c >index 873f81f..aada533 100644 >--- a/arch/x86/kernel/apic/apic_flat_64.c >+++ b/arch/x86/kernel/apic/apic_flat_64.c >@@ -72,9 +72,9 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) > { > unsigned long flags; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > __default_send_IPI_dest_field(mask, vector, apic->dest_logical); >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) >diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c >index dc4f486..2ed892b 100644 >--- a/arch/x86/kernel/apic/io_apic.c >+++ b/arch/x86/kernel/apic/io_apic.c >@@ -75,8 +75,11 @@ > */ > int sis_apic_bug = -1; > >-static DEFINE_SPINLOCK(ioapic_lock); >-static DEFINE_SPINLOCK(vector_lock); >+static IPIPE_DEFINE_SPINLOCK(ioapic_lock); >+static IPIPE_DEFINE_SPINLOCK(vector_lock); >+#ifdef CONFIG_IPIPE >+unsigned long bugous_edge_irq_triggers[(NR_IRQS + BITS_PER_LONG - 1) / BITS_PER_LONG]; >+#endif > > /* > * # of IRQ routing registers >@@ -417,6 +420,8 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned > writel(value, &io_apic->data); > } > >+#if !defined(CONFIG_IPIPE) || defined(CONFIG_SMP) >+ > static bool io_apic_level_ack_pending(struct irq_cfg *cfg) > { > struct irq_pin_list *entry; >@@ -440,6 +445,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) > return false; > } > >+#endif /* !CONFIG_IPIPE || CONFIG_SMP */ >+ > union entry_union { > struct { u32 w1, w2; }; > struct IO_APIC_route_entry entry; >@@ -615,6 +622,7 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) > BUG_ON(!cfg); > > spin_lock_irqsave(&ioapic_lock, flags); >+ ipipe_irq_lock(desc->irq); > __mask_IO_APIC_irq(cfg); > spin_unlock_irqrestore(&ioapic_lock, flags); > } >@@ -625,7 +633,13 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) > unsigned long flags; > > spin_lock_irqsave(&ioapic_lock, flags); >+#ifdef CONFIG_IPIPE >+ if (test_and_clear_bit(desc->irq, &bugous_edge_irq_triggers[0])) >+ __unmask_and_level_IO_APIC_irq(cfg); >+ else >+#endif > __unmask_IO_APIC_irq(cfg); >+ ipipe_irq_unlock(desc->irq); > spin_unlock_irqrestore(&ioapic_lock, flags); > } > >@@ -2250,6 +2264,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) > } > cfg = irq_cfg(irq); > __unmask_IO_APIC_irq(cfg); >+ ipipe_irq_unlock(irq); > spin_unlock_irqrestore(&ioapic_lock, flags); > > return was_pending; >@@ -2529,23 +2544,61 @@ static void irq_complete_move(struct irq_desc **descp) > static inline void irq_complete_move(struct irq_desc **descp) {} > #endif > >+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) >+ >+#ifdef CONFIG_INTR_REMAP >+static void eoi_ioapic_irq(struct irq_desc *desc); >+#else /* !CONFIG_INTR_REMAP */ >+static inline void eoi_ioapic_irq(struct irq_desc *desc) {} >+#endif /* !CONFIG_INTR_REMAP */ >+ >+static void move_apic_irq(unsigned int irq) >+{ >+ struct irq_desc *desc = irq_to_desc(irq); >+ struct irq_cfg *cfg; >+ >+ if (desc->handle_irq == &handle_edge_irq) { >+ spin_lock(&desc->lock); >+ irq_complete_move(&desc); >+ move_native_irq(irq); >+ spin_unlock(&desc->lock); >+ } else if (desc->handle_irq == &handle_fasteoi_irq) { >+ spin_lock(&desc->lock); >+ irq_complete_move(&desc); >+ if (irq_remapped(irq)) >+ eoi_ioapic_irq(desc); >+ if (unlikely(desc->status & IRQ_MOVE_PENDING)) { >+ cfg = desc->chip_data; >+ if (!io_apic_level_ack_pending(cfg)) >+ move_masked_irq(irq); >+ unmask_IO_APIC_irq_desc(desc); >+ } >+ spin_unlock(&desc->lock); >+ } else >+ WARN_ON_ONCE(1); >+} >+#endif /* CONFIG_IPIPE && CONFIG_SMP */ >+ > static void ack_apic_edge(unsigned int irq) > { >+#ifndef CONFIG_IPIPE > struct irq_desc *desc = irq_to_desc(irq); > > irq_complete_move(&desc); > move_native_irq(irq); >- ack_APIC_irq(); >+#endif /* CONFIG_IPIPE */ >+ __ack_APIC_irq(); > } > > atomic_t irq_mis_count; > > static void ack_apic_level(unsigned int irq) > { >- struct irq_desc *desc = irq_to_desc(irq); > unsigned long v; > int i; > struct irq_cfg *cfg; >+#ifndef CONFIG_IPIPE >+ struct irq_desc *desc = irq_to_desc(irq); > int do_unmask_irq = 0; > > irq_complete_move(&desc); >@@ -2628,6 +2681,26 @@ static void ack_apic_level(unsigned int irq) > __unmask_and_level_IO_APIC_irq(cfg); > spin_unlock(&ioapic_lock); > } >+#else /* CONFIG_IPIPE */ >+ /* >+ * Prevent low priority IRQs grabbed by high priority domains >+ * from being delayed, waiting for a high priority interrupt >+ * handler running in a low priority domain to complete. >+ */ >+ cfg = irq_cfg(irq); >+ i = cfg->vector; >+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); >+ spin_lock(&ioapic_lock); >+ if (unlikely(!(v & (1 << (i & 0x1f))))) { >+ /* IO-APIC erratum: see comment above. */ >+ atomic_inc(&irq_mis_count); >+ __mask_and_edge_IO_APIC_irq(cfg); >+ set_bit(irq, &bugous_edge_irq_triggers[0]); >+ } else >+ __mask_IO_APIC_irq(cfg); >+ spin_unlock(&ioapic_lock); >+ __ack_APIC_irq(); >+#endif /* CONFIG_IPIPE */ > } > > #ifdef CONFIG_INTR_REMAP >@@ -2656,14 +2729,14 @@ eoi_ioapic_irq(struct irq_desc *desc) > > static void ir_ack_apic_edge(unsigned int irq) > { >- ack_APIC_irq(); >+ __ack_APIC_irq(); > } > > static void ir_ack_apic_level(unsigned int irq) > { > struct irq_desc *desc = irq_to_desc(irq); > >- ack_APIC_irq(); >+ __ack_APIC_irq(); > eoi_ioapic_irq(desc); > } > #endif /* CONFIG_INTR_REMAP */ >@@ -2677,6 +2750,9 @@ static struct irq_chip ioapic_chip __read_mostly = { > .eoi = ack_apic_level, > #ifdef CONFIG_SMP > .set_affinity = set_ioapic_affinity_irq, >+#ifdef CONFIG_IPIPE >+ .move = move_apic_irq, >+#endif > #endif > .retrigger = ioapic_retrigger_irq, > }; >@@ -2691,6 +2767,9 @@ static struct irq_chip ir_ioapic_chip __read_mostly = { > .eoi = ir_ack_apic_level, > #ifdef CONFIG_SMP > .set_affinity = set_ir_ioapic_affinity_irq, >+#ifdef CONFIG_IPIPE >+ .move = move_apic_irq, >+#endif > #endif > #endif > .retrigger = ioapic_retrigger_irq, >@@ -2736,23 +2815,29 @@ static inline void init_IO_APIC_traps(void) > > static void mask_lapic_irq(unsigned int irq) > { >- unsigned long v; >+ unsigned long v, flags; > >+ local_irq_save_hw_cond(flags); >+ ipipe_irq_lock(irq); > v = apic_read(APIC_LVT0); > apic_write(APIC_LVT0, v | APIC_LVT_MASKED); >+ local_irq_restore_hw_cond(flags); > } > > static void unmask_lapic_irq(unsigned int irq) > { >- unsigned long v; >+ unsigned long v, flags; > >+ local_irq_save_hw_cond(flags); > v = apic_read(APIC_LVT0); > apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); >+ ipipe_irq_unlock(irq); >+ local_irq_restore_hw_cond(flags); > } > > static void ack_lapic_irq(unsigned int irq) > { >- ack_APIC_irq(); >+ __ack_APIC_irq(); > } > > static struct irq_chip lapic_chip __read_mostly = { >@@ -2760,6 +2845,9 @@ static struct irq_chip lapic_chip __read_mostly = { > .mask = mask_lapic_irq, > .unmask = unmask_lapic_irq, > .ack = ack_lapic_irq, >+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) >+ .move = move_apic_irq, >+#endif > }; > > static void lapic_register_intr(int irq, struct irq_desc *desc) >@@ -3007,6 +3095,10 @@ static inline void __init check_timer(void) > "...trying to set up timer as Virtual Wire IRQ...\n"); > > lapic_register_intr(0, desc); >+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_64) >+ irq_to_desc(0)->ipipe_ack = __ipipe_ack_edge_irq; >+ irq_to_desc(0)->ipipe_end = __ipipe_end_edge_irq; >+#endif > apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ > enable_8259A_irq(0); > >@@ -3404,6 +3496,9 @@ static struct irq_chip msi_chip = { > .ack = ack_apic_edge, > #ifdef CONFIG_SMP > .set_affinity = set_msi_irq_affinity, >+#ifdef CONFIG_IPIPE >+ .move = move_apic_irq, >+#endif > #endif > .retrigger = ioapic_retrigger_irq, > }; >@@ -3416,6 +3511,9 @@ static struct irq_chip msi_ir_chip = { > .ack = ir_ack_apic_edge, > #ifdef CONFIG_SMP > .set_affinity = ir_set_msi_irq_affinity, >+#ifdef CONFIG_IPIPE >+ .move = move_apic_irq, >+#endif > #endif > #endif > .retrigger = ioapic_retrigger_irq, >@@ -3704,6 +3802,9 @@ static struct irq_chip ht_irq_chip = { > .ack = ack_apic_edge, > #ifdef CONFIG_SMP > .set_affinity = set_ht_irq_affinity, >+#ifdef CONFIG_IPIPE >+ .move = move_apic_irq, >+#endif > #endif > .retrigger = ioapic_retrigger_irq, > }; >@@ -4075,6 +4176,14 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) > return 0; > } > >+#ifdef CONFIG_IPIPE >+unsigned __ipipe_get_ioapic_irq_vector(int irq) >+{ >+ return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ? >+ ipipe_apic_irq_vector(irq) : irq_cfg(irq)->vector; >+} >+#endif /* CONFIG_IPIPE */ >+ > /* > * This function currently is only a helper for the i386 smp boot process where > * we need to reprogram the ioredtbls to cater for the cpus which have come online >diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c >index 08385e0..f5ad117 100644 >--- a/arch/x86/kernel/apic/ipi.c >+++ b/arch/x86/kernel/apic/ipi.c >@@ -29,12 +29,12 @@ void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) > * to an arbitrary mask, so I do a unicast to each CPU instead. > * - mbligh > */ >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, > query_cpu), vector, APIC_DEST_PHYSICAL); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, >@@ -46,14 +46,14 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, > > /* See Hack comment above */ > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > if (query_cpu == this_cpu) > continue; > __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, > query_cpu), vector, APIC_DEST_PHYSICAL); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, >@@ -68,12 +68,12 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, > * should be modified to do 1 message per cluster ID - mbligh > */ > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) > __default_send_IPI_dest_field( > apic->cpu_to_logical_apicid(query_cpu), vector, > apic->dest_logical); >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, >@@ -85,7 +85,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, > > /* See Hack comment above */ > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > if (query_cpu == this_cpu) > continue; >@@ -93,7 +93,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, > apic->cpu_to_logical_apicid(query_cpu), vector, > apic->dest_logical); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > #ifdef CONFIG_X86_32 >@@ -109,10 +109,10 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) > if (WARN_ONCE(!mask, "empty IPI mask")) > return; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); > __default_send_IPI_dest_field(mask, vector, apic->dest_logical); >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > void default_send_IPI_allbutself(int vector) >diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c >index 7ff61d6..a72056e 100644 >--- a/arch/x86/kernel/apic/nmi.c >+++ b/arch/x86/kernel/apic/nmi.c >@@ -59,6 +59,10 @@ static unsigned int nmi_hz = HZ; > static DEFINE_PER_CPU(short, wd_enabled); > static int endflag __initdata; > >+static int default_nmi_watchdog_tick(struct pt_regs * regs, unsigned reason); >+int (*nmi_watchdog_tick) (struct pt_regs * regs, unsigned reason) = &default_nmi_watchdog_tick; >+EXPORT_SYMBOL(nmi_watchdog_tick); >+ > static inline unsigned int get_nmi_count(int cpu) > { > return per_cpu(irq_stat, cpu).__nmi_count; >@@ -387,7 +391,7 @@ void touch_nmi_watchdog(void) > EXPORT_SYMBOL(touch_nmi_watchdog); > > notrace __kprobes int >-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) >+default_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) > { > /* > * Since current_thread_info()-> is always on the stack, and we >diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c >index a5371ec..442f45c 100644 >--- a/arch/x86/kernel/apic/x2apic_cluster.c >+++ b/arch/x86/kernel/apic/x2apic_cluster.c >@@ -61,13 +61,13 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > __x2apic_send_IPI_dest( > per_cpu(x86_cpu_to_logical_apicid, query_cpu), > vector, apic->dest_logical); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void >@@ -79,7 +79,7 @@ static void > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > if (query_cpu == this_cpu) > continue; >@@ -87,7 +87,7 @@ static void > per_cpu(x86_cpu_to_logical_apicid, query_cpu), > vector, apic->dest_logical); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void x2apic_send_IPI_allbutself(int vector) >@@ -98,7 +98,7 @@ static void x2apic_send_IPI_allbutself(int vector) > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_online_cpu(query_cpu) { > if (query_cpu == this_cpu) > continue; >@@ -106,7 +106,7 @@ static void x2apic_send_IPI_allbutself(int vector) > per_cpu(x86_cpu_to_logical_apicid, query_cpu), > vector, apic->dest_logical); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void x2apic_send_IPI_all(int vector) >diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c >index a8989aa..fba85fa 100644 >--- a/arch/x86/kernel/apic/x2apic_phys.c >+++ b/arch/x86/kernel/apic/x2apic_phys.c >@@ -62,12 +62,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), > vector, APIC_DEST_PHYSICAL); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void >@@ -79,14 +79,14 @@ static void > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > if (query_cpu != this_cpu) > __x2apic_send_IPI_dest( > per_cpu(x86_cpu_to_apicid, query_cpu), > vector, APIC_DEST_PHYSICAL); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void x2apic_send_IPI_allbutself(int vector) >@@ -97,14 +97,14 @@ static void x2apic_send_IPI_allbutself(int vector) > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_online_cpu(query_cpu) { > if (query_cpu == this_cpu) > continue; > __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), > vector, APIC_DEST_PHYSICAL); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void x2apic_send_IPI_all(int vector) >diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c >index 228d982..c249555 100644 >--- a/arch/x86/kernel/cpu/mtrr/cyrix.c >+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c >@@ -18,7 +18,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base, > > arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > > ccr3 = getCx86(CX86_CCR3); > setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ >@@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base, > rcr = getCx86(CX86_RCR_BASE + reg); > setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ > >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > > shift = ((unsigned char *) base)[1] & 0x0f; > *base >>= PAGE_SHIFT; >@@ -178,6 +178,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, > unsigned long size, mtrr_type type) > { > unsigned char arr, arr_type, arr_size; >+ unsigned long flags; > > arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ > >@@ -221,6 +222,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, > } > } > >+ local_irq_save_hw(flags); >+ > prepare_set(); > > base <<= PAGE_SHIFT; >@@ -230,6 +233,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, > setCx86(CX86_RCR_BASE + reg, arr_type); > > post_set(); >+ >+ local_irq_restore_hw(flags); > } > > typedef struct { >@@ -247,8 +252,10 @@ static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 }; > > static void cyrix_set_all(void) > { >+ unsigned long flags; > int i; > >+ local_irq_save_hw(flags); > prepare_set(); > > /* the CCRs are not contiguous */ >@@ -263,6 +270,7 @@ static void cyrix_set_all(void) > } > > post_set(); >+ local_irq_restore_hw(flags); > } > > static struct mtrr_ops cyrix_mtrr_ops = { >diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c >index 55da0c5..5594a98 100644 >--- a/arch/x86/kernel/cpu/mtrr/generic.c >+++ b/arch/x86/kernel/cpu/mtrr/generic.c >@@ -635,7 +635,7 @@ static void generic_set_all(void) > unsigned long mask, count; > unsigned long flags; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > prepare_set(); > > /* Actually set the state */ >@@ -645,7 +645,7 @@ static void generic_set_all(void) > pat_init(); > > post_set(); >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > > /* Use the atomic bitops to update the global mask */ > for (count = 0; count < sizeof mask * 8; ++count) { >@@ -669,12 +669,12 @@ static void generic_set_all(void) > static void generic_set_mtrr(unsigned int reg, unsigned long base, > unsigned long size, mtrr_type type) > { >- unsigned long flags; >+ unsigned long flags, _flags; > struct mtrr_var_range *vr; > > vr = &mtrr_state.var_ranges[reg]; > >- local_irq_save(flags); >+ local_irq_save_full(flags, _flags); > prepare_set(); > > if (size == 0) { >@@ -695,7 +695,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, > } > > post_set(); >- local_irq_restore(flags); >+ local_irq_restore_full(flags, _flags); > } > > int generic_validate_add_page(unsigned long base, unsigned long size, >diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c >index 2d8a371..8c6afa5 100644 >--- a/arch/x86/kernel/dumpstack.c >+++ b/arch/x86/kernel/dumpstack.c >@@ -327,6 +327,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) > local_irq_enable(); > do_exit(SIGBUS); > } >+EXPORT_SYMBOL_GPL(die_nmi); > > static int __init oops_setup(char *s) > { >diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c >index f7dd2a7..37b2338 100644 >--- a/arch/x86/kernel/dumpstack_32.c >+++ b/arch/x86/kernel/dumpstack_32.c >@@ -108,6 +108,9 @@ void show_registers(struct pt_regs *regs) > printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", > TASK_COMM_LEN, current->comm, task_pid_nr(current), > current_thread_info(), current, task_thread_info(current)); >+#ifdef CONFIG_IPIPE >+ printk(KERN_EMERG "I-pipe domain %s\n", ipipe_current_domain->name); >+#endif /* CONFIG_IPIPE */ > /* > * When in-kernel, we also print out the stack and code at the > * time of the fault.. >diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c >index a071e6b..de2dde2 100644 >--- a/arch/x86/kernel/dumpstack_64.c >+++ b/arch/x86/kernel/dumpstack_64.c >@@ -254,6 +254,11 @@ void show_registers(struct pt_regs *regs) > sp = regs->sp; > printk("CPU %d ", cpu); > __show_regs(regs, 1); >+#ifdef CONFIG_IPIPE >+ if (ipipe_current_domain != ipipe_root_domain) >+ printk("I-pipe domain %s\n", ipipe_current_domain->name); >+ else >+#endif /* CONFIG_IPIPE */ > printk("Process %s (pid: %d, threadinfo %p, task %p)\n", > cur->comm, cur->pid, task_thread_info(cur), cur); > >diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S >index c097e7d..5918f48 100644 >--- a/arch/x86/kernel/entry_32.S >+++ b/arch/x86/kernel/entry_32.S >@@ -44,6 +44,7 @@ > #include <linux/linkage.h> > #include <asm/thread_info.h> > #include <asm/irqflags.h> >+#include <asm/ipipe_base.h> > #include <asm/errno.h> > #include <asm/segment.h> > #include <asm/smp.h> >@@ -79,6 +80,59 @@ > > #define nr_syscalls ((syscall_table_size)/4) > >+#ifdef CONFIG_IPIPE >+#define EMULATE_ROOT_IRET(bypass) \ >+ call __ipipe_unstall_iret_root ; \ >+ TRACE_IRQS_ON ; \ >+ bypass: \ >+ movl PT_EAX(%esp),%eax >+#define TEST_PREEMPTIBLE(regs) call __ipipe_kpreempt_root ; testl %eax,%eax >+#define CATCH_ROOT_SYSCALL(bypass1,bypass2) \ >+ movl %esp,%eax ; \ >+ call __ipipe_syscall_root ; \ >+ testl %eax,%eax ; \ >+ js bypass1 ; \ >+ jne bypass2 ; \ >+ movl PT_ORIG_EAX(%esp),%eax >+#define PUSH_XCODE(v) pushl $ ex_ ## v >+#define PUSH_XVEC(v) pushl $ ex_ ## v >+#define HANDLE_EXCEPTION(code) movl %code,%ecx ; \ >+ call __ipipe_handle_exception ; \ >+ testl %eax,%eax ; \ >+ jnz restore_ret >+#define DIVERT_EXCEPTION(code) movl $(__USER_DS), %ecx ; \ >+ movl %ecx, %ds ; \ >+ movl %ecx, %es ; \ >+ movl %esp, %eax ; \ >+ movl $ex_ ## code,%edx ; \ >+ call __ipipe_divert_exception ; \ >+ testl %eax,%eax ; \ >+ jnz restore_ret >+ >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+# define IPIPE_TRACE_IRQ_ENTER \ >+ lea PT_EIP-4(%esp), %ebp; \ >+ movl PT_ORIG_EAX(%esp), %eax; \ >+ call ipipe_trace_begin >+# define IPIPE_TRACE_IRQ_EXIT \ >+ pushl %eax; \ >+ movl PT_ORIG_EAX+4(%esp), %eax; \ >+ call ipipe_trace_end; \ >+ popl %eax >+#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ >+#define IPIPE_TRACE_IRQ_ENTER >+#define IPIPE_TRACE_IRQ_EXIT >+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ >+#else /* !CONFIG_IPIPE */ >+#define EMULATE_ROOT_IRET(bypass) >+#define TEST_PREEMPTIBLE(regs) testl $X86_EFLAGS_IF,PT_EFLAGS(regs) >+#define CATCH_ROOT_SYSCALL(bypass1,bypass2) >+#define PUSH_XCODE(v) pushl $v >+#define PUSH_XVEC(v) pushl v >+#define HANDLE_EXCEPTION(code) call *%code >+#define DIVERT_EXCEPTION(code) >+#endif /* CONFIG_IPIPE */ >+ > #ifdef CONFIG_PREEMPT > #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF > #else >@@ -318,6 +372,7 @@ > .endm > > ENTRY(ret_from_fork) >+ ENABLE_INTERRUPTS_HW_COND > CFI_STARTPROC > pushl %eax > CFI_ADJUST_CFA_OFFSET 4 >@@ -345,7 +400,7 @@ END(ret_from_fork) > RING0_PTREGS_FRAME > ret_from_exception: > preempt_stop(CLBR_ANY) >-ret_from_intr: >+ENTRY(ret_from_intr) > GET_THREAD_INFO(%ebp) > check_userspace: > movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS >@@ -369,14 +424,14 @@ END(ret_from_exception) > > #ifdef CONFIG_PREEMPT > ENTRY(resume_kernel) >- DISABLE_INTERRUPTS(CLBR_ANY) >+ DISABLE_INTERRUPTS_HW(CLBR_ANY) > cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? > jnz restore_all > need_resched: > movl TI_flags(%ebp), %ecx # need_resched set ? > testb $_TIF_NEED_RESCHED, %cl > jz restore_all >- testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? >+ TEST_PREEMPTIBLE(%esp) # interrupts off (exception path) ? > jz restore_all > call preempt_schedule_irq > jmp need_resched >@@ -424,7 +479,7 @@ sysenter_past_esp: > pushl %eax > CFI_ADJUST_CFA_OFFSET 4 > SAVE_ALL >- ENABLE_INTERRUPTS(CLBR_NONE) >+ ENABLE_INTERRUPTS_HW(CLBR_NONE) > > /* > * Load the potential sixth argument from user stack. >@@ -440,6 +495,7 @@ sysenter_past_esp: > .previous > > GET_THREAD_INFO(%ebp) >+ CATCH_ROOT_SYSCALL(sysenter_tail,sysenter_out) > > testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) > jnz sysenter_audit >@@ -448,6 +504,7 @@ sysenter_do_call: > jae syscall_badsys > call *sys_call_table(,%eax,4) > movl %eax,PT_EAX(%esp) >+sysenter_tail: > LOCKDEP_SYS_EXIT > DISABLE_INTERRUPTS(CLBR_ANY) > TRACE_IRQS_OFF >@@ -456,10 +513,13 @@ sysenter_do_call: > jne sysexit_audit > sysenter_exit: > /* if something modifies registers it must also disable sysexit */ >+ EMULATE_ROOT_IRET(sysenter_out) > movl PT_EIP(%esp), %edx > movl PT_OLDESP(%esp), %ecx > xorl %ebp,%ebp >- TRACE_IRQS_ON >+#ifndef CONFIG_IPIPE >+ TRACE_IRQS_ON >+#endif > 1: mov PT_FS(%esp), %fs > PTGS_TO_GS > ENABLE_INTERRUPTS_SYSEXIT >@@ -520,6 +580,7 @@ ENTRY(system_call) > CFI_ADJUST_CFA_OFFSET 4 > SAVE_ALL > GET_THREAD_INFO(%ebp) >+ CATCH_ROOT_SYSCALL(syscall_exit,restore_ret) > # system call tracing in operation / emulation > testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) > jnz syscall_trace_entry >@@ -552,6 +613,10 @@ restore_all_notrace: > CFI_REMEMBER_STATE > je ldt_ss # returning to user-space with LDT SS > restore_nocheck: >+#ifdef CONFIG_IPIPE >+ call __ipipe_unstall_iret_root >+#endif /* CONFIG_IPIPE */ >+restore_ret: > RESTORE_REGS 4 # skip orig_eax/error_code > CFI_ADJUST_CFA_OFFSET -4 > irq_return: >@@ -559,7 +624,7 @@ irq_return: > .section .fixup,"ax" > ENTRY(iret_exc) > pushl $0 # no error code >- pushl $do_iret_error >+ PUSH_XCODE(do_iret_error) > jmp error_code > .previous > .section __ex_table,"a" >@@ -613,7 +678,7 @@ ldt_ss: > /* Disable interrupts, but do not irqtrace this section: we > * will soon execute iret and the tracer was already set to > * the irqstate after the iret */ >- DISABLE_INTERRUPTS(CLBR_EAX) >+ DISABLE_INTERRUPTS_HW(CLBR_EAX) > lss (%esp), %esp /* switch to espfix segment */ > CFI_ADJUST_CFA_OFFSET -8 > jmp restore_nocheck >@@ -627,6 +692,7 @@ work_pending: > testb $_TIF_NEED_RESCHED, %cl > jz work_notifysig > work_resched: >+ ENABLE_INTERRUPTS_HW_COND > call schedule > LOCKDEP_SYS_EXIT > DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt >@@ -799,6 +865,48 @@ END(irq_entries_start) > END(interrupt) > .previous > >+#ifdef CONFIG_IPIPE >+ .p2align CONFIG_X86_L1_CACHE_SHIFT >+common_interrupt: >+ addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ >+ SAVE_ALL >+ IPIPE_TRACE_IRQ_ENTER >+ movl %esp, %eax >+ call *ipipe_irq_handler >+ IPIPE_TRACE_IRQ_EXIT >+ testl %eax,%eax >+ jnz ret_from_intr >+ jmp restore_ret >+ CFI_ENDPROC >+ >+#define BUILD_INTERRUPT3(name, nr, fn) \ >+ENTRY(name) \ >+ RING0_INT_FRAME; \ >+ pushl $~(nr); \ >+ CFI_ADJUST_CFA_OFFSET 4; \ >+ SAVE_ALL; \ >+ IPIPE_TRACE_IRQ_ENTER; \ >+ movl %esp, %eax; \ >+ call *ipipe_irq_handler; \ >+ IPIPE_TRACE_IRQ_EXIT; \ >+ testl %eax,%eax; \ >+ jnz ret_from_intr; \ >+ jmp restore_ret; \ >+ CFI_ENDPROC >+ >+#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+ BUILD_INTERRUPT(ipipe_ipi0,IPIPE_SERVICE_VECTOR0) >+ BUILD_INTERRUPT(ipipe_ipi1,IPIPE_SERVICE_VECTOR1) >+ BUILD_INTERRUPT(ipipe_ipi2,IPIPE_SERVICE_VECTOR2) >+ BUILD_INTERRUPT(ipipe_ipi3,IPIPE_SERVICE_VECTOR3) >+#ifdef CONFIG_SMP >+ BUILD_INTERRUPT(ipipe_ipiX,IPIPE_CRITICAL_VECTOR) >+#endif >+#endif >+ >+#else /* !CONFIG_IPIPE */ > /* > * the CPU automatically disables interrupts when executing an IRQ vector, > * so IRQ-flags tracing has to follow that: >@@ -829,6 +937,8 @@ ENDPROC(name) > > #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) > >+#endif /* !CONFIG_IPIPE */ >+ > /* The include is where all of the SMP etc. interrupts come from */ > #include <asm/entry_arch.h> > >@@ -836,7 +946,7 @@ ENTRY(coprocessor_error) > RING0_INT_FRAME > pushl $0 > CFI_ADJUST_CFA_OFFSET 4 >- pushl $do_coprocessor_error >+ PUSH_XCODE(do_coprocessor_error) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -846,7 +956,7 @@ ENTRY(simd_coprocessor_error) > RING0_INT_FRAME > pushl $0 > CFI_ADJUST_CFA_OFFSET 4 >- pushl $do_simd_coprocessor_error >+ PUSH_XCODE(do_simd_coprocessor_error) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -856,7 +966,7 @@ ENTRY(device_not_available) > RING0_INT_FRAME > pushl $-1 # mark this as an int > CFI_ADJUST_CFA_OFFSET 4 >- pushl $do_device_not_available >+ PUSH_XCODE(do_device_not_available) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -881,7 +991,7 @@ ENTRY(overflow) > RING0_INT_FRAME > pushl $0 > CFI_ADJUST_CFA_OFFSET 4 >- pushl $do_overflow >+ PUSH_XCODE(do_overflow) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -891,7 +1001,7 @@ ENTRY(bounds) > RING0_INT_FRAME > pushl $0 > CFI_ADJUST_CFA_OFFSET 4 >- pushl $do_bounds >+ PUSH_XCODE(do_bounds) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -901,7 +1011,7 @@ ENTRY(invalid_op) > RING0_INT_FRAME > pushl $0 > CFI_ADJUST_CFA_OFFSET 4 >- pushl $do_invalid_op >+ PUSH_XCODE(do_invalid_op) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -911,7 +1021,7 @@ ENTRY(coprocessor_segment_overrun) > RING0_INT_FRAME > pushl $0 > CFI_ADJUST_CFA_OFFSET 4 >- pushl $do_coprocessor_segment_overrun >+ PUSH_XCODE(do_coprocessor_segment_overrun) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -919,7 +1029,7 @@ END(coprocessor_segment_overrun) > > ENTRY(invalid_TSS) > RING0_EC_FRAME >- pushl $do_invalid_TSS >+ PUSH_XCODE(do_invalid_TSS) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -927,7 +1037,7 @@ END(invalid_TSS) > > ENTRY(segment_not_present) > RING0_EC_FRAME >- pushl $do_segment_not_present >+ PUSH_XCODE(do_segment_not_present) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -935,7 +1045,7 @@ END(segment_not_present) > > ENTRY(stack_segment) > RING0_EC_FRAME >- pushl $do_stack_segment >+ PUSH_XCODE(do_stack_segment) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -943,7 +1053,7 @@ END(stack_segment) > > ENTRY(alignment_check) > RING0_EC_FRAME >- pushl $do_alignment_check >+ PUSH_XCODE(do_alignment_check) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -953,7 +1063,7 @@ ENTRY(divide_error) > RING0_INT_FRAME > pushl $0 # no error code > CFI_ADJUST_CFA_OFFSET 4 >- pushl $do_divide_error >+ PUSH_XCODE(do_divide_error) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -964,7 +1074,7 @@ ENTRY(machine_check) > RING0_INT_FRAME > pushl $0 > CFI_ADJUST_CFA_OFFSET 4 >- pushl machine_check_vector >+ PUSH_XVEC(machine_check_vector) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -975,7 +1085,7 @@ ENTRY(spurious_interrupt_bug) > RING0_INT_FRAME > pushl $0 > CFI_ADJUST_CFA_OFFSET 4 >- pushl $do_spurious_interrupt_bug >+ PUSH_XCODE(do_spurious_interrupt_bug) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >@@ -1210,7 +1320,7 @@ syscall_table_size=(.-sys_call_table) > > ENTRY(page_fault) > RING0_EC_FRAME >- pushl $do_page_fault >+ PUSH_XCODE(do_page_fault) > CFI_ADJUST_CFA_OFFSET 4 > ALIGN > error_code: >@@ -1260,7 +1370,7 @@ error_code: > movl %ecx, %es > TRACE_IRQS_OFF > movl %esp,%eax # pt_regs pointer >- call *%edi >+ HANDLE_EXCEPTION(edi) > jmp ret_from_exception > CFI_ENDPROC > END(page_fault) >@@ -1304,6 +1414,7 @@ debug_stack_correct: > CFI_ADJUST_CFA_OFFSET 4 > SAVE_ALL > TRACE_IRQS_OFF >+ DIVERT_EXCEPTION(do_debug) > xorl %edx,%edx # error code 0 > movl %esp,%eax # pt_regs pointer > call do_debug >@@ -1404,6 +1515,7 @@ ENTRY(int3) > CFI_ADJUST_CFA_OFFSET 4 > SAVE_ALL > TRACE_IRQS_OFF >+ DIVERT_EXCEPTION(do_int3) > xorl %edx,%edx # zero error code > movl %esp,%eax # pt_regs pointer > call do_int3 >@@ -1413,7 +1525,7 @@ END(int3) > > ENTRY(general_protection) > RING0_EC_FRAME >- pushl $do_general_protection >+ PUSH_XCODE(do_general_protection) > CFI_ADJUST_CFA_OFFSET 4 > jmp error_code > CFI_ENDPROC >diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S >index b5c061f..ce29b45 100644 >--- a/arch/x86/kernel/entry_64.S >+++ b/arch/x86/kernel/entry_64.S >@@ -48,6 +48,7 @@ > #include <asm/unistd.h> > #include <asm/thread_info.h> > #include <asm/hw_irq.h> >+#include <asm/ipipe_base.h> > #include <asm/page_types.h> > #include <asm/irqflags.h> > #include <asm/paravirt.h> >@@ -61,6 +62,13 @@ > #define __AUDIT_ARCH_LE 0x40000000 > > .code64 >+ >+#ifdef CONFIG_IPIPE >+#define PREEMPT_SCHEDULE_IRQ call __ipipe_preempt_schedule_irq >+#else /* !CONFIG_IPIPE */ >+#define PREEMPT_SCHEDULE_IRQ call preempt_schedule_irq >+#endif /* !CONFIG_IPIPE */ >+ > #ifdef CONFIG_FUNCTION_TRACER > #ifdef CONFIG_DYNAMIC_FTRACE > ENTRY(mcount) >@@ -336,7 +344,10 @@ ENTRY(save_args) > /* > * We entered an interrupt context - irqs are off: > */ >-2: TRACE_IRQS_OFF >+2: >+#ifndef CONFIG_IPIPE >+ TRACE_IRQS_OFF >+#endif > ret > CFI_ENDPROC > END(save_args) >@@ -402,6 +413,7 @@ ENTRY(ret_from_fork) > CFI_ADJUST_CFA_OFFSET 8 > popf # reset kernel eflags > CFI_ADJUST_CFA_OFFSET -8 >+ ENABLE_INTERRUPTS_HW_COND > > call schedule_tail # rdi: 'prev' task parameter > >@@ -477,6 +489,17 @@ ENTRY(system_call_after_swapgs) > movq %rax,ORIG_RAX-ARGOFFSET(%rsp) > movq %rcx,RIP-ARGOFFSET(%rsp) > CFI_REL_OFFSET rip,RIP-ARGOFFSET >+#ifdef CONFIG_IPIPE >+ pushq %rdi >+ pushq %rax >+ leaq -(ARGOFFSET-16)(%rsp),%rdi # regs for handler >+ call __ipipe_syscall_root_thunk >+ testl %eax, %eax >+ popq %rax >+ popq %rdi >+ js ret_from_sys_call >+ jnz sysret_fastexit >+#endif > GET_THREAD_INFO(%rcx) > testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) > jnz tracesys >@@ -506,6 +529,7 @@ sysret_check: > * sysretq will re-enable interrupts: > */ > TRACE_IRQS_ON >+sysret_fastexit: > movq RIP-ARGOFFSET(%rsp),%rcx > CFI_REGISTER rip,rcx > RESTORE_ARGS 0,-ARG_SKIP,1 >@@ -517,6 +541,8 @@ sysret_check: > /* Handle reschedules */ > /* edx: work, edi: workmask */ > sysret_careful: >+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),%edx >+ jnz ret_from_sys_call_trace > bt $TIF_NEED_RESCHED,%edx > jnc sysret_signal > TRACE_IRQS_ON >@@ -528,6 +554,16 @@ sysret_careful: > CFI_ADJUST_CFA_OFFSET -8 > jmp sysret_check > >+ret_from_sys_call_trace: >+ TRACE_IRQS_ON >+ sti >+ SAVE_REST >+ FIXUP_TOP_OF_STACK %rdi >+ movq %rsp,%rdi >+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ >+ RESTORE_REST >+ jmp int_ret_from_sys_call >+ > /* Handle a signal */ > sysret_signal: > TRACE_IRQS_ON >@@ -800,7 +836,29 @@ END(interrupt) > CFI_ADJUST_CFA_OFFSET 10*8 > call save_args > PARTIAL_FRAME 0 >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+ pushq %rbp >+ leaq RIP-8(%rdi), %rbp # make interrupted address show up in trace >+ pushq %rdi >+ movq ORIG_RAX(%rdi), %rdi # IRQ number >+ notq %rdi # ...is inverted, fix up >+ call ipipe_trace_begin >+ popq %rdi >+ popq %rbp >+ >+ call \func >+ >+ pushq %rbp >+ pushq %rax >+ movq 8-ARGOFFSET+ORIG_RAX(%rbp), %rdi >+ leaq 8-ARGOFFSET+RIP-8(%rbp), %rbp >+ notq %rdi >+ call ipipe_trace_end >+ popq %rax >+ popq %rbp >+#else > call \func >+#endif > .endm > > /* >@@ -809,9 +867,24 @@ END(interrupt) > */ > .p2align CONFIG_X86_L1_CACHE_SHIFT > common_interrupt: >+#ifdef CONFIG_IPIPE >+ XCPT_FRAME >+ addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ >+ interrupt *ipipe_irq_handler >+ testl %eax, %eax >+ jnz ret_from_intr >+ decl PER_CPU_VAR(irq_count) >+ leaveq >+ CFI_DEF_CFA_REGISTER rsp >+ CFI_ADJUST_CFA_OFFSET -8 >+ testl $3,CS-ARGOFFSET(%rsp) >+ jz restore_args >+ jmp retint_swapgs_notrace >+#else /* !CONFIG_IPIPE */ > XCPT_FRAME > addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ > interrupt do_IRQ >+#endif /* !CONFIG_IPIPE */ > /* 0(%rsp): old_rsp-ARGOFFSET */ > ret_from_intr: > DISABLE_INTERRUPTS(CLBR_NONE) >@@ -820,7 +893,7 @@ ret_from_intr: > leaveq > CFI_DEF_CFA_REGISTER rsp > CFI_ADJUST_CFA_OFFSET -8 >-exit_intr: >+ENTRY(exit_intr) > GET_THREAD_INFO(%rcx) > testl $3,CS-ARGOFFSET(%rsp) > je retint_kernel >@@ -840,20 +913,20 @@ retint_check: > jnz retint_careful > > retint_swapgs: /* return to user-space */ >+ TRACE_IRQS_IRETQ > /* > * The iretq could re-enable interrupts: > */ >- DISABLE_INTERRUPTS(CLBR_ANY) >- TRACE_IRQS_IRETQ >+retint_swapgs_notrace: > SWAPGS >+retint_noswapgs: > jmp restore_args > > retint_restore_args: /* return to kernel space */ >- DISABLE_INTERRUPTS(CLBR_ANY) >+ TRACE_IRQS_IRETQ > /* > * The iretq could re-enable interrupts: > */ >- TRACE_IRQS_IRETQ > restore_args: > RESTORE_ARGS 0,8,0 > >@@ -935,7 +1008,15 @@ ENTRY(retint_kernel) > jnc retint_restore_args > bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ > jnc retint_restore_args >- call preempt_schedule_irq >+#ifdef CONFIG_IPIPE >+ /* >+ * We may have preempted call_softirq before __do_softirq raised or >+ * after it lowered the preemption counter. >+ */ >+ cmpl $0,PER_CPU_VAR(irq_count) >+ jge retint_restore_args >+#endif >+ PREEMPT_SCHEDULE_IRQ > jmp exit_intr > #endif > >@@ -945,16 +1026,31 @@ END(common_interrupt) > /* > * APIC interrupts. > */ >-.macro apicinterrupt num sym do_sym >+ .macro apicinterrupt num sym do_sym > ENTRY(\sym) > INTR_FRAME > pushq $~(\num) > CFI_ADJUST_CFA_OFFSET 8 >+#ifdef CONFIG_IPIPE >+ interrupt *ipipe_irq_handler >+ testl %eax, %eax >+ jnz ret_from_intr >+ decl PER_CPU_VAR(irq_count) >+ leaveq >+ CFI_DEF_CFA_REGISTER rsp >+ CFI_ADJUST_CFA_OFFSET -8 >+ testl $3,CS-ARGOFFSET(%rsp) >+ jz restore_args >+ jmp retint_swapgs_notrace >+ CFI_ENDPROC >+ .endm >+#else /* !CONFIG_IPIPE */ > interrupt \do_sym > jmp ret_from_intr > CFI_ENDPROC > END(\sym) > .endm >+#endif /* !CONFIG_IPIPE */ > > #ifdef CONFIG_SMP > apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ >@@ -979,6 +1075,7 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ > invalidate_interrupt1 smp_invalidate_interrupt > apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ > invalidate_interrupt2 smp_invalidate_interrupt >+#ifndef CONFIG_IPIPE > apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ > invalidate_interrupt3 smp_invalidate_interrupt > apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ >@@ -989,6 +1086,7 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ > invalidate_interrupt6 smp_invalidate_interrupt > apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ > invalidate_interrupt7 smp_invalidate_interrupt >+#endif /* !CONFIG_IPIPE */ > #endif > > apicinterrupt THRESHOLD_APIC_VECTOR \ >@@ -1023,7 +1121,7 @@ apicinterrupt LOCAL_PENDING_VECTOR \ > /* > * Exception entry points. > */ >-.macro zeroentry sym do_sym >+.macro zeroentry sym do_sym ex_code > ENTRY(\sym) > INTR_FRAME > PARAVIRT_ADJUST_EXCEPTION_FRAME >@@ -1034,13 +1132,26 @@ ENTRY(\sym) > DEFAULT_FRAME 0 > movq %rsp,%rdi /* pt_regs pointer */ > xorl %esi,%esi /* no error code */ >+#ifdef CONFIG_IPIPE >+ movq $\ex_code,%rdx >+ call __ipipe_handle_exception /* handle(regs, error_code, ex_code) */ >+ testl %eax, %eax >+ jz error_exit >+ movl %ebx,%eax >+ RESTORE_REST >+ DISABLE_INTERRUPTS(CLBR_NONE) >+ testl %eax,%eax >+ jne retint_noswapgs >+ jmp retint_swapgs_notrace >+#else /* !CONFIG_IPIPE */ > call \do_sym >+#endif /* !CONFIG_IPIPE */ > jmp error_exit /* %ebx: no swapgs flag */ > CFI_ENDPROC > END(\sym) > .endm > >-.macro paranoidzeroentry sym do_sym >+.macro paranoidzeroentry sym do_sym ex_code=0 > ENTRY(\sym) > INTR_FRAME > PARAVIRT_ADJUST_EXCEPTION_FRAME >@@ -1050,14 +1161,27 @@ ENTRY(\sym) > call save_paranoid > TRACE_IRQS_OFF > movq %rsp,%rdi /* pt_regs pointer */ >+#ifdef CONFIG_IPIPE >+ .if \ex_code >+ movq $\ex_code,%rsi >+ call __ipipe_divert_exception /* handle(regs, ex_code) */ >+ testl %eax,%eax >+ jnz 1f >+ movq %rsp,%rdi >+ .endif >+#endif > xorl %esi,%esi /* no error code */ > call \do_sym >+#ifdef CONFIG_IPIPE >+ xorl %eax,%eax /* tell paranoid_exit to propagate the exception */ >+1: >+#endif > jmp paranoid_exit /* %ebx: no swapgs flag */ > CFI_ENDPROC > END(\sym) > .endm > >-.macro paranoidzeroentry_ist sym do_sym ist >+.macro paranoidzeroentry_ist sym do_sym ist ex_code=0 > ENTRY(\sym) > INTR_FRAME > PARAVIRT_ADJUST_EXCEPTION_FRAME >@@ -1067,17 +1191,30 @@ ENTRY(\sym) > call save_paranoid > TRACE_IRQS_OFF > movq %rsp,%rdi /* pt_regs pointer */ >+#ifdef CONFIG_IPIPE >+ .if \ex_code >+ movq $\ex_code,%rsi >+ call __ipipe_divert_exception /* handle(regs, ex_code) */ >+ testl %eax,%eax >+ jnz 1f >+ movq %rsp,%rdi >+ .endif >+#endif > xorl %esi,%esi /* no error code */ > PER_CPU(init_tss, %rbp) > subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) > call \do_sym > addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) >+#ifdef CONFIG_IPIPE >+ xorl %eax,%eax /* tell paranoid_exit to propagate the exception */ >+1: >+#endif > jmp paranoid_exit /* %ebx: no swapgs flag */ > CFI_ENDPROC > END(\sym) > .endm > >-.macro errorentry sym do_sym >+.macro errorentry sym do_sym ex_code > ENTRY(\sym) > XCPT_FRAME > PARAVIRT_ADJUST_EXCEPTION_FRAME >@@ -1088,14 +1225,27 @@ ENTRY(\sym) > movq %rsp,%rdi /* pt_regs pointer */ > movq ORIG_RAX(%rsp),%rsi /* get error code */ > movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ >+#ifdef CONFIG_IPIPE >+ movq $\ex_code,%rdx >+ call __ipipe_handle_exception /* handle(regs, error_code, ex_code) */ >+ testl %eax, %eax >+ jz error_exit >+ movl %ebx,%eax >+ RESTORE_REST >+ DISABLE_INTERRUPTS(CLBR_NONE) >+ testl %eax,%eax >+ jne retint_noswapgs >+ jmp retint_swapgs_notrace >+#else /* !CONFIG_IPIPE */ > call \do_sym >+#endif /* !CONFIG_IPIPE */ > jmp error_exit /* %ebx: no swapgs flag */ > CFI_ENDPROC > END(\sym) > .endm > > /* error code is on the stack already */ >-.macro paranoiderrorentry sym do_sym >+.macro paranoiderrorentry sym do_sym ex_code=0 > ENTRY(\sym) > XCPT_FRAME > PARAVIRT_ADJUST_EXCEPTION_FRAME >@@ -1105,27 +1255,40 @@ ENTRY(\sym) > DEFAULT_FRAME 0 > TRACE_IRQS_OFF > movq %rsp,%rdi /* pt_regs pointer */ >+#ifdef CONFIG_IPIPE >+ .if \ex_code >+ movq $\ex_code,%rsi >+ call __ipipe_divert_exception /* handle(regs, ex_code) */ >+ testl %eax,%eax >+ jnz 1f >+ movq %rsp,%rdi >+ .endif >+#endif > movq ORIG_RAX(%rsp),%rsi /* get error code */ > movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ > call \do_sym >+#ifdef CONFIG_IPIPE >+ xorl %eax,%eax /* tell paranoid_exit to propagate the exception */ >+1: >+#endif > jmp paranoid_exit /* %ebx: no swapgs flag */ > CFI_ENDPROC > END(\sym) > .endm > >-zeroentry divide_error do_divide_error >-zeroentry overflow do_overflow >-zeroentry bounds do_bounds >-zeroentry invalid_op do_invalid_op >-zeroentry device_not_available do_device_not_available >+zeroentry divide_error do_divide_error ex_do_divide_error >+zeroentry overflow do_overflow ex_do_overflow >+zeroentry bounds do_bounds ex_do_bounds >+zeroentry invalid_op do_invalid_op ex_do_invalid_op >+zeroentry device_not_available do_device_not_available ex_do_device_not_available > paranoiderrorentry double_fault do_double_fault >-zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun >-errorentry invalid_TSS do_invalid_TSS >-errorentry segment_not_present do_segment_not_present >-zeroentry spurious_interrupt_bug do_spurious_interrupt_bug >-zeroentry coprocessor_error do_coprocessor_error >-errorentry alignment_check do_alignment_check >-zeroentry simd_coprocessor_error do_simd_coprocessor_error >+zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun ex_do_coprocessor_segment_overrun >+errorentry invalid_TSS do_invalid_TSS ex_do_invalid_TSS >+errorentry segment_not_present do_segment_not_present ex_do_segment_not_present >+zeroentry spurious_interrupt_bug do_spurious_interrupt_bug ex_do_spurious_interrupt_bug >+zeroentry coprocessor_error do_coprocessor_error ex_do_coprocessor_error >+errorentry alignment_check do_alignment_check ex_do_alignment_check >+zeroentry simd_coprocessor_error do_simd_coprocessor_error ex_do_simd_coprocessor_error > > /* Reload gs selector with exception handling */ > /* edi: new selector */ >@@ -1255,14 +1418,18 @@ ENTRY(call_softirq) > CFI_REL_OFFSET rbp,0 > mov %rsp,%rbp > CFI_DEF_CFA_REGISTER rbp >+ DISABLE_INTERRUPTS_HW_COND > incl PER_CPU_VAR(irq_count) > cmove PER_CPU_VAR(irq_stack_ptr),%rsp >+ ENABLE_INTERRUPTS_HW_COND > push %rbp # backlink for old unwinder > call __do_softirq >+ DISABLE_INTERRUPTS_HW_COND > leaveq > CFI_DEF_CFA_REGISTER rsp > CFI_ADJUST_CFA_OFFSET -8 > decl PER_CPU_VAR(irq_count) >+ ENABLE_INTERRUPTS_HW_COND > ret > CFI_ENDPROC > END(call_softirq) >@@ -1371,16 +1538,16 @@ END(xen_failsafe_callback) > */ > .pushsection .kprobes.text, "ax" > >-paranoidzeroentry_ist debug do_debug DEBUG_STACK >-paranoidzeroentry_ist int3 do_int3 DEBUG_STACK >+paranoidzeroentry_ist debug do_debug DEBUG_STACK ex_do_debug >+paranoidzeroentry_ist int3 do_int3 DEBUG_STACK ex_do_int3 > paranoiderrorentry stack_segment do_stack_segment > #ifdef CONFIG_XEN > zeroentry xen_debug do_debug > zeroentry xen_int3 do_int3 > errorentry xen_stack_segment do_stack_segment > #endif >-errorentry general_protection do_general_protection >-errorentry page_fault do_page_fault >+errorentry general_protection do_general_protection ex_do_general_protection >+errorentry page_fault do_page_fault ex_do_page_fault > #ifdef CONFIG_X86_MCE > paranoidzeroentry machine_check *machine_check_vector(%rip) > #endif >@@ -1403,8 +1570,13 @@ ENTRY(paranoid_exit) > INTR_FRAME > DISABLE_INTERRUPTS(CLBR_NONE) > TRACE_IRQS_OFF >+paranoid_notrace: > testl %ebx,%ebx /* swapgs needed? */ > jnz paranoid_restore >+#ifdef CONFIG_IPIPE >+ testl %eax,%eax >+ jnz paranoid_swapgs >+#endif > testl $3,CS(%rsp) > jnz paranoid_userspace > paranoid_swapgs: >diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c >index 23c1679..1c00022 100644 >--- a/arch/x86/kernel/i8253.c >+++ b/arch/x86/kernel/i8253.c >@@ -11,6 +11,7 @@ > #include <linux/delay.h> > #include <linux/init.h> > #include <linux/io.h> >+#include <linux/ipipe.h> > > #include <asm/i8253.h> > #include <asm/hpet.h> >@@ -130,6 +131,12 @@ static cycle_t pit_read(struct clocksource *cs) > int count; > u32 jifs; > >+#ifdef CONFIG_IPIPE >+ if (!__ipipe_pipeline_head_p(ipipe_root_domain)) >+ /* We don't really own the PIT. */ >+ return (cycle_t)(jiffies * LATCH) + (LATCH - 1) - old_count; >+#endif /* CONFIG_IPIPE */ >+ > spin_lock_irqsave(&i8253_lock, flags); > /* > * Although our caller may have the read side of xtime_lock, >diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c >index df89102..cfb29a2 100644 >--- a/arch/x86/kernel/i8259.c >+++ b/arch/x86/kernel/i8259.c >@@ -32,7 +32,7 @@ > */ > > static int i8259A_auto_eoi; >-DEFINE_SPINLOCK(i8259A_lock); >+IPIPE_DEFINE_SPINLOCK(i8259A_lock); > static void mask_and_ack_8259A(unsigned int); > > struct irq_chip i8259A_chip = { >@@ -69,6 +69,7 @@ void disable_8259A_irq(unsigned int irq) > unsigned long flags; > > spin_lock_irqsave(&i8259A_lock, flags); >+ ipipe_irq_lock(irq); > cached_irq_mask |= mask; > if (irq & 8) > outb(cached_slave_mask, PIC_SLAVE_IMR); >@@ -79,15 +80,18 @@ void disable_8259A_irq(unsigned int irq) > > void enable_8259A_irq(unsigned int irq) > { >- unsigned int mask = ~(1 << irq); >+ unsigned int mask = (1 << irq); > unsigned long flags; > > spin_lock_irqsave(&i8259A_lock, flags); >- cached_irq_mask &= mask; >- if (irq & 8) >- outb(cached_slave_mask, PIC_SLAVE_IMR); >- else >- outb(cached_master_mask, PIC_MASTER_IMR); >+ if (cached_irq_mask & mask) { >+ cached_irq_mask &= ~mask; >+ if (irq & 8) >+ outb(cached_slave_mask, PIC_SLAVE_IMR); >+ else >+ outb(cached_master_mask, PIC_MASTER_IMR); >+ ipipe_irq_unlock(irq); >+ } > spin_unlock_irqrestore(&i8259A_lock, flags); > } > >@@ -168,6 +172,18 @@ static void mask_and_ack_8259A(unsigned int irq) > */ > if (cached_irq_mask & irqmask) > goto spurious_8259A_irq; >+#ifdef CONFIG_IPIPE >+ if (irq == 0) { >+ /* >+ * Fast timer ack -- don't mask (unless supposedly >+ * spurious). We trace outb's in order to detect >+ * broken hardware inducing large delays. >+ */ >+ outb(0x60, PIC_MASTER_CMD); /* Specific EOI to master. */ >+ spin_unlock_irqrestore(&i8259A_lock, flags); >+ return; >+ } >+#endif /* CONFIG_IPIPE */ > cached_irq_mask |= irqmask; > > handle_real_irq: >diff --git a/arch/x86/kernel/ipipe.c b/arch/x86/kernel/ipipe.c >new file mode 100644 >index 0000000..116fee7 >--- /dev/null >+++ b/arch/x86/kernel/ipipe.c >@@ -0,0 +1,1084 @@ >+/* -*- linux-c -*- >+ * linux/arch/x86/kernel/ipipe.c >+ * >+ * Copyright (C) 2002-2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ * >+ * Architecture-dependent I-PIPE support for x86. >+ */ >+ >+#include <linux/kernel.h> >+#include <linux/smp.h> >+#include <linux/module.h> >+#include <linux/sched.h> >+#include <linux/interrupt.h> >+#include <linux/slab.h> >+#include <linux/irq.h> >+#include <linux/clockchips.h> >+#include <linux/kprobes.h> >+#include <asm/unistd.h> >+#include <asm/system.h> >+#include <asm/atomic.h> >+#include <asm/hw_irq.h> >+#include <asm/irq.h> >+#include <asm/desc.h> >+#include <asm/io.h> >+#ifdef CONFIG_X86_LOCAL_APIC >+#include <asm/tlbflush.h> >+#include <asm/fixmap.h> >+#include <asm/bitops.h> >+#include <asm/mpspec.h> >+#ifdef CONFIG_X86_IO_APIC >+#include <asm/io_apic.h> >+#endif /* CONFIG_X86_IO_APIC */ >+#include <asm/apic.h> >+#endif /* CONFIG_X86_LOCAL_APIC */ >+#include <asm/traps.h> >+ >+int __ipipe_tick_irq = 0; /* Legacy timer */ >+ >+DEFINE_PER_CPU(struct pt_regs, __ipipe_tick_regs); >+ >+DEFINE_PER_CPU(unsigned long, __ipipe_cr2); >+EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_cr2); >+ >+#ifdef CONFIG_SMP >+ >+static cpumask_t __ipipe_cpu_sync_map; >+ >+static cpumask_t __ipipe_cpu_lock_map; >+ >+static unsigned long __ipipe_critical_lock; >+ >+static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier); >+ >+static atomic_t __ipipe_critical_count = ATOMIC_INIT(0); >+ >+static void (*__ipipe_cpu_sync) (void); >+ >+#endif /* CONFIG_SMP */ >+ >+/* >+ * ipipe_trigger_irq() -- Push the interrupt at front of the pipeline >+ * just like if it has been actually received from a hw source. Also >+ * works for virtual interrupts. >+ */ >+int ipipe_trigger_irq(unsigned int irq) >+{ >+ struct pt_regs regs; >+ unsigned long flags; >+ >+#ifdef CONFIG_IPIPE_DEBUG >+ if (irq >= IPIPE_NR_IRQS) >+ return -EINVAL; >+ if (ipipe_virtual_irq_p(irq)) { >+ if (!test_bit(irq - IPIPE_VIRQ_BASE, >+ &__ipipe_virtual_irq_map)) >+ return -EINVAL; >+ } else if (irq_to_desc(irq) == NULL) >+ return -EINVAL; >+#endif >+ local_irq_save_hw(flags); >+ regs.flags = flags; >+ regs.orig_ax = irq; /* Positive value - IRQ won't be acked */ >+ regs.cs = __KERNEL_CS; >+ __ipipe_handle_irq(®s); >+ local_irq_restore_hw(flags); >+ >+ return 1; >+} >+ >+int ipipe_get_sysinfo(struct ipipe_sysinfo *info) >+{ >+ info->ncpus = num_online_cpus(); >+ info->cpufreq = ipipe_cpu_freq(); >+ info->archdep.tmirq = __ipipe_tick_irq; >+#ifdef CONFIG_X86_TSC >+ info->archdep.tmfreq = ipipe_cpu_freq(); >+#else /* !CONFIG_X86_TSC */ >+ info->archdep.tmfreq = CLOCK_TICK_RATE; >+#endif /* CONFIG_X86_TSC */ >+ >+ return 0; >+} >+ >+#ifdef CONFIG_X86_UV >+asmlinkage void uv_bau_message_interrupt(struct pt_regs *regs); >+#endif >+#ifdef CONFIG_X86_MCE_THRESHOLD >+asmlinkage void smp_threshold_interrupt(void); >+#endif >+#ifdef CONFIG_X86_NEW_MCE >+asmlinkage void smp_mce_self_interrupt(void); >+#endif >+ >+static void __ipipe_ack_irq(unsigned irq, struct irq_desc *desc) >+{ >+ desc->ipipe_ack(irq, desc); >+} >+ >+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq) >+{ >+ irq_to_desc(irq)->status &= ~IRQ_DISABLED; >+} >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+ >+static void __ipipe_noack_apic(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+static void __ipipe_ack_apic(unsigned irq, struct irq_desc *desc) >+{ >+ __ack_APIC_irq(); >+} >+ >+static void __ipipe_null_handler(unsigned irq, void *cookie) >+{ >+} >+ >+#endif /* CONFIG_X86_LOCAL_APIC */ >+ >+/* __ipipe_enable_pipeline() -- We are running on the boot CPU, hw >+ interrupts are off, and secondary CPUs are still lost in space. */ >+ >+void __init __ipipe_enable_pipeline(void) >+{ >+ unsigned int vector, irq; >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+ >+ /* Map the APIC system vectors. */ >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR), >+ (ipipe_irq_handler_t)&smp_apic_timer_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR), >+ (ipipe_irq_handler_t)&smp_spurious_interrupt, >+ NULL, >+ &__ipipe_noack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(ERROR_APIC_VECTOR), >+ (ipipe_irq_handler_t)&smp_error_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0), >+ &__ipipe_null_handler, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1), >+ &__ipipe_null_handler, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2), >+ &__ipipe_null_handler, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3), >+ &__ipipe_null_handler, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+#ifdef CONFIG_X86_THERMAL_VECTOR >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(THERMAL_APIC_VECTOR), >+ (ipipe_irq_handler_t)&smp_thermal_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#endif /* CONFIG_X86_THERMAL_VECTOR */ >+ >+#ifdef CONFIG_X86_MCE_THRESHOLD >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(THRESHOLD_APIC_VECTOR), >+ (ipipe_irq_handler_t)&smp_threshold_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#endif /* CONFIG_X86_MCE_THRESHOLD */ >+ >+#ifdef CONFIG_X86_NEW_MCE >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(MCE_SELF_VECTOR), >+ (ipipe_irq_handler_t)&smp_mce_self_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#endif /* CONFIG_X86_MCE_THRESHOLD */ >+ >+#ifdef CONFIG_X86_UV >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(UV_BAU_MESSAGE), >+ (ipipe_irq_handler_t)&uv_bau_message_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#endif /* CONFIG_X86_UV */ >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(GENERIC_INTERRUPT_VECTOR), >+ (ipipe_irq_handler_t)&smp_generic_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+#ifdef CONFIG_PERF_COUNTERS >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(LOCAL_PENDING_VECTOR), >+ (ipipe_irq_handler_t)&perf_pending_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#endif /* CONFIG_PERF_COUNTERS */ >+ >+#endif /* CONFIG_X86_LOCAL_APIC */ >+ >+#ifdef CONFIG_SMP >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(RESCHEDULE_VECTOR), >+ (ipipe_irq_handler_t)&smp_reschedule_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ for (vector = INVALIDATE_TLB_VECTOR_START; >+ vector <= INVALIDATE_TLB_VECTOR_END; ++vector) >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(vector), >+ (ipipe_irq_handler_t)&smp_invalidate_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR), >+ (ipipe_irq_handler_t)&smp_call_function_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(CALL_FUNCTION_SINGLE_VECTOR), >+ (ipipe_irq_handler_t)&smp_call_function_single_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ IRQ_MOVE_CLEANUP_VECTOR, >+ (ipipe_irq_handler_t)&smp_irq_move_cleanup_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(REBOOT_VECTOR), >+ (ipipe_irq_handler_t)&smp_reboot_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#else >+ (void)vector; >+#endif /* CONFIG_SMP */ >+ >+ /* Finally, virtualize the remaining ISA and IO-APIC >+ * interrupts. Interrupts which have already been virtualized >+ * will just beget a silent -EPERM error since >+ * IPIPE_SYSTEM_MASK has been passed for them, that's ok. */ >+ >+ for (irq = 0; irq < NR_IRQS; irq++) >+ /* >+ * Fails for IPIPE_CRITICAL_IPI and IRQ_MOVE_CLEANUP_VECTOR, >+ * but that's ok. >+ */ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ irq, >+ (ipipe_irq_handler_t)&do_IRQ, >+ NULL, >+ &__ipipe_ack_irq, >+ IPIPE_STDROOT_MASK); >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+ /* Eventually allow these vectors to be reprogrammed. */ >+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI0].control &= ~IPIPE_SYSTEM_MASK; >+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI1].control &= ~IPIPE_SYSTEM_MASK; >+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI2].control &= ~IPIPE_SYSTEM_MASK; >+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI3].control &= ~IPIPE_SYSTEM_MASK; >+#endif /* CONFIG_X86_LOCAL_APIC */ >+} >+ >+#ifdef CONFIG_SMP >+ >+cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask) >+{ >+ cpumask_t oldmask; >+ >+ if (irq_to_desc(irq)->chip->set_affinity == NULL) >+ return CPU_MASK_NONE; >+ >+ if (cpus_empty(cpumask)) >+ return CPU_MASK_NONE; /* Return mask value -- no change. */ >+ >+ cpus_and(cpumask, cpumask, cpu_online_map); >+ if (cpus_empty(cpumask)) >+ return CPU_MASK_NONE; /* Error -- bad mask value or non-routable IRQ. */ >+ >+ cpumask_copy(&oldmask, irq_to_desc(irq)->affinity); >+ irq_to_desc(irq)->chip->set_affinity(irq, &cpumask); >+ >+ return oldmask; >+} >+ >+int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask) >+{ >+ unsigned long flags; >+ int self; >+ >+ if (ipi != IPIPE_SERVICE_IPI0 && >+ ipi != IPIPE_SERVICE_IPI1 && >+ ipi != IPIPE_SERVICE_IPI2 && >+ ipi != IPIPE_SERVICE_IPI3) >+ return -EINVAL; >+ >+ local_irq_save_hw(flags); >+ >+ self = cpu_isset(ipipe_processor_id(),cpumask); >+ cpu_clear(ipipe_processor_id(), cpumask); >+ >+ if (!cpus_empty(cpumask)) >+ apic->send_IPI_mask(&cpumask, ipipe_apic_irq_vector(ipi)); >+ >+ if (self) >+ ipipe_trigger_irq(ipi); >+ >+ local_irq_restore_hw(flags); >+ >+ return 0; >+} >+ >+/* Always called with hw interrupts off. */ >+ >+void __ipipe_do_critical_sync(unsigned irq, void *cookie) >+{ >+ int cpu = ipipe_processor_id(); >+ >+ cpu_set(cpu, __ipipe_cpu_sync_map); >+ >+ /* Now we are in sync with the lock requestor running on another >+ CPU. Enter a spinning wait until he releases the global >+ lock. */ >+ spin_lock(&__ipipe_cpu_barrier); >+ >+ /* Got it. Now get out. */ >+ >+ if (__ipipe_cpu_sync) >+ /* Call the sync routine if any. */ >+ __ipipe_cpu_sync(); >+ >+ spin_unlock(&__ipipe_cpu_barrier); >+ >+ cpu_clear(cpu, __ipipe_cpu_sync_map); >+} >+ >+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd) >+{ >+ ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge = &__ipipe_ack_apic; >+ ipd->irqs[IPIPE_CRITICAL_IPI].handler = &__ipipe_do_critical_sync; >+ ipd->irqs[IPIPE_CRITICAL_IPI].cookie = NULL; >+ /* Immediately handle in the current domain but *never* pass */ >+ ipd->irqs[IPIPE_CRITICAL_IPI].control = >+ IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK; >+} >+ >+#endif /* CONFIG_SMP */ >+ >+/* >+ * ipipe_critical_enter() -- Grab the superlock excluding all CPUs but >+ * the current one from a critical section. This lock is used when we >+ * must enforce a global critical section for a single CPU in a >+ * possibly SMP system whichever context the CPUs are running. >+ */ >+unsigned long ipipe_critical_enter(void (*syncfn) (void)) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ >+#ifdef CONFIG_SMP >+ if (unlikely(num_online_cpus() == 1)) >+ return flags; >+ >+ { >+ int cpu = ipipe_processor_id(); >+ cpumask_t lock_map; >+ >+ if (!cpu_test_and_set(cpu, __ipipe_cpu_lock_map)) { >+ while (test_and_set_bit(0, &__ipipe_critical_lock)) { >+ int n = 0; >+ do { >+ cpu_relax(); >+ } while (++n < cpu); >+ } >+ >+ spin_lock(&__ipipe_cpu_barrier); >+ >+ __ipipe_cpu_sync = syncfn; >+ >+ /* Send the sync IPI to all processors but the current one. */ >+ apic->send_IPI_allbutself(IPIPE_CRITICAL_VECTOR); >+ >+ cpus_andnot(lock_map, cpu_online_map, __ipipe_cpu_lock_map); >+ >+ while (!cpus_equal(__ipipe_cpu_sync_map, lock_map)) >+ cpu_relax(); >+ } >+ >+ atomic_inc(&__ipipe_critical_count); >+ } >+#endif /* CONFIG_SMP */ >+ >+ return flags; >+} >+ >+/* ipipe_critical_exit() -- Release the superlock. */ >+ >+void ipipe_critical_exit(unsigned long flags) >+{ >+#ifdef CONFIG_SMP >+ if (num_online_cpus() == 1) >+ goto out; >+ >+ if (atomic_dec_and_test(&__ipipe_critical_count)) { >+ spin_unlock(&__ipipe_cpu_barrier); >+ >+ while (!cpus_empty(__ipipe_cpu_sync_map)) >+ cpu_relax(); >+ >+ cpu_clear(ipipe_processor_id(), __ipipe_cpu_lock_map); >+ clear_bit(0, &__ipipe_critical_lock); >+ smp_mb__after_clear_bit(); >+ } >+out: >+#endif /* CONFIG_SMP */ >+ >+ local_irq_restore_hw(flags); >+} >+ >+static inline void __fixup_if(int s, struct pt_regs *regs) >+{ >+ /* >+ * Have the saved hw state look like the domain stall bit, so >+ * that __ipipe_unstall_iret_root() restores the proper >+ * pipeline state for the root stage upon exit. >+ */ >+ if (s) >+ regs->flags &= ~X86_EFLAGS_IF; >+ else >+ regs->flags |= X86_EFLAGS_IF; >+} >+ >+#ifdef CONFIG_X86_32 >+ >+/* >+ * Check the stall bit of the root domain to make sure the existing >+ * preemption opportunity upon in-kernel resumption could be >+ * exploited. In case a rescheduling could take place, the root stage >+ * is stalled before the hw interrupts are re-enabled. This routine >+ * must be called with hw interrupts off. >+ */ >+ >+asmlinkage int __ipipe_kpreempt_root(struct pt_regs regs) >+{ >+ if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) >+ /* Root stage is stalled: rescheduling denied. */ >+ return 0; >+ >+ __ipipe_stall_root(); >+ trace_hardirqs_off(); >+ local_irq_enable_hw_notrace(); >+ >+ return 1; /* Ok, may reschedule now. */ >+} >+ >+asmlinkage void __ipipe_unstall_iret_root(struct pt_regs regs) >+{ >+ struct ipipe_percpu_domain_data *p; >+ >+ /* Emulate IRET's handling of the interrupt flag. */ >+ >+ local_irq_disable_hw(); >+ >+ p = ipipe_root_cpudom_ptr(); >+ >+ /* >+ * Restore the software state as it used to be on kernel >+ * entry. CAUTION: NMIs must *not* return through this >+ * emulation. >+ */ >+ if (raw_irqs_disabled_flags(regs.flags)) { >+ if (!__test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) >+ trace_hardirqs_off(); >+ regs.flags |= X86_EFLAGS_IF; >+ } else { >+ if (test_bit(IPIPE_STALL_FLAG, &p->status)) { >+ trace_hardirqs_on(); >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+ } >+ /* >+ * We could have received and logged interrupts while >+ * stalled in the syscall path: play the log now to >+ * release any pending event. The SYNC_BIT prevents >+ * infinite recursion in case of flooding. >+ */ >+ if (unlikely(__ipipe_ipending_p(p))) >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ } >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+ ipipe_trace_end(0x8000000D); >+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ >+} >+ >+#else /* !CONFIG_X86_32 */ >+ >+#ifdef CONFIG_PREEMPT >+ >+asmlinkage void preempt_schedule_irq(void); >+ >+void __ipipe_preempt_schedule_irq(void) >+{ >+ struct ipipe_percpu_domain_data *p; >+ unsigned long flags; >+ /* >+ * We have no IRQ state fixup on entry to exceptions in >+ * x86_64, so we have to stall the root stage before >+ * rescheduling. >+ */ >+ BUG_ON(!irqs_disabled_hw()); >+ local_irq_save(flags); >+ local_irq_enable_hw(); >+ preempt_schedule_irq(); /* Ok, may reschedule now. */ >+ local_irq_disable_hw(); >+ >+ /* >+ * Flush any pending interrupt that may have been logged after >+ * preempt_schedule_irq() stalled the root stage before >+ * returning to us, and now. >+ */ >+ p = ipipe_root_cpudom_ptr(); >+ if (unlikely(__ipipe_ipending_p(p))) { >+ add_preempt_count(PREEMPT_ACTIVE); >+ trace_hardirqs_on(); >+ clear_bit(IPIPE_STALL_FLAG, &p->status); >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ sub_preempt_count(PREEMPT_ACTIVE); >+ } >+ >+ __local_irq_restore_nosync(flags); >+} >+ >+#endif /* CONFIG_PREEMPT */ >+ >+#endif /* !CONFIG_X86_32 */ >+ >+void __ipipe_halt_root(void) >+{ >+ struct ipipe_percpu_domain_data *p; >+ >+ /* Emulate sti+hlt sequence over the root domain. */ >+ >+ local_irq_disable_hw(); >+ >+ p = ipipe_root_cpudom_ptr(); >+ >+ trace_hardirqs_on(); >+ clear_bit(IPIPE_STALL_FLAG, &p->status); >+ >+ if (unlikely(__ipipe_ipending_p(p))) { >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ local_irq_enable_hw(); >+ } else { >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+ ipipe_trace_end(0x8000000E); >+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ >+ asm volatile("sti; hlt": : :"memory"); >+ } >+} >+ >+static void do_machine_check_vector(struct pt_regs *regs, long error_code) >+{ >+#ifdef CONFIG_X86_MCE >+#ifdef CONFIG_X86_32 >+ extern void (*machine_check_vector)(struct pt_regs *, long error_code); >+ machine_check_vector(regs, error_code); >+#else >+ do_machine_check(regs, error_code); >+#endif >+#endif /* CONFIG_X86_MCE */ >+} >+ >+/* Work around genksyms's issue with over-qualification in decls. */ >+ >+typedef void dotraplinkage __ipipe_exhandler(struct pt_regs *, long); >+ >+typedef __ipipe_exhandler *__ipipe_exptr; >+ >+static __ipipe_exptr __ipipe_std_extable[] = { >+ >+ [ex_do_divide_error] = &do_divide_error, >+ [ex_do_overflow] = &do_overflow, >+ [ex_do_bounds] = &do_bounds, >+ [ex_do_invalid_op] = &do_invalid_op, >+ [ex_do_coprocessor_segment_overrun] = &do_coprocessor_segment_overrun, >+ [ex_do_invalid_TSS] = &do_invalid_TSS, >+ [ex_do_segment_not_present] = &do_segment_not_present, >+ [ex_do_stack_segment] = &do_stack_segment, >+ [ex_do_general_protection] = do_general_protection, >+ [ex_do_page_fault] = (__ipipe_exptr)&do_page_fault, >+ [ex_do_spurious_interrupt_bug] = &do_spurious_interrupt_bug, >+ [ex_do_coprocessor_error] = &do_coprocessor_error, >+ [ex_do_alignment_check] = &do_alignment_check, >+ [ex_machine_check_vector] = &do_machine_check_vector, >+ [ex_do_simd_coprocessor_error] = &do_simd_coprocessor_error, >+ [ex_do_device_not_available] = &do_device_not_available, >+#ifdef CONFIG_X86_32 >+ [ex_do_iret_error] = &do_iret_error, >+#endif >+}; >+ >+#ifdef CONFIG_KGDB >+#include <linux/kgdb.h> >+ >+static int __ipipe_xlate_signo[] = { >+ >+ [ex_do_divide_error] = SIGFPE, >+ [ex_do_debug] = SIGTRAP, >+ [2] = -1, >+ [ex_do_int3] = SIGTRAP, >+ [ex_do_overflow] = SIGSEGV, >+ [ex_do_bounds] = SIGSEGV, >+ [ex_do_invalid_op] = SIGILL, >+ [ex_do_device_not_available] = -1, >+ [8] = -1, >+ [ex_do_coprocessor_segment_overrun] = SIGFPE, >+ [ex_do_invalid_TSS] = SIGSEGV, >+ [ex_do_segment_not_present] = SIGBUS, >+ [ex_do_stack_segment] = SIGBUS, >+ [ex_do_general_protection] = SIGSEGV, >+ [ex_do_page_fault] = SIGSEGV, >+ [ex_do_spurious_interrupt_bug] = -1, >+ [ex_do_coprocessor_error] = -1, >+ [ex_do_alignment_check] = SIGBUS, >+ [ex_machine_check_vector] = -1, >+ [ex_do_simd_coprocessor_error] = -1, >+ [20 ... 31] = -1, >+#ifdef CONFIG_X86_32 >+ [ex_do_iret_error] = SIGSEGV, >+#endif >+}; >+#endif /* CONFIG_KGDB */ >+ >+int __ipipe_handle_exception(struct pt_regs *regs, long error_code, int vector) >+{ >+ bool root_entry = false; >+ unsigned long flags = 0; >+ unsigned long cr2 = 0; >+ >+ if (ipipe_root_domain_p) { >+ root_entry = true; >+ >+ local_save_flags(flags); >+ /* >+ * Replicate hw interrupt state into the virtual mask >+ * before calling the I-pipe event handler over the >+ * root domain. Also required later when calling the >+ * Linux exception handler. >+ */ >+ if (irqs_disabled_hw()) >+ local_irq_disable(); >+ } >+#ifdef CONFIG_KGDB >+ /* catch exception KGDB is interested in over non-root domains */ >+ else if (__ipipe_xlate_signo[vector] >= 0 && >+ !kgdb_handle_exception(vector, __ipipe_xlate_signo[vector], >+ error_code, regs)) >+ return 1; >+#endif /* CONFIG_KGDB */ >+ >+ if (vector == ex_do_page_fault) >+ cr2 = native_read_cr2(); >+ >+ if (unlikely(ipipe_trap_notify(vector, regs))) { >+ if (root_entry) >+ local_irq_restore_nosync(flags); >+ return 1; >+ } >+ >+ if (likely(ipipe_root_domain_p)) { >+ /* >+ * In case we faulted in the iret path, regs.flags do not >+ * match the root domain state. The fault handler or the >+ * low-level return code may evaluate it. Fix this up, either >+ * by the root state sampled on entry or, if we migrated to >+ * root, with the current state. >+ */ >+ __fixup_if(root_entry ? raw_irqs_disabled_flags(flags) : >+ raw_irqs_disabled(), regs); >+ } else { >+ /* Detect unhandled faults over non-root domains. */ >+ struct ipipe_domain *ipd = ipipe_current_domain; >+ >+ /* Switch to root so that Linux can handle the fault cleanly. */ >+ __ipipe_current_domain = ipipe_root_domain; >+ >+ ipipe_trace_panic_freeze(); >+ >+ /* Always warn about user land and unfixable faults. */ >+ if ((error_code & 4) || !search_exception_tables(instruction_pointer(regs))) { >+ printk(KERN_ERR "BUG: Unhandled exception over domain" >+ " %s at 0x%lx - switching to ROOT\n", >+ ipd->name, instruction_pointer(regs)); >+ dump_stack(); >+ ipipe_trace_panic_dump(); >+#ifdef CONFIG_IPIPE_DEBUG >+ /* Also report fixable ones when debugging is enabled. */ >+ } else { >+ printk(KERN_WARNING "WARNING: Fixable exception over " >+ "domain %s at 0x%lx - switching to ROOT\n", >+ ipd->name, instruction_pointer(regs)); >+ dump_stack(); >+ ipipe_trace_panic_dump(); >+#endif /* CONFIG_IPIPE_DEBUG */ >+ } >+ } >+ >+ if (vector == ex_do_page_fault) >+ write_cr2(cr2); >+ >+ __ipipe_std_extable[vector](regs, error_code); >+ >+ /* >+ * Relevant for 64-bit: Restore root domain state as the low-level >+ * return code will not align it to regs.flags. >+ */ >+ if (root_entry) >+ local_irq_restore_nosync(flags); >+ >+ return 0; >+} >+ >+int __ipipe_divert_exception(struct pt_regs *regs, int vector) >+{ >+ bool root_entry = false; >+ unsigned long flags = 0; >+ >+ if (ipipe_root_domain_p) { >+ root_entry = true; >+ >+ local_save_flags(flags); >+ >+ if (irqs_disabled_hw()) { >+ /* >+ * Same root state handling as in >+ * __ipipe_handle_exception. >+ */ >+ local_irq_disable(); >+ } >+ } >+#ifdef CONFIG_KGDB >+ /* catch int1 and int3 over non-root domains */ >+ else { >+#ifdef CONFIG_X86_32 >+ if (vector != ex_do_device_not_available) >+#endif >+ { >+ unsigned int condition = 0; >+ >+ if (vector == 1) >+ get_debugreg(condition, 6); >+ if (!kgdb_handle_exception(vector, SIGTRAP, condition, regs)) >+ return 1; >+ } >+ } >+#endif /* CONFIG_KGDB */ >+ >+ if (unlikely(ipipe_trap_notify(vector, regs))) { >+ if (root_entry) >+ local_irq_restore_nosync(flags); >+ return 1; >+ } >+ >+ /* see __ipipe_handle_exception */ >+ if (likely(ipipe_root_domain_p)) >+ __fixup_if(root_entry ? raw_irqs_disabled_flags(flags) : >+ raw_irqs_disabled(), regs); >+ /* >+ * No need to restore root state in the 64-bit case, the Linux handler >+ * and the return code will take care of it. >+ */ >+ >+ return 0; >+} >+ >+int __ipipe_syscall_root(struct pt_regs *regs) >+{ >+ struct ipipe_percpu_domain_data *p; >+ unsigned long flags; >+ int ret; >+ >+ /* >+ * This routine either returns: >+ * 0 -- if the syscall is to be passed to Linux; >+ * >0 -- if the syscall should not be passed to Linux, and no >+ * tail work should be performed; >+ * <0 -- if the syscall should not be passed to Linux but the >+ * tail work has to be performed (for handling signals etc). >+ */ >+ >+ if (!__ipipe_syscall_watched_p(current, regs->orig_ax) || >+ !__ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL)) >+ return 0; >+ >+ ret = __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL, regs); >+ if (!ipipe_root_domain_p) { >+#ifdef CONFIG_X86_64 >+ local_irq_disable_hw(); >+#endif >+ return 1; >+ } >+ >+ local_irq_save_hw(flags); >+ p = ipipe_root_cpudom_ptr(); >+#ifdef CONFIG_X86_32 >+ /* >+ * Fix-up only required on 32-bit as only here the IRET return code >+ * will evaluate the flags. >+ */ >+ __fixup_if(test_bit(IPIPE_STALL_FLAG, &p->status), regs); >+#endif >+ /* >+ * If allowed, sync pending VIRQs before _TIF_NEED_RESCHED is >+ * tested. >+ */ >+ if (__ipipe_ipending_p(p)) >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOVIRT); >+#ifdef CONFIG_X86_64 >+ if (!ret) >+#endif >+ local_irq_restore_hw(flags); >+ >+ return -ret; >+} >+ >+/* >+ * __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic >+ * interrupt protection log is maintained here for each domain. Hw >+ * interrupts are off on entry. >+ */ >+int __ipipe_handle_irq(struct pt_regs *regs) >+{ >+ struct ipipe_domain *this_domain, *next_domain; >+ unsigned int vector = regs->orig_ax, irq; >+ struct list_head *head, *pos; >+ int m_ack; >+ >+ if ((long)regs->orig_ax < 0) { >+ vector = ~vector; >+#ifdef CONFIG_X86_LOCAL_APIC >+ if (vector >= FIRST_SYSTEM_VECTOR) >+ irq = ipipe_apic_vector_irq(vector); >+#ifdef CONFIG_SMP >+ else if (vector == IRQ_MOVE_CLEANUP_VECTOR) >+ irq = vector; >+#endif /* CONFIG_SMP */ >+ else >+#endif /* CONFIG_X86_LOCAL_APIC */ >+ irq = __get_cpu_var(vector_irq)[vector]; >+ m_ack = 0; >+ } else { /* This is a self-triggered one. */ >+ irq = vector; >+ m_ack = 1; >+ } >+ >+ this_domain = ipipe_current_domain; >+ >+ if (test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control)) >+ head = &this_domain->p_link; >+ else { >+ head = __ipipe_pipeline.next; >+ next_domain = list_entry(head, struct ipipe_domain, p_link); >+ if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) { >+ if (!m_ack && next_domain->irqs[irq].acknowledge) >+ next_domain->irqs[irq].acknowledge(irq, irq_to_desc(irq)); >+ __ipipe_dispatch_wired(next_domain, irq); >+ goto finalize_nosync; >+ } >+ } >+ >+ /* Ack the interrupt. */ >+ >+ pos = head; >+ >+ while (pos != &__ipipe_pipeline) { >+ next_domain = list_entry(pos, struct ipipe_domain, p_link); >+ if (test_bit(IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) { >+ __ipipe_set_irq_pending(next_domain, irq); >+ if (!m_ack && next_domain->irqs[irq].acknowledge) { >+ next_domain->irqs[irq].acknowledge(irq, irq_to_desc(irq)); >+ m_ack = 1; >+ } >+ } >+ if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control)) >+ break; >+ pos = next_domain->p_link.next; >+ } >+ >+ /* >+ * If the interrupt preempted the head domain, then do not >+ * even try to walk the pipeline, unless an interrupt is >+ * pending for it. >+ */ >+ if (test_bit(IPIPE_AHEAD_FLAG, &this_domain->flags) && >+ !__ipipe_ipending_p(ipipe_head_cpudom_ptr())) >+ goto finalize_nosync; >+ >+ /* >+ * Now walk the pipeline, yielding control to the highest >+ * priority domain that has pending interrupt(s) or >+ * immediately to the current domain if the interrupt has been >+ * marked as 'sticky'. This search does not go beyond the >+ * current domain in the pipeline. >+ */ >+ >+ __ipipe_walk_pipeline(head); >+ >+finalize_nosync: >+ >+ /* >+ * Given our deferred dispatching model for regular IRQs, we >+ * only record CPU regs for the last timer interrupt, so that >+ * the timer handler charges CPU times properly. It is assumed >+ * that other interrupt handlers don't actually care for such >+ * information. >+ */ >+ >+ if (irq == __ipipe_tick_irq) { >+ struct pt_regs *tick_regs = &__raw_get_cpu_var(__ipipe_tick_regs); >+ tick_regs->flags = regs->flags; >+ tick_regs->cs = regs->cs; >+ tick_regs->ip = regs->ip; >+ tick_regs->bp = regs->bp; >+#ifdef CONFIG_X86_64 >+ tick_regs->ss = regs->ss; >+ tick_regs->sp = regs->sp; >+#endif >+ if (!ipipe_root_domain_p) >+ tick_regs->flags &= ~X86_EFLAGS_IF; >+ } >+ >+ if (!ipipe_root_domain_p || >+ test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) >+ return 0; >+ >+#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) >+ /* >+ * Prevent a spurious rescheduling from being triggered on >+ * preemptible kernels along the way out through >+ * ret_from_intr. >+ */ >+ if ((long)regs->orig_ax < 0) >+ __set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); >+#endif /* CONFIG_SMP */ >+ >+ return 1; >+} >+ >+int __ipipe_check_tickdev(const char *devname) >+{ >+#ifdef CONFIG_X86_LOCAL_APIC >+ if (!strcmp(devname, "lapic")) >+ return __ipipe_check_lapic(); >+#endif >+ >+ return 1; >+} >+ >+void *ipipe_irq_handler = __ipipe_handle_irq; >+EXPORT_SYMBOL(ipipe_irq_handler); >+EXPORT_SYMBOL(io_apic_irqs); >+EXPORT_PER_CPU_SYMBOL(__ipipe_tick_regs); >+__attribute__((regparm(3))) void do_notify_resume(struct pt_regs *, void *, __u32); >+EXPORT_SYMBOL(do_notify_resume); >+extern void *sys_call_table; >+EXPORT_SYMBOL(sys_call_table); >+#ifdef CONFIG_X86_32 >+extern void ret_from_intr(void); >+EXPORT_SYMBOL(ret_from_intr); >+extern spinlock_t i8259A_lock; >+extern struct desc_struct idt_table[]; >+#else >+extern ipipe_spinlock_t i8259A_lock; >+extern gate_desc idt_table[]; >+#endif >+EXPORT_PER_CPU_SYMBOL(vector_irq); >+EXPORT_SYMBOL(idt_table); >+EXPORT_SYMBOL(i8259A_lock); >+EXPORT_SYMBOL(__ipipe_sync_stage); >+EXPORT_SYMBOL(kill_proc_info); >+EXPORT_SYMBOL(find_task_by_pid_ns); >+ >+EXPORT_SYMBOL(__ipipe_tick_irq); >+ >+EXPORT_SYMBOL_GPL(irq_to_desc); >+struct task_struct *__switch_to(struct task_struct *prev_p, >+ struct task_struct *next_p); >+EXPORT_SYMBOL_GPL(__switch_to); >+EXPORT_SYMBOL_GPL(show_stack); >+ >+EXPORT_PER_CPU_SYMBOL_GPL(init_tss); >+#ifdef CONFIG_SMP >+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tlbstate); >+#endif /* CONFIG_SMP */ >+ >+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) >+EXPORT_SYMBOL(tasklist_lock); >+#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */ >+ >+#if defined(CONFIG_CC_STACKPROTECTOR) && defined(CONFIG_X86_64) >+EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); >+#endif >+ >+EXPORT_SYMBOL(__ipipe_halt_root); >diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c >index 04bbd52..76d2fa3 100644 >--- a/arch/x86/kernel/irq.c >+++ b/arch/x86/kernel/irq.c >@@ -38,7 +38,7 @@ void ack_bad_irq(unsigned int irq) > * completely. > * But only ack when the APIC is enabled -AK > */ >- ack_APIC_irq(); >+ __ack_APIC_irq(); > } > > #define irq_stats(x) (&per_cpu(irq_stat, x)) >@@ -231,11 +231,12 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) > unsigned vector = ~regs->orig_ax; > unsigned irq; > >+ irq = __get_cpu_var(vector_irq)[vector]; >+ __ipipe_move_root_irq(irq); >+ > exit_idle(); > irq_enter(); > >- irq = __get_cpu_var(vector_irq)[vector]; >- > if (!handle_irq(irq, regs)) { > ack_APIC_irq(); > >diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c >index 40f3077..e3604ee 100644 >--- a/arch/x86/kernel/irqinit.c >+++ b/arch/x86/kernel/irqinit.c >@@ -159,11 +159,13 @@ static void __init smp_intr_init(void) > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); >+#ifndef CONFIG_IPIPE > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); >+#endif > > /* IPI for generic function call */ > alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); >@@ -178,6 +180,10 @@ static void __init smp_intr_init(void) > > /* IPI used for rebooting/stopping */ > alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); >+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32) >+ /* IPI for critical lock */ >+ alloc_intr_gate(IPIPE_CRITICAL_VECTOR, ipipe_ipiX); >+#endif > #endif > #endif /* CONFIG_SMP */ > } >@@ -212,6 +218,12 @@ static void __init apic_intr_init(void) > alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); > # endif > >+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32) >+ alloc_intr_gate(IPIPE_SERVICE_VECTOR0, ipipe_ipi0); >+ alloc_intr_gate(IPIPE_SERVICE_VECTOR1, ipipe_ipi1); >+ alloc_intr_gate(IPIPE_SERVICE_VECTOR2, ipipe_ipi2); >+ alloc_intr_gate(IPIPE_SERVICE_VECTOR3, ipipe_ipi3); >+#endif > #endif > } > >diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c >index d0ba107..b624d46 100644 >--- a/arch/x86/kernel/process.c >+++ b/arch/x86/kernel/process.c >@@ -35,7 +35,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) > return -ENOMEM; > WARN_ON((unsigned long)dst->thread.xstate & 15); > memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); >+ } else { >+#ifdef CONFIG_IPIPE >+ dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, >+ GFP_KERNEL); >+ if (!dst->thread.xstate) >+ return -ENOMEM; >+#endif > } >+ > return 0; > } > >@@ -61,6 +69,10 @@ void arch_task_cache_init(void) > kmem_cache_create("task_xstate", xstate_size, > __alignof__(union thread_xstate), > SLAB_PANIC | SLAB_NOTRACK, NULL); >+#ifdef CONFIG_IPIPE >+ current->thread.xstate = kmem_cache_alloc(task_xstate_cachep, >+ GFP_KERNEL); >+#endif > } > > /* >@@ -309,7 +321,7 @@ EXPORT_SYMBOL(default_idle); > > void stop_this_cpu(void *dummy) > { >- local_irq_disable(); >+ local_irq_disable_hw(); > /* > * Remove this CPU: > */ >@@ -534,6 +546,11 @@ static void c1e_idle(void) > > void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) > { >+#ifdef CONFIG_IPIPE >+#define default_to_mwait force_mwait >+#else >+#define default_to_mwait 1 >+#endif > #ifdef CONFIG_SMP > if (pm_idle == poll_idle && smp_num_siblings > 1) { > printk(KERN_WARNING "WARNING: polling idle and HT enabled," >@@ -543,7 +560,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) > if (pm_idle) > return; > >- if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { >+ if (default_to_mwait && cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { > /* > * One CPU supports mwait => All CPUs supports mwait > */ >diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c >index 4cf7956..fff349c 100644 >--- a/arch/x86/kernel/process_32.c >+++ b/arch/x86/kernel/process_32.c >@@ -305,10 +305,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) > regs->cs = __USER_CS; > regs->ip = new_ip; > regs->sp = new_sp; >+#ifndef CONFIG_IPIPE /* Lazily handled, init_fpu() will reset the state. */ > /* > * Free the old FP and other extended state > */ > free_thread_xstate(current); >+#endif > } > EXPORT_SYMBOL_GPL(start_thread); > >@@ -345,7 +347,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) > { > struct thread_struct *prev = &prev_p->thread, > *next = &next_p->thread; >- int cpu = smp_processor_id(); >+ int cpu = raw_smp_processor_id(); > struct tss_struct *tss = &per_cpu(init_tss, cpu); > bool preload_fpu; > >diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c >index 6eabe90..e96b01d 100644 >--- a/arch/x86/kernel/process_64.c >+++ b/arch/x86/kernel/process_64.c >@@ -58,6 +58,8 @@ asmlinkage extern void ret_from_fork(void); > DEFINE_PER_CPU(unsigned long, old_rsp); > static DEFINE_PER_CPU(unsigned char, is_idle); > >+asmlinkage extern void thread_return(void); >+ > unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; > > static ATOMIC_NOTIFIER_HEAD(idle_notifier); >@@ -292,6 +294,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, > p->thread.sp = (unsigned long) childregs; > p->thread.sp0 = (unsigned long) (childregs+1); > p->thread.usersp = me->thread.usersp; >+ p->thread.rip = (unsigned long) thread_return; > > set_tsk_thread_flag(p, TIF_FORK); > >@@ -358,10 +361,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) > regs->ss = __USER_DS; > regs->flags = 0x200; > set_fs(USER_DS); >+#ifndef CONFIG_IPIPE /* Lazily handled, init_fpu() will reset the state. */ > /* > * Free the old FP and other extended state > */ > free_thread_xstate(current); >+#endif > } > EXPORT_SYMBOL_GPL(start_thread); > >@@ -380,7 +385,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) > { > struct thread_struct *prev = &prev_p->thread; > struct thread_struct *next = &next_p->thread; >- int cpu = smp_processor_id(); >+ int cpu = raw_smp_processor_id(); > struct tss_struct *tss = &per_cpu(init_tss, cpu); > unsigned fsindex, gsindex; > bool preload_fpu; >diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c >index ec1de97..a3f5bd6 100644 >--- a/arch/x86/kernel/smp.c >+++ b/arch/x86/kernel/smp.c >@@ -184,9 +184,9 @@ static void native_smp_send_stop(void) > udelay(1); > } > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > disable_local_APIC(); >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > /* >diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c >index 28e963d..9eee566 100644 >--- a/arch/x86/kernel/smpboot.c >+++ b/arch/x86/kernel/smpboot.c >@@ -266,7 +266,7 @@ static void __cpuinit smp_callin(void) > /* > * Activate a secondary processor. > */ >-notrace static void __cpuinit start_secondary(void *unused) >+static void __cpuinit start_secondary(void *unused) > { > /* > * Don't put *anything* before cpu_init(), SMP booting is too >@@ -837,7 +837,7 @@ do_rest: > int __cpuinit native_cpu_up(unsigned int cpu) > { > int apicid = apic->cpu_present_to_apicid(cpu); >- unsigned long flags; >+ unsigned long flags, _flags; > int err; > > WARN_ON(irqs_disabled()); >@@ -889,9 +889,9 @@ int __cpuinit native_cpu_up(unsigned int cpu) > * Check TSC synchronization with the AP (keep irqs disabled > * while doing so): > */ >- local_irq_save(flags); >+ local_irq_save_full(flags, _flags); > check_tsc_sync_source(cpu); >- local_irq_restore(flags); >+ local_irq_restore_full(flags, _flags); > > while (!cpu_online(cpu)) { > cpu_relax(); >diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c >index be25734..2b61ebd 100644 >--- a/arch/x86/kernel/time.c >+++ b/arch/x86/kernel/time.c >@@ -70,11 +70,12 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) > * manually to deassert NMI lines for the watchdog if run > * on an 82489DX-based system. > */ >- spin_lock(&i8259A_lock); >+ unsigned long flags; >+ spin_lock_irqsave_cond(&i8259A_lock,flags); > outb(0x0c, PIC_MASTER_OCW3); > /* Ack the IRQ; AEOI will end it automatically. */ > inb(PIC_MASTER_POLL); >- spin_unlock(&i8259A_lock); >+ spin_unlock_irqrestore_cond(&i8259A_lock,flags); > } > > global_clock_event->event_handler(global_clock_event); >diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c >index 7e37dce..38ff3e2 100644 >--- a/arch/x86/kernel/traps.c >+++ b/arch/x86/kernel/traps.c >@@ -805,6 +805,7 @@ void __math_state_restore(void) > */ > if (unlikely(restore_fpu_checking(tsk))) { > stts(); >+ local_irq_enable_hw_cond(); > force_sig(SIGSEGV, tsk); > return; > } >@@ -827,6 +828,7 @@ asmlinkage void math_state_restore(void) > { > struct thread_info *thread = current_thread_info(); > struct task_struct *tsk = thread->task; >+ unsigned long flags; > > if (!tsk_used_math(tsk)) { > local_irq_enable(); >@@ -843,9 +845,11 @@ asmlinkage void math_state_restore(void) > local_irq_disable(); > } > >+ local_irq_save_hw_cond(flags); > clts(); /* Allow maths ops (or we recurse) */ > > __math_state_restore(); >+ local_irq_restore_hw_cond(flags); > } > EXPORT_SYMBOL_GPL(math_state_restore); > >diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c >index 9c4e625..f0f25ab 100644 >--- a/arch/x86/kernel/vm86_32.c >+++ b/arch/x86/kernel/vm86_32.c >@@ -148,12 +148,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) > do_exit(SIGSEGV); > } > >+ local_irq_disable_hw_cond(); > tss = &per_cpu(init_tss, get_cpu()); > current->thread.sp0 = current->thread.saved_sp0; > current->thread.sysenter_cs = __KERNEL_CS; > load_sp0(tss, ¤t->thread); > current->thread.saved_sp0 = 0; > put_cpu(); >+ local_irq_enable_hw_cond(); > > ret = KVM86->regs32; > >@@ -324,12 +326,14 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk > tsk->thread.saved_fs = info->regs32->fs; > tsk->thread.saved_gs = get_user_gs(info->regs32); > >+ local_irq_disable_hw_cond(); > tss = &per_cpu(init_tss, get_cpu()); > tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; > if (cpu_has_sep) > tsk->thread.sysenter_cs = 0; > load_sp0(tss, &tsk->thread); > put_cpu(); >+ local_irq_enable_hw_cond(); > > tsk->thread.screen_bitmap = info->screen_bitmap; > if (info->flags & VM86_SCREEN_BITMAP) >diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c >index c9f2d9b..78d780a 100644 >--- a/arch/x86/lib/mmx_32.c >+++ b/arch/x86/lib/mmx_32.c >@@ -30,7 +30,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) > void *p; > int i; > >- if (unlikely(in_interrupt())) >+ if (unlikely(!ipipe_root_domain_p || in_interrupt())) > return __memcpy(to, from, len); > > p = to; >diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S >index bf9a7d5..98609ae 100644 >--- a/arch/x86/lib/thunk_64.S >+++ b/arch/x86/lib/thunk_64.S >@@ -65,6 +65,10 @@ > thunk lockdep_sys_exit_thunk,lockdep_sys_exit > #endif > >+#ifdef CONFIG_IPIPE >+ thunk_retrax __ipipe_syscall_root_thunk,__ipipe_syscall_root >+#endif >+ > /* SAVE_ARGS below is used only for the .cfi directives it contains. */ > CFI_STARTPROC > SAVE_ARGS >diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c >index f4cee90..d678a7c 100644 >--- a/arch/x86/mm/fault.c >+++ b/arch/x86/mm/fault.c >@@ -1,3 +1,4 @@ >+ > /* > * Copyright (C) 1995 Linus Torvalds > * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. >@@ -323,43 +324,9 @@ out: > > #else /* CONFIG_X86_64: */ > >-void vmalloc_sync_all(void) >-{ >- unsigned long address; >- >- for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; >- address += PGDIR_SIZE) { >- >- const pgd_t *pgd_ref = pgd_offset_k(address); >- unsigned long flags; >- struct page *page; >- >- if (pgd_none(*pgd_ref)) >- continue; >- >- spin_lock_irqsave(&pgd_lock, flags); >- list_for_each_entry(page, &pgd_list, lru) { >- pgd_t *pgd; >- pgd = (pgd_t *)page_address(page) + pgd_index(address); >- if (pgd_none(*pgd)) >- set_pgd(pgd, *pgd_ref); >- else >- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); >- } >- spin_unlock_irqrestore(&pgd_lock, flags); >- } >-} >- >-/* >- * 64-bit: >- * >- * Handle a fault on the vmalloc area >- * >- * This assumes no large pages in there. >- */ >-static noinline int vmalloc_fault(unsigned long address) >+static inline int vmalloc_sync_one(pgd_t *pgd, unsigned long address) > { >- pgd_t *pgd, *pgd_ref; >+ pgd_t *pgd_ref; > pud_t *pud, *pud_ref; > pmd_t *pmd, *pmd_ref; > pte_t *pte, *pte_ref; >@@ -373,7 +340,6 @@ static noinline int vmalloc_fault(unsigned long address) > * happen within a race in page table update. In the later > * case just flush: > */ >- pgd = pgd_offset(current->active_mm, address); > pgd_ref = pgd_offset_k(address); > if (pgd_none(*pgd_ref)) > return -1; >@@ -421,6 +387,46 @@ static noinline int vmalloc_fault(unsigned long address) > return 0; > } > >+void vmalloc_sync_all(void) >+{ >+ unsigned long address; >+ >+ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END; >+ address += PGDIR_SIZE) { >+ >+ const pgd_t *pgd_ref = pgd_offset_k(address); >+ unsigned long flags; >+ struct page *page; >+ >+ if (pgd_none(*pgd_ref)) >+ continue; >+ >+ spin_lock_irqsave(&pgd_lock, flags); >+ list_for_each_entry(page, &pgd_list, lru) { >+ pgd_t *pgd; >+ pgd = (pgd_t *)page_address(page) + pgd_index(address); >+ if (pgd_none(*pgd)) >+ set_pgd(pgd, *pgd_ref); >+ else >+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); >+ } >+ spin_unlock_irqrestore(&pgd_lock, flags); >+ } >+} >+ >+/* >+ * 64-bit: >+ * >+ * Handle a fault on the vmalloc area >+ * >+ * This assumes no large pages in there. >+ */ >+static noinline int vmalloc_fault(unsigned long address) >+{ >+ pgd_t *pgd = pgd = pgd_offset(current->active_mm, address); >+ return vmalloc_sync_one(pgd, address); >+} >+ > static const char errata93_warning[] = > KERN_ERR > "******* Your BIOS seems to not contain a fix for K8 errata #93\n" >@@ -958,6 +964,9 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) > /* Get the faulting address: */ > address = read_cr2(); > >+ if (!__ipipe_pipeline_head_p(ipipe_root_domain)) >+ local_irq_enable_hw_cond(); >+ > /* > * Detect and handle instructions that would cause a page fault for > * both a tracked kernel page and a userspace page. >@@ -1137,3 +1146,43 @@ good_area: > > up_read(&mm->mmap_sem); > } >+ >+#ifdef CONFIG_IPIPE >+void __ipipe_pin_range_globally(unsigned long start, unsigned long end) >+{ >+#ifdef CONFIG_X86_32 >+ unsigned long next, addr = start; >+ >+ do { >+ unsigned long flags; >+ struct page *page; >+ >+ next = pgd_addr_end(addr, end); >+ spin_lock_irqsave(&pgd_lock, flags); >+ list_for_each_entry(page, &pgd_list, lru) >+ vmalloc_sync_one(page_address(page), addr); >+ spin_unlock_irqrestore(&pgd_lock, flags); >+ >+ } while (addr = next, addr != end); >+#else >+ unsigned long next, addr = start; >+ int ret = 0; >+ >+ do { >+ struct page *page; >+ >+ next = pgd_addr_end(addr, end); >+ spin_lock(&pgd_lock); >+ list_for_each_entry(page, &pgd_list, lru) { >+ pgd_t *pgd; >+ pgd = (pgd_t *)page_address(page) + pgd_index(addr); >+ ret = vmalloc_sync_one(pgd, addr); >+ if (ret) >+ break; >+ } >+ spin_unlock(&pgd_lock); >+ addr = next; >+ } while (!ret && addr != end); >+#endif >+} >+#endif /* CONFIG_IPIPE */ >diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c >index 36fe08e..32adecd 100644 >--- a/arch/x86/mm/tlb.c >+++ b/arch/x86/mm/tlb.c >@@ -57,11 +57,15 @@ static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; > */ > void leave_mm(int cpu) > { >+ unsigned long flags; >+ > if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) > BUG(); >+ local_irq_save_hw_cond(flags); > cpumask_clear_cpu(cpu, > mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); > load_cr3(swapper_pg_dir); >+ local_irq_restore_hw_cond(flags); > } > EXPORT_SYMBOL_GPL(leave_mm); > >@@ -192,6 +196,9 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, > apic->send_IPI_mask(to_cpumask(f->flush_cpumask), > INVALIDATE_TLB_VECTOR_START + sender); > >+#ifdef CONFIG_IPIPE >+ WARN_ON_ONCE(irqs_disabled_hw()); >+#endif > while (!cpumask_empty(to_cpumask(f->flush_cpumask))) > cpu_relax(); > } >diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c >index 737a1c4..15e81de 100644 >--- a/drivers/pci/htirq.c >+++ b/drivers/pci/htirq.c >@@ -21,7 +21,7 @@ > * With multiple simultaneous hypertransport irq devices it might pay > * to make this more fine grained. But start with simple, stupid, and correct. > */ >-static DEFINE_SPINLOCK(ht_irq_lock); >+static IPIPE_DEFINE_SPINLOCK(ht_irq_lock); > > struct ht_irq_cfg { > struct pci_dev *dev; >diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c >index 5ed1b82..d57ad7d 100644 >--- a/drivers/serial/8250.c >+++ b/drivers/serial/8250.c >@@ -3016,6 +3016,53 @@ static int serial8250_resume(struct platform_device *dev) > return 0; > } > >+#if defined(CONFIG_IPIPE_DEBUG) && defined(CONFIG_SERIAL_8250_CONSOLE) >+ >+#include <stdarg.h> >+ >+void __weak __ipipe_serial_debug(const char *fmt, ...) >+{ >+ struct uart_8250_port *up = &serial8250_ports[0]; >+ unsigned int ier, count; >+ unsigned long flags; >+ char buf[128]; >+ va_list ap; >+ >+ va_start(ap, fmt); >+ vsprintf(buf, fmt, ap); >+ va_end(ap); >+ count = strlen(buf); >+ >+ touch_nmi_watchdog(); >+ >+ local_irq_save_hw(flags); >+ >+ /* >+ * First save the IER then disable the interrupts >+ */ >+ ier = serial_in(up, UART_IER); >+ >+ if (up->capabilities & UART_CAP_UUE) >+ serial_out(up, UART_IER, UART_IER_UUE); >+ else >+ serial_out(up, UART_IER, 0); >+ >+ uart_console_write(&up->port, buf, count, serial8250_console_putchar); >+ >+ /* >+ * Finally, wait for transmitter to become empty >+ * and restore the IER >+ */ >+ wait_for_xmitr(up, BOTH_EMPTY); >+ serial_out(up, UART_IER, ier); >+ >+ local_irq_restore_hw(flags); >+} >+ >+EXPORT_SYMBOL(__ipipe_serial_debug); >+ >+#endif >+ > static struct platform_driver serial8250_isa_driver = { > .probe = serial8250_probe, > .remove = __devexit_p(serial8250_remove), >diff --git a/fs/exec.c b/fs/exec.c >index a2a3944..0708ae7 100644 >--- a/fs/exec.c >+++ b/fs/exec.c >@@ -715,6 +715,7 @@ static int exec_mmap(struct mm_struct *mm) > { > struct task_struct *tsk; > struct mm_struct * old_mm, *active_mm; >+ unsigned long flags; > > /* Notify parent that we're no longer interested in the old VM */ > tsk = current; >@@ -737,8 +738,10 @@ static int exec_mmap(struct mm_struct *mm) > task_lock(tsk); > active_mm = tsk->active_mm; > tsk->mm = mm; >+ ipipe_mm_switch_protect(flags); > tsk->active_mm = mm; > activate_mm(active_mm, mm); >+ ipipe_mm_switch_unprotect(flags); > task_unlock(tsk); > arch_pick_mmap_layout(mm); > if (old_mm) { >diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h >index c99c64d..5d01b93 100644 >--- a/include/asm-generic/atomic.h >+++ b/include/asm-generic/atomic.h >@@ -60,11 +60,11 @@ static inline int atomic_add_return(int i, atomic_t *v) > unsigned long flags; > int temp; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > temp = v->counter; > temp += i; > v->counter = temp; >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > > return temp; > } >@@ -82,11 +82,11 @@ static inline int atomic_sub_return(int i, atomic_t *v) > unsigned long flags; > int temp; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > temp = v->counter; > temp -= i; > v->counter = temp; >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > > return temp; > } >@@ -139,9 +139,9 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) > unsigned long flags; > > mask = ~mask; >- local_irq_save(flags); >+ local_irq_save_hw(flags); > *addr &= mask; >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) >diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h >index c894646..8d42ffe 100644 >--- a/include/asm-generic/bitops/atomic.h >+++ b/include/asm-generic/bitops/atomic.h >@@ -21,20 +21,20 @@ extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; > * this is the substitute */ > #define _atomic_spin_lock_irqsave(l,f) do { \ > raw_spinlock_t *s = ATOMIC_HASH(l); \ >- local_irq_save(f); \ >+ local_irq_save_hw(f); \ > __raw_spin_lock(s); \ > } while(0) > > #define _atomic_spin_unlock_irqrestore(l,f) do { \ > raw_spinlock_t *s = ATOMIC_HASH(l); \ > __raw_spin_unlock(s); \ >- local_irq_restore(f); \ >+ local_irq_restore_hw(f); \ > } while(0) > > > #else >-# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0) >-# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0) >+# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save_hw(f); } while (0) >+# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore_hw(f); } while (0) > #endif > > /* >diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h >index b2ba2fc..ed01ab9 100644 >--- a/include/asm-generic/cmpxchg-local.h >+++ b/include/asm-generic/cmpxchg-local.h >@@ -20,7 +20,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, > if (size == 8 && sizeof(unsigned long) != 8) > wrong_size_cmpxchg(ptr); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > switch (size) { > case 1: prev = *(u8 *)ptr; > if (prev == old) >@@ -41,7 +41,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, > default: > wrong_size_cmpxchg(ptr); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > return prev; > } > >@@ -54,11 +54,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr, > u64 prev; > unsigned long flags; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > prev = *(u64 *)ptr; > if (prev == old) > *(u64 *)ptr = new; >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > return prev; > } > >diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h >index 90079c3..65e872e 100644 >--- a/include/asm-generic/percpu.h >+++ b/include/asm-generic/percpu.h >@@ -56,6 +56,20 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; > #define __raw_get_cpu_var(var) \ > (*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset)) > >+#ifdef CONFIG_IPIPE >+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) >+extern int __ipipe_check_percpu_access(void); >+#define __ipipe_local_cpu_offset \ >+ ({ \ >+ WARN_ON_ONCE(__ipipe_check_percpu_access()); \ >+ __my_cpu_offset; \ >+ }) >+#else >+#define __ipipe_local_cpu_offset __my_cpu_offset >+#endif >+#define __ipipe_get_cpu_var(var) \ >+ (*SHIFT_PERCPU_PTR(&per_cpu_var(var), __ipipe_local_cpu_offset)) >+#endif /* CONFIG_IPIPE */ > > #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA > extern void setup_per_cpu_areas(void); >@@ -66,6 +80,7 @@ extern void setup_per_cpu_areas(void); > #define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var))) > #define __get_cpu_var(var) per_cpu_var(var) > #define __raw_get_cpu_var(var) per_cpu_var(var) >+#define __ipipe_get_cpu_var(var) __raw_get_cpu_var(var) > > #endif /* SMP */ > >diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h >index 6d527ee..c997ef1 100644 >--- a/include/linux/hardirq.h >+++ b/include/linux/hardirq.h >@@ -183,24 +183,28 @@ extern void irq_enter(void); > */ > extern void irq_exit(void); > >-#define nmi_enter() \ >- do { \ >- ftrace_nmi_enter(); \ >- BUG_ON(in_nmi()); \ >- add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ >- lockdep_off(); \ >- rcu_nmi_enter(); \ >- trace_hardirq_enter(); \ >+#define nmi_enter() \ >+ do { \ >+ if (likely(!ipipe_test_foreign_stack())) { \ >+ ftrace_nmi_enter(); \ >+ BUG_ON(in_nmi()); \ >+ add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ >+ lockdep_off(); \ >+ rcu_nmi_enter(); \ >+ trace_hardirq_enter(); \ >+ } \ > } while (0) > >-#define nmi_exit() \ >- do { \ >- trace_hardirq_exit(); \ >- rcu_nmi_exit(); \ >- lockdep_on(); \ >- BUG_ON(!in_nmi()); \ >- sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ >- ftrace_nmi_exit(); \ >+#define nmi_exit() \ >+ do { \ >+ if (likely(!ipipe_test_foreign_stack())) { \ >+ trace_hardirq_exit(); \ >+ rcu_nmi_exit(); \ >+ lockdep_on(); \ >+ BUG_ON(!in_nmi()); \ >+ sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ >+ ftrace_nmi_exit(); \ >+ } \ > } while (0) > > #endif /* LINUX_HARDIRQ_H */ >diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h >new file mode 100644 >index 0000000..1040a2b >--- /dev/null >+++ b/include/linux/ipipe.h >@@ -0,0 +1,690 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe.h >+ * >+ * Copyright (C) 2002-2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_H >+#define __LINUX_IPIPE_H >+ >+#include <linux/spinlock.h> >+#include <linux/cache.h> >+#include <linux/percpu.h> >+#include <linux/mutex.h> >+#include <linux/linkage.h> >+#include <linux/ipipe_base.h> >+#include <linux/ipipe_compat.h> >+#include <asm/ipipe.h> >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+ >+#include <linux/cpumask.h> >+#include <asm/system.h> >+ >+static inline int ipipe_disable_context_check(int cpu) >+{ >+ return xchg(&per_cpu(ipipe_percpu_context_check, cpu), 0); >+} >+ >+static inline void ipipe_restore_context_check(int cpu, int old_state) >+{ >+ per_cpu(ipipe_percpu_context_check, cpu) = old_state; >+} >+ >+static inline void ipipe_context_check_off(void) >+{ >+ int cpu; >+ for_each_online_cpu(cpu) >+ per_cpu(ipipe_percpu_context_check, cpu) = 0; >+} >+ >+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ >+ >+static inline int ipipe_disable_context_check(int cpu) >+{ >+ return 0; >+} >+ >+static inline void ipipe_restore_context_check(int cpu, int old_state) { } >+ >+static inline void ipipe_context_check_off(void) { } >+ >+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ >+ >+#ifdef CONFIG_IPIPE >+ >+#define IPIPE_VERSION_STRING IPIPE_ARCH_STRING >+#define IPIPE_RELEASE_NUMBER ((IPIPE_MAJOR_NUMBER << 16) | \ >+ (IPIPE_MINOR_NUMBER << 8) | \ >+ (IPIPE_PATCH_NUMBER)) >+ >+#ifndef BROKEN_BUILTIN_RETURN_ADDRESS >+#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0)) >+#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1)) >+#endif /* !BUILTIN_RETURN_ADDRESS */ >+ >+#define IPIPE_ROOT_PRIO 100 >+#define IPIPE_ROOT_ID 0 >+#define IPIPE_ROOT_NPTDKEYS 4 /* Must be <= BITS_PER_LONG */ >+ >+#define IPIPE_RESET_TIMER 0x1 >+#define IPIPE_GRAB_TIMER 0x2 >+ >+/* Global domain flags */ >+#define IPIPE_SPRINTK_FLAG 0 /* Synchronous printk() allowed */ >+#define IPIPE_AHEAD_FLAG 1 /* Domain always heads the pipeline */ >+ >+/* Interrupt control bits */ >+#define IPIPE_HANDLE_FLAG 0 >+#define IPIPE_PASS_FLAG 1 >+#define IPIPE_ENABLE_FLAG 2 >+#define IPIPE_DYNAMIC_FLAG IPIPE_HANDLE_FLAG >+#define IPIPE_STICKY_FLAG 3 >+#define IPIPE_SYSTEM_FLAG 4 >+#define IPIPE_LOCK_FLAG 5 >+#define IPIPE_WIRED_FLAG 6 >+#define IPIPE_EXCLUSIVE_FLAG 7 >+ >+#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG) >+#define IPIPE_PASS_MASK (1 << IPIPE_PASS_FLAG) >+#define IPIPE_ENABLE_MASK (1 << IPIPE_ENABLE_FLAG) >+#define IPIPE_DYNAMIC_MASK IPIPE_HANDLE_MASK >+#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG) >+#define IPIPE_SYSTEM_MASK (1 << IPIPE_SYSTEM_FLAG) >+#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG) >+#define IPIPE_WIRED_MASK (1 << IPIPE_WIRED_FLAG) >+#define IPIPE_EXCLUSIVE_MASK (1 << IPIPE_EXCLUSIVE_FLAG) >+ >+#define IPIPE_DEFAULT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK) >+#define IPIPE_STDROOT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK) >+ >+#define IPIPE_EVENT_SELF 0x80000000 >+ >+#define IPIPE_NR_CPUS NR_CPUS >+ >+/* This accessor assumes hw IRQs are off on SMP; allows assignment. */ >+#define __ipipe_current_domain __ipipe_get_cpu_var(ipipe_percpu_domain) >+/* This read-only accessor makes sure that hw IRQs are off on SMP. */ >+#define ipipe_current_domain \ >+ ({ \ >+ struct ipipe_domain *__ipd__; \ >+ unsigned long __flags__; \ >+ local_irq_save_hw_smp(__flags__); \ >+ __ipd__ = __ipipe_current_domain; \ >+ local_irq_restore_hw_smp(__flags__); \ >+ __ipd__; \ >+ }) >+ >+#define ipipe_virtual_irq_p(irq) ((irq) >= IPIPE_VIRQ_BASE && \ >+ (irq) < IPIPE_NR_IRQS) >+ >+#define IPIPE_SAME_HANDLER ((ipipe_irq_handler_t)(-1)) >+ >+struct irq_desc; >+ >+typedef void (*ipipe_irq_ackfn_t)(unsigned irq, struct irq_desc *desc); >+ >+typedef int (*ipipe_event_handler_t)(unsigned event, >+ struct ipipe_domain *from, >+ void *data); >+struct ipipe_domain { >+ >+ int slot; /* Slot number in percpu domain data array. */ >+ struct list_head p_link; /* Link in pipeline */ >+ ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */ >+ unsigned long long evself; /* Self-monitored event bits. */ >+ >+ struct irqdesc { >+ unsigned long control; >+ ipipe_irq_ackfn_t acknowledge; >+ ipipe_irq_handler_t handler; >+ void *cookie; >+ } ____cacheline_aligned irqs[IPIPE_NR_IRQS]; >+ >+ int priority; >+ void *pdd; >+ unsigned long flags; >+ unsigned domid; >+ const char *name; >+ struct mutex mutex; >+}; >+ >+#define IPIPE_HEAD_PRIORITY (-1) /* For domains always heading the pipeline */ >+ >+struct ipipe_domain_attr { >+ >+ unsigned domid; /* Domain identifier -- Magic value set by caller */ >+ const char *name; /* Domain name -- Warning: won't be dup'ed! */ >+ int priority; /* Priority in interrupt pipeline */ >+ void (*entry) (void); /* Domain entry point */ >+ void *pdd; /* Per-domain (opaque) data pointer */ >+}; >+ >+#define __ipipe_irq_cookie(ipd, irq) (ipd)->irqs[irq].cookie >+#define __ipipe_irq_handler(ipd, irq) (ipd)->irqs[irq].handler >+#define __ipipe_cpudata_irq_hits(ipd, cpu, irq) ipipe_percpudom(ipd, irqall, cpu)[irq] >+ >+extern unsigned __ipipe_printk_virq; >+ >+extern unsigned long __ipipe_virtual_irq_map; >+ >+extern struct list_head __ipipe_pipeline; >+ >+extern int __ipipe_event_monitors[]; >+ >+/* Private interface */ >+ >+void ipipe_init_early(void); >+ >+void ipipe_init(void); >+ >+#ifdef CONFIG_PROC_FS >+void ipipe_init_proc(void); >+ >+#ifdef CONFIG_IPIPE_TRACE >+void __ipipe_init_tracer(void); >+#else /* !CONFIG_IPIPE_TRACE */ >+#define __ipipe_init_tracer() do { } while(0) >+#endif /* CONFIG_IPIPE_TRACE */ >+ >+#else /* !CONFIG_PROC_FS */ >+#define ipipe_init_proc() do { } while(0) >+#endif /* CONFIG_PROC_FS */ >+ >+void __ipipe_init_stage(struct ipipe_domain *ipd); >+ >+void __ipipe_cleanup_domain(struct ipipe_domain *ipd); >+ >+void __ipipe_add_domain_proc(struct ipipe_domain *ipd); >+ >+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd); >+ >+void __ipipe_flush_printk(unsigned irq, void *cookie); >+ >+void __ipipe_walk_pipeline(struct list_head *pos); >+ >+void __ipipe_pend_irq(unsigned irq, struct list_head *head); >+ >+int __ipipe_dispatch_event(unsigned event, void *data); >+ >+void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq); >+ >+void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq); >+ >+void __ipipe_sync_stage(int dovirt); >+ >+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq); >+ >+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq); >+ >+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq); >+ >+void __ipipe_pin_range_globally(unsigned long start, unsigned long end); >+ >+/* Must be called hw IRQs off. */ >+static inline void ipipe_irq_lock(unsigned irq) >+{ >+ __ipipe_lock_irq(__ipipe_current_domain, ipipe_processor_id(), irq); >+} >+ >+/* Must be called hw IRQs off. */ >+static inline void ipipe_irq_unlock(unsigned irq) >+{ >+ __ipipe_unlock_irq(__ipipe_current_domain, irq); >+} >+ >+#ifndef __ipipe_sync_pipeline >+#define __ipipe_sync_pipeline(dovirt) __ipipe_sync_stage(dovirt) >+#endif >+ >+#ifndef __ipipe_run_irqtail >+#define __ipipe_run_irqtail() do { } while(0) >+#endif >+ >+#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next) >+ >+#define __ipipe_ipending_p(p) ((p)->irqpend_himap != 0) >+ >+/* >+ * Keep the following as a macro, so that client code could check for >+ * the support of the invariant pipeline head optimization. >+ */ >+#define __ipipe_pipeline_head() \ >+ list_entry(__ipipe_pipeline.next, struct ipipe_domain, p_link) >+ >+#define local_irq_enable_hw_cond() local_irq_enable_hw() >+#define local_irq_disable_hw_cond() local_irq_disable_hw() >+#define local_irq_save_hw_cond(flags) local_irq_save_hw(flags) >+#define local_irq_restore_hw_cond(flags) local_irq_restore_hw(flags) >+ >+#ifdef CONFIG_SMP >+cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask); >+int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask); >+#define local_irq_save_hw_smp(flags) local_irq_save_hw(flags) >+#define local_irq_restore_hw_smp(flags) local_irq_restore_hw(flags) >+#else /* !CONFIG_SMP */ >+#define local_irq_save_hw_smp(flags) do { (void)(flags); } while(0) >+#define local_irq_restore_hw_smp(flags) do { } while(0) >+#endif /* CONFIG_SMP */ >+ >+#define local_irq_save_full(vflags, rflags) \ >+ do { \ >+ local_irq_save(vflags); \ >+ local_irq_save_hw(rflags); \ >+ } while(0) >+ >+#define local_irq_restore_full(vflags, rflags) \ >+ do { \ >+ local_irq_restore_hw(rflags); \ >+ local_irq_restore(vflags); \ >+ } while(0) >+ >+static inline void __local_irq_restore_nosync(unsigned long x) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_root_cpudom_ptr(); >+ >+ if (raw_irqs_disabled_flags(x)) { >+ set_bit(IPIPE_STALL_FLAG, &p->status); >+ trace_hardirqs_off(); >+ } else { >+ trace_hardirqs_on(); >+ clear_bit(IPIPE_STALL_FLAG, &p->status); >+ } >+} >+ >+static inline void local_irq_restore_nosync(unsigned long x) >+{ >+ unsigned long flags; >+ local_irq_save_hw_smp(flags); >+ __local_irq_restore_nosync(x); >+ local_irq_restore_hw_smp(flags); >+} >+ >+#define __ipipe_root_domain_p (__ipipe_current_domain == ipipe_root_domain) >+#define ipipe_root_domain_p (ipipe_current_domain == ipipe_root_domain) >+ >+static inline int __ipipe_event_monitored_p(int ev) >+{ >+ if (__ipipe_event_monitors[ev] > 0) >+ return 1; >+ >+ return (ipipe_current_domain->evself & (1LL << ev)) != 0; >+} >+ >+#define ipipe_sigwake_notify(p) \ >+do { \ >+ if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE)) \ >+ __ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE, p); \ >+} while(0) >+ >+#define ipipe_exit_notify(p) \ >+do { \ >+ if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_EXIT)) \ >+ __ipipe_dispatch_event(IPIPE_EVENT_EXIT, p); \ >+} while(0) >+ >+#define ipipe_setsched_notify(p) \ >+do { \ >+ if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED)) \ >+ __ipipe_dispatch_event(IPIPE_EVENT_SETSCHED, p); \ >+} while(0) >+ >+#define ipipe_schedule_notify(prev, next) \ >+do { \ >+ if ((((prev)->flags|(next)->flags) & PF_EVNOTIFY) && \ >+ __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE)) \ >+ __ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE,next); \ >+} while(0) >+ >+#define ipipe_trap_notify(ex, regs) \ >+({ \ >+ unsigned long __flags__; \ >+ int __ret__ = 0; \ >+ local_irq_save_hw_smp(__flags__); \ >+ if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)) || \ >+ ((current)->flags & PF_EVNOTIFY)) && \ >+ __ipipe_event_monitored_p(ex)) { \ >+ local_irq_restore_hw_smp(__flags__); \ >+ __ret__ = __ipipe_dispatch_event(ex, regs); \ >+ } else \ >+ local_irq_restore_hw_smp(__flags__); \ >+ __ret__; \ >+}) >+ >+static inline void ipipe_init_notify(struct task_struct *p) >+{ >+ if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT)) >+ __ipipe_dispatch_event(IPIPE_EVENT_INIT, p); >+} >+ >+struct mm_struct; >+ >+static inline void ipipe_cleanup_notify(struct mm_struct *mm) >+{ >+ if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP)) >+ __ipipe_dispatch_event(IPIPE_EVENT_CLEANUP, mm); >+} >+ >+/* Public interface */ >+ >+int ipipe_register_domain(struct ipipe_domain *ipd, >+ struct ipipe_domain_attr *attr); >+ >+int ipipe_unregister_domain(struct ipipe_domain *ipd); >+ >+void ipipe_suspend_domain(void); >+ >+int ipipe_virtualize_irq(struct ipipe_domain *ipd, >+ unsigned irq, >+ ipipe_irq_handler_t handler, >+ void *cookie, >+ ipipe_irq_ackfn_t acknowledge, >+ unsigned modemask); >+ >+int ipipe_control_irq(unsigned irq, >+ unsigned clrmask, >+ unsigned setmask); >+ >+unsigned ipipe_alloc_virq(void); >+ >+int ipipe_free_virq(unsigned virq); >+ >+int ipipe_trigger_irq(unsigned irq); >+ >+static inline void __ipipe_propagate_irq(unsigned irq) >+{ >+ struct list_head *next = __ipipe_current_domain->p_link.next; >+ if (next == &ipipe_root.p_link) { >+ /* Fast path: root must handle all interrupts. */ >+ __ipipe_set_irq_pending(&ipipe_root, irq); >+ return; >+ } >+ __ipipe_pend_irq(irq, next); >+} >+ >+static inline void __ipipe_schedule_irq(unsigned irq) >+{ >+ __ipipe_pend_irq(irq, &__ipipe_current_domain->p_link); >+} >+ >+static inline void __ipipe_schedule_irq_head(unsigned irq) >+{ >+ __ipipe_set_irq_pending(__ipipe_pipeline_head(), irq); >+} >+ >+static inline void __ipipe_schedule_irq_root(unsigned irq) >+{ >+ __ipipe_set_irq_pending(&ipipe_root, irq); >+} >+ >+static inline void ipipe_propagate_irq(unsigned irq) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ __ipipe_propagate_irq(irq); >+ local_irq_restore_hw(flags); >+} >+ >+static inline void ipipe_schedule_irq(unsigned irq) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ __ipipe_schedule_irq(irq); >+ local_irq_restore_hw(flags); >+} >+ >+static inline void ipipe_schedule_irq_head(unsigned irq) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ __ipipe_schedule_irq_head(irq); >+ local_irq_restore_hw(flags); >+} >+ >+static inline void ipipe_schedule_irq_root(unsigned irq) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ __ipipe_schedule_irq_root(irq); >+ local_irq_restore_hw(flags); >+} >+ >+void ipipe_stall_pipeline_from(struct ipipe_domain *ipd); >+ >+unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd); >+ >+unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd); >+ >+static inline void ipipe_unstall_pipeline_from(struct ipipe_domain *ipd) >+{ >+ ipipe_test_and_unstall_pipeline_from(ipd); >+} >+ >+void ipipe_restore_pipeline_from(struct ipipe_domain *ipd, >+ unsigned long x); >+ >+static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd) >+{ >+ return test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); >+} >+ >+static inline void ipipe_stall_pipeline_head(void) >+{ >+ local_irq_disable_hw(); >+ __set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status)); >+} >+ >+static inline unsigned long ipipe_test_and_stall_pipeline_head(void) >+{ >+ local_irq_disable_hw(); >+ return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status)); >+} >+ >+void ipipe_unstall_pipeline_head(void); >+ >+void __ipipe_restore_pipeline_head(unsigned long x); >+ >+static inline void ipipe_restore_pipeline_head(unsigned long x) >+{ >+ /* On some archs, __test_and_set_bit() might return different >+ * truth value than test_bit(), so we test the exclusive OR of >+ * both statuses, assuming that the lowest bit is always set in >+ * the truth value (if this is wrong, the failed optimization will >+ * be caught in __ipipe_restore_pipeline_head() if >+ * CONFIG_DEBUG_KERNEL is set). */ >+ if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status))) & 1) >+ __ipipe_restore_pipeline_head(x); >+} >+ >+#define ipipe_unstall_pipeline() \ >+ ipipe_unstall_pipeline_from(ipipe_current_domain) >+ >+#define ipipe_test_and_unstall_pipeline() \ >+ ipipe_test_and_unstall_pipeline_from(ipipe_current_domain) >+ >+#define ipipe_test_pipeline() \ >+ ipipe_test_pipeline_from(ipipe_current_domain) >+ >+#define ipipe_test_and_stall_pipeline() \ >+ ipipe_test_and_stall_pipeline_from(ipipe_current_domain) >+ >+#define ipipe_stall_pipeline() \ >+ ipipe_stall_pipeline_from(ipipe_current_domain) >+ >+#define ipipe_restore_pipeline(x) \ >+ ipipe_restore_pipeline_from(ipipe_current_domain, (x)) >+ >+void ipipe_init_attr(struct ipipe_domain_attr *attr); >+ >+int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo); >+ >+unsigned long ipipe_critical_enter(void (*syncfn) (void)); >+ >+void ipipe_critical_exit(unsigned long flags); >+ >+static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd) >+{ >+ set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); >+} >+ >+static inline void ipipe_set_printk_async(struct ipipe_domain *ipd) >+{ >+ clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); >+} >+ >+static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd) >+{ >+ /* Must be called hw interrupts off. */ >+ __set_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status)); >+} >+ >+static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd) >+{ >+ /* Must be called hw interrupts off. */ >+ __clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status)); >+} >+ >+static inline int ipipe_test_foreign_stack(void) >+{ >+ /* Must be called hw interrupts off. */ >+ return test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)); >+} >+ >+#ifndef ipipe_safe_current >+#define ipipe_safe_current() \ >+({ \ >+ struct task_struct *p; \ >+ unsigned long flags; \ >+ local_irq_save_hw_smp(flags); \ >+ p = ipipe_test_foreign_stack() ? &init_task : current; \ >+ local_irq_restore_hw_smp(flags); \ >+ p; \ >+}) >+#endif >+ >+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, >+ unsigned event, >+ ipipe_event_handler_t handler); >+ >+cpumask_t ipipe_set_irq_affinity(unsigned irq, >+ cpumask_t cpumask); >+ >+int ipipe_send_ipi(unsigned ipi, >+ cpumask_t cpumask); >+ >+int ipipe_setscheduler_root(struct task_struct *p, >+ int policy, >+ int prio); >+ >+int ipipe_reenter_root(struct task_struct *prev, >+ int policy, >+ int prio); >+ >+int ipipe_alloc_ptdkey(void); >+ >+int ipipe_free_ptdkey(int key); >+ >+int ipipe_set_ptd(int key, >+ void *value); >+ >+void *ipipe_get_ptd(int key); >+ >+int ipipe_disable_ondemand_mappings(struct task_struct *tsk); >+ >+static inline void ipipe_nmi_enter(void) >+{ >+ int cpu = ipipe_processor_id(); >+ >+ per_cpu(ipipe_nmi_saved_root, cpu) = ipipe_root_cpudom_var(status); >+ __set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+ per_cpu(ipipe_saved_context_check_state, cpu) = >+ ipipe_disable_context_check(cpu); >+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ >+} >+ >+static inline void ipipe_nmi_exit(void) >+{ >+ int cpu = ipipe_processor_id(); >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+ ipipe_restore_context_check >+ (cpu, per_cpu(ipipe_saved_context_check_state, cpu)); >+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ >+ >+ if (!test_bit(IPIPE_STALL_FLAG, &per_cpu(ipipe_nmi_saved_root, cpu))) >+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); >+} >+ >+#else /* !CONFIG_IPIPE */ >+ >+#define ipipe_init_early() do { } while(0) >+#define ipipe_init() do { } while(0) >+#define ipipe_suspend_domain() do { } while(0) >+#define ipipe_sigwake_notify(p) do { } while(0) >+#define ipipe_setsched_notify(p) do { } while(0) >+#define ipipe_init_notify(p) do { } while(0) >+#define ipipe_exit_notify(p) do { } while(0) >+#define ipipe_cleanup_notify(mm) do { } while(0) >+#define ipipe_trap_notify(t,r) 0 >+#define ipipe_init_proc() do { } while(0) >+ >+static inline void __ipipe_pin_range_globally(unsigned long start, >+ unsigned long end) >+{ >+} >+ >+static inline int ipipe_test_foreign_stack(void) >+{ >+ return 0; >+} >+ >+#define local_irq_enable_hw_cond() do { } while(0) >+#define local_irq_disable_hw_cond() do { } while(0) >+#define local_irq_save_hw_cond(flags) do { (void)(flags); } while(0) >+#define local_irq_restore_hw_cond(flags) do { } while(0) >+#define local_irq_save_hw_smp(flags) do { (void)(flags); } while(0) >+#define local_irq_restore_hw_smp(flags) do { } while(0) >+ >+#define ipipe_irq_lock(irq) do { } while(0) >+#define ipipe_irq_unlock(irq) do { } while(0) >+ >+#define __ipipe_root_domain_p 1 >+#define ipipe_root_domain_p 1 >+#define ipipe_safe_current current >+#define ipipe_processor_id() smp_processor_id() >+ >+#define ipipe_nmi_enter() do { } while (0) >+#define ipipe_nmi_exit() do { } while (0) >+ >+#define local_irq_disable_head() local_irq_disable() >+ >+#define local_irq_save_full(vflags, rflags) do { (void)(vflags); local_irq_save(rflags); } while(0) >+#define local_irq_restore_full(vflags, rflags) do { (void)(vflags); local_irq_restore(rflags); } while(0) >+#define local_irq_restore_nosync(vflags) local_irq_restore(vflags) >+ >+#define __ipipe_pipeline_head_p(ipd) 1 >+ >+#endif /* CONFIG_IPIPE */ >+ >+#endif /* !__LINUX_IPIPE_H */ >diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h >new file mode 100644 >index 0000000..9853df3 >--- /dev/null >+++ b/include/linux/ipipe_base.h >@@ -0,0 +1,118 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_base.h >+ * >+ * Copyright (C) 2002-2007 Philippe Gerum. >+ * 2007 Jan Kiszka. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_BASE_H >+#define __LINUX_IPIPE_BASE_H >+ >+#ifdef CONFIG_IPIPE >+ >+#include <asm/ipipe_base.h> >+ >+#define __bpl_up(x) (((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1)) >+/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */ >+#define IPIPE_NR_VIRQS BITS_PER_LONG >+/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */ >+#define IPIPE_VIRQ_BASE __bpl_up(IPIPE_NR_XIRQS) >+/* Total number of IRQ slots */ >+#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS) >+ >+#define IPIPE_IRQ_LOMAPSZ (IPIPE_NR_IRQS / BITS_PER_LONG) >+#if IPIPE_IRQ_LOMAPSZ > BITS_PER_LONG >+/* >+ * We need a 3-level mapping. This allows us to handle up to 32k IRQ >+ * vectors on 32bit machines, 256k on 64bit ones. >+ */ >+#define __IPIPE_3LEVEL_IRQMAP 1 >+#define IPIPE_IRQ_MDMAPSZ (__bpl_up(IPIPE_IRQ_LOMAPSZ) / BITS_PER_LONG) >+#else >+/* >+ * 2-level mapping is enough. This allows us to handle up to 1024 IRQ >+ * vectors on 32bit machines, 4096 on 64bit ones. >+ */ >+#define __IPIPE_2LEVEL_IRQMAP 1 >+#endif >+ >+#define IPIPE_IRQ_DOALL 0 >+#define IPIPE_IRQ_DOVIRT 1 >+ >+/* Per-cpu pipeline status */ >+#define IPIPE_STALL_FLAG 0 /* Stalls a pipeline stage -- guaranteed at bit #0 */ >+#define IPIPE_SYNC_FLAG 1 /* The interrupt syncer is running for the domain */ >+#define IPIPE_NOSTACK_FLAG 2 /* Domain currently runs on a foreign stack */ >+ >+#define IPIPE_STALL_MASK (1L << IPIPE_STALL_FLAG) >+#define IPIPE_SYNC_MASK (1L << IPIPE_SYNC_FLAG) >+#define IPIPE_NOSTACK_MASK (1L << IPIPE_NOSTACK_FLAG) >+ >+typedef void (*ipipe_irq_handler_t)(unsigned int irq, >+ void *cookie); >+ >+extern struct ipipe_domain ipipe_root; >+ >+#define ipipe_root_domain (&ipipe_root) >+ >+void __ipipe_unstall_root(void); >+ >+void __ipipe_restore_root(unsigned long x); >+ >+#define ipipe_preempt_disable(flags) \ >+ do { \ >+ local_irq_save_hw(flags); \ >+ if (__ipipe_root_domain_p) \ >+ preempt_disable(); \ >+ } while (0) >+ >+#define ipipe_preempt_enable(flags) \ >+ do { \ >+ if (__ipipe_root_domain_p) { \ >+ preempt_enable_no_resched(); \ >+ local_irq_restore_hw(flags); \ >+ preempt_check_resched(); \ >+ } else \ >+ local_irq_restore_hw(flags); \ >+ } while (0) >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+void ipipe_check_context(struct ipipe_domain *border_ipd); >+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ >+static inline void ipipe_check_context(struct ipipe_domain *border_ipd) { } >+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ >+ >+/* Generic features */ >+ >+#ifdef CONFIG_GENERIC_CLOCKEVENTS >+#define __IPIPE_FEATURE_REQUEST_TICKDEV 1 >+#endif >+#define __IPIPE_FEATURE_DELAYED_ATOMICSW 1 >+#define __IPIPE_FEATURE_FASTPEND_IRQ 1 >+#define __IPIPE_FEATURE_TRACE_EVENT 1 >+ >+#else /* !CONFIG_IPIPE */ >+#define ipipe_preempt_disable(flags) do { \ >+ preempt_disable(); \ >+ (void)(flags); \ >+ } while (0) >+#define ipipe_preempt_enable(flags) preempt_enable() >+#define ipipe_check_context(ipd) do { } while(0) >+#endif /* CONFIG_IPIPE */ >+ >+#endif /* !__LINUX_IPIPE_BASE_H */ >diff --git a/include/linux/ipipe_compat.h b/include/linux/ipipe_compat.h >new file mode 100644 >index 0000000..50a245c >--- /dev/null >+++ b/include/linux/ipipe_compat.h >@@ -0,0 +1,54 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_compat.h >+ * >+ * Copyright (C) 2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_COMPAT_H >+#define __LINUX_IPIPE_COMPAT_H >+ >+#ifdef CONFIG_IPIPE_COMPAT >+/* >+ * OBSOLETE: defined only for backward compatibility. Will be removed >+ * in future releases, please update client code accordingly. >+ */ >+ >+#ifdef CONFIG_SMP >+#define ipipe_declare_cpuid int cpuid >+#define ipipe_load_cpuid() do { \ >+ cpuid = ipipe_processor_id(); \ >+ } while(0) >+#define ipipe_lock_cpu(flags) do { \ >+ local_irq_save_hw(flags); \ >+ cpuid = ipipe_processor_id(); \ >+ } while(0) >+#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) >+#define ipipe_get_cpu(flags) ipipe_lock_cpu(flags) >+#define ipipe_put_cpu(flags) ipipe_unlock_cpu(flags) >+#else /* !CONFIG_SMP */ >+#define ipipe_declare_cpuid const int cpuid = 0 >+#define ipipe_load_cpuid() do { } while(0) >+#define ipipe_lock_cpu(flags) local_irq_save_hw(flags) >+#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) >+#define ipipe_get_cpu(flags) do { (void)(flags); } while(0) >+#define ipipe_put_cpu(flags) do { } while(0) >+#endif /* CONFIG_SMP */ >+ >+#endif /* CONFIG_IPIPE_COMPAT */ >+ >+#endif /* !__LINUX_IPIPE_COMPAT_H */ >diff --git a/include/linux/ipipe_lock.h b/include/linux/ipipe_lock.h >new file mode 100644 >index 0000000..b751d54 >--- /dev/null >+++ b/include/linux/ipipe_lock.h >@@ -0,0 +1,144 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_lock.h >+ * >+ * Copyright (C) 2009 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_LOCK_H >+#define __LINUX_IPIPE_LOCK_H >+ >+typedef struct { >+ raw_spinlock_t bare_lock; >+} __ipipe_spinlock_t; >+ >+#define ipipe_lock_p(lock) \ >+ __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *) >+ >+#define common_lock_p(lock) \ >+ __builtin_types_compatible_p(typeof(lock), spinlock_t *) >+ >+#define bare_lock(lock) (&((__ipipe_spinlock_t *)(lock))->bare_lock) >+#define std_lock(lock) ((spinlock_t *)(lock)) >+ >+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) >+ >+extern int __bad_spinlock_type(void); >+#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ >+ do { \ >+ if (ipipe_lock_p(lock)) \ >+ (flags) = __ipipe_spin_lock_irqsave(bare_lock(lock)); \ >+ else if (common_lock_p(lock)) \ >+ (flags) = _spin_lock_irqsave(std_lock(lock)); \ >+ else __bad_spinlock_type(); \ >+ } while (0) >+ >+#else /* !(CONFIG_SMP || CONFIG_DEBUG_SPINLOCK) */ >+ >+#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ >+ do { \ >+ if (ipipe_lock_p(lock)) \ >+ (flags) = __ipipe_spin_lock_irqsave(bare_lock(lock)); \ >+ else if (common_lock_p(lock)) \ >+ _spin_lock_irqsave(std_lock(lock), flags); \ >+ } while (0) >+ >+#endif /* !(CONFIG_SMP || CONFIG_DEBUG_SPINLOCK) */ >+ >+#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ >+ do { \ >+ if (ipipe_lock_p(lock)) \ >+ __ipipe_spin_unlock_irqrestore(bare_lock(lock), flags); \ >+ else if (common_lock_p(lock)) \ >+ _spin_unlock_irqrestore(std_lock(lock), flags); \ >+ } while (0) >+ >+#define PICK_SPINOP(op, lock) \ >+ do { \ >+ if (ipipe_lock_p(lock)) \ >+ __raw_spin##op(bare_lock(lock)); \ >+ else if (common_lock_p(lock)) \ >+ _spin##op(std_lock(lock)); \ >+ } while (0) >+ >+#define PICK_SPINOP_IRQ(op, lock) \ >+ do { \ >+ if (ipipe_lock_p(lock)) \ >+ __ipipe_spin##op##_irq(bare_lock(lock)); \ >+ else if (common_lock_p(lock)) \ >+ _spin##op##_irq(std_lock(lock)); \ >+ } while (0) >+ >+#define __raw_spin_lock_init(lock) \ >+ do { \ >+ IPIPE_DEFINE_SPINLOCK(__lock__); \ >+ *((ipipe_spinlock_t *)lock) = __lock__; \ >+ } while (0) >+ >+#ifdef CONFIG_IPIPE >+ >+#define ipipe_spinlock_t __ipipe_spinlock_t >+#define IPIPE_DEFINE_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED >+#define IPIPE_DECLARE_SPINLOCK(x) extern ipipe_spinlock_t x >+#define IPIPE_SPIN_LOCK_UNLOCKED \ >+ (__ipipe_spinlock_t) { .bare_lock = __RAW_SPIN_LOCK_UNLOCKED } >+ >+#define spin_lock_irqsave_cond(lock, flags) \ >+ spin_lock_irqsave(lock, flags) >+ >+#define spin_unlock_irqrestore_cond(lock, flags) \ >+ spin_unlock_irqrestore(lock, flags) >+ >+void __ipipe_spin_lock_irq(raw_spinlock_t *lock); >+ >+void __ipipe_spin_unlock_irq(raw_spinlock_t *lock); >+ >+unsigned long __ipipe_spin_lock_irqsave(raw_spinlock_t *lock); >+ >+void __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, >+ unsigned long x); >+ >+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock); >+ >+void __ipipe_spin_unlock_irqcomplete(unsigned long x); >+ >+#else /* !CONFIG_IPIPE */ >+ >+#define ipipe_spinlock_t spinlock_t >+#define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x) >+#define IPIPE_DECLARE_SPINLOCK(x) extern spinlock_t x >+#define IPIPE_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED >+ >+#define spin_lock_irqsave_cond(lock, flags) \ >+ do { \ >+ (void)(flags); \ >+ spin_lock(lock); \ >+ } while(0) >+ >+#define spin_unlock_irqrestore_cond(lock, flags) \ >+ spin_unlock(lock) >+ >+#define __ipipe_spin_lock_irq(lock) do { } while (0) >+#define __ipipe_spin_unlock_irq(lock) do { } while (0) >+#define __ipipe_spin_lock_irqsave(lock) 0 >+#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while (0) >+#define __ipipe_spin_unlock_irqbegin(lock) do { } while (0) >+#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while (0) >+ >+#endif /* !CONFIG_IPIPE */ >+ >+#endif /* !__LINUX_IPIPE_LOCK_H */ >diff --git a/include/linux/ipipe_percpu.h b/include/linux/ipipe_percpu.h >new file mode 100644 >index 0000000..f6727e3 >--- /dev/null >+++ b/include/linux/ipipe_percpu.h >@@ -0,0 +1,89 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_percpu.h >+ * >+ * Copyright (C) 2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_PERCPU_H >+#define __LINUX_IPIPE_PERCPU_H >+ >+#include <asm/percpu.h> >+#include <asm/ptrace.h> >+ >+struct ipipe_domain; >+ >+struct ipipe_percpu_domain_data { >+ unsigned long status; /* <= Must be first in struct. */ >+ unsigned long irqpend_himap; >+#ifdef __IPIPE_3LEVEL_IRQMAP >+ unsigned long irqpend_mdmap[IPIPE_IRQ_MDMAPSZ]; >+#endif >+ unsigned long irqpend_lomap[IPIPE_IRQ_LOMAPSZ]; >+ unsigned long irqheld_map[IPIPE_IRQ_LOMAPSZ]; >+ unsigned long irqall[IPIPE_NR_IRQS]; >+ u64 evsync; >+}; >+ >+/* >+ * CAREFUL: all accessors based on __raw_get_cpu_var() you may find in >+ * this file should be used only while hw interrupts are off, to >+ * prevent from CPU migration regardless of the running domain. >+ */ >+#ifdef CONFIG_SMP >+#define ipipe_percpudom_ptr(ipd, cpu) \ >+ (&per_cpu(ipipe_percpu_darray, cpu)[(ipd)->slot]) >+#define ipipe_cpudom_ptr(ipd) \ >+ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[(ipd)->slot]) >+#else >+DECLARE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]); >+#define ipipe_percpudom_ptr(ipd, cpu) \ >+ (per_cpu(ipipe_percpu_daddr, cpu)[(ipd)->slot]) >+#define ipipe_cpudom_ptr(ipd) \ >+ (__ipipe_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot]) >+#endif >+#define ipipe_percpudom(ipd, var, cpu) (ipipe_percpudom_ptr(ipd, cpu)->var) >+#define ipipe_cpudom_var(ipd, var) (ipipe_cpudom_ptr(ipd)->var) >+ >+#define IPIPE_ROOT_SLOT 0 >+#define IPIPE_HEAD_SLOT (CONFIG_IPIPE_DOMAINS - 1) >+ >+DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]); >+ >+DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain); >+ >+DECLARE_PER_CPU(unsigned long, ipipe_nmi_saved_root); >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+DECLARE_PER_CPU(int, ipipe_percpu_context_check); >+DECLARE_PER_CPU(int, ipipe_saved_context_check_state); >+#endif >+ >+#define ipipe_root_cpudom_ptr(var) \ >+ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT]) >+ >+#define ipipe_root_cpudom_var(var) ipipe_root_cpudom_ptr()->var >+ >+#define ipipe_this_cpudom_var(var) \ >+ ipipe_cpudom_var(__ipipe_current_domain, var) >+ >+#define ipipe_head_cpudom_ptr() \ >+ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_HEAD_SLOT]) >+ >+#define ipipe_head_cpudom_var(var) ipipe_head_cpudom_ptr()->var >+ >+#endif /* !__LINUX_IPIPE_PERCPU_H */ >diff --git a/include/linux/ipipe_tickdev.h b/include/linux/ipipe_tickdev.h >new file mode 100644 >index 0000000..4a1cb1b >--- /dev/null >+++ b/include/linux/ipipe_tickdev.h >@@ -0,0 +1,58 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_tickdev.h >+ * >+ * Copyright (C) 2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_TICKDEV_H >+#define __LINUX_IPIPE_TICKDEV_H >+ >+#if defined(CONFIG_IPIPE) && defined(CONFIG_GENERIC_CLOCKEVENTS) >+ >+#include <linux/clockchips.h> >+ >+struct tick_device; >+ >+struct ipipe_tick_device { >+ >+ void (*emul_set_mode)(enum clock_event_mode, >+ struct clock_event_device *cdev); >+ int (*emul_set_tick)(unsigned long delta, >+ struct clock_event_device *cdev); >+ void (*real_set_mode)(enum clock_event_mode mode, >+ struct clock_event_device *cdev); >+ int (*real_set_tick)(unsigned long delta, >+ struct clock_event_device *cdev); >+ struct tick_device *slave; >+ unsigned long real_max_delta_ns; >+ unsigned long real_mult; >+ int real_shift; >+}; >+ >+int ipipe_request_tickdev(const char *devname, >+ void (*emumode)(enum clock_event_mode mode, >+ struct clock_event_device *cdev), >+ int (*emutick)(unsigned long evt, >+ struct clock_event_device *cdev), >+ int cpu, unsigned long *tmfreq); >+ >+void ipipe_release_tickdev(int cpu); >+ >+#endif /* CONFIG_IPIPE && CONFIG_GENERIC_CLOCKEVENTS */ >+ >+#endif /* !__LINUX_IPIPE_TICKDEV_H */ >diff --git a/include/linux/ipipe_trace.h b/include/linux/ipipe_trace.h >new file mode 100644 >index 0000000..627b354 >--- /dev/null >+++ b/include/linux/ipipe_trace.h >@@ -0,0 +1,72 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_trace.h >+ * >+ * Copyright (C) 2005 Luotao Fu. >+ * 2005-2007 Jan Kiszka. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef _LINUX_IPIPE_TRACE_H >+#define _LINUX_IPIPE_TRACE_H >+ >+#ifdef CONFIG_IPIPE_TRACE >+ >+#include <linux/types.h> >+ >+void ipipe_trace_begin(unsigned long v); >+void ipipe_trace_end(unsigned long v); >+void ipipe_trace_freeze(unsigned long v); >+void ipipe_trace_special(unsigned char special_id, unsigned long v); >+void ipipe_trace_pid(pid_t pid, short prio); >+void ipipe_trace_event(unsigned char id, unsigned long delay_tsc); >+int ipipe_trace_max_reset(void); >+int ipipe_trace_frozen_reset(void); >+ >+#else /* !CONFIG_IPIPE_TRACE */ >+ >+#define ipipe_trace_begin(v) do { (void)(v); } while(0) >+#define ipipe_trace_end(v) do { (void)(v); } while(0) >+#define ipipe_trace_freeze(v) do { (void)(v); } while(0) >+#define ipipe_trace_special(id, v) do { (void)(id); (void)(v); } while(0) >+#define ipipe_trace_pid(pid, prio) do { (void)(pid); (void)(prio); } while(0) >+#define ipipe_trace_event(id, delay_tsc) do { (void)(id); (void)(delay_tsc); } while(0) >+#define ipipe_trace_max_reset() do { } while(0) >+#define ipipe_trace_froze_reset() do { } while(0) >+ >+#endif /* !CONFIG_IPIPE_TRACE */ >+ >+#ifdef CONFIG_IPIPE_TRACE_PANIC >+void ipipe_trace_panic_freeze(void); >+void ipipe_trace_panic_dump(void); >+#else >+static inline void ipipe_trace_panic_freeze(void) { } >+static inline void ipipe_trace_panic_dump(void) { } >+#endif >+ >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+#define ipipe_trace_irq_entry(irq) ipipe_trace_begin(irq) >+#define ipipe_trace_irq_exit(irq) ipipe_trace_end(irq) >+#define ipipe_trace_irqsoff() ipipe_trace_begin(0x80000000UL) >+#define ipipe_trace_irqson() ipipe_trace_end(0x80000000UL) >+#else >+#define ipipe_trace_irq_entry(irq) do { (void)(irq);} while(0) >+#define ipipe_trace_irq_exit(irq) do { (void)(irq);} while(0) >+#define ipipe_trace_irqsoff() do { } while(0) >+#define ipipe_trace_irqson() do { } while(0) >+#endif >+ >+#endif /* !__LINUX_IPIPE_TRACE_H */ >diff --git a/include/linux/irq.h b/include/linux/irq.h >index 9e5f45a..85642bc 100644 >--- a/include/linux/irq.h >+++ b/include/linux/irq.h >@@ -124,6 +124,9 @@ struct irq_chip { > void (*end)(unsigned int irq); > int (*set_affinity)(unsigned int irq, > const struct cpumask *dest); >+#ifdef CONFIG_IPIPE >+ void (*move)(unsigned int irq); >+#endif /* CONFIG_IPIPE */ > int (*retrigger)(unsigned int irq); > int (*set_type)(unsigned int irq, unsigned int flow_type); > int (*set_wake)(unsigned int irq, unsigned int on); >@@ -173,6 +176,12 @@ struct irq_2_iommu; > * @name: flow handler name for /proc/interrupts output > */ > struct irq_desc { >+#ifdef CONFIG_IPIPE >+ void (*ipipe_ack)(unsigned int irq, >+ struct irq_desc *desc); >+ void (*ipipe_end)(unsigned int irq, >+ struct irq_desc *desc); >+#endif /* CONFIG_IPIPE */ > unsigned int irq; > struct timer_rand_state *timer_rand_state; > unsigned int *kstat_irqs; >@@ -346,6 +355,10 @@ extern void > set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, > irq_flow_handler_t handle, const char *name); > >+extern irq_flow_handler_t >+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, >+ int is_chained); >+ > extern void > __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, > const char *name); >@@ -357,6 +370,7 @@ static inline void __set_irq_handler_unlocked(int irq, > struct irq_desc *desc; > > desc = irq_to_desc(irq); >+ handler = __fixup_irq_handler(desc, handler, 0); > desc->handle_irq = handler; > } > >diff --git a/include/linux/kernel.h b/include/linux/kernel.h >index f4e3184..3b80b7b 100644 >--- a/include/linux/kernel.h >+++ b/include/linux/kernel.h >@@ -14,6 +14,7 @@ > #include <linux/compiler.h> > #include <linux/bitops.h> > #include <linux/log2.h> >+#include <linux/ipipe_base.h> > #include <linux/typecheck.h> > #include <linux/ratelimit.h> > #include <linux/dynamic_debug.h> >@@ -119,9 +120,12 @@ struct user; > > #ifdef CONFIG_PREEMPT_VOLUNTARY > extern int _cond_resched(void); >-# define might_resched() _cond_resched() >+# define might_resched() do { \ >+ ipipe_check_context(ipipe_root_domain); \ >+ _cond_resched(); \ >+ } while (0) > #else >-# define might_resched() do { } while (0) >+# define might_resched() ipipe_check_context(ipipe_root_domain) > #endif > > #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP >diff --git a/include/linux/preempt.h b/include/linux/preempt.h >index 72b1a10..80553be 100644 >--- a/include/linux/preempt.h >+++ b/include/linux/preempt.h >@@ -9,13 +9,20 @@ > #include <linux/thread_info.h> > #include <linux/linkage.h> > #include <linux/list.h> >+#include <linux/ipipe_base.h> > > #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) > extern void add_preempt_count(int val); > extern void sub_preempt_count(int val); > #else >-# define add_preempt_count(val) do { preempt_count() += (val); } while (0) >-# define sub_preempt_count(val) do { preempt_count() -= (val); } while (0) >+# define add_preempt_count(val) do { \ >+ ipipe_check_context(ipipe_root_domain); \ >+ preempt_count() += (val); \ >+ } while (0) >+# define sub_preempt_count(val) do { \ >+ ipipe_check_context(ipipe_root_domain); \ >+ preempt_count() -= (val); \ >+ } while (0) > #endif > > #define inc_preempt_count() add_preempt_count(1) >diff --git a/include/linux/sched.h b/include/linux/sched.h >index 70abfd3..efecc7a 100644 >--- a/include/linux/sched.h >+++ b/include/linux/sched.h >@@ -61,6 +61,7 @@ struct sched_param { > #include <linux/errno.h> > #include <linux/nodemask.h> > #include <linux/mm_types.h> >+#include <linux/ipipe.h> > > #include <asm/system.h> > #include <asm/page.h> >@@ -195,6 +196,13 @@ extern unsigned long long time_sync_thresh; > #define TASK_DEAD 64 > #define TASK_WAKEKILL 128 > #define TASK_WAKING 256 >+#ifdef CONFIG_IPIPE >+#define TASK_ATOMICSWITCH 512 >+#define TASK_NOWAKEUP 1024 >+#else /* !CONFIG_IPIPE */ >+#define TASK_ATOMICSWITCH 0 >+#define TASK_NOWAKEUP 0 >+#endif /* CONFIG_IPIPE */ > > /* Convenience macros for the sake of set_task_state */ > #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) >@@ -302,6 +310,15 @@ extern void trap_init(void); > extern void update_process_times(int user); > extern void scheduler_tick(void); > >+#ifdef CONFIG_IPIPE >+void update_root_process_times(struct pt_regs *regs); >+#else /* !CONFIG_IPIPE */ >+static inline void update_root_process_times(struct pt_regs *regs) >+{ >+ update_process_times(user_mode(regs)); >+} >+#endif /* CONFIG_IPIPE */ >+ > extern void sched_show_task(struct task_struct *p); > > #ifdef CONFIG_DETECT_SOFTLOCKUP >@@ -349,8 +366,8 @@ extern signed long schedule_timeout(signed long timeout); > extern signed long schedule_timeout_interruptible(signed long timeout); > extern signed long schedule_timeout_killable(signed long timeout); > extern signed long schedule_timeout_uninterruptible(signed long timeout); >-asmlinkage void __schedule(void); >-asmlinkage void schedule(void); >+asmlinkage int __schedule(void); >+asmlinkage int schedule(void); > extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); > > struct nsproxy; >@@ -475,6 +492,9 @@ extern int get_dumpable(struct mm_struct *mm); > #endif > /* leave room for more dump flags */ > #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ >+#ifdef CONFIG_IPIPE >+#define MMF_VM_PINNED 31 /* ondemand load up and COW disabled */ >+#endif > > #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) > >@@ -1496,6 +1516,9 @@ struct task_struct { > #endif > atomic_t fs_excl; /* holding fs exclusive resources */ > struct rcu_head rcu; >+#ifdef CONFIG_IPIPE >+ void *ptd[IPIPE_ROOT_NPTDKEYS]; >+#endif > > /* > * cache last used pipe for splice >@@ -1736,6 +1759,11 @@ extern cputime_t task_gtime(struct task_struct *p); > #define PF_EXITING 0x00000004 /* getting shut down */ > #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ > #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ >+#ifdef CONFIG_IPIPE >+#define PF_EVNOTIFY 0x00000020 /* Notify other domains about internal events */ >+#else >+#define PF_EVNOTIFY 0 >+#endif /* CONFIG_IPIPE */ > #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ > #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ > #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ >diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h >index f0ca7a7..3096642 100644 >--- a/include/linux/spinlock.h >+++ b/include/linux/spinlock.h >@@ -90,10 +90,12 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); > # include <linux/spinlock_up.h> > #endif > >+#include <linux/ipipe_lock.h> >+ > #ifdef CONFIG_DEBUG_SPINLOCK > extern void __spin_lock_init(spinlock_t *lock, const char *name, > struct lock_class_key *key); >-# define spin_lock_init(lock) \ >+# define _spin_lock_init(lock) \ > do { \ > static struct lock_class_key __key; \ > \ >@@ -101,10 +103,12 @@ do { \ > } while (0) > > #else >-# define spin_lock_init(lock) \ >+# define _spin_lock_init(lock) \ > do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) > #endif > >+# define spin_lock_init(lock) PICK_SPINOP(_lock_init, lock) >+ > #ifdef CONFIG_DEBUG_SPINLOCK > extern void __rwlock_init(rwlock_t *lock, const char *name, > struct lock_class_key *key); >@@ -186,7 +190,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } > #define read_trylock(lock) __cond_lock(lock, _read_trylock(lock)) > #define write_trylock(lock) __cond_lock(lock, _write_trylock(lock)) > >-#define spin_lock(lock) _spin_lock(lock) >+#define spin_lock(lock) PICK_SPINOP(_lock, lock) > > #ifdef CONFIG_DEBUG_LOCK_ALLOC > # define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass) >@@ -208,7 +212,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } > #define spin_lock_irqsave(lock, flags) \ > do { \ > typecheck(unsigned long, flags); \ >- flags = _spin_lock_irqsave(lock); \ >+ PICK_SPINLOCK_IRQSAVE(lock, flags); \ > } while (0) > #define read_lock_irqsave(lock, flags) \ > do { \ >@@ -240,7 +244,7 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } > #define spin_lock_irqsave(lock, flags) \ > do { \ > typecheck(unsigned long, flags); \ >- _spin_lock_irqsave(lock, flags); \ >+ PICK_SPINLOCK_IRQSAVE(lock, flags); \ > } while (0) > #define read_lock_irqsave(lock, flags) \ > do { \ >@@ -257,23 +261,23 @@ static inline void smp_mb__after_lock(void) { smp_mb(); } > > #endif > >-#define spin_lock_irq(lock) _spin_lock_irq(lock) >+#define spin_lock_irq(lock) PICK_SPINOP_IRQ(_lock, lock) > #define spin_lock_bh(lock) _spin_lock_bh(lock) > #define read_lock_irq(lock) _read_lock_irq(lock) > #define read_lock_bh(lock) _read_lock_bh(lock) > #define write_lock_irq(lock) _write_lock_irq(lock) > #define write_lock_bh(lock) _write_lock_bh(lock) >-#define spin_unlock(lock) _spin_unlock(lock) >+#define spin_unlock(lock) PICK_SPINOP(_unlock, lock) > #define read_unlock(lock) _read_unlock(lock) > #define write_unlock(lock) _write_unlock(lock) >-#define spin_unlock_irq(lock) _spin_unlock_irq(lock) >+#define spin_unlock_irq(lock) PICK_SPINOP_IRQ(_unlock, lock) > #define read_unlock_irq(lock) _read_unlock_irq(lock) > #define write_unlock_irq(lock) _write_unlock_irq(lock) > > #define spin_unlock_irqrestore(lock, flags) \ > do { \ > typecheck(unsigned long, flags); \ >- _spin_unlock_irqrestore(lock, flags); \ >+ PICK_SPINUNLOCK_IRQRESTORE(lock, flags); \ > } while (0) > #define spin_unlock_bh(lock) _spin_unlock_bh(lock) > >diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h >index 7a7e18f..190bc0a 100644 >--- a/include/linux/spinlock_api_smp.h >+++ b/include/linux/spinlock_api_smp.h >@@ -229,7 +229,9 @@ static inline int __write_trylock(rwlock_t *lock) > * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are > * not re-enabled during lock-acquire (which the preempt-spin-ops do): > */ >-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) >+#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ >+ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ >+ defined(CONFIG_IPIPE) > > static inline void __read_lock(rwlock_t *lock) > { >@@ -250,7 +252,7 @@ static inline unsigned long __spin_lock_irqsave(spinlock_t *lock) > * _raw_spin_lock_flags() code, because lockdep assumes > * that interrupts are not re-enabled during lock-acquire: > */ >-#ifdef CONFIG_LOCKDEP >+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE) > LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); > #else > _raw_spin_lock_flags(lock, &flags); >diff --git a/init/Kconfig b/init/Kconfig >index eb4b337..a73e078 100644 >--- a/init/Kconfig >+++ b/init/Kconfig >@@ -78,6 +78,7 @@ config INIT_ENV_ARG_LIMIT > > config LOCALVERSION > string "Local version - append to kernel release" >+ default "-ipipe" > help > Append an extra string to the end of your kernel version. > This will show up when you type uname, for example. >diff --git a/init/main.c b/init/main.c >index bc109c7..4672e7d 100644 >--- a/init/main.c >+++ b/init/main.c >@@ -530,7 +530,7 @@ asmlinkage void __init start_kernel(void) > > cgroup_init_early(); > >- local_irq_disable(); >+ local_irq_disable_hw(); > early_boot_irqs_off(); > early_init_irq_lock_class(); > >@@ -565,6 +565,7 @@ asmlinkage void __init start_kernel(void) > pidhash_init(); > vfs_caches_init_early(); > sort_main_extable(); >+ ipipe_init_early(); > trap_init(); > mm_init(); > /* >@@ -593,6 +594,11 @@ asmlinkage void __init start_kernel(void) > softirq_init(); > timekeeping_init(); > time_init(); >+ /* >+ * We need to wait for the interrupt and time subsystems to be >+ * initialized before enabling the pipeline. >+ */ >+ ipipe_init(); > profile_init(); > if (!irqs_disabled()) > printk(KERN_CRIT "start_kernel(): bug: interrupts were " >@@ -774,6 +780,7 @@ static void __init do_basic_setup(void) > init_tmpfs(); > driver_init(); > init_irq_proc(); >+ ipipe_init_proc(); > do_ctors(); > do_initcalls(); > } >diff --git a/kernel/Makefile b/kernel/Makefile >index d7c13d2..b6a84ee 100644 >--- a/kernel/Makefile >+++ b/kernel/Makefile >@@ -83,6 +83,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o > obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o > obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o > obj-$(CONFIG_RELAY) += relay.o >+obj-$(CONFIG_IPIPE) += ipipe/ > obj-$(CONFIG_SYSCTL) += utsname_sysctl.o > obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o > obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o >diff --git a/kernel/exit.c b/kernel/exit.c >index f7864ac..f5c3129 100644 >--- a/kernel/exit.c >+++ b/kernel/exit.c >@@ -963,6 +963,7 @@ NORET_TYPE void do_exit(long code) > acct_process(); > trace_sched_process_exit(tsk); > >+ ipipe_exit_notify(tsk); > exit_sem(tsk); > exit_files(tsk); > exit_fs(tsk); >@@ -1766,3 +1767,37 @@ SYSCALL_DEFINE3(waitpid, pid_t, pid, int > } > > #endif >+ >+void rt_daemonize(void) >+{ >+ sigset_t blocked; >+ >+ /* >+ * We don't want to have TIF_FREEZE set if the system-wide hibernation >+ * or suspend transition begins right now. >+ */ >+ current->flags |= (PF_NOFREEZE | PF_KTHREAD); >+ >+ if (current->nsproxy != &init_nsproxy) { >+ get_nsproxy(&init_nsproxy); >+ switch_task_namespaces(current, &init_nsproxy); >+ } >+ set_special_pids(&init_struct_pid); >+ proc_clear_tty(current); >+ >+ /* Block and flush all signals */ >+ sigfillset(&blocked); >+ sigprocmask(SIG_BLOCK, &blocked, NULL); >+ flush_signals(current); >+ >+ /* Become as one with the init task */ >+ >+ daemonize_fs_struct(); >+ exit_files(current); >+ current->files = init_task.files; >+ atomic_inc(¤t->files->count); >+ >+ reparent_to_kthreadd(); >+} >+ >+EXPORT_SYMBOL(rt_daemonize); >diff --git a/kernel/fork.c b/kernel/fork.c >index 166b8c4..dff0f55 100644 >--- a/kernel/fork.c >+++ b/kernel/fork.c >@@ -511,6 +511,7 @@ void mmput(struct mm_struct *mm) > exit_aio(mm); > ksm_exit(mm); > exit_mmap(mm); >+ ipipe_cleanup_notify(mm); > set_mm_exe_file(mm, NULL); > if (!list_empty(&mm->mmlist)) { > spin_lock(&mmlist_lock); >@@ -918,7 +919,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) > { > unsigned long new_flags = p->flags; > >- new_flags &= ~PF_SUPERPRIV; >+ new_flags &= ~(PF_SUPERPRIV | PF_EVNOTIFY); > new_flags |= PF_FORKNOEXEC; > new_flags |= PF_STARTING; > p->flags = new_flags; >@@ -1303,6 +1304,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, > write_unlock_irq(&tasklist_lock); > proc_fork_connector(p); > cgroup_post_fork(p); >+#ifdef CONFIG_IPIPE >+ memset(p->ptd, 0, sizeof(p->ptd)); >+#endif /* CONFIG_IPIPE */ > perf_event_fork(p); > return p; > >@@ -1700,11 +1704,14 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) > } > > if (new_mm) { >+ unsigned long flags; > mm = current->mm; > active_mm = current->active_mm; > current->mm = new_mm; >+ ipipe_mm_switch_protect(flags); > current->active_mm = new_mm; > activate_mm(active_mm, new_mm); >+ ipipe_mm_switch_unprotect(flags); > new_mm = mm; > } > >diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig >new file mode 100644 >index 0000000..de5e6a3 >--- /dev/null >+++ b/kernel/ipipe/Kconfig >@@ -0,0 +1,35 @@ >+config IPIPE >+ bool "Interrupt pipeline" >+ default y >+ ---help--- >+ Activate this option if you want the interrupt pipeline to be >+ compiled in. >+ >+config IPIPE_DOMAINS >+ int "Max domains" >+ depends on IPIPE >+ default 4 >+ ---help--- >+ The maximum number of I-pipe domains to run concurrently. >+ >+config IPIPE_COMPAT >+ bool "Maintain code compatibility with older releases" >+ depends on IPIPE >+ default y >+ ---help--- >+ Activate this option if you want the compatibility code to be >+ defined, so that older I-pipe clients may use obsolete >+ constructs. WARNING: obsolete code will be eventually >+ deprecated in future I-pipe releases, and removed from the >+ compatibility support as time passes. Please fix I-pipe >+ clients to get rid of such uses as soon as possible. >+ >+config IPIPE_DELAYED_ATOMICSW >+ bool >+ depends on IPIPE >+ default n >+ >+config IPIPE_UNMASKED_CONTEXT_SWITCH >+ bool >+ depends on IPIPE >+ default n >diff --git a/kernel/ipipe/Kconfig.debug b/kernel/ipipe/Kconfig.debug >new file mode 100644 >index 0000000..629c894 >--- /dev/null >+++ b/kernel/ipipe/Kconfig.debug >@@ -0,0 +1,97 @@ >+config IPIPE_DEBUG >+ bool "I-pipe debugging" >+ depends on IPIPE >+ >+config IPIPE_DEBUG_CONTEXT >+ bool "Check for illicit cross-domain calls" >+ depends on IPIPE_DEBUG >+ default y >+ ---help--- >+ Enable this feature to arm checkpoints in the kernel that >+ verify the correct invocation context. On entry of critical >+ Linux services a warning is issued if the caller is not >+ running over the root domain. >+ >+config IPIPE_DEBUG_INTERNAL >+ bool "Enable internal debug checks" >+ depends on IPIPE_DEBUG >+ default y >+ ---help--- >+ When this feature is enabled, I-pipe will perform internal >+ consistency checks of its subsystems, e.g. on per-cpu variable >+ access. >+ >+config IPIPE_TRACE >+ bool "Latency tracing" >+ depends on IPIPE_DEBUG >+ select FRAME_POINTER >+ select KALLSYMS >+ select PROC_FS >+ ---help--- >+ Activate this option if you want to use per-function tracing of >+ the kernel. The tracer will collect data via instrumentation >+ features like the one below or with the help of explicite calls >+ of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the >+ in-kernel tracing API. The collected data and runtime control >+ is available via /proc/ipipe/trace/*. >+ >+if IPIPE_TRACE >+ >+config IPIPE_TRACE_ENABLE >+ bool "Enable tracing on boot" >+ default y >+ ---help--- >+ Disable this option if you want to arm the tracer after booting >+ manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce >+ boot time on slow embedded devices due to the tracer overhead. >+ >+config IPIPE_TRACE_MCOUNT >+ bool "Instrument function entries" >+ default y >+ select FUNCTION_TRACER >+ select TRACING >+ select CONTEXT_SWITCH_TRACER >+ select FTRACE_MCOUNT_RECORD >+ select DYNAMIC_FTRACE >+ ---help--- >+ When enabled, records every kernel function entry in the tracer >+ log. While this slows down the system noticeably, it provides >+ the highest level of information about the flow of events. >+ However, it can be switch off in order to record only explicit >+ I-pipe trace points. >+ >+config IPIPE_TRACE_IRQSOFF >+ bool "Trace IRQs-off times" >+ default y >+ ---help--- >+ Activate this option if I-pipe shall trace the longest path >+ with hard-IRQs switched off. >+ >+config IPIPE_TRACE_SHIFT >+ int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)" >+ range 10 18 >+ default 14 >+ ---help--- >+ The number of trace points to hold tracing data for each >+ trace path, as a power of 2. >+ >+config IPIPE_TRACE_VMALLOC >+ bool "Use vmalloc'ed trace buffer" >+ default y if EMBEDDED >+ ---help--- >+ Instead of reserving static kernel data, the required buffer >+ is allocated via vmalloc during boot-up when this option is >+ enabled. This can help to start systems that are low on memory, >+ but it slightly degrades overall performance. Try this option >+ when a traced kernel hangs unexpectedly at boot time. >+ >+config IPIPE_TRACE_PANIC >+ bool "Enable panic back traces" >+ default y >+ ---help--- >+ Provides services to freeze and dump a back trace on panic >+ situations. This is used on IPIPE_DEBUG_CONTEXT exceptions >+ as well as ordinary kernel oopses. You can control the number >+ of printed back trace points via /proc/ipipe/trace. >+ >+endif >diff --git a/kernel/ipipe/Makefile b/kernel/ipipe/Makefile >new file mode 100644 >index 0000000..6257dfa >--- /dev/null >+++ b/kernel/ipipe/Makefile >@@ -0,0 +1,3 @@ >+ >+obj-$(CONFIG_IPIPE) += core.o >+obj-$(CONFIG_IPIPE_TRACE) += tracer.o >diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c >new file mode 100644 >index 0000000..63deaf9 >--- /dev/null >+++ b/kernel/ipipe/core.c >@@ -0,0 +1,1955 @@ >+/* -*- linux-c -*- >+ * linux/kernel/ipipe/core.c >+ * >+ * Copyright (C) 2002-2005 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ * >+ * Architecture-independent I-PIPE core support. >+ */ >+ >+#include <linux/version.h> >+#include <linux/module.h> >+#include <linux/init.h> >+#include <linux/kernel.h> >+#include <linux/sched.h> >+#include <linux/sched.h> >+#include <linux/kallsyms.h> >+#include <linux/interrupt.h> >+#include <linux/bitops.h> >+#include <linux/tick.h> >+#include <linux/prefetch.h> >+#ifdef CONFIG_PROC_FS >+#include <linux/proc_fs.h> >+#include <linux/seq_file.h> >+#endif /* CONFIG_PROC_FS */ >+#include <linux/ipipe_trace.h> >+#include <linux/ipipe_tickdev.h> >+#include <linux/irq.h> >+ >+static int __ipipe_ptd_key_count; >+ >+static unsigned long __ipipe_ptd_key_map; >+ >+static unsigned long __ipipe_domain_slot_map; >+ >+struct ipipe_domain ipipe_root; >+ >+#ifndef CONFIG_SMP >+/* >+ * Create an alias to the unique root status, so that arch-dep code >+ * may get simple and easy access to this percpu variable. We also >+ * create an array of pointers to the percpu domain data; this tends >+ * to produce a better code when reaching non-root domains. We make >+ * sure that the early boot code would be able to dereference the >+ * pointer to the root domain data safely by statically initializing >+ * its value (local_irq*() routines depend on this). >+ */ >+#if __GNUC__ >= 4 >+extern unsigned long __ipipe_root_status >+__attribute__((alias(__stringify(__raw_get_cpu_var(ipipe_percpu_darray))))); >+EXPORT_SYMBOL(__ipipe_root_status); >+#else /* __GNUC__ < 4 */ >+/* >+ * Work around a GCC 3.x issue making alias symbols unusable as >+ * constant initializers. >+ */ >+unsigned long *const __ipipe_root_status_addr = >+ &__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT].status; >+EXPORT_SYMBOL(__ipipe_root_status_addr); >+#endif /* __GNUC__ < 4 */ >+ >+DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]) = >+{ [IPIPE_ROOT_SLOT] = (struct ipipe_percpu_domain_data *)&__raw_get_cpu_var(ipipe_percpu_darray) }; >+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr); >+#endif /* !CONFIG_SMP */ >+ >+DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]) = >+{ [IPIPE_ROOT_SLOT] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */ >+ >+DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) = { &ipipe_root }; >+ >+DEFINE_PER_CPU(unsigned long, ipipe_nmi_saved_root); /* Copy of root status during NMI */ >+ >+static IPIPE_DEFINE_SPINLOCK(__ipipe_pipelock); >+ >+LIST_HEAD(__ipipe_pipeline); >+ >+unsigned long __ipipe_virtual_irq_map; >+ >+#ifdef CONFIG_PRINTK >+unsigned __ipipe_printk_virq; >+#endif /* CONFIG_PRINTK */ >+ >+int __ipipe_event_monitors[IPIPE_NR_EVENTS]; >+ >+#ifdef CONFIG_GENERIC_CLOCKEVENTS >+ >+DECLARE_PER_CPU(struct tick_device, tick_cpu_device); >+ >+static DEFINE_PER_CPU(struct ipipe_tick_device, ipipe_tick_cpu_device); >+ >+int ipipe_request_tickdev(const char *devname, >+ void (*emumode)(enum clock_event_mode mode, >+ struct clock_event_device *cdev), >+ int (*emutick)(unsigned long delta, >+ struct clock_event_device *cdev), >+ int cpu, unsigned long *tmfreq) >+{ >+ struct ipipe_tick_device *itd; >+ struct tick_device *slave; >+ struct clock_event_device *evtdev; >+ unsigned long long freq; >+ unsigned long flags; >+ int status; >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ itd = &per_cpu(ipipe_tick_cpu_device, cpu); >+ >+ if (itd->slave != NULL) { >+ status = -EBUSY; >+ goto out; >+ } >+ >+ slave = &per_cpu(tick_cpu_device, cpu); >+ >+ if (strcmp(slave->evtdev->name, devname)) { >+ /* >+ * No conflict so far with the current tick device, >+ * check whether the requested device is sane and has >+ * been blessed by the kernel. >+ */ >+ status = __ipipe_check_tickdev(devname) ? >+ CLOCK_EVT_MODE_UNUSED : CLOCK_EVT_MODE_SHUTDOWN; >+ goto out; >+ } >+ >+ /* >+ * Our caller asks for using the same clock event device for >+ * ticking than we do, let's create a tick emulation device to >+ * interpose on the set_next_event() method, so that we may >+ * both manage the device in oneshot mode. Only the tick >+ * emulation code will actually program the clockchip hardware >+ * for the next shot, though. >+ * >+ * CAUTION: we still have to grab the tick device even when it >+ * current runs in periodic mode, since the kernel may switch >+ * to oneshot dynamically (highres/no_hz tick mode). >+ */ >+ >+ evtdev = slave->evtdev; >+ status = evtdev->mode; >+ >+ if (status == CLOCK_EVT_MODE_SHUTDOWN) >+ goto out; >+ >+ itd->slave = slave; >+ itd->emul_set_mode = emumode; >+ itd->emul_set_tick = emutick; >+ itd->real_set_mode = evtdev->set_mode; >+ itd->real_set_tick = evtdev->set_next_event; >+ itd->real_max_delta_ns = evtdev->max_delta_ns; >+ itd->real_mult = evtdev->mult; >+ itd->real_shift = evtdev->shift; >+ freq = (1000000000ULL * evtdev->mult) >> evtdev->shift; >+ *tmfreq = (unsigned long)freq; >+ evtdev->set_mode = emumode; >+ evtdev->set_next_event = emutick; >+ evtdev->max_delta_ns = ULONG_MAX; >+ evtdev->mult = 1; >+ evtdev->shift = 0; >+out: >+ ipipe_critical_exit(flags); >+ >+ return status; >+} >+ >+void ipipe_release_tickdev(int cpu) >+{ >+ struct ipipe_tick_device *itd; >+ struct tick_device *slave; >+ struct clock_event_device *evtdev; >+ unsigned long flags; >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ itd = &per_cpu(ipipe_tick_cpu_device, cpu); >+ >+ if (itd->slave != NULL) { >+ slave = &per_cpu(tick_cpu_device, cpu); >+ evtdev = slave->evtdev; >+ evtdev->set_mode = itd->real_set_mode; >+ evtdev->set_next_event = itd->real_set_tick; >+ evtdev->max_delta_ns = itd->real_max_delta_ns; >+ evtdev->mult = itd->real_mult; >+ evtdev->shift = itd->real_shift; >+ itd->slave = NULL; >+ } >+ >+ ipipe_critical_exit(flags); >+} >+ >+#endif /* CONFIG_GENERIC_CLOCKEVENTS */ >+ >+void __init ipipe_init_early(void) >+{ >+ struct ipipe_domain *ipd = &ipipe_root; >+ >+ /* >+ * Do the early init stuff. At this point, the kernel does not >+ * provide much services yet: be careful. >+ */ >+ __ipipe_check_platform(); /* Do platform dependent checks first. */ >+ >+ /* >+ * A lightweight registration code for the root domain. We are >+ * running on the boot CPU, hw interrupts are off, and >+ * secondary CPUs are still lost in space. >+ */ >+ >+ /* Reserve percpu data slot #0 for the root domain. */ >+ ipd->slot = 0; >+ set_bit(0, &__ipipe_domain_slot_map); >+ >+ ipd->name = "Linux"; >+ ipd->domid = IPIPE_ROOT_ID; >+ ipd->priority = IPIPE_ROOT_PRIO; >+ >+ __ipipe_init_stage(ipd); >+ >+ list_add_tail(&ipd->p_link, &__ipipe_pipeline); >+ >+ __ipipe_init_platform(); >+ >+#ifdef CONFIG_PRINTK >+ __ipipe_printk_virq = ipipe_alloc_virq(); /* Cannot fail here. */ >+ ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk; >+ ipd->irqs[__ipipe_printk_virq].cookie = NULL; >+ ipd->irqs[__ipipe_printk_virq].acknowledge = NULL; >+ ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK; >+#endif /* CONFIG_PRINTK */ >+} >+ >+void __init ipipe_init(void) >+{ >+ /* Now we may engage the pipeline. */ >+ __ipipe_enable_pipeline(); >+ >+ printk(KERN_INFO "I-pipe %s: pipeline enabled.\n", >+ IPIPE_VERSION_STRING); >+} >+ >+void __ipipe_init_stage(struct ipipe_domain *ipd) >+{ >+ struct ipipe_percpu_domain_data *p; >+ unsigned long status; >+ int cpu, n; >+ >+ for_each_online_cpu(cpu) { >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ status = p->status; >+ memset(p, 0, sizeof(*p)); >+ p->status = status; >+ } >+ >+ for (n = 0; n < IPIPE_NR_IRQS; n++) { >+ ipd->irqs[n].acknowledge = NULL; >+ ipd->irqs[n].handler = NULL; >+ ipd->irqs[n].control = IPIPE_PASS_MASK; /* Pass but don't handle */ >+ } >+ >+ for (n = 0; n < IPIPE_NR_EVENTS; n++) >+ ipd->evhand[n] = NULL; >+ >+ ipd->evself = 0LL; >+ mutex_init(&ipd->mutex); >+ >+ __ipipe_hook_critical_ipi(ipd); >+} >+ >+void __ipipe_cleanup_domain(struct ipipe_domain *ipd) >+{ >+ ipipe_unstall_pipeline_from(ipd); >+ >+#ifdef CONFIG_SMP >+ { >+ struct ipipe_percpu_domain_data *p; >+ int cpu; >+ >+ for_each_online_cpu(cpu) { >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ while (__ipipe_ipending_p(p)) >+ cpu_relax(); >+ } >+ } >+#else >+ __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = NULL; >+#endif >+ >+ clear_bit(ipd->slot, &__ipipe_domain_slot_map); >+} >+ >+void __ipipe_unstall_root(void) >+{ >+ struct ipipe_percpu_domain_data *p; >+ >+ local_irq_disable_hw(); >+ >+#ifdef CONFIG_IPIPE_DEBUG_INTERNAL >+ /* This helps catching bad usage from assembly call sites. */ >+ BUG_ON(!__ipipe_root_domain_p); >+#endif >+ >+ p = ipipe_root_cpudom_ptr(); >+ >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+ >+ if (unlikely(__ipipe_ipending_p(p))) >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ >+ local_irq_enable_hw(); >+} >+ >+void __ipipe_restore_root(unsigned long x) >+{ >+#ifdef CONFIG_IPIPE_DEBUG_INTERNAL >+ BUG_ON(!ipipe_root_domain_p); >+#endif >+ >+ if (x) >+ __ipipe_stall_root(); >+ else >+ __ipipe_unstall_root(); >+} >+ >+void ipipe_stall_pipeline_from(struct ipipe_domain *ipd) >+{ >+ unsigned long flags; >+ /* >+ * We have to prevent against race on updating the status >+ * variable _and_ CPU migration at the same time, so disable >+ * hw IRQs here. >+ */ >+ local_irq_save_hw(flags); >+ >+ __set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); >+ >+ if (!__ipipe_pipeline_head_p(ipd)) >+ local_irq_restore_hw(flags); >+} >+ >+unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd) >+{ >+ unsigned long flags, x; >+ >+ /* See ipipe_stall_pipeline_from() */ >+ local_irq_save_hw(flags); >+ >+ x = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); >+ >+ if (!__ipipe_pipeline_head_p(ipd)) >+ local_irq_restore_hw(flags); >+ >+ return x; >+} >+ >+unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd) >+{ >+ unsigned long flags, x; >+ struct list_head *pos; >+ >+ local_irq_save_hw(flags); >+ >+ x = __test_and_clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); >+ >+ if (ipd == __ipipe_current_domain) >+ pos = &ipd->p_link; >+ else >+ pos = __ipipe_pipeline.next; >+ >+ __ipipe_walk_pipeline(pos); >+ >+ if (likely(__ipipe_pipeline_head_p(ipd))) >+ local_irq_enable_hw(); >+ else >+ local_irq_restore_hw(flags); >+ >+ return x; >+} >+ >+void ipipe_restore_pipeline_from(struct ipipe_domain *ipd, >+ unsigned long x) >+{ >+ if (x) >+ ipipe_stall_pipeline_from(ipd); >+ else >+ ipipe_unstall_pipeline_from(ipd); >+} >+ >+void ipipe_unstall_pipeline_head(void) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr(); >+ struct ipipe_domain *head_domain; >+ >+ local_irq_disable_hw(); >+ >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+ >+ if (unlikely(__ipipe_ipending_p(p))) { >+ head_domain = __ipipe_pipeline_head(); >+ if (likely(head_domain == __ipipe_current_domain)) >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ else >+ __ipipe_walk_pipeline(&head_domain->p_link); >+ } >+ >+ local_irq_enable_hw(); >+} >+ >+void __ipipe_restore_pipeline_head(unsigned long x) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr(); >+ struct ipipe_domain *head_domain; >+ >+ local_irq_disable_hw(); >+ >+ if (x) { >+#ifdef CONFIG_DEBUG_KERNEL >+ static int warned; >+ if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) { >+ /* >+ * Already stalled albeit ipipe_restore_pipeline_head() >+ * should have detected it? Send a warning once. >+ */ >+ warned = 1; >+ printk(KERN_WARNING >+ "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n"); >+ dump_stack(); >+ } >+#else /* !CONFIG_DEBUG_KERNEL */ >+ set_bit(IPIPE_STALL_FLAG, &p->status); >+#endif /* CONFIG_DEBUG_KERNEL */ >+ } >+ else { >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+ if (unlikely(__ipipe_ipending_p(p))) { >+ head_domain = __ipipe_pipeline_head(); >+ if (likely(head_domain == __ipipe_current_domain)) >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ else >+ __ipipe_walk_pipeline(&head_domain->p_link); >+ } >+ local_irq_enable_hw(); >+ } >+} >+ >+void __ipipe_spin_lock_irq(raw_spinlock_t *lock) >+{ >+ local_irq_disable_hw(); >+ __raw_spin_lock(lock); >+ __set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+} >+ >+void __ipipe_spin_unlock_irq(raw_spinlock_t *lock) >+{ >+ __raw_spin_unlock(lock); >+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+ local_irq_enable_hw(); >+} >+ >+unsigned long __ipipe_spin_lock_irqsave(raw_spinlock_t *lock) >+{ >+ unsigned long flags; >+ int s; >+ >+ local_irq_save_hw(flags); >+ __raw_spin_lock(lock); >+ s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+ >+ return raw_mangle_irq_bits(s, flags); >+} >+ >+void __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long x) >+{ >+ __raw_spin_unlock(lock); >+ if (!raw_demangle_irq_bits(&x)) >+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+ local_irq_restore_hw(x); >+} >+ >+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock) >+{ >+ __raw_spin_unlock(&lock->bare_lock); >+} >+ >+void __ipipe_spin_unlock_irqcomplete(unsigned long x) >+{ >+ if (!raw_demangle_irq_bits(&x)) >+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+ local_irq_restore_hw(x); >+} >+ >+#ifdef __IPIPE_3LEVEL_IRQMAP >+ >+/* Must be called hw IRQs off. */ >+static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, >+ unsigned int irq) >+{ >+ __set_bit(irq, p->irqheld_map); >+ p->irqall[irq]++; >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd); >+ int l0b, l1b; >+ >+ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); >+ l1b = irq / BITS_PER_LONG; >+ prefetchw(p); >+ >+ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { >+ __set_bit(irq, p->irqpend_lomap); >+ __set_bit(l1b, p->irqpend_mdmap); >+ __set_bit(l0b, &p->irqpend_himap); >+ } else >+ __set_bit(irq, p->irqheld_map); >+ >+ p->irqall[irq]++; >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned int irq) >+{ >+ struct ipipe_percpu_domain_data *p; >+ int l0b, l1b; >+ >+ if (unlikely(test_and_set_bit(IPIPE_LOCK_FLAG, >+ &ipd->irqs[irq].control))) >+ return; >+ >+ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); >+ l1b = irq / BITS_PER_LONG; >+ >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ if (__test_and_clear_bit(irq, p->irqpend_lomap)) { >+ __set_bit(irq, p->irqheld_map); >+ if (p->irqpend_lomap[l1b] == 0) { >+ __clear_bit(l1b, p->irqpend_mdmap); >+ if (p->irqpend_mdmap[l0b] == 0) >+ __clear_bit(l0b, &p->irqpend_himap); >+ } >+ } >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned int irq) >+{ >+ struct ipipe_percpu_domain_data *p; >+ int l0b, l1b, cpu; >+ >+ if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG, >+ &ipd->irqs[irq].control))) >+ return; >+ >+ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); >+ l1b = irq / BITS_PER_LONG; >+ >+ for_each_online_cpu(cpu) { >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ if (test_and_clear_bit(irq, p->irqheld_map)) { >+ /* We need atomic ops here: */ >+ set_bit(irq, p->irqpend_lomap); >+ set_bit(l1b, p->irqpend_mdmap); >+ set_bit(l0b, &p->irqpend_himap); >+ } >+ } >+} >+ >+static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p, >+ int dovirt) >+{ >+ unsigned long l0m, l1m, l2m, himask, mdmask; >+ int l0b, l1b, l2b, vl0b, vl1b; >+ unsigned int irq; >+ >+ if (dovirt) { >+ /* >+ * All virtual IRQs are mapped by a single long word. >+ * There is exactly BITS_PER_LONG virqs, and they are >+ * always last in the interrupt map, starting at >+ * IPIPE_VIRQ_BASE. Therefore, we only need to test a >+ * single bit within the high and middle maps to check >+ * whether a virtual IRQ is pending (the computations >+ * below are constant). >+ */ >+ vl0b = IPIPE_VIRQ_BASE / (BITS_PER_LONG * BITS_PER_LONG); >+ himask = (1L << vl0b); >+ vl1b = IPIPE_VIRQ_BASE / BITS_PER_LONG; >+ mdmask = (1L << (vl1b & (BITS_PER_LONG-1))); >+ } else >+ himask = mdmask = ~0L; >+ >+ l0m = p->irqpend_himap & himask; >+ if (unlikely(l0m == 0)) >+ return -1; >+ >+ l0b = __ipipe_ffnz(l0m); >+ l1m = p->irqpend_mdmap[l0b] & mdmask; >+ if (unlikely(l1m == 0)) >+ return -1; >+ >+ l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG; >+ l2m = p->irqpend_lomap[l1b]; >+ if (unlikely(l2m == 0)) >+ return -1; >+ >+ l2b = __ipipe_ffnz(l2m); >+ irq = l1b * BITS_PER_LONG + l2b; >+ >+ __clear_bit(irq, p->irqpend_lomap); >+ if (p->irqpend_lomap[l1b] == 0) { >+ __clear_bit(l1b, p->irqpend_mdmap); >+ if (p->irqpend_mdmap[l0b] == 0) >+ __clear_bit(l0b, &p->irqpend_himap); >+ } >+ >+ return irq; >+} >+ >+#else /* __IPIPE_2LEVEL_IRQMAP */ >+ >+/* Must be called hw IRQs off. */ >+static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, >+ unsigned int irq) >+{ >+ __set_bit(irq, p->irqheld_map); >+ p->irqall[irq]++; >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd); >+ int l0b = irq / BITS_PER_LONG; >+ >+ prefetchw(p); >+ >+ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { >+ __set_bit(irq, p->irqpend_lomap); >+ __set_bit(l0b, &p->irqpend_himap); >+ } else >+ __set_bit(irq, p->irqheld_map); >+ >+ p->irqall[irq]++; >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq) >+{ >+ struct ipipe_percpu_domain_data *p; >+ int l0b = irq / BITS_PER_LONG; >+ >+ if (unlikely(test_and_set_bit(IPIPE_LOCK_FLAG, >+ &ipd->irqs[irq].control))) >+ return; >+ >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ if (__test_and_clear_bit(irq, p->irqpend_lomap)) { >+ __set_bit(irq, p->irqheld_map); >+ if (p->irqpend_lomap[l0b] == 0) >+ __clear_bit(l0b, &p->irqpend_himap); >+ } >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq) >+{ >+ struct ipipe_percpu_domain_data *p; >+ int l0b = irq / BITS_PER_LONG, cpu; >+ >+ if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG, >+ &ipd->irqs[irq].control))) >+ return; >+ >+ for_each_online_cpu(cpu) { >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ if (test_and_clear_bit(irq, p->irqheld_map)) { >+ /* We need atomic ops here: */ >+ set_bit(irq, p->irqpend_lomap); >+ set_bit(l0b, &p->irqpend_himap); >+ } >+ } >+} >+ >+static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p, >+ int dovirt) >+{ >+ unsigned long l0m, l1m, himask = ~0L; >+ int l0b, l1b; >+ >+ himask <<= dovirt ? IPIPE_VIRQ_BASE/BITS_PER_LONG : 0; >+ >+ l0m = p->irqpend_himap & himask; >+ if (unlikely(l0m == 0)) >+ return -1; >+ >+ l0b = __ipipe_ffnz(l0m); >+ l1m = p->irqpend_lomap[l0b]; >+ if (unlikely(l1m == 0)) >+ return -1; >+ >+ l1b = __ipipe_ffnz(l1m); >+ __clear_bit(l1b, &p->irqpend_lomap[l0b]); >+ if (p->irqpend_lomap[l0b] == 0) >+ __clear_bit(l0b, &p->irqpend_himap); >+ >+ return l0b * BITS_PER_LONG + l1b; >+} >+ >+#endif /* __IPIPE_2LEVEL_IRQMAP */ >+ >+/* >+ * __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must >+ * be called with local hw interrupts disabled. >+ */ >+void __ipipe_walk_pipeline(struct list_head *pos) >+{ >+ struct ipipe_domain *this_domain = __ipipe_current_domain, *next_domain; >+ struct ipipe_percpu_domain_data *p, *np; >+ >+ p = ipipe_cpudom_ptr(this_domain); >+ >+ while (pos != &__ipipe_pipeline) { >+ >+ next_domain = list_entry(pos, struct ipipe_domain, p_link); >+ np = ipipe_cpudom_ptr(next_domain); >+ >+ if (test_bit(IPIPE_STALL_FLAG, &np->status)) >+ break; /* Stalled stage -- do not go further. */ >+ >+ if (__ipipe_ipending_p(np)) { >+ if (next_domain == this_domain) >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ else { >+ >+ p->evsync = 0; >+ __ipipe_current_domain = next_domain; >+ ipipe_suspend_domain(); /* Sync stage and propagate interrupts. */ >+ >+ if (__ipipe_current_domain == next_domain) >+ __ipipe_current_domain = this_domain; >+ /* >+ * Otherwise, something changed the current domain under our >+ * feet recycling the register set; do not override the new >+ * domain. >+ */ >+ >+ if (__ipipe_ipending_p(p) && >+ !test_bit(IPIPE_STALL_FLAG, &p->status)) >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ } >+ break; >+ } else if (next_domain == this_domain) >+ break; >+ >+ pos = next_domain->p_link.next; >+ } >+} >+ >+/* >+ * ipipe_suspend_domain() -- Suspend the current domain, switching to >+ * the next one which has pending work down the pipeline. >+ */ >+void ipipe_suspend_domain(void) >+{ >+ struct ipipe_domain *this_domain, *next_domain; >+ struct ipipe_percpu_domain_data *p; >+ struct list_head *ln; >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ >+ this_domain = next_domain = __ipipe_current_domain; >+ p = ipipe_cpudom_ptr(this_domain); >+ p->status &= ~(IPIPE_STALL_MASK|IPIPE_SYNC_MASK); >+ >+ if (__ipipe_ipending_p(p)) >+ goto sync_stage; >+ >+ for (;;) { >+ ln = next_domain->p_link.next; >+ >+ if (ln == &__ipipe_pipeline) >+ break; >+ >+ next_domain = list_entry(ln, struct ipipe_domain, p_link); >+ p = ipipe_cpudom_ptr(next_domain); >+ >+ if (p->status & IPIPE_STALL_MASK) >+ break; >+ >+ if (!__ipipe_ipending_p(p)) >+ continue; >+ >+ __ipipe_current_domain = next_domain; >+sync_stage: >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ >+ if (__ipipe_current_domain != next_domain) >+ /* >+ * Something has changed the current domain under our >+ * feet, recycling the register set; take note. >+ */ >+ this_domain = __ipipe_current_domain; >+ } >+ >+ __ipipe_current_domain = this_domain; >+ >+ local_irq_restore_hw(flags); >+} >+ >+ >+/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt. >+ * Virtual interrupts are handled in exactly the same way than their >+ * hw-generated counterparts wrt pipelining. >+ */ >+unsigned ipipe_alloc_virq(void) >+{ >+ unsigned long flags, irq = 0; >+ int ipos; >+ >+ spin_lock_irqsave(&__ipipe_pipelock, flags); >+ >+ if (__ipipe_virtual_irq_map != ~0) { >+ ipos = ffz(__ipipe_virtual_irq_map); >+ set_bit(ipos, &__ipipe_virtual_irq_map); >+ irq = ipos + IPIPE_VIRQ_BASE; >+ } >+ >+ spin_unlock_irqrestore(&__ipipe_pipelock, flags); >+ >+ return irq; >+} >+ >+/* >+ * ipipe_control_irq() -- Change modes of a pipelined interrupt for >+ * the current domain. >+ */ >+int ipipe_virtualize_irq(struct ipipe_domain *ipd, >+ unsigned irq, >+ ipipe_irq_handler_t handler, >+ void *cookie, >+ ipipe_irq_ackfn_t acknowledge, >+ unsigned modemask) >+{ >+ ipipe_irq_handler_t old_handler; >+ struct irq_desc *desc; >+ unsigned long flags; >+ int err; >+ >+ if (irq >= IPIPE_NR_IRQS) >+ return -EINVAL; >+ >+ if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) >+ return -EPERM; >+ >+ if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags)) >+ /* Silently unwire interrupts for non-heading domains. */ >+ modemask &= ~IPIPE_WIRED_MASK; >+ >+ spin_lock_irqsave(&__ipipe_pipelock, flags); >+ >+ old_handler = ipd->irqs[irq].handler; >+ >+ if (handler != NULL) { >+ if (handler == IPIPE_SAME_HANDLER) { >+ handler = old_handler; >+ cookie = ipd->irqs[irq].cookie; >+ >+ if (handler == NULL) { >+ err = -EINVAL; >+ goto unlock_and_exit; >+ } >+ } else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 && >+ old_handler != NULL) { >+ err = -EBUSY; >+ goto unlock_and_exit; >+ } >+ >+ /* Wired interrupts can only be delivered to domains >+ * always heading the pipeline, and using dynamic >+ * propagation. */ >+ >+ if ((modemask & IPIPE_WIRED_MASK) != 0) { >+ if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) { >+ err = -EINVAL; >+ goto unlock_and_exit; >+ } >+ modemask |= (IPIPE_HANDLE_MASK); >+ } >+ >+ if ((modemask & IPIPE_STICKY_MASK) != 0) >+ modemask |= IPIPE_HANDLE_MASK; >+ } else >+ modemask &= >+ ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK | >+ IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK); >+ >+ if (acknowledge == NULL && !ipipe_virtual_irq_p(irq)) >+ /* >+ * Acknowledge handler unspecified for a hw interrupt: >+ * use the Linux-defined handler instead. >+ */ >+ acknowledge = ipipe_root_domain->irqs[irq].acknowledge; >+ >+ ipd->irqs[irq].handler = handler; >+ ipd->irqs[irq].cookie = cookie; >+ ipd->irqs[irq].acknowledge = acknowledge; >+ ipd->irqs[irq].control = modemask; >+ >+ if (irq < NR_IRQS && !ipipe_virtual_irq_p(irq)) { >+ desc = irq_to_desc(irq); >+ if (handler != NULL) { >+ if (desc) >+ __ipipe_enable_irqdesc(ipd, irq); >+ >+ if ((modemask & IPIPE_ENABLE_MASK) != 0) { >+ if (ipd != __ipipe_current_domain) { >+ /* >+ * IRQ enable/disable state is domain-sensitive, so we >+ * may not change it for another domain. What is >+ * allowed however is forcing some domain to handle an >+ * interrupt source, by passing the proper 'ipd' >+ * descriptor which thus may be different from >+ * __ipipe_current_domain. >+ */ >+ err = -EPERM; >+ goto unlock_and_exit; >+ } >+ if (desc) >+ __ipipe_enable_irq(irq); >+ } >+ } else if (old_handler != NULL && desc) >+ __ipipe_disable_irqdesc(ipd, irq); >+ } >+ >+ err = 0; >+ >+ unlock_and_exit: >+ >+ spin_unlock_irqrestore(&__ipipe_pipelock, flags); >+ >+ return err; >+} >+ >+/* ipipe_control_irq() -- Change modes of a pipelined interrupt for >+ * the current domain. */ >+ >+int ipipe_control_irq(unsigned irq, unsigned clrmask, unsigned setmask) >+{ >+ struct ipipe_domain *ipd; >+ unsigned long flags; >+ >+ if (irq >= IPIPE_NR_IRQS) >+ return -EINVAL; >+ >+ spin_lock_irqsave(&__ipipe_pipelock, flags); >+ >+ ipd = __ipipe_current_domain; >+ >+ if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) { >+ spin_unlock_irqrestore(&__ipipe_pipelock, flags); >+ return -EPERM; >+ } >+ >+ if (ipd->irqs[irq].handler == NULL) >+ setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); >+ >+ if ((setmask & IPIPE_STICKY_MASK) != 0) >+ setmask |= IPIPE_HANDLE_MASK; >+ >+ if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0) /* If one goes, both go. */ >+ clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); >+ >+ ipd->irqs[irq].control &= ~clrmask; >+ ipd->irqs[irq].control |= setmask; >+ >+ if ((setmask & IPIPE_ENABLE_MASK) != 0) >+ __ipipe_enable_irq(irq); >+ else if ((clrmask & IPIPE_ENABLE_MASK) != 0) >+ __ipipe_disable_irq(irq); >+ >+ spin_unlock_irqrestore(&__ipipe_pipelock, flags); >+ >+ return 0; >+} >+ >+/* __ipipe_dispatch_event() -- Low-level event dispatcher. */ >+ >+int __ipipe_dispatch_event (unsigned event, void *data) >+{ >+extern void *ipipe_irq_handler; void *handler; if (ipipe_irq_handler != __ipipe_handle_irq && (handler = ipipe_root_domain->evhand[event])) { return ((int (*)(unsigned long, void *))handler)(event, data); } else { >+ struct ipipe_domain *start_domain, *this_domain, *next_domain; >+ struct ipipe_percpu_domain_data *np; >+ ipipe_event_handler_t evhand; >+ struct list_head *pos, *npos; >+ unsigned long flags; >+ int propagate = 1; >+ >+ local_irq_save_hw(flags); >+ >+ start_domain = this_domain = __ipipe_current_domain; >+ >+ list_for_each_safe(pos, npos, &__ipipe_pipeline) { >+ /* >+ * Note: Domain migration may occur while running >+ * event or interrupt handlers, in which case the >+ * current register set is going to be recycled for a >+ * different domain than the initiating one. We do >+ * care for that, always tracking the current domain >+ * descriptor upon return from those handlers. >+ */ >+ next_domain = list_entry(pos, struct ipipe_domain, p_link); >+ np = ipipe_cpudom_ptr(next_domain); >+ >+ /* >+ * Keep a cached copy of the handler's address since >+ * ipipe_catch_event() may clear it under our feet. >+ */ >+ evhand = next_domain->evhand[event]; >+ >+ if (evhand != NULL) { >+ __ipipe_current_domain = next_domain; >+ np->evsync |= (1LL << event); >+ local_irq_restore_hw(flags); >+ propagate = !evhand(event, start_domain, data); >+ local_irq_save_hw(flags); >+ /* >+ * We may have a migration issue here, if the >+ * current task is migrated to another CPU on >+ * behalf of the invoked handler, usually when >+ * a syscall event is processed. However, >+ * ipipe_catch_event() will make sure that a >+ * CPU that clears a handler for any given >+ * event will not attempt to wait for itself >+ * to clear the evsync bit for that event, >+ * which practically plugs the hole, without >+ * resorting to a much more complex strategy. >+ */ >+ np->evsync &= ~(1LL << event); >+ if (__ipipe_current_domain != next_domain) >+ this_domain = __ipipe_current_domain; >+ } >+ >+ /* NEVER sync the root stage here. */ >+ if (next_domain != ipipe_root_domain && >+ __ipipe_ipending_p(np) && >+ !test_bit(IPIPE_STALL_FLAG, &np->status)) { >+ __ipipe_current_domain = next_domain; >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ if (__ipipe_current_domain != next_domain) >+ this_domain = __ipipe_current_domain; >+ } >+ >+ __ipipe_current_domain = this_domain; >+ >+ if (next_domain == this_domain || !propagate) >+ break; >+ } >+ >+ local_irq_restore_hw(flags); >+ >+ return !propagate; >+} } >+ >+/* >+ * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired >+ * interrupts are immediately and unconditionally delivered to the >+ * domain heading the pipeline upon receipt, and such domain must have >+ * been registered as an invariant head for the system (priority == >+ * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is >+ * to get an extra-fast dispatching path for those IRQs, by relying on >+ * a straightforward logic based on assumptions that must always be >+ * true for invariant head domains. The following assumptions are >+ * made when dealing with such interrupts: >+ * >+ * 1- Wired interrupts are purely dynamic, i.e. the decision to >+ * propagate them down the pipeline must be done from the head domain >+ * ISR. >+ * 2- Wired interrupts cannot be shared or sticky. >+ * 3- The root domain cannot be an invariant pipeline head, in >+ * consequence of what the root domain cannot handle wired >+ * interrupts. >+ * 4- Wired interrupts must have a valid acknowledge handler for the >+ * head domain (if needed, see __ipipe_handle_irq). >+ * >+ * Called with hw interrupts off. >+ */ >+ >+void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head); >+ >+ prefetchw(p); >+ >+ if (unlikely(test_bit(IPIPE_LOCK_FLAG, &head->irqs[irq].control))) { >+ /* >+ * If we can't process this IRQ right now, we must >+ * mark it as held, so that it will get played during >+ * normal log sync when the corresponding interrupt >+ * source is eventually unlocked. >+ */ >+ __ipipe_set_irq_held(p, irq); >+ return; >+ } >+ >+ if (test_bit(IPIPE_STALL_FLAG, &p->status)) { >+ __ipipe_set_irq_pending(head, irq); >+ return; >+ } >+ >+ __ipipe_dispatch_wired_nocheck(head, irq); >+} >+ >+void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq) /* hw interrupts off */ >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head); >+ struct ipipe_domain *old; >+ >+ prefetchw(p); >+ >+ old = __ipipe_current_domain; >+ __ipipe_current_domain = head; /* Switch to the head domain. */ >+ >+ p->irqall[irq]++; >+ __set_bit(IPIPE_STALL_FLAG, &p->status); >+ head->irqs[irq].handler(irq, head->irqs[irq].cookie); /* Call the ISR. */ >+ __ipipe_run_irqtail(); >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+ >+ if (__ipipe_current_domain == head) { >+ __ipipe_current_domain = old; >+ if (old == head) { >+ if (__ipipe_ipending_p(p)) >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ return; >+ } >+ } >+ >+ __ipipe_walk_pipeline(&head->p_link); >+} >+ >+/* >+ * __ipipe_sync_stage() -- Flush the pending IRQs for the current >+ * domain (and processor). This routine flushes the interrupt log >+ * (see "Optimistic interrupt protection" from D. Stodolsky et al. for >+ * more on the deferred interrupt scheme). Every interrupt that >+ * occurred while the pipeline was stalled gets played. WARNING: >+ * callers on SMP boxen should always check for CPU migration on >+ * return of this routine. >+ * >+ * This routine must be called with hw interrupts off. >+ */ >+void __ipipe_sync_stage(int dovirt) >+{ >+ struct ipipe_percpu_domain_data *p; >+ struct ipipe_domain *ipd; >+ int cpu, irq; >+ >+ ipd = __ipipe_current_domain; >+ p = ipipe_cpudom_ptr(ipd); >+ >+ if (__test_and_set_bit(IPIPE_SYNC_FLAG, &p->status)) { >+ /* >+ * Some questionable code in the root domain may enter >+ * busy waits for IRQs over interrupt context, so we >+ * unfortunately have to allow piling up IRQs for >+ * them. Non-root domains are not allowed to do this. >+ */ >+ if (ipd != ipipe_root_domain) >+ return; >+ } >+ >+ cpu = ipipe_processor_id(); >+ >+ for (;;) { >+ irq = __ipipe_next_irq(p, dovirt); >+ if (irq < 0) >+ break; >+ /* >+ * Make sure the compiler does not reorder >+ * wrongly, so that all updates to maps are >+ * done before the handler gets called. >+ */ >+ barrier(); >+ >+ if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) >+ continue; >+ >+ __set_bit(IPIPE_STALL_FLAG, &p->status); >+ smp_wmb(); >+ >+ if (ipd == ipipe_root_domain) >+ trace_hardirqs_off(); >+ >+ __ipipe_run_isr(ipd, irq); >+ barrier(); >+ p = ipipe_cpudom_ptr(__ipipe_current_domain); >+#ifdef CONFIG_SMP >+ { >+ int newcpu = ipipe_processor_id(); >+ >+ if (newcpu != cpu) { /* Handle CPU migration. */ >+ /* >+ * We expect any domain to clear the SYNC bit each >+ * time it switches in a new task, so that preemptions >+ * and/or CPU migrations (in the SMP case) over the >+ * ISR do not lock out the log syncer for some >+ * indefinite amount of time. In the Linux case, >+ * schedule() handles this (see kernel/sched.c). For >+ * this reason, we don't bother clearing it here for >+ * the source CPU in the migration handling case, >+ * since it must have scheduled another task in by >+ * now. >+ */ >+ __set_bit(IPIPE_SYNC_FLAG, &p->status); >+ cpu = newcpu; >+ } >+ } >+#endif /* CONFIG_SMP */ >+#ifdef CONFIG_TRACE_IRQFLAGS >+ if (__ipipe_root_domain_p && >+ test_bit(IPIPE_STALL_FLAG, &p->status)) >+ trace_hardirqs_on(); >+#endif >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+ } >+ >+ __clear_bit(IPIPE_SYNC_FLAG, &p->status); >+} >+ >+/* ipipe_register_domain() -- Link a new domain to the pipeline. */ >+ >+int ipipe_register_domain(struct ipipe_domain *ipd, >+ struct ipipe_domain_attr *attr) >+{ >+ struct ipipe_percpu_domain_data *p; >+ struct list_head *pos = NULL; >+ struct ipipe_domain *_ipd; >+ unsigned long flags; >+ >+ if (!ipipe_root_domain_p) { >+ printk(KERN_WARNING >+ "I-pipe: Only the root domain may register a new domain.\n"); >+ return -EPERM; >+ } >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ if (attr->priority == IPIPE_HEAD_PRIORITY) { >+ if (test_bit(IPIPE_HEAD_SLOT, &__ipipe_domain_slot_map)) { >+ ipipe_critical_exit(flags); >+ return -EAGAIN; /* Cannot override current head. */ >+ } >+ ipd->slot = IPIPE_HEAD_SLOT; >+ } else >+ ipd->slot = ffz(__ipipe_domain_slot_map); >+ >+ if (ipd->slot < CONFIG_IPIPE_DOMAINS) { >+ set_bit(ipd->slot, &__ipipe_domain_slot_map); >+ list_for_each(pos, &__ipipe_pipeline) { >+ _ipd = list_entry(pos, struct ipipe_domain, p_link); >+ if (_ipd->domid == attr->domid) >+ break; >+ } >+ } >+ >+ ipipe_critical_exit(flags); >+ >+ if (pos != &__ipipe_pipeline) { >+ if (ipd->slot < CONFIG_IPIPE_DOMAINS) >+ clear_bit(ipd->slot, &__ipipe_domain_slot_map); >+ return -EBUSY; >+ } >+ >+#ifndef CONFIG_SMP >+ /* >+ * Set up the perdomain pointers for direct access to the >+ * percpu domain data. This saves a costly multiply each time >+ * we need to refer to the contents of the percpu domain data >+ * array. >+ */ >+ __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = &__raw_get_cpu_var(ipipe_percpu_darray)[ipd->slot]; >+#endif >+ >+ ipd->name = attr->name; >+ ipd->domid = attr->domid; >+ ipd->pdd = attr->pdd; >+ ipd->flags = 0; >+ >+ if (attr->priority == IPIPE_HEAD_PRIORITY) { >+ ipd->priority = INT_MAX; >+ __set_bit(IPIPE_AHEAD_FLAG,&ipd->flags); >+ } >+ else >+ ipd->priority = attr->priority; >+ >+ __ipipe_init_stage(ipd); >+ >+ INIT_LIST_HEAD(&ipd->p_link); >+ >+#ifdef CONFIG_PROC_FS >+ __ipipe_add_domain_proc(ipd); >+#endif /* CONFIG_PROC_FS */ >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ list_for_each(pos, &__ipipe_pipeline) { >+ _ipd = list_entry(pos, struct ipipe_domain, p_link); >+ if (ipd->priority > _ipd->priority) >+ break; >+ } >+ >+ list_add_tail(&ipd->p_link, pos); >+ >+ ipipe_critical_exit(flags); >+ >+ printk(KERN_INFO "I-pipe: Domain %s registered.\n", ipd->name); >+ >+ if (attr->entry == NULL) >+ return 0; >+ >+ /* >+ * Finally, allow the new domain to perform its initialization >+ * duties. >+ */ >+ local_irq_save_hw_smp(flags); >+ __ipipe_current_domain = ipd; >+ local_irq_restore_hw_smp(flags); >+ attr->entry(); >+ local_irq_save_hw(flags); >+ __ipipe_current_domain = ipipe_root_domain; >+ p = ipipe_root_cpudom_ptr(); >+ >+ if (__ipipe_ipending_p(p) && >+ !test_bit(IPIPE_STALL_FLAG, &p->status)) >+ __ipipe_sync_pipeline(IPIPE_IRQ_DOALL); >+ >+ local_irq_restore_hw(flags); >+ >+ return 0; >+} >+ >+/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */ >+ >+int ipipe_unregister_domain(struct ipipe_domain *ipd) >+{ >+ unsigned long flags; >+ >+ if (!ipipe_root_domain_p) { >+ printk(KERN_WARNING >+ "I-pipe: Only the root domain may unregister a domain.\n"); >+ return -EPERM; >+ } >+ >+ if (ipd == ipipe_root_domain) { >+ printk(KERN_WARNING >+ "I-pipe: Cannot unregister the root domain.\n"); >+ return -EPERM; >+ } >+#ifdef CONFIG_SMP >+ { >+ struct ipipe_percpu_domain_data *p; >+ unsigned int irq; >+ int cpu; >+ >+ /* >+ * In the SMP case, wait for the logged events to drain on >+ * other processors before eventually removing the domain >+ * from the pipeline. >+ */ >+ >+ ipipe_unstall_pipeline_from(ipd); >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { >+ clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control); >+ clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control); >+ set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control); >+ } >+ >+ ipipe_critical_exit(flags); >+ >+ for_each_online_cpu(cpu) { >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ while (__ipipe_ipending_p(p)) >+ cpu_relax(); >+ } >+ } >+#endif /* CONFIG_SMP */ >+ >+ mutex_lock(&ipd->mutex); >+ >+#ifdef CONFIG_PROC_FS >+ __ipipe_remove_domain_proc(ipd); >+#endif /* CONFIG_PROC_FS */ >+ >+ /* >+ * Simply remove the domain from the pipeline and we are almost done. >+ */ >+ >+ flags = ipipe_critical_enter(NULL); >+ list_del_init(&ipd->p_link); >+ ipipe_critical_exit(flags); >+ >+ __ipipe_cleanup_domain(ipd); >+ >+ mutex_unlock(&ipd->mutex); >+ >+ printk(KERN_INFO "I-pipe: Domain %s unregistered.\n", ipd->name); >+ >+ return 0; >+} >+ >+/* >+ * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of >+ * a running interrupt handler to the next domain down the pipeline. >+ * ipipe_schedule_irq() -- Does almost the same as above, but attempts >+ * to pend the interrupt for the current domain first. >+ * Must be called hw IRQs off. >+ */ >+void __ipipe_pend_irq(unsigned irq, struct list_head *head) >+{ >+ struct ipipe_domain *ipd; >+ struct list_head *ln; >+ >+#ifdef CONFIG_IPIPE_DEBUG >+ BUG_ON(irq >= IPIPE_NR_IRQS || >+ (ipipe_virtual_irq_p(irq) >+ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))); >+#endif >+ for (ln = head; ln != &__ipipe_pipeline; ln = ipd->p_link.next) { >+ ipd = list_entry(ln, struct ipipe_domain, p_link); >+ if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) { >+ __ipipe_set_irq_pending(ipd, irq); >+ return; >+ } >+ } >+} >+ >+/* ipipe_free_virq() -- Release a virtual/soft interrupt. */ >+ >+int ipipe_free_virq(unsigned virq) >+{ >+ if (!ipipe_virtual_irq_p(virq)) >+ return -EINVAL; >+ >+ clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map); >+ >+ return 0; >+} >+ >+void ipipe_init_attr(struct ipipe_domain_attr *attr) >+{ >+ attr->name = "anon"; >+ attr->domid = 1; >+ attr->entry = NULL; >+ attr->priority = IPIPE_ROOT_PRIO; >+ attr->pdd = NULL; >+} >+ >+/* >+ * ipipe_catch_event() -- Interpose or remove an event handler for a >+ * given domain. >+ */ >+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, >+ unsigned event, >+ ipipe_event_handler_t handler) >+{ >+ ipipe_event_handler_t old_handler; >+ unsigned long flags; >+ int self = 0, cpu; >+ >+ if (event & IPIPE_EVENT_SELF) { >+ event &= ~IPIPE_EVENT_SELF; >+ self = 1; >+ } >+ >+ if (event >= IPIPE_NR_EVENTS) >+ return NULL; >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ if (!(old_handler = xchg(&ipd->evhand[event],handler))) { >+ if (handler) { >+ if (self) >+ ipd->evself |= (1LL << event); >+ else >+ __ipipe_event_monitors[event]++; >+ } >+ } >+ else if (!handler) { >+ if (ipd->evself & (1LL << event)) >+ ipd->evself &= ~(1LL << event); >+ else >+ __ipipe_event_monitors[event]--; >+ } else if ((ipd->evself & (1LL << event)) && !self) { >+ __ipipe_event_monitors[event]++; >+ ipd->evself &= ~(1LL << event); >+ } else if (!(ipd->evself & (1LL << event)) && self) { >+ __ipipe_event_monitors[event]--; >+ ipd->evself |= (1LL << event); >+ } >+ >+ ipipe_critical_exit(flags); >+ >+ if (!handler && ipipe_root_domain_p) { >+ /* >+ * If we cleared a handler on behalf of the root >+ * domain, we have to wait for any current invocation >+ * to drain, since our caller might subsequently unmap >+ * the target domain. To this aim, this code >+ * synchronizes with __ipipe_dispatch_event(), >+ * guaranteeing that either the dispatcher sees a null >+ * handler in which case it discards the invocation >+ * (which also prevents from entering a livelock), or >+ * finds a valid handler and calls it. Symmetrically, >+ * ipipe_catch_event() ensures that the called code >+ * won't be unmapped under our feet until the event >+ * synchronization flag is cleared for the given event >+ * on all CPUs. >+ */ >+ preempt_disable(); >+ cpu = smp_processor_id(); >+ /* >+ * Hack: this solves the potential migration issue >+ * raised in __ipipe_dispatch_event(). This is a >+ * work-around which makes the assumption that other >+ * CPUs will subsequently, either process at least one >+ * interrupt for the target domain, or call >+ * __ipipe_dispatch_event() without going through a >+ * migration while running the handler at least once; >+ * practically, this is safe on any normally running >+ * system. >+ */ >+ ipipe_percpudom(ipd, evsync, cpu) &= ~(1LL << event); >+ preempt_enable(); >+ >+ for_each_online_cpu(cpu) { >+ while (ipipe_percpudom(ipd, evsync, cpu) & (1LL << event)) >+ schedule_timeout_interruptible(HZ / 50); >+ } >+ } >+ >+ return old_handler; >+} >+ >+cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask) >+{ >+#ifdef CONFIG_SMP >+ if (irq >= NR_IRQS) // if (irq >= IPIPE_NR_XIRQS) >+ /* Allow changing affinity of external IRQs only. */ >+ return CPU_MASK_NONE; >+ >+ if (num_online_cpus() > 1) >+ return __ipipe_set_irq_affinity(irq,cpumask); >+#endif /* CONFIG_SMP */ >+ >+ return CPU_MASK_NONE; >+} >+ >+int ipipe_send_ipi (unsigned ipi, cpumask_t cpumask) >+ >+{ >+#ifdef CONFIG_SMP >+ return __ipipe_send_ipi(ipi,cpumask); >+#else /* !CONFIG_SMP */ >+ return -EINVAL; >+#endif /* CONFIG_SMP */ >+} >+ >+int ipipe_alloc_ptdkey (void) >+{ >+ unsigned long flags; >+ int key = -1; >+ >+ spin_lock_irqsave(&__ipipe_pipelock,flags); >+ >+ if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) { >+ key = ffz(__ipipe_ptd_key_map); >+ set_bit(key,&__ipipe_ptd_key_map); >+ __ipipe_ptd_key_count++; >+ } >+ >+ spin_unlock_irqrestore(&__ipipe_pipelock,flags); >+ >+ return key; >+} >+ >+int ipipe_free_ptdkey (int key) >+{ >+ unsigned long flags; >+ >+ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) >+ return -EINVAL; >+ >+ spin_lock_irqsave(&__ipipe_pipelock,flags); >+ >+ if (test_and_clear_bit(key,&__ipipe_ptd_key_map)) >+ __ipipe_ptd_key_count--; >+ >+ spin_unlock_irqrestore(&__ipipe_pipelock,flags); >+ >+ return 0; >+} >+ >+int ipipe_set_ptd (int key, void *value) >+ >+{ >+ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) >+ return -EINVAL; >+ >+ current->ptd[key] = value; >+ >+ return 0; >+} >+ >+void *ipipe_get_ptd (int key) >+ >+{ >+ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) >+ return NULL; >+ >+ return current->ptd[key]; >+} >+ >+#ifdef CONFIG_PROC_FS >+ >+struct proc_dir_entry *ipipe_proc_root; >+ >+static int __ipipe_version_info_proc(char *page, >+ char **start, >+ off_t off, int count, int *eof, void *data) >+{ >+ int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING); >+ >+ len -= off; >+ >+ if (len <= off + count) >+ *eof = 1; >+ >+ *start = page + off; >+ >+ if(len > count) >+ len = count; >+ >+ if(len < 0) >+ len = 0; >+ >+ return len; >+} >+ >+static int __ipipe_common_info_show(struct seq_file *p, void *data) >+{ >+ struct ipipe_domain *ipd = (struct ipipe_domain *)p->private; >+ char handling, stickiness, lockbit, exclusive, virtuality; >+ >+ unsigned long ctlbits; >+ unsigned irq; >+ >+ seq_printf(p, " +----- Handling ([A]ccepted, [G]rabbed, [W]ired, [D]iscarded)\n"); >+ seq_printf(p, " |+---- Sticky\n"); >+ seq_printf(p, " ||+--- Locked\n"); >+ seq_printf(p, " |||+-- Exclusive\n"); >+ seq_printf(p, " ||||+- Virtual\n"); >+ seq_printf(p, "[IRQ] |||||\n"); >+ >+ mutex_lock(&ipd->mutex); >+ >+ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { >+ /* Remember to protect against >+ * ipipe_virtual_irq/ipipe_control_irq if more fields >+ * get involved. */ >+ ctlbits = ipd->irqs[irq].control; >+ >+ if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)) >+ /* >+ * There might be a hole between the last external >+ * IRQ and the first virtual one; skip it. >+ */ >+ continue; >+ >+ if (ipipe_virtual_irq_p(irq) >+ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)) >+ /* Non-allocated virtual IRQ; skip it. */ >+ continue; >+ >+ /* >+ * Statuses are as follows: >+ * o "accepted" means handled _and_ passed down the pipeline. >+ * o "grabbed" means handled, but the interrupt might be >+ * terminated _or_ passed down the pipeline depending on >+ * what the domain handler asks for to the I-pipe. >+ * o "wired" is basically the same as "grabbed", except that >+ * the interrupt is unconditionally delivered to an invariant >+ * pipeline head domain. >+ * o "passed" means unhandled by the domain but passed >+ * down the pipeline. >+ * o "discarded" means unhandled and _not_ passed down the >+ * pipeline. The interrupt merely disappears from the >+ * current domain down to the end of the pipeline. >+ */ >+ if (ctlbits & IPIPE_HANDLE_MASK) { >+ if (ctlbits & IPIPE_PASS_MASK) >+ handling = 'A'; >+ else if (ctlbits & IPIPE_WIRED_MASK) >+ handling = 'W'; >+ else >+ handling = 'G'; >+ } else if (ctlbits & IPIPE_PASS_MASK) >+ /* Do not output if no major action is taken. */ >+ continue; >+ else >+ handling = 'D'; >+ >+ if (ctlbits & IPIPE_STICKY_MASK) >+ stickiness = 'S'; >+ else >+ stickiness = '.'; >+ >+ if (ctlbits & IPIPE_LOCK_MASK) >+ lockbit = 'L'; >+ else >+ lockbit = '.'; >+ >+ if (ctlbits & IPIPE_EXCLUSIVE_MASK) >+ exclusive = 'X'; >+ else >+ exclusive = '.'; >+ >+ if (ipipe_virtual_irq_p(irq)) >+ virtuality = 'V'; >+ else >+ virtuality = '.'; >+ >+ seq_printf(p, " %3u: %c%c%c%c%c\n", >+ irq, handling, stickiness, lockbit, exclusive, virtuality); >+ } >+ >+ seq_printf(p, "[Domain info]\n"); >+ >+ seq_printf(p, "id=0x%.8x\n", ipd->domid); >+ >+ if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags)) >+ seq_printf(p, "priority=topmost\n"); >+ else >+ seq_printf(p, "priority=%d\n", ipd->priority); >+ >+ mutex_unlock(&ipd->mutex); >+ >+ return 0; >+} >+ >+static int __ipipe_common_info_open(struct inode *inode, struct file *file) >+{ >+ return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->data); >+} >+ >+static struct file_operations __ipipe_info_proc_ops = { >+ .owner = THIS_MODULE, >+ .open = __ipipe_common_info_open, >+ .read = seq_read, >+ .llseek = seq_lseek, >+ .release = single_release, >+}; >+ >+void __ipipe_add_domain_proc(struct ipipe_domain *ipd) >+{ >+ struct proc_dir_entry *e = create_proc_entry(ipd->name, 0444, ipipe_proc_root); >+ if (e) { >+ e->proc_fops = &__ipipe_info_proc_ops; >+ e->data = (void*) ipd; >+ } >+} >+ >+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd) >+{ >+ remove_proc_entry(ipd->name,ipipe_proc_root); >+} >+ >+void __init ipipe_init_proc(void) >+{ >+ ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0); >+ create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL); >+ __ipipe_add_domain_proc(ipipe_root_domain); >+ >+ __ipipe_init_tracer(); >+} >+ >+#endif /* CONFIG_PROC_FS */ >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+ >+DEFINE_PER_CPU(int, ipipe_percpu_context_check) = { 1 }; >+DEFINE_PER_CPU(int, ipipe_saved_context_check_state); >+ >+void ipipe_check_context(struct ipipe_domain *border_domain) >+{ >+ struct ipipe_percpu_domain_data *p; >+ struct ipipe_domain *this_domain; >+ unsigned long flags; >+ int cpu; >+ >+ local_irq_save_hw_smp(flags); >+ >+ this_domain = __ipipe_current_domain; >+ p = ipipe_head_cpudom_ptr(); >+ if (likely(this_domain->priority <= border_domain->priority && >+ !test_bit(IPIPE_STALL_FLAG, &p->status))) { >+ local_irq_restore_hw_smp(flags); >+ return; >+ } >+ >+ cpu = ipipe_processor_id(); >+ if (!per_cpu(ipipe_percpu_context_check, cpu)) { >+ local_irq_restore_hw_smp(flags); >+ return; >+ } >+ >+ local_irq_restore_hw_smp(flags); >+ >+ ipipe_context_check_off(); >+ ipipe_trace_panic_freeze(); >+ ipipe_set_printk_sync(__ipipe_current_domain); >+ >+ if (this_domain->priority > border_domain->priority) >+ printk(KERN_ERR "I-pipe: Detected illicit call from domain " >+ "'%s'\n" >+ KERN_ERR " into a service reserved for domain " >+ "'%s' and below.\n", >+ this_domain->name, border_domain->name); >+ else >+ printk(KERN_ERR "I-pipe: Detected stalled topmost domain, " >+ "probably caused by a bug.\n" >+ " A critical section may have been " >+ "left unterminated.\n"); >+ dump_stack(); >+ ipipe_trace_panic_dump(); >+} >+ >+EXPORT_SYMBOL(ipipe_check_context); >+ >+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ >+ >+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) >+ >+int notrace __ipipe_check_percpu_access(void) >+{ >+ struct ipipe_percpu_domain_data *p; >+ struct ipipe_domain *this_domain; >+ unsigned long flags; >+ int ret = 0; >+ >+ local_irq_save_hw_notrace(flags); >+ >+ this_domain = __raw_get_cpu_var(ipipe_percpu_domain); >+ >+ /* >+ * Only the root domain may implement preemptive CPU migration >+ * of tasks, so anything above in the pipeline should be fine. >+ */ >+ if (this_domain->priority > IPIPE_ROOT_PRIO) >+ goto out; >+ >+ if (raw_irqs_disabled_flags(flags)) >+ goto out; >+ >+ /* >+ * Last chance: hw interrupts were enabled on entry while >+ * running over the root domain, but the root stage might be >+ * currently stalled, in which case preemption would be >+ * disabled, and no migration could occur. >+ */ >+ if (this_domain == ipipe_root_domain) { >+ p = ipipe_root_cpudom_ptr(); >+ if (test_bit(IPIPE_STALL_FLAG, &p->status)) >+ goto out; >+ } >+ /* >+ * Our caller may end up accessing the wrong per-cpu variable >+ * instance due to CPU migration; tell it to complain about >+ * this. >+ */ >+ ret = 1; >+out: >+ local_irq_restore_hw_notrace(flags); >+ >+ return ret; >+} >+ >+#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */ >+ >+EXPORT_SYMBOL(ipipe_virtualize_irq); >+EXPORT_SYMBOL(ipipe_control_irq); >+EXPORT_SYMBOL(ipipe_suspend_domain); >+EXPORT_SYMBOL(ipipe_alloc_virq); >+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain); >+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_darray); >+EXPORT_SYMBOL(ipipe_root); >+EXPORT_SYMBOL(ipipe_stall_pipeline_from); >+EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from); >+EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from); >+EXPORT_SYMBOL(ipipe_restore_pipeline_from); >+EXPORT_SYMBOL(ipipe_unstall_pipeline_head); >+EXPORT_SYMBOL(__ipipe_restore_pipeline_head); >+EXPORT_SYMBOL(__ipipe_unstall_root); >+EXPORT_SYMBOL(__ipipe_restore_root); >+EXPORT_SYMBOL(__ipipe_spin_lock_irq); >+EXPORT_SYMBOL(__ipipe_spin_unlock_irq); >+EXPORT_SYMBOL(__ipipe_spin_lock_irqsave); >+EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore); >+EXPORT_SYMBOL(__ipipe_pipeline); >+EXPORT_SYMBOL(__ipipe_lock_irq); >+EXPORT_SYMBOL(__ipipe_unlock_irq); >+EXPORT_SYMBOL(ipipe_register_domain); >+EXPORT_SYMBOL(ipipe_unregister_domain); >+EXPORT_SYMBOL(ipipe_free_virq); >+EXPORT_SYMBOL(ipipe_init_attr); >+EXPORT_SYMBOL(ipipe_catch_event); >+EXPORT_SYMBOL(ipipe_alloc_ptdkey); >+EXPORT_SYMBOL(ipipe_free_ptdkey); >+EXPORT_SYMBOL(ipipe_set_ptd); >+EXPORT_SYMBOL(ipipe_get_ptd); >+EXPORT_SYMBOL(ipipe_set_irq_affinity); >+EXPORT_SYMBOL(ipipe_send_ipi); >+EXPORT_SYMBOL(__ipipe_pend_irq); >+EXPORT_SYMBOL(__ipipe_set_irq_pending); >+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) >+EXPORT_SYMBOL(__ipipe_check_percpu_access); >+#endif >+#ifdef CONFIG_GENERIC_CLOCKEVENTS >+EXPORT_SYMBOL(ipipe_request_tickdev); >+EXPORT_SYMBOL(ipipe_release_tickdev); >+#endif >+ >+EXPORT_SYMBOL(ipipe_critical_enter); >+EXPORT_SYMBOL(ipipe_critical_exit); >+EXPORT_SYMBOL(ipipe_trigger_irq); >+EXPORT_SYMBOL(ipipe_get_sysinfo); >diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c >new file mode 100644 >index 0000000..d3c1866 >--- /dev/null >+++ b/kernel/ipipe/tracer.c >@@ -0,0 +1,1441 @@ >+/* -*- linux-c -*- >+ * kernel/ipipe/tracer.c >+ * >+ * Copyright (C) 2005 Luotao Fu. >+ * 2005-2008 Jan Kiszka. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#include <linux/kernel.h> >+#include <linux/module.h> >+#include <linux/version.h> >+#include <linux/kallsyms.h> >+#include <linux/seq_file.h> >+#include <linux/proc_fs.h> >+#include <linux/ctype.h> >+#include <linux/vmalloc.h> >+#include <linux/pid.h> >+#include <linux/utsrelease.h> >+#include <linux/sched.h> >+#include <linux/ipipe.h> >+#include <linux/ftrace.h> >+#include <asm/uaccess.h> >+ >+#define IPIPE_TRACE_PATHS 4 /* <!> Do not lower below 3 */ >+#define IPIPE_DEFAULT_ACTIVE 0 >+#define IPIPE_DEFAULT_MAX 1 >+#define IPIPE_DEFAULT_FROZEN 2 >+ >+#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT) >+#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1)) >+ >+#define IPIPE_DEFAULT_PRE_TRACE 10 >+#define IPIPE_DEFAULT_POST_TRACE 10 >+#define IPIPE_DEFAULT_BACK_TRACE 100 >+ >+#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */ >+#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */ >+ >+#define IPIPE_TFLG_NMI_LOCK 0x0001 >+#define IPIPE_TFLG_NMI_HIT 0x0002 >+#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004 >+ >+#define IPIPE_TFLG_HWIRQ_OFF 0x0100 >+#define IPIPE_TFLG_FREEZING 0x0200 >+#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */ >+#define IPIPE_TFLG_CURRDOM_MASK 0x0C00 >+#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */ >+#define IPIPE_TFLG_DOMSTATE_BITS 3 >+ >+#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \ >+ (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT))) >+#define IPIPE_TFLG_CURRENT_DOMAIN(point) \ >+ ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT) >+ >+struct ipipe_trace_point { >+ short type; >+ short flags; >+ unsigned long eip; >+ unsigned long parent_eip; >+ unsigned long v; >+ unsigned long long timestamp; >+}; >+ >+struct ipipe_trace_path { >+ volatile int flags; >+ int dump_lock; /* separated from flags due to cross-cpu access */ >+ int trace_pos; /* next point to fill */ >+ int begin, end; /* finalised path begin and end */ >+ int post_trace; /* non-zero when in post-trace phase */ >+ unsigned long long length; /* max path length in cycles */ >+ unsigned long nmi_saved_eip; /* for deferred requests from NMIs */ >+ unsigned long nmi_saved_parent_eip; >+ unsigned long nmi_saved_v; >+ struct ipipe_trace_point point[IPIPE_TRACE_POINTS]; >+} ____cacheline_aligned_in_smp; >+ >+enum ipipe_trace_type >+{ >+ IPIPE_TRACE_FUNC = 0, >+ IPIPE_TRACE_BEGIN, >+ IPIPE_TRACE_END, >+ IPIPE_TRACE_FREEZE, >+ IPIPE_TRACE_SPECIAL, >+ IPIPE_TRACE_PID, >+ IPIPE_TRACE_EVENT, >+}; >+ >+#define IPIPE_TYPE_MASK 0x0007 >+#define IPIPE_TYPE_BITS 3 >+ >+#ifdef CONFIG_IPIPE_TRACE_VMALLOC >+static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path); >+#else /* !CONFIG_IPIPE_TRACE_VMALLOC */ >+static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) = >+ { [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } }; >+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ >+ >+int ipipe_trace_enable = 0; >+ >+static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE }; >+static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX }; >+static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN }; >+static IPIPE_DEFINE_SPINLOCK(global_path_lock); >+static int pre_trace = IPIPE_DEFAULT_PRE_TRACE; >+static int post_trace = IPIPE_DEFAULT_POST_TRACE; >+static int back_trace = IPIPE_DEFAULT_BACK_TRACE; >+static int verbose_trace = 1; >+static unsigned long trace_overhead; >+ >+static unsigned long trigger_begin; >+static unsigned long trigger_end; >+ >+static DEFINE_MUTEX(out_mutex); >+static struct ipipe_trace_path *print_path; >+#ifdef CONFIG_IPIPE_TRACE_PANIC >+static struct ipipe_trace_path *panic_path; >+#endif /* CONFIG_IPIPE_TRACE_PANIC */ >+static int print_pre_trace; >+static int print_post_trace; >+ >+ >+static long __ipipe_signed_tsc2us(long long tsc); >+static void >+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point); >+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip); >+ >+ >+static notrace void >+__ipipe_store_domain_states(struct ipipe_trace_point *point) >+{ >+ struct ipipe_domain *ipd; >+ struct list_head *pos; >+ int i = 0; >+ >+ list_for_each_prev(pos, &__ipipe_pipeline) { >+ ipd = list_entry(pos, struct ipipe_domain, p_link); >+ >+ if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status))) >+ point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT); >+ >+ if (ipd == __ipipe_current_domain) >+ point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT; >+ >+ if (++i > IPIPE_TFLG_DOMSTATE_BITS) >+ break; >+ } >+} >+ >+static notrace int __ipipe_get_free_trace_path(int old, int cpu) >+{ >+ int new_active = old; >+ struct ipipe_trace_path *tp; >+ >+ do { >+ if (++new_active == IPIPE_TRACE_PATHS) >+ new_active = 0; >+ tp = &per_cpu(trace_path, cpu)[new_active]; >+ } while (new_active == per_cpu(max_path, cpu) || >+ new_active == per_cpu(frozen_path, cpu) || >+ tp->dump_lock); >+ >+ return new_active; >+} >+ >+static notrace void >+__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp, >+ struct ipipe_trace_path *old_tp, int old_pos) >+{ >+ int i; >+ >+ new_tp->trace_pos = pre_trace+1; >+ >+ for (i = new_tp->trace_pos; i > 0; i--) >+ memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)], >+ &old_tp->point[WRAP_POINT_NO(old_pos-i)], >+ sizeof(struct ipipe_trace_point)); >+ >+ /* mark the end (i.e. the point before point[0]) invalid */ >+ new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0; >+} >+ >+static notrace struct ipipe_trace_path * >+__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos) >+{ >+ struct ipipe_trace_path *old_tp = tp; >+ long active = per_cpu(active_path, cpu); >+ unsigned long long length; >+ >+ /* do we have a new worst case? */ >+ length = tp->point[tp->end].timestamp - >+ tp->point[tp->begin].timestamp; >+ if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) { >+ /* we need protection here against other cpus trying >+ to start a proc dump */ >+ spin_lock(&global_path_lock); >+ >+ /* active path holds new worst case */ >+ tp->length = length; >+ per_cpu(max_path, cpu) = active; >+ >+ /* find next unused trace path */ >+ active = __ipipe_get_free_trace_path(active, cpu); >+ >+ spin_unlock(&global_path_lock); >+ >+ tp = &per_cpu(trace_path, cpu)[active]; >+ >+ /* migrate last entries for pre-tracing */ >+ __ipipe_migrate_pre_trace(tp, old_tp, pos); >+ } >+ >+ return tp; >+} >+ >+static notrace struct ipipe_trace_path * >+__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos) >+{ >+ struct ipipe_trace_path *old_tp = tp; >+ long active = per_cpu(active_path, cpu); >+ int n; >+ >+ /* frozen paths have no core (begin=end) */ >+ tp->begin = tp->end; >+ >+ /* we need protection here against other cpus trying >+ * to set their frozen path or to start a proc dump */ >+ spin_lock(&global_path_lock); >+ >+ per_cpu(frozen_path, cpu) = active; >+ >+ /* find next unused trace path */ >+ active = __ipipe_get_free_trace_path(active, cpu); >+ >+ /* check if this is the first frozen path */ >+ for_each_possible_cpu(n) { >+ if (n != cpu && >+ per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0) >+ tp->end = -1; >+ } >+ >+ spin_unlock(&global_path_lock); >+ >+ tp = &per_cpu(trace_path, cpu)[active]; >+ >+ /* migrate last entries for pre-tracing */ >+ __ipipe_migrate_pre_trace(tp, old_tp, pos); >+ >+ return tp; >+} >+ >+void notrace >+__ipipe_trace(enum ipipe_trace_type type, unsigned long eip, >+ unsigned long parent_eip, unsigned long v) >+{ >+ struct ipipe_trace_path *tp, *old_tp; >+ int pos, next_pos, begin; >+ struct ipipe_trace_point *point; >+ unsigned long flags; >+ int cpu; >+ >+ local_irq_save_hw_notrace(flags); >+ >+ cpu = ipipe_processor_id(); >+ restart: >+ tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; >+ >+ /* here starts a race window with NMIs - catched below */ >+ >+ /* check for NMI recursion */ >+ if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) { >+ tp->flags |= IPIPE_TFLG_NMI_HIT; >+ >+ /* first freeze request from NMI context? */ >+ if ((type == IPIPE_TRACE_FREEZE) && >+ !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) { >+ /* save arguments and mark deferred freezing */ >+ tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ; >+ tp->nmi_saved_eip = eip; >+ tp->nmi_saved_parent_eip = parent_eip; >+ tp->nmi_saved_v = v; >+ } >+ return; /* no need for restoring flags inside IRQ */ >+ } >+ >+ /* clear NMI events and set lock (atomically per cpu) */ >+ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | >+ IPIPE_TFLG_NMI_FREEZE_REQ)) >+ | IPIPE_TFLG_NMI_LOCK; >+ >+ /* check active_path again - some nasty NMI may have switched >+ * it meanwhile */ >+ if (unlikely(tp != >+ &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) { >+ /* release lock on wrong path and restart */ >+ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; >+ >+ /* there is no chance that the NMI got deferred >+ * => no need to check for pending freeze requests */ >+ goto restart; >+ } >+ >+ /* get the point buffer */ >+ pos = tp->trace_pos; >+ point = &tp->point[pos]; >+ >+ /* store all trace point data */ >+ point->type = type; >+ point->flags = raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0; >+ point->eip = eip; >+ point->parent_eip = parent_eip; >+ point->v = v; >+ ipipe_read_tsc(point->timestamp); >+ >+ __ipipe_store_domain_states(point); >+ >+ /* forward to next point buffer */ >+ next_pos = WRAP_POINT_NO(pos+1); >+ tp->trace_pos = next_pos; >+ >+ /* only mark beginning if we haven't started yet */ >+ begin = tp->begin; >+ if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0)) >+ tp->begin = pos; >+ >+ /* end of critical path, start post-trace if not already started */ >+ if (unlikely(type == IPIPE_TRACE_END) && >+ (begin >= 0) && !tp->post_trace) >+ tp->post_trace = post_trace + 1; >+ >+ /* freeze only if the slot is free and we are not already freezing */ >+ if ((unlikely(type == IPIPE_TRACE_FREEZE) || >+ (unlikely(eip >= trigger_begin && eip <= trigger_end) && >+ type == IPIPE_TRACE_FUNC)) && >+ per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 && >+ !(tp->flags & IPIPE_TFLG_FREEZING)) { >+ tp->post_trace = post_trace + 1; >+ tp->flags |= IPIPE_TFLG_FREEZING; >+ } >+ >+ /* enforce end of trace in case of overflow */ >+ if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) { >+ tp->end = pos; >+ goto enforce_end; >+ } >+ >+ /* stop tracing this path if we are in post-trace and >+ * a) that phase is over now or >+ * b) a new TRACE_BEGIN came in but we are not freezing this path */ >+ if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) || >+ ((type == IPIPE_TRACE_BEGIN) && >+ !(tp->flags & IPIPE_TFLG_FREEZING))))) { >+ /* store the path's end (i.e. excluding post-trace) */ >+ tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace); >+ >+ enforce_end: >+ if (tp->flags & IPIPE_TFLG_FREEZING) >+ tp = __ipipe_trace_freeze(cpu, tp, pos); >+ else >+ tp = __ipipe_trace_end(cpu, tp, pos); >+ >+ /* reset the active path, maybe already start a new one */ >+ tp->begin = (type == IPIPE_TRACE_BEGIN) ? >+ WRAP_POINT_NO(tp->trace_pos - 1) : -1; >+ tp->end = -1; >+ tp->post_trace = 0; >+ tp->flags = 0; >+ >+ /* update active_path not earlier to avoid races with NMIs */ >+ per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu); >+ } >+ >+ /* we still have old_tp and point, >+ * let's reset NMI lock and check for catches */ >+ old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK; >+ if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) { >+ /* well, this late tagging may not immediately be visible for >+ * other cpus already dumping this path - a minor issue */ >+ point->flags |= IPIPE_TFLG_NMI_HIT; >+ >+ /* handle deferred freezing from NMI context */ >+ if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) >+ __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip, >+ old_tp->nmi_saved_parent_eip, >+ old_tp->nmi_saved_v); >+ } >+ >+ local_irq_restore_hw_notrace(flags); >+} >+ >+static unsigned long __ipipe_global_path_lock(void) >+{ >+ unsigned long flags; >+ int cpu; >+ struct ipipe_trace_path *tp; >+ >+ spin_lock_irqsave(&global_path_lock, flags); >+ >+ cpu = ipipe_processor_id(); >+ restart: >+ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; >+ >+ /* here is small race window with NMIs - catched below */ >+ >+ /* clear NMI events and set lock (atomically per cpu) */ >+ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | >+ IPIPE_TFLG_NMI_FREEZE_REQ)) >+ | IPIPE_TFLG_NMI_LOCK; >+ >+ /* check active_path again - some nasty NMI may have switched >+ * it meanwhile */ >+ if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) { >+ /* release lock on wrong path and restart */ >+ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; >+ >+ /* there is no chance that the NMI got deferred >+ * => no need to check for pending freeze requests */ >+ goto restart; >+ } >+ >+ return flags; >+} >+ >+static void __ipipe_global_path_unlock(unsigned long flags) >+{ >+ int cpu; >+ struct ipipe_trace_path *tp; >+ >+ /* release spinlock first - it's not involved in the NMI issue */ >+ __ipipe_spin_unlock_irqbegin(&global_path_lock); >+ >+ cpu = ipipe_processor_id(); >+ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; >+ >+ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; >+ >+ /* handle deferred freezing from NMI context */ >+ if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) >+ __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip, >+ tp->nmi_saved_parent_eip, tp->nmi_saved_v); >+ >+ /* See __ipipe_spin_lock_irqsave() and friends. */ >+ __ipipe_spin_unlock_irqcomplete(flags); >+} >+ >+void notrace ipipe_trace_begin(unsigned long v) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, v); >+} >+EXPORT_SYMBOL(ipipe_trace_begin); >+ >+void notrace ipipe_trace_end(unsigned long v) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, v); >+} >+EXPORT_SYMBOL(ipipe_trace_end); >+ >+void notrace ipipe_trace_freeze(unsigned long v) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, v); >+} >+EXPORT_SYMBOL(ipipe_trace_freeze); >+ >+void notrace ipipe_trace_special(unsigned char id, unsigned long v) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS), >+ __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, v); >+} >+EXPORT_SYMBOL(ipipe_trace_special); >+ >+void notrace ipipe_trace_pid(pid_t pid, short prio) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS), >+ __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, pid); >+} >+EXPORT_SYMBOL(ipipe_trace_pid); >+ >+void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS), >+ __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, delay_tsc); >+} >+EXPORT_SYMBOL(ipipe_trace_event); >+ >+int ipipe_trace_max_reset(void) >+{ >+ int cpu; >+ unsigned long flags; >+ struct ipipe_trace_path *path; >+ int ret = 0; >+ >+ flags = __ipipe_global_path_lock(); >+ >+ for_each_possible_cpu(cpu) { >+ path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; >+ >+ if (path->dump_lock) { >+ ret = -EBUSY; >+ break; >+ } >+ >+ path->begin = -1; >+ path->end = -1; >+ path->trace_pos = 0; >+ path->length = 0; >+ } >+ >+ __ipipe_global_path_unlock(flags); >+ >+ return ret; >+} >+EXPORT_SYMBOL(ipipe_trace_max_reset); >+ >+int ipipe_trace_frozen_reset(void) >+{ >+ int cpu; >+ unsigned long flags; >+ struct ipipe_trace_path *path; >+ int ret = 0; >+ >+ flags = __ipipe_global_path_lock(); >+ >+ for_each_online_cpu(cpu) { >+ path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; >+ >+ if (path->dump_lock) { >+ ret = -EBUSY; >+ break; >+ } >+ >+ path->begin = -1; >+ path->end = -1; >+ path->trace_pos = 0; >+ path->length = 0; >+ } >+ >+ __ipipe_global_path_unlock(flags); >+ >+ return ret; >+} >+EXPORT_SYMBOL(ipipe_trace_frozen_reset); >+ >+static void >+__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point, >+ int trylock) >+{ >+ struct task_struct *task = NULL; >+ char buf[8]; >+ int i; >+ int locked = 1; >+ >+ if (trylock) { >+ if (!read_trylock(&tasklist_lock)) >+ locked = 0; >+ } else >+ read_lock(&tasklist_lock); >+ >+ if (locked) >+ task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns); >+ >+ if (task) >+ strncpy(task_info, task->comm, 11); >+ else >+ strcpy(task_info, "-<?>-"); >+ >+ if (locked) >+ read_unlock(&tasklist_lock); >+ >+ for (i = strlen(task_info); i < 11; i++) >+ task_info[i] = ' '; >+ >+ sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS); >+ strcpy(task_info + (11 - strlen(buf)), buf); >+} >+ >+static void >+__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path, >+ struct ipipe_trace_point *point) >+{ >+ long time; >+ int type; >+ >+ time = __ipipe_signed_tsc2us(point->timestamp - >+ path->point[path->begin].timestamp + point->v); >+ type = point->type >> IPIPE_TYPE_BITS; >+ >+ if (type == 0) >+ /* >+ * Event type #0 is predefined, stands for the next >+ * timer tick. >+ */ >+ sprintf(buf, "tick@%-6ld", time); >+ else >+ sprintf(buf, "%3d@%-7ld", type, time); >+} >+ >+#ifdef CONFIG_IPIPE_TRACE_PANIC >+void ipipe_trace_panic_freeze(void) >+{ >+ unsigned long flags; >+ int cpu; >+ >+ if (!ipipe_trace_enable) >+ return; >+ >+ ipipe_trace_enable = 0; >+ local_irq_save_hw_notrace(flags); >+ >+ cpu = ipipe_processor_id(); >+ >+ panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; >+ >+ local_irq_restore_hw(flags); >+} >+EXPORT_SYMBOL(ipipe_trace_panic_freeze); >+ >+void ipipe_trace_panic_dump(void) >+{ >+ int cnt = back_trace; >+ int start, pos; >+ char buf[16]; >+ >+ if (!panic_path) >+ return; >+ >+ ipipe_context_check_off(); >+ >+ printk("I-pipe tracer log (%d points):\n", cnt); >+ >+ start = pos = WRAP_POINT_NO(panic_path->trace_pos-1); >+ >+ while (cnt-- > 0) { >+ struct ipipe_trace_point *point = &panic_path->point[pos]; >+ long time; >+ char info[16]; >+ int i; >+ >+ printk(" %c", >+ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); >+ >+ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) >+ printk("%c", >+ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? >+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? >+ '#' : '+') : >+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? >+ '*' : ' ')); >+ >+ if (!point->eip) >+ printk("-<invalid>-\n"); >+ else { >+ __ipipe_trace_point_type(buf, point); >+ printk("%s", buf); >+ >+ switch (point->type & IPIPE_TYPE_MASK) { >+ case IPIPE_TRACE_FUNC: >+ printk(" "); >+ break; >+ >+ case IPIPE_TRACE_PID: >+ __ipipe_get_task_info(info, >+ point, 1); >+ printk("%s", info); >+ break; >+ >+ case IPIPE_TRACE_EVENT: >+ __ipipe_get_event_date(info, >+ panic_path, point); >+ printk("%s", info); >+ break; >+ >+ default: >+ printk("0x%08lx ", point->v); >+ } >+ >+ time = __ipipe_signed_tsc2us(point->timestamp - >+ panic_path->point[start].timestamp); >+ printk(" %5ld ", time); >+ >+ __ipipe_print_symname(NULL, point->eip); >+ printk(" ("); >+ __ipipe_print_symname(NULL, point->parent_eip); >+ printk(")\n"); >+ } >+ pos = WRAP_POINT_NO(pos - 1); >+ } >+ >+ panic_path = NULL; >+} >+EXPORT_SYMBOL(ipipe_trace_panic_dump); >+#endif /* CONFIG_IPIPE_TRACE_PANIC */ >+ >+ >+/* --- /proc output --- */ >+ >+static notrace int __ipipe_in_critical_trpath(long point_no) >+{ >+ return ((WRAP_POINT_NO(point_no-print_path->begin) < >+ WRAP_POINT_NO(print_path->end-print_path->begin)) || >+ ((print_path->end == print_path->begin) && >+ (WRAP_POINT_NO(point_no-print_path->end) > >+ print_post_trace))); >+} >+ >+static long __ipipe_signed_tsc2us(long long tsc) >+{ >+ unsigned long long abs_tsc; >+ long us; >+ >+ /* ipipe_tsc2us works on unsigned => handle sign separately */ >+ abs_tsc = (tsc >= 0) ? tsc : -tsc; >+ us = ipipe_tsc2us(abs_tsc); >+ if (tsc < 0) >+ return -us; >+ else >+ return us; >+} >+ >+static void >+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point) >+{ >+ switch (point->type & IPIPE_TYPE_MASK) { >+ case IPIPE_TRACE_FUNC: >+ strcpy(buf, "func "); >+ break; >+ >+ case IPIPE_TRACE_BEGIN: >+ strcpy(buf, "begin "); >+ break; >+ >+ case IPIPE_TRACE_END: >+ strcpy(buf, "end "); >+ break; >+ >+ case IPIPE_TRACE_FREEZE: >+ strcpy(buf, "freeze "); >+ break; >+ >+ case IPIPE_TRACE_SPECIAL: >+ sprintf(buf, "(0x%02x) ", >+ point->type >> IPIPE_TYPE_BITS); >+ break; >+ >+ case IPIPE_TRACE_PID: >+ sprintf(buf, "[%5d] ", (pid_t)point->v); >+ break; >+ >+ case IPIPE_TRACE_EVENT: >+ sprintf(buf, "event "); >+ break; >+ } >+} >+ >+static void >+__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point) >+{ >+ char mark = ' '; >+ int point_no = point - print_path->point; >+ int i; >+ >+ if (print_path->end == point_no) >+ mark = '<'; >+ else if (print_path->begin == point_no) >+ mark = '>'; >+ else if (__ipipe_in_critical_trpath(point_no)) >+ mark = ':'; >+ seq_printf(m, "%c%c", mark, >+ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); >+ >+ if (!verbose_trace) >+ return; >+ >+ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) >+ seq_printf(m, "%c", >+ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? >+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? >+ '#' : '+') : >+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' ')); >+} >+ >+static void >+__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point) >+{ >+ unsigned long delay = 0; >+ int next; >+ char *mark = " "; >+ >+ next = WRAP_POINT_NO(point+1 - print_path->point); >+ >+ if (next != print_path->trace_pos) >+ delay = ipipe_tsc2ns(print_path->point[next].timestamp - >+ point->timestamp); >+ >+ if (__ipipe_in_critical_trpath(point - print_path->point)) { >+ if (delay > IPIPE_DELAY_WARN) >+ mark = "! "; >+ else if (delay > IPIPE_DELAY_NOTE) >+ mark = "+ "; >+ } >+ seq_puts(m, mark); >+ >+ if (verbose_trace) >+ seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000, >+ (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' '); >+ else >+ seq_puts(m, " "); >+} >+ >+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip) >+{ >+ char namebuf[KSYM_NAME_LEN+1]; >+ unsigned long size, offset; >+ const char *sym_name; >+ char *modname; >+ >+ sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); >+ >+#ifdef CONFIG_IPIPE_TRACE_PANIC >+ if (!m) { >+ /* panic dump */ >+ if (sym_name) { >+ printk("%s+0x%lx", sym_name, offset); >+ if (modname) >+ printk(" [%s]", modname); >+ } >+ } else >+#endif /* CONFIG_IPIPE_TRACE_PANIC */ >+ { >+ if (sym_name) { >+ if (verbose_trace) { >+ seq_printf(m, "%s+0x%lx", sym_name, offset); >+ if (modname) >+ seq_printf(m, " [%s]", modname); >+ } else >+ seq_puts(m, sym_name); >+ } else >+ seq_printf(m, "<%08lx>", eip); >+ } >+} >+ >+static void __ipipe_print_headline(struct seq_file *m) >+{ >+ seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu " >+ "us\n\n", trace_overhead/1000, trace_overhead%1000); >+ >+ if (verbose_trace) { >+ const char *name[4] = { [0 ... 3] = "<unused>" }; >+ struct list_head *pos; >+ int i = 0; >+ >+ list_for_each_prev(pos, &__ipipe_pipeline) { >+ struct ipipe_domain *ipd = >+ list_entry(pos, struct ipipe_domain, p_link); >+ >+ name[i] = ipd->name; >+ if (++i > 3) >+ break; >+ } >+ >+ seq_printf(m, >+ " +----- Hard IRQs ('|': locked)\n" >+ " |+---- %s\n" >+ " ||+--- %s\n" >+ " |||+-- %s\n" >+ " ||||+- %s%s\n" >+ " ||||| +---------- " >+ "Delay flag ('+': > %d us, '!': > %d us)\n" >+ " ||||| | +- " >+ "NMI noise ('N')\n" >+ " ||||| | |\n" >+ " Type User Val. Time Delay Function " >+ "(Parent)\n", >+ name[3], name[2], name[1], name[0], >+ name[0] ? " ('*': domain stalled, '+': current, " >+ "'#': current+stalled)" : "", >+ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); >+ } else >+ seq_printf(m, >+ " +--------------- Hard IRQs ('|': locked)\n" >+ " | +- Delay flag " >+ "('+': > %d us, '!': > %d us)\n" >+ " | |\n" >+ " Type Time Function (Parent)\n", >+ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); >+} >+ >+static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos) >+{ >+ loff_t n = *pos; >+ >+ mutex_lock(&out_mutex); >+ >+ if (!n) { >+ struct ipipe_trace_path *tp; >+ unsigned long length_usecs; >+ int points, cpu; >+ unsigned long flags; >+ >+ /* protect against max_path/frozen_path updates while we >+ * haven't locked our target path, also avoid recursively >+ * taking global_path_lock from NMI context */ >+ flags = __ipipe_global_path_lock(); >+ >+ /* find the longest of all per-cpu paths */ >+ print_path = NULL; >+ for_each_online_cpu(cpu) { >+ tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; >+ if ((print_path == NULL) || >+ (tp->length > print_path->length)) { >+ print_path = tp; >+ break; >+ } >+ } >+ print_path->dump_lock = 1; >+ >+ __ipipe_global_path_unlock(flags); >+ >+ /* does this path actually contain data? */ >+ if (print_path->end == print_path->begin) >+ return NULL; >+ >+ /* number of points inside the critical path */ >+ points = WRAP_POINT_NO(print_path->end-print_path->begin+1); >+ >+ /* pre- and post-tracing length, post-trace length was frozen >+ in __ipipe_trace, pre-trace may have to be reduced due to >+ buffer overrun */ >+ print_pre_trace = pre_trace; >+ print_post_trace = WRAP_POINT_NO(print_path->trace_pos - >+ print_path->end - 1); >+ if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) >+ print_pre_trace = IPIPE_TRACE_POINTS - 1 - points - >+ print_post_trace; >+ >+ length_usecs = ipipe_tsc2us(print_path->length); >+ seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n" >+ "------------------------------------------------------------\n", >+ UTS_RELEASE, IPIPE_ARCH_STRING); >+ seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: " >+ "%d (-%d/+%d), Length: %lu us\n", >+ cpu, print_path->point[print_path->begin].timestamp, >+ points, print_pre_trace, print_post_trace, length_usecs); >+ __ipipe_print_headline(m); >+ } >+ >+ /* check if we are inside the trace range */ >+ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + >+ print_pre_trace + print_post_trace)) >+ return NULL; >+ >+ /* return the next point to be shown */ >+ return &print_path->point[WRAP_POINT_NO(print_path->begin - >+ print_pre_trace + n)]; >+} >+ >+static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos) >+{ >+ loff_t n = ++*pos; >+ >+ /* check if we are inside the trace range with the next entry */ >+ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + >+ print_pre_trace + print_post_trace)) >+ return NULL; >+ >+ /* return the next point to be shown */ >+ return &print_path->point[WRAP_POINT_NO(print_path->begin - >+ print_pre_trace + *pos)]; >+} >+ >+static void __ipipe_prtrace_stop(struct seq_file *m, void *p) >+{ >+ if (print_path) >+ print_path->dump_lock = 0; >+ mutex_unlock(&out_mutex); >+} >+ >+static int __ipipe_prtrace_show(struct seq_file *m, void *p) >+{ >+ long time; >+ struct ipipe_trace_point *point = p; >+ char buf[16]; >+ >+ if (!point->eip) { >+ seq_puts(m, "-<invalid>-\n"); >+ return 0; >+ } >+ >+ __ipipe_print_pathmark(m, point); >+ __ipipe_trace_point_type(buf, point); >+ seq_puts(m, buf); >+ if (verbose_trace) >+ switch (point->type & IPIPE_TYPE_MASK) { >+ case IPIPE_TRACE_FUNC: >+ seq_puts(m, " "); >+ break; >+ >+ case IPIPE_TRACE_PID: >+ __ipipe_get_task_info(buf, point, 0); >+ seq_puts(m, buf); >+ break; >+ >+ case IPIPE_TRACE_EVENT: >+ __ipipe_get_event_date(buf, print_path, point); >+ seq_puts(m, buf); >+ break; >+ >+ default: >+ seq_printf(m, "0x%08lx ", point->v); >+ } >+ >+ time = __ipipe_signed_tsc2us(point->timestamp - >+ print_path->point[print_path->begin].timestamp); >+ seq_printf(m, "%5ld", time); >+ >+ __ipipe_print_delay(m, point); >+ __ipipe_print_symname(m, point->eip); >+ seq_puts(m, " ("); >+ __ipipe_print_symname(m, point->parent_eip); >+ seq_puts(m, ")\n"); >+ >+ return 0; >+} >+ >+static struct seq_operations __ipipe_max_ptrace_ops = { >+ .start = __ipipe_max_prtrace_start, >+ .next = __ipipe_prtrace_next, >+ .stop = __ipipe_prtrace_stop, >+ .show = __ipipe_prtrace_show >+}; >+ >+static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file) >+{ >+ return seq_open(file, &__ipipe_max_ptrace_ops); >+} >+ >+static ssize_t >+__ipipe_max_reset(struct file *file, const char __user *pbuffer, >+ size_t count, loff_t *data) >+{ >+ mutex_lock(&out_mutex); >+ ipipe_trace_max_reset(); >+ mutex_unlock(&out_mutex); >+ >+ return count; >+} >+ >+struct file_operations __ipipe_max_prtrace_fops = { >+ .open = __ipipe_max_prtrace_open, >+ .read = seq_read, >+ .write = __ipipe_max_reset, >+ .llseek = seq_lseek, >+ .release = seq_release, >+}; >+ >+static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos) >+{ >+ loff_t n = *pos; >+ >+ mutex_lock(&out_mutex); >+ >+ if (!n) { >+ struct ipipe_trace_path *tp; >+ int cpu; >+ unsigned long flags; >+ >+ /* protect against max_path/frozen_path updates while we >+ * haven't locked our target path, also avoid recursively >+ * taking global_path_lock from NMI context */ >+ flags = __ipipe_global_path_lock(); >+ >+ /* find the first of all per-cpu frozen paths */ >+ print_path = NULL; >+ for_each_online_cpu(cpu) { >+ tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; >+ if (tp->end >= 0) { >+ print_path = tp; >+ break; >+ } >+ } >+ if (print_path) >+ print_path->dump_lock = 1; >+ >+ __ipipe_global_path_unlock(flags); >+ >+ if (!print_path) >+ return NULL; >+ >+ /* back- and post-tracing length, post-trace length was frozen >+ in __ipipe_trace, back-trace may have to be reduced due to >+ buffer overrun */ >+ print_pre_trace = back_trace-1; /* substract freeze point */ >+ print_post_trace = WRAP_POINT_NO(print_path->trace_pos - >+ print_path->end - 1); >+ if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) >+ print_pre_trace = IPIPE_TRACE_POINTS - 2 - >+ print_post_trace; >+ >+ seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n" >+ "------------------------------------------------------" >+ "------\n", >+ UTS_RELEASE, IPIPE_ARCH_STRING); >+ seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n", >+ cpu, print_path->point[print_path->begin].timestamp, >+ print_pre_trace+1, print_post_trace); >+ __ipipe_print_headline(m); >+ } >+ >+ /* check if we are inside the trace range */ >+ if (n >= print_pre_trace + 1 + print_post_trace) >+ return NULL; >+ >+ /* return the next point to be shown */ >+ return &print_path->point[WRAP_POINT_NO(print_path->begin- >+ print_pre_trace+n)]; >+} >+ >+static struct seq_operations __ipipe_frozen_ptrace_ops = { >+ .start = __ipipe_frozen_prtrace_start, >+ .next = __ipipe_prtrace_next, >+ .stop = __ipipe_prtrace_stop, >+ .show = __ipipe_prtrace_show >+}; >+ >+static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file) >+{ >+ return seq_open(file, &__ipipe_frozen_ptrace_ops); >+} >+ >+static ssize_t >+__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer, >+ size_t count, loff_t *data) >+{ >+ char *end, buf[16]; >+ int val; >+ int n; >+ >+ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; >+ >+ if (copy_from_user(buf, pbuffer, n)) >+ return -EFAULT; >+ >+ buf[n] = '\0'; >+ val = simple_strtol(buf, &end, 0); >+ >+ if (((*end != '\0') && !isspace(*end)) || (val < 0)) >+ return -EINVAL; >+ >+ mutex_lock(&out_mutex); >+ ipipe_trace_frozen_reset(); >+ if (val > 0) >+ ipipe_trace_freeze(-1); >+ mutex_unlock(&out_mutex); >+ >+ return count; >+} >+ >+struct file_operations __ipipe_frozen_prtrace_fops = { >+ .open = __ipipe_frozen_prtrace_open, >+ .read = seq_read, >+ .write = __ipipe_frozen_ctrl, >+ .llseek = seq_lseek, >+ .release = seq_release, >+}; >+ >+static int __ipipe_rd_proc_val(char *page, char **start, off_t off, >+ int count, int *eof, void *data) >+{ >+ int len; >+ >+ len = sprintf(page, "%u\n", *(int *)data); >+ len -= off; >+ if (len <= off + count) >+ *eof = 1; >+ *start = page + off; >+ if (len > count) >+ len = count; >+ if (len < 0) >+ len = 0; >+ >+ return len; >+} >+ >+static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer, >+ unsigned long count, void *data) >+{ >+ char *end, buf[16]; >+ int val; >+ int n; >+ >+ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; >+ >+ if (copy_from_user(buf, buffer, n)) >+ return -EFAULT; >+ >+ buf[n] = '\0'; >+ val = simple_strtol(buf, &end, 0); >+ >+ if (((*end != '\0') && !isspace(*end)) || (val < 0)) >+ return -EINVAL; >+ >+ mutex_lock(&out_mutex); >+ *(int *)data = val; >+ mutex_unlock(&out_mutex); >+ >+ return count; >+} >+ >+static int __ipipe_rd_trigger(char *page, char **start, off_t off, int count, >+ int *eof, void *data) >+{ >+ int len; >+ >+ if (!trigger_begin) >+ return 0; >+ >+ len = sprint_symbol(page, trigger_begin); >+ page[len++] = '\n'; >+ >+ len -= off; >+ if (len <= off + count) >+ *eof = 1; >+ *start = page + off; >+ if (len > count) >+ len = count; >+ if (len < 0) >+ len = 0; >+ >+ return len; >+} >+ >+static int __ipipe_wr_trigger(struct file *file, const char __user *buffer, >+ unsigned long count, void *data) >+{ >+ char buf[KSYM_SYMBOL_LEN]; >+ unsigned long begin, end; >+ >+ if (count > sizeof(buf) - 1) >+ count = sizeof(buf) - 1; >+ if (copy_from_user(buf, buffer, count)) >+ return -EFAULT; >+ buf[count] = 0; >+ if (buf[count-1] == '\n') >+ buf[count-1] = 0; >+ >+ begin = kallsyms_lookup_name(buf); >+ if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL)) >+ return -ENOENT; >+ end += begin - 1; >+ >+ mutex_lock(&out_mutex); >+ /* invalidate the current range before setting a new one */ >+ trigger_end = 0; >+ wmb(); >+ ipipe_trace_frozen_reset(); >+ >+ /* set new range */ >+ trigger_begin = begin; >+ wmb(); >+ trigger_end = end; >+ mutex_unlock(&out_mutex); >+ >+ return count; >+} >+ >+#ifdef CONFIG_IPIPE_TRACE_MCOUNT >+static void notrace >+ipipe_trace_function(unsigned long ip, unsigned long parent_ip) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0); >+} >+ >+static struct ftrace_ops ipipe_trace_ops = { >+ .func = ipipe_trace_function >+}; >+ >+static int __ipipe_wr_enable(struct file *file, const char __user *buffer, >+ unsigned long count, void *data) >+{ >+ char *end, buf[16]; >+ int val; >+ int n; >+ >+ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; >+ >+ if (copy_from_user(buf, buffer, n)) >+ return -EFAULT; >+ >+ buf[n] = '\0'; >+ val = simple_strtol(buf, &end, 0); >+ >+ if (((*end != '\0') && !isspace(*end)) || (val < 0)) >+ return -EINVAL; >+ >+ mutex_lock(&out_mutex); >+ >+ if (ipipe_trace_enable) { >+ if (!val) >+ unregister_ftrace_function(&ipipe_trace_ops); >+ } else if (val) >+ register_ftrace_function(&ipipe_trace_ops); >+ >+ ipipe_trace_enable = val; >+ >+ mutex_unlock(&out_mutex); >+ >+ return count; >+} >+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ >+ >+extern struct proc_dir_entry *ipipe_proc_root; >+ >+static struct proc_dir_entry * __init >+__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir, >+ const char *name, int *value_ptr) >+{ >+ struct proc_dir_entry *entry; >+ >+ entry = create_proc_entry(name, 0644, trace_dir); >+ if (entry) { >+ entry->data = value_ptr; >+ entry->read_proc = __ipipe_rd_proc_val; >+ entry->write_proc = __ipipe_wr_proc_val; >+ } >+ return entry; >+} >+ >+void __init __ipipe_init_tracer(void) >+{ >+ struct proc_dir_entry *trace_dir; >+ struct proc_dir_entry *entry; >+ unsigned long long start, end, min = ULLONG_MAX; >+ int i; >+#ifdef CONFIG_IPIPE_TRACE_VMALLOC >+ int cpu, path; >+ >+ for_each_possible_cpu(cpu) { >+ struct ipipe_trace_path *tp_buf; >+ >+ tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) * >+ IPIPE_TRACE_PATHS, cpu_to_node(cpu)); >+ if (!tp_buf) { >+ printk(KERN_ERR "I-pipe: " >+ "insufficient memory for trace buffer.\n"); >+ return; >+ } >+ memset(tp_buf, 0, >+ sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); >+ for (path = 0; path < IPIPE_TRACE_PATHS; path++) { >+ tp_buf[path].begin = -1; >+ tp_buf[path].end = -1; >+ } >+ per_cpu(trace_path, cpu) = tp_buf; >+ } >+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ >+ >+ /* Calculate minimum overhead of __ipipe_trace() */ >+ local_irq_disable_hw(); >+ for (i = 0; i < 100; i++) { >+ ipipe_read_tsc(start); >+ __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, 0); >+ ipipe_read_tsc(end); >+ >+ end -= start; >+ if (end < min) >+ min = end; >+ } >+ local_irq_enable_hw(); >+ trace_overhead = ipipe_tsc2ns(min); >+ >+#ifdef CONFIG_IPIPE_TRACE_ENABLE >+ ipipe_trace_enable = 1; >+#ifdef CONFIG_IPIPE_TRACE_MCOUNT >+ register_ftrace_function(&ipipe_trace_ops); >+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ >+#endif /* CONFIG_IPIPE_TRACE_ENABLE */ >+ >+ trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root); >+ >+ entry = create_proc_entry("max", 0644, trace_dir); >+ if (entry) >+ entry->proc_fops = &__ipipe_max_prtrace_fops; >+ >+ entry = create_proc_entry("frozen", 0644, trace_dir); >+ if (entry) >+ entry->proc_fops = &__ipipe_frozen_prtrace_fops; >+ >+ entry = create_proc_entry("trigger", 0644, trace_dir); >+ if (entry) { >+ entry->read_proc = __ipipe_rd_trigger; >+ entry->write_proc = __ipipe_wr_trigger; >+ } >+ >+ __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points", >+ &pre_trace); >+ __ipipe_create_trace_proc_val(trace_dir, "post_trace_points", >+ &post_trace); >+ __ipipe_create_trace_proc_val(trace_dir, "back_trace_points", >+ &back_trace); >+ __ipipe_create_trace_proc_val(trace_dir, "verbose", >+ &verbose_trace); >+ entry = __ipipe_create_trace_proc_val(trace_dir, "enable", >+ &ipipe_trace_enable); >+#ifdef CONFIG_IPIPE_TRACE_MCOUNT >+ if (entry) >+ entry->write_proc = __ipipe_wr_enable; >+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ >+} >diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c >index e570d19..7cebb6f 100644 >--- a/kernel/irq/chip.c >+++ b/kernel/irq/chip.c >@@ -15,6 +15,7 @@ > #include <linux/module.h> > #include <linux/interrupt.h> > #include <linux/kernel_stat.h> >+#include <linux/ipipe.h> > > #include "internals.h" > >@@ -459,7 +460,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) > irqreturn_t action_ret; > > spin_lock(&desc->lock); >+#ifndef CONFIG_IPIPE > mask_ack_irq(desc, irq); >+#endif > > if (unlikely(desc->status & IRQ_INPROGRESS)) > goto out_unlock; >@@ -539,8 +542,13 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) > > spin_lock(&desc->lock); > desc->status &= ~IRQ_INPROGRESS; >+#ifdef CONFIG_IPIPE >+ desc->chip->unmask(irq); >+out: >+#else > out: > desc->chip->eoi(irq); >+#endif > > spin_unlock(&desc->lock); > } >@@ -582,8 +590,10 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) > kstat_incr_irqs_this_cpu(irq, desc); > > /* Start handling the irq */ >+#ifndef CONFIG_IPIPE > if (desc->chip->ack) > desc->chip->ack(irq); >+#endif > > /* Mark the IRQ currently in progress.*/ > desc->status |= IRQ_INPROGRESS; >@@ -637,8 +647,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) > > kstat_incr_irqs_this_cpu(irq, desc); > >+#ifndef CONFIG_IPIPE > if (desc->chip->ack) > desc->chip->ack(irq); >+#endif /* CONFIG_IPIPE */ > > action_ret = handle_IRQ_event(irq, desc->action); > if (!noirqdebug) >@@ -648,6 +660,134 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) > desc->chip->eoi(irq); > } > >+#ifdef CONFIG_IPIPE >+ >+void __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+void __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+void __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc) >+{ >+ mask_ack_irq(desc, irq); >+} >+ >+void __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc) >+{ >+ if (desc->chip->unmask) >+ desc->chip->unmask(irq); >+} >+ >+void __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *desc) >+{ >+ desc->chip->eoi(irq); >+} >+ >+void __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *desc) >+{ >+ /* >+ * Non-requestable IRQs should not be masked in EOI handler. >+ */ >+ if (!(desc->status & IRQ_NOREQUEST)) >+ desc->chip->unmask(irq); >+} >+ >+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc) >+{ >+ desc->chip->ack(irq); >+} >+ >+void __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc) >+{ >+ if (desc->chip->ack) >+ desc->chip->ack(irq); >+} >+ >+void __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc) >+{ >+ if (desc->chip->eoi) >+ desc->chip->eoi(irq); >+} >+ >+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+void __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc) >+{ >+ static int done; >+ >+ handle_bad_irq(irq, desc); >+ >+ if (!done) { >+ printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n", >+ __FUNCTION__, irq); >+ done = 1; >+ } >+} >+ >+void __ipipe_noack_irq(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+void __ipipe_noend_irq(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+irq_flow_handler_t >+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) >+{ >+ if (unlikely(handle == NULL)) { >+ desc->ipipe_ack = &__ipipe_ack_bad_irq; >+ desc->ipipe_end = &__ipipe_noend_irq; >+ } else { >+ if (is_chained) { >+ desc->ipipe_ack = handle; >+ desc->ipipe_end = &__ipipe_noend_irq; >+ handle = __ipipe_noack_irq; >+ } else if (handle == &handle_simple_irq) { >+ desc->ipipe_ack = &__ipipe_ack_simple_irq; >+ desc->ipipe_end = &__ipipe_end_simple_irq; >+ } else if (handle == &handle_level_irq) { >+ desc->ipipe_ack = &__ipipe_ack_level_irq; >+ desc->ipipe_end = &__ipipe_end_level_irq; >+ } else if (handle == &handle_edge_irq) { >+ desc->ipipe_ack = &__ipipe_ack_edge_irq; >+ desc->ipipe_end = &__ipipe_end_edge_irq; >+ } else if (handle == &handle_fasteoi_irq) { >+ desc->ipipe_ack = &__ipipe_ack_fasteoi_irq; >+ desc->ipipe_end = &__ipipe_end_fasteoi_irq; >+ } else if (handle == &handle_percpu_irq) { >+ desc->ipipe_ack = &__ipipe_ack_percpu_irq; >+ desc->ipipe_end = &__ipipe_end_percpu_irq; >+ } else if (desc->chip == &no_irq_chip) { >+ desc->ipipe_ack = &__ipipe_noack_irq; >+ desc->ipipe_end = &__ipipe_noend_irq; >+ } else { >+ desc->ipipe_ack = &__ipipe_ack_bad_irq; >+ desc->ipipe_end = &__ipipe_noend_irq; >+ } >+ } >+ >+ /* Suppress intermediate trampoline routine. */ >+ ipipe_root_domain->irqs[desc->irq].acknowledge = desc->ipipe_ack; >+ >+ return handle; >+} >+ >+#else /* !CONFIG_IPIPE */ >+ >+irq_flow_handler_t >+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) >+{ >+ return handle; >+} >+ >+#endif /* !CONFIG_IPIPE */ >+ > void > __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, > const char *name) >@@ -679,6 +819,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, > chip_bus_lock(irq, desc); > spin_lock_irqsave(&desc->lock, flags); > >+ handle = __fixup_irq_handler(desc, handle, is_chained); >+ > /* Uninstall? */ > if (handle == handle_bad_irq) { > if (desc->chip != &no_irq_chip) >diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c >index 17c71bb..406f375 100644 >--- a/kernel/irq/handle.c >+++ b/kernel/irq/handle.c >@@ -462,8 +462,10 @@ unsigned int __do_IRQ(unsigned int irq) > /* > * No locking required for CPU-local interrupts: > */ >+#ifndef CONFIG_IPIPE > if (desc->chip->ack) > desc->chip->ack(irq); >+#endif > if (likely(!(desc->status & IRQ_DISABLED))) { > action_ret = handle_IRQ_event(irq, desc->action); > if (!noirqdebug) >@@ -474,8 +476,10 @@ unsigned int __do_IRQ(unsigned int irq) > } > > spin_lock(&desc->lock); >+#ifndef CONFIG_IPIPE > if (desc->chip->ack) > desc->chip->ack(irq); >+#endif > /* > * REPLAY is when Linux resends an IRQ that was dropped earlier > * WAITING is used by probe to mark irqs that are being tested >diff --git a/kernel/lockdep.c b/kernel/lockdep.c >index 9af5672..fa84d6d 100644 >--- a/kernel/lockdep.c >+++ b/kernel/lockdep.c >@@ -2318,7 +2318,7 @@ void trace_hardirqs_on_caller(unsigned long ip) > /* we'll do an OFF -> ON transition: */ > curr->hardirqs_enabled = 1; > >- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) >+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) > return; > if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) > return; >@@ -2361,7 +2361,7 @@ void trace_hardirqs_off_caller(unsigned long ip) > if (unlikely(!debug_locks || current->lockdep_recursion)) > return; > >- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) >+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) > return; > > if (curr->hardirqs_enabled) { >@@ -2393,7 +2393,7 @@ void trace_softirqs_on(unsigned long ip) > if (unlikely(!debug_locks)) > return; > >- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) >+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) > return; > > if (curr->softirqs_enabled) { >@@ -2427,7 +2427,7 @@ void trace_softirqs_off(unsigned long ip) > if (unlikely(!debug_locks)) > return; > >- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) >+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) > return; > > if (curr->softirqs_enabled) { >diff --git a/kernel/panic.c b/kernel/panic.c >index 96b45d0..63f5b9e 100644 >--- a/kernel/panic.c >+++ b/kernel/panic.c >@@ -22,6 +22,7 @@ > #include <linux/init.h> > #include <linux/nmi.h> > #include <linux/dmi.h> >+#include <linux/ipipe_trace.h> > > int panic_on_oops; > static unsigned long tainted_mask; >@@ -304,6 +305,8 @@ void oops_enter(void) > { > tracing_off(); > /* can't trust the integrity of the kernel anymore: */ >+ ipipe_trace_panic_freeze(); >+ ipipe_disable_context_check(ipipe_processor_id()); > debug_locks_off(); > do_oops_enter_exit(); > } >diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c >index 04a9e90..49bc6cd 100644 >--- a/kernel/power/hibernate.c >+++ b/kernel/power/hibernate.c >@@ -238,6 +238,7 @@ static int create_image(int platform_mode) > goto Enable_cpus; > > local_irq_disable(); >+ local_irq_disable_hw_cond(); > > error = sysdev_suspend(PMSG_FREEZE); > if (error) { >@@ -267,6 +268,7 @@ static int create_image(int platform_mode) > */ > > Enable_irqs: >+ local_irq_enable_hw_cond(); > local_irq_enable(); > > Enable_cpus: >@@ -359,6 +361,7 @@ static int resume_target_kernel(bool platform_mode) > goto Enable_cpus; > > local_irq_disable(); >+ local_irq_disable_hw_cond(); > > error = sysdev_suspend(PMSG_QUIESCE); > if (error) >@@ -390,6 +393,7 @@ static int resume_target_kernel(bool platform_mode) > sysdev_resume(); > > Enable_irqs: >+ local_irq_enable_hw_cond(); > local_irq_enable(); > > Enable_cpus: >@@ -471,6 +475,7 @@ int hibernation_platform_enter(void) > goto Platform_finish; > > local_irq_disable(); >+ local_irq_disable_hw_cond(); > sysdev_suspend(PMSG_HIBERNATE); > hibernation_ops->enter(); > /* We should never get here */ >diff --git a/kernel/printk.c b/kernel/printk.c >index f38b07f..f3f0057 100644 >--- a/kernel/printk.c >+++ b/kernel/printk.c >@@ -564,6 +564,41 @@ static int have_callable_console(void) > return 0; > } > >+#ifdef CONFIG_IPIPE >+ >+static ipipe_spinlock_t __ipipe_printk_lock = IPIPE_SPIN_LOCK_UNLOCKED; >+ >+static int __ipipe_printk_fill; >+ >+static char __ipipe_printk_buf[__LOG_BUF_LEN]; >+ >+void __ipipe_flush_printk (unsigned virq, void *cookie) >+{ >+ char *p = __ipipe_printk_buf; >+ int len, lmax, out = 0; >+ unsigned long flags; >+ >+ goto start; >+ >+ do { >+ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); >+ start: >+ lmax = __ipipe_printk_fill; >+ while (out < lmax) { >+ len = strlen(p) + 1; >+ printk("%s",p); >+ p += len; >+ out += len; >+ } >+ spin_lock_irqsave(&__ipipe_printk_lock, flags); >+ } >+ while (__ipipe_printk_fill != lmax); >+ >+ __ipipe_printk_fill = 0; >+ >+ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); >+} >+ > /** > * printk - print a kernel message > * @fmt: format string >@@ -588,6 +623,65 @@ static int have_callable_console(void) > > asmlinkage int printk(const char *fmt, ...) > { >+ int r, fbytes, oldcount; >+ unsigned long flags; >+ int sprintk = 1; >+ int cs = -1; >+ va_list args; >+ >+ va_start(args, fmt); >+ >+ local_irq_save_hw(flags); >+ >+ if (test_bit(IPIPE_SPRINTK_FLAG, &__ipipe_current_domain->flags) || >+ oops_in_progress) >+ cs = ipipe_disable_context_check(ipipe_processor_id()); >+ else if (__ipipe_current_domain == ipipe_root_domain) { >+ struct ipipe_domain *dom; >+ >+ list_for_each_entry(dom, &__ipipe_pipeline, p_link) { >+ if (dom == ipipe_root_domain) >+ break; >+ if (test_bit(IPIPE_STALL_FLAG, >+ &ipipe_cpudom_var(dom, status))) >+ sprintk = 0; >+ } >+ } else >+ sprintk = 0; >+ >+ local_irq_restore_hw(flags); >+ >+ if (sprintk) { >+ r = vprintk(fmt, args); >+ if (cs != -1) >+ ipipe_restore_context_check(ipipe_processor_id(), cs); >+ goto out; >+ } >+ >+ spin_lock_irqsave(&__ipipe_printk_lock, flags); >+ >+ oldcount = __ipipe_printk_fill; >+ fbytes = __LOG_BUF_LEN - oldcount; >+ >+ if (fbytes > 1) { >+ r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill, >+ fbytes, fmt, args) + 1; /* account for the null byte */ >+ __ipipe_printk_fill += r; >+ } else >+ r = 0; >+ >+ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); >+ >+ if (oldcount == 0) >+ ipipe_trigger_irq(__ipipe_printk_virq); >+out: >+ va_end(args); >+ >+ return r; >+} >+#else /* !CONFIG_IPIPE */ >+asmlinkage int printk(const char *fmt, ...) >+{ > va_list args; > int r; > >@@ -597,6 +691,7 @@ asmlinkage int printk(const char *fmt, ...) > > return r; > } >+#endif /* CONFIG_IPIPE */ > > /* cpu currently holding logbuf_lock */ > static volatile unsigned int printk_cpu = UINT_MAX; >diff --git a/kernel/sched.c b/kernel/sched.c >index ed61192..83937d6 100644 >--- a/kernel/sched.c >+++ b/kernel/sched.c >@@ -2344,6 +2344,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, > int wake_flags) > { > int cpu, orig_cpu, this_cpu, success = 0; >+ unsigned int old_state; > unsigned long flags; > struct rq *rq, *orig_rq; > >@@ -2355,7 +2356,9 @@ static int try_to_wake_up(struct task_st > smp_wmb(); > rq = orig_rq = task_rq_lock(p, &flags); > update_rq_clock(rq); >- if (!(p->state & state)) >+ old_state = p->state; >+ if (!(old_state & state) || >+ (old_state & (TASK_NOWAKEUP|TASK_ATOMICSWITCH))) > goto out; > > if (p->se.on_rq) >@@ -2840,22 +2843,29 @@ asmlinkage void schedule_tail(struct tas > #endif > if (current->set_child_tid) > put_user(task_pid_vnr(current), current->set_child_tid); >+ >+ ipipe_init_notify(current); > } > > /* > * context_switch - switch to the new MM and the new > * thread's register state. > */ >-static inline void >+int > context_switch(struct rq *rq, struct task_struct *prev, > struct task_struct *next) > { > struct mm_struct *mm, *oldmm; > >- prepare_task_switch(rq, prev, next); >- trace_sched_switch(rq, prev, next); > mm = next->mm; > oldmm = prev->active_mm; >+ >+if (!rq) { >+ switch_mm(oldmm, next->active_mm, next); >+ if (!mm) enter_lazy_tlb(oldmm, next); >+} else { >+ prepare_task_switch(rq, prev, next); >+ trace_sched_switch(rq, prev, next); > /* > * For paravirt, this is coupled with an exit in switch_to to > * combine the page table reload and the switch backend into >@@ -2883,11 +2893,24 @@ context_switch(struct rq *rq, struct tas > #ifndef __ARCH_WANT_UNLOCKED_CTXSW > spin_release(&rq->lock.dep_map, 1, _THIS_IP_); > #endif >- >+} >+#ifdef CONFIG_IPIPE >+ next->ptd[IPIPE_ROOT_NPTDKEYS - 1] = prev; >+#endif /* CONFIG_IPIPE */ > /* Here we just switch the register state and the stack. */ > switch_to(prev, next, prev); > > barrier(); >+ >+if (unlikely(rq)) { >+#if 1 // def CONFIG_IPIPE_DELAYED_ATOMICSW >+ current->state &= ~TASK_ATOMICSWITCH; >+#else >+ prev->state &= ~TASK_ATOMICSWITCH; >+#endif >+ if (task_hijacked(prev)) >+ return 1; __ipipe_dispatch_event(IPIPE_FIRST_EVENT - 2, 0); >+ > /* > * this_rq must be evaluated again because prev may have moved > * CPUs since it called schedule(), thus the 'rq' on its stack >@@ -2895,6 +2918,10 @@ context_switch(struct rq *rq, struct tas > */ > finish_task_switch(this_rq(), prev); > } >+ return 0; >+} >+ >+EXPORT_SYMBOL(context_switch); > > /* > * nr_running, nr_uninterruptible and nr_context_switches: >@@ -5300,6 +5327,7 @@ notrace unsigned long get_parent_ip(unsi > > void __kprobes add_preempt_count(int val) > { >+ ipipe_check_context(ipipe_root_domain); > #ifdef CONFIG_DEBUG_PREEMPT > /* > * Underflow? >@@ -5322,6 +5350,7 @@ EXPORT_SYMBOL(add_preempt_count); > > void __kprobes sub_preempt_count(int val) > { >+ ipipe_check_context(ipipe_root_domain); > #ifdef CONFIG_DEBUG_PREEMPT > /* > * Underflow? >@@ -5370,6 +5399,7 @@ static noinline void __schedule_bug(stru > */ > static inline void schedule_debug(struct task_struct *prev) > { >+ ipipe_check_context(ipipe_root_domain); > /* > * Test if we are atomic. Since do_exit() needs to call into > * schedule() atomically, we ignore that path for now. >@@ -5448,7 +5478,7 @@ pick_next_task(struct rq *rq) > /* > * schedule() is the main scheduler function. > */ >-asmlinkage void __sched schedule(void) >+asmlinkage int __sched schedule(void) > { > struct task_struct *prev, *next; > unsigned long *switch_count; >@@ -5462,6 +5492,9 @@ need_resched: > rcu_sched_qs(cpu); > prev = rq->curr; > switch_count = &prev->nivcsw; >+ if (unlikely(prev->state & TASK_ATOMICSWITCH)) >+ /* Pop one disable level -- one still remains. */ >+ preempt_enable(); > > release_kernel_lock(prev); > need_resched_nonpreemptible: >@@ -5499,15 +5532,18 @@ need_resched_nonpreemptible: > rq->curr = next; > ++*switch_count; > >- context_switch(rq, prev, next); /* unlocks the rq */ >+ if (context_switch(rq, prev, next)) /* unlocks the rq */ >+ return 1; /* task hijacked by higher domain */ > /* > * the context switch might have flipped the stack from under > * us, hence refresh the local variables. > */ > cpu = smp_processor_id(); > rq = cpu_rq(cpu); >- } else >+ } else { >+ prev->state &= ~TASK_ATOMICSWITCH; > spin_unlock_irq(&rq->lock); >+ } > > post_schedule(rq); > >@@ -5517,6 +5553,8 @@ need_resched_nonpreemptible: > preempt_enable_no_resched(); > if (need_resched()) > goto need_resched; >+ >+ return 0; > } > EXPORT_SYMBOL(schedule); > >@@ -5600,7 +5638,8 @@ asmlinkage void __sched preempt_schedule > > do { > add_preempt_count(PREEMPT_ACTIVE); >- schedule(); >+ if (schedule()) >+ return; > sub_preempt_count(PREEMPT_ACTIVE); > > /* >@@ -6371,6 +6410,7 @@ recheck: > oldprio = p->prio; > prev_class = p->sched_class; > __setscheduler(rq, p, policy, param->sched_priority); >+ ipipe_setsched_notify(p); > > if (running) > p->sched_class->set_curr_task(rq); >@@ -7018,6 +7058,7 @@ void __cpuinit init_idle(struct task_str > #else > task_thread_info(idle)->preempt_count = 0; > #endif >+ ipipe_check_context(ipipe_root_domain); > /* > * The idle tasks have their own, simple scheduling class: > */ >@@ -10958,3 +10999,64 @@ void synchronize_sched_expedited(void) > EXPORT_SYMBOL_GPL(synchronize_sched_expedited); > > #endif /* #else #ifndef CONFIG_SMP */ >+ >+#ifdef CONFIG_IPIPE >+ >+int ipipe_setscheduler_root(struct task_struct *p, int policy, int prio) >+{ >+ const struct sched_class *prev_class = p->sched_class; >+ int oldprio, on_rq, running; >+ unsigned long flags; >+ struct rq *rq; >+ >+ spin_lock_irqsave(&p->pi_lock, flags); >+ rq = __task_rq_lock(p); >+ update_rq_clock(rq); >+ on_rq = p->se.on_rq; >+ running = task_current(rq, p); >+ if (on_rq) >+ deactivate_task(rq, p, 0); >+ if (running) >+ p->sched_class->put_prev_task(rq, p); >+ >+ p->sched_reset_on_fork = 0; >+ >+ oldprio = p->prio; >+ __setscheduler(rq, p, policy, prio); >+ ipipe_setsched_notify(p); >+ >+ if (running) >+ p->sched_class->set_curr_task(rq); >+ if (on_rq) { >+ activate_task(rq, p, 0); >+ >+ check_class_changed(rq, p, prev_class, oldprio, running); >+ } >+ __task_rq_unlock(rq); >+ spin_unlock_irqrestore(&p->pi_lock, flags); >+ >+ rt_mutex_adjust_pi(p); >+ >+ return 0; >+} >+EXPORT_SYMBOL_GPL(ipipe_setscheduler_root); >+ >+int ipipe_reenter_root(struct task_struct *prev, int policy, int prio) >+{ >+ struct rq *rq = this_rq(); >+ >+ finish_task_switch(rq, prev); >+ >+ post_schedule(rq); >+ >+ (void)reacquire_kernel_lock(current); >+ preempt_enable_no_resched(); >+ >+ if (current->policy != policy || current->rt_priority != prio) >+ return ipipe_setscheduler_root(current, policy, prio); >+ >+ return 0; >+} >+EXPORT_SYMBOL_GPL(ipipe_reenter_root); >+ >+#endif /* CONFIG_IPIPE */ >diff --git a/kernel/signal.c b/kernel/signal.c >index 4d0658d..a7eac5f 100644 >--- a/kernel/signal.c >+++ b/kernel/signal.c >@@ -518,6 +518,7 @@ void signal_wake_up(struct task_struct *t, int resume) > unsigned int mask; > > set_tsk_thread_flag(t, TIF_SIGPENDING); >+ ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */ > > /* > * For SIGKILL, we want to wake it up in the stopped/traced/killable >diff --git a/kernel/spinlock.c b/kernel/spinlock.c >index 5ddab73..97cf064 100644 >--- a/kernel/spinlock.c >+++ b/kernel/spinlock.c >@@ -50,7 +50,9 @@ EXPORT_SYMBOL(_write_trylock); > * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are > * not re-enabled during lock-acquire (which the preempt-spin-ops do): > */ >-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) >+#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ >+ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ >+ defined(CONFIG_IPIPE) > > #ifndef _read_lock > void __lockfunc _read_lock(rwlock_t *lock) >diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c >index 83c4417..782a209 100644 >--- a/kernel/time/tick-common.c >+++ b/kernel/time/tick-common.c >@@ -69,7 +69,7 @@ static void tick_periodic(int cpu) > write_sequnlock(&xtime_lock); > } > >- update_process_times(user_mode(get_irq_regs())); >+ update_root_process_times(get_irq_regs()); > profile_tick(CPU_PROFILING); > } > >@@ -177,6 +177,10 @@ static void tick_setup_device(struct tick_device *td, > > td->evtdev = newdev; > >+ /* I-pipe: derive global tick IRQ from CPU 0 */ >+ if (cpu == 0) >+ ipipe_update_tick_evtdev(newdev); >+ > /* > * When the device is not per cpu, pin the interrupt to the > * current cpu: >diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c >index 44320b1..45ec05a 100644 >--- a/kernel/time/tick-sched.c >+++ b/kernel/time/tick-sched.c >@@ -549,7 +549,7 @@ static void tick_nohz_handler(struct clock_event_device *dev) > ts->idle_jiffies++; > } > >- update_process_times(user_mode(regs)); >+ update_root_process_times(regs); > profile_tick(CPU_PROFILING); > > while (tick_nohz_reprogram(ts, now)) { >@@ -700,7 +700,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) > touch_softlockup_watchdog(); > ts->idle_jiffies++; > } >- update_process_times(user_mode(regs)); >+ update_root_process_times(regs); > profile_tick(CPU_PROFILING); > } > >diff --git a/kernel/timer.c b/kernel/timer.c >index 5db5a8d..1b45eb9 100644 >--- a/kernel/timer.c >+++ b/kernel/timer.c >@@ -1204,6 +1204,25 @@ void update_process_times(int user_tick) > run_posix_cpu_timers(p); > } > >+#ifdef CONFIG_IPIPE >+ >+void update_root_process_times(struct pt_regs *regs) >+{ >+ int cpu, user_tick = user_mode(regs); >+ >+ if (__ipipe_root_tick_p(regs)) { >+ update_process_times(user_tick); >+ return; >+ } >+ >+ run_local_timers(); >+ cpu = smp_processor_id(); >+ rcu_check_callbacks(cpu, user_tick); >+ run_posix_cpu_timers(current); >+} >+ >+#endif >+ > /* > * This function runs timers and the timer-tq in bottom half context. > */ >diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c >index 0cccb6c..eaba13e 100644 >--- a/kernel/trace/ftrace.c >+++ b/kernel/trace/ftrace.c >@@ -28,6 +28,7 @@ > #include <linux/ctype.h> > #include <linux/list.h> > #include <linux/hash.h> >+#include <linux/ipipe.h> > > #include <trace/events/sched.h> > >@@ -1142,6 +1143,9 @@ static int __ftrace_modify_code(void *data) > > static void ftrace_run_update_code(int command) > { >+#ifdef CONFIG_IPIPE >+ unsigned long flags; >+#endif /* CONFIG_IPIPE */ > int ret; > > ret = ftrace_arch_code_modify_prepare(); >@@ -1149,7 +1153,13 @@ static void ftrace_run_update_code(int command) > if (ret) > return; > >+#ifdef CONFIG_IPIPE >+ flags = ipipe_critical_enter(NULL); >+ __ftrace_modify_code(&command); >+ ipipe_critical_exit(flags); >+#else /* !CONFIG_IPIPE */ > stop_machine(__ftrace_modify_code, &command, NULL); >+#endif /* !CONFIG_IPIPE */ > > ret = ftrace_arch_code_modify_post_process(); > FTRACE_WARN_ON(ret); >@@ -2648,9 +2658,9 @@ static int ftrace_convert_nops(struct module *mod, > } > > /* disable interrupts to prevent kstop machine */ >- local_irq_save(flags); >+ local_irq_save_hw_notrace(flags); > ftrace_update_code(mod); >- local_irq_restore(flags); >+ local_irq_restore_hw_notrace(flags); > mutex_unlock(&ftrace_lock); > > return 0; >@@ -2729,9 +2739,9 @@ void __init ftrace_init(void) > /* Keep the ftrace pointer to the stub */ > addr = (unsigned long)ftrace_stub; > >- local_irq_save(flags); >+ local_irq_save_hw_notrace(flags); > ftrace_dyn_arch_init(&addr); >- local_irq_restore(flags); >+ local_irq_restore_hw_notrace(flags); > > /* ftrace_dyn_arch_init places the return code in addr */ > if (addr) >diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug >index 234ceb1..faffad9 100644 >--- a/lib/Kconfig.debug >+++ b/lib/Kconfig.debug >@@ -136,6 +136,8 @@ config DEBUG_SECTION_MISMATCH > - Enable verbose reporting from modpost to help solving > the section mismatches reported. > >+source "kernel/ipipe/Kconfig.debug" >+ > config DEBUG_KERNEL > bool "Kernel debugging" > help >diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c >index 9681d54..2dba50c 100644 >--- a/lib/bust_spinlocks.c >+++ b/lib/bust_spinlocks.c >@@ -13,6 +13,7 @@ > #include <linux/wait.h> > #include <linux/vt_kern.h> > #include <linux/console.h> >+#include <linux/ipipe_trace.h> > > > void __attribute__((weak)) bust_spinlocks(int yes) >@@ -24,6 +25,7 @@ void __attribute__((weak)) bust_spinlocks(int yes) > unblank_screen(); > #endif > console_unblank(); >+ ipipe_trace_panic_dump(); > if (--oops_in_progress == 0) > wake_up_klogd(); > } >diff --git a/lib/ioremap.c b/lib/ioremap.c >index 14c6078..a275469 100644 >--- a/lib/ioremap.c >+++ b/lib/ioremap.c >@@ -85,8 +85,8 @@ int ioremap_page_range(unsigned long addr, > if (err) > break; > } while (pgd++, addr = next, addr != end); >- >- flush_cache_vmap(start, end); >+ __ipipe_pin_range_globally(start, end); >+ flush_cache_vmap(start, end); > > return err; > } >diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c >index 4689cb0..3d12764 100644 >--- a/lib/smp_processor_id.c >+++ b/lib/smp_processor_id.c >@@ -12,10 +12,13 @@ notrace unsigned int debug_smp_processor_id(void) > unsigned long preempt_count = preempt_count(); > int this_cpu = raw_smp_processor_id(); > >+ if (!ipipe_root_domain_p) >+ goto out; >+ > if (likely(preempt_count)) > goto out; > >- if (irqs_disabled()) >+ if (irqs_disabled() || irqs_disabled_hw()) > goto out; > > /* >diff --git a/mm/memory.c b/mm/memory.c >index 4e59455..b8d365d 100644 >--- a/mm/memory.c >+++ b/mm/memory.c >@@ -56,6 +56,7 @@ > #include <linux/kallsyms.h> > #include <linux/swapops.h> > #include <linux/elf.h> >+#include <linux/vmalloc.h> > > #include <asm/io.h> > #include <asm/pgalloc.h> >@@ -566,6 +567,32 @@ out: > return pfn_to_page(pfn); > } > >+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) >+{ >+ /* >+ * If the source page was a PFN mapping, we don't have >+ * a "struct page" for it. We do a best-effort copy by >+ * just copying from the original user address. If that >+ * fails, we just zero-fill it. Live with it. >+ */ >+ if (unlikely(!src)) { >+ void *kaddr = kmap_atomic(dst, KM_USER0); >+ void __user *uaddr = (void __user *)(va & PAGE_MASK); >+ >+ /* >+ * This really shouldn't fail, because the page is there >+ * in the page tables. But it might just be unreadable, >+ * in which case we just give up and fill the result with >+ * zeroes. >+ */ >+ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) >+ memset(kaddr, 0, PAGE_SIZE); >+ kunmap_atomic(kaddr, KM_USER0); >+ flush_dcache_page(dst); >+ } else >+ copy_user_highpage(dst, src, va, vma); >+} >+ > /* > * copy one vm_area from one task to the other. Assumes the page tables > * already present in the new task to be cleared in the whole range >@@ -574,8 +601,8 @@ out: > > static inline void > copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, >- pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, >- unsigned long addr, int *rss) >+ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, >+ unsigned long addr, int *rss, struct page *uncow_page) > { > unsigned long vm_flags = vma->vm_flags; > pte_t pte = *src_pte; >@@ -614,6 +641,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, > * in the parent and the child > */ > if (is_cow_mapping(vm_flags)) { >+#ifdef CONFIG_IPIPE >+ if (uncow_page) { >+ struct page *old_page = vm_normal_page(vma, addr, pte); >+ cow_user_page(uncow_page, old_page, addr, vma); >+ pte = mk_pte(uncow_page, vma->vm_page_prot); >+ >+ if (vm_flags & VM_SHARED) >+ pte = pte_mkclean(pte); >+ pte = pte_mkold(pte); >+ >+ page_add_new_anon_rmap(uncow_page, vma, addr); >+ rss[!!PageAnon(uncow_page)]++; >+ goto out_set_pte; >+ } >+#endif /* CONFIG_IPIPE */ > ptep_set_wrprotect(src_mm, addr, src_pte); > pte = pte_wrprotect(pte); > } >@@ -645,13 +687,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, > pte_t *src_pte, *dst_pte; > spinlock_t *src_ptl, *dst_ptl; > int progress = 0; >+ struct page *uncow_page = NULL; > int rss[2]; >- >+#ifdef CONFIG_IPIPE >+ int do_cow_break = 0; >+again: >+ if (do_cow_break) { >+ uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); >+ if (!uncow_page) >+ return -ENOMEM; >+ do_cow_break = 0; >+ } >+#else > again: >+#endif > rss[1] = rss[0] = 0; > dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); >- if (!dst_pte) >+ if (!dst_pte) { >+ if (uncow_page) >+ page_cache_release(uncow_page); > return -ENOMEM; >+ } > src_pte = pte_offset_map_nested(src_pmd, addr); > src_ptl = pte_lockptr(src_mm, src_pmd); > spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); >@@ -674,7 +730,25 @@ again: > progress++; > continue; > } >- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); >+#ifdef CONFIG_IPIPE >+ if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) { >+ if (is_cow_mapping(vma->vm_flags) && >+ test_bit(MMF_VM_PINNED, &src_mm->flags) && >+ ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) { >+ arch_leave_lazy_mmu_mode(); >+ spin_unlock(src_ptl); >+ pte_unmap_nested(src_pte); >+ add_mm_rss(dst_mm, rss[0], rss[1]); >+ pte_unmap_unlock(dst_pte, dst_ptl); >+ cond_resched(); >+ do_cow_break = 1; >+ goto again; >+ } >+ } >+#endif >+ copy_one_pte(dst_mm, src_mm, dst_pte, >+ src_pte, vma, addr, rss, uncow_page); >+ uncow_page = NULL; > progress += 8; > } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); > >@@ -1941,32 +2015,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) > return pte; > } > >-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) >-{ >- /* >- * If the source page was a PFN mapping, we don't have >- * a "struct page" for it. We do a best-effort copy by >- * just copying from the original user address. If that >- * fails, we just zero-fill it. Live with it. >- */ >- if (unlikely(!src)) { >- void *kaddr = kmap_atomic(dst, KM_USER0); >- void __user *uaddr = (void __user *)(va & PAGE_MASK); >- >- /* >- * This really shouldn't fail, because the page is there >- * in the page tables. But it might just be unreadable, >- * in which case we just give up and fill the result with >- * zeroes. >- */ >- if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) >- memset(kaddr, 0, PAGE_SIZE); >- kunmap_atomic(kaddr, KM_USER0); >- flush_dcache_page(dst); >- } else >- copy_user_highpage(dst, src, va, vma); >-} >- > /* > * This routine handles present pages, when users try to write > * to a shared page. It is done by copying the page to a new address >@@ -3377,3 +3425,111 @@ void might_fault(void) > } > EXPORT_SYMBOL(might_fault); > #endif >+ >+#ifdef CONFIG_IPIPE >+ >+static inline int ipipe_pin_
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 377831
:
282129
| 282139 |
283031
|
283033