Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 283033 Details for
Bug 377831
sys-kernel/rtai-sources - Real Time Application Interface kernel sources
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
hal-linux-2.6.37.-x86-2.9-00.patch
hal-linux-2.6.37.-x86-2.9-00.patch.txt (text/plain), 342.82 KB, created by
Chris Brown
on 2011-08-12 14:51:57 UTC
(
hide
)
Description:
hal-linux-2.6.37.-x86-2.9-00.patch
Filename:
MIME Type:
Creator:
Chris Brown
Created:
2011-08-12 14:51:57 UTC
Size:
342.82 KB
patch
obsolete
>diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig >index e330da2..cef6fde 100644 >--- a/arch/x86/Kconfig >+++ b/arch/x86/Kconfig >@@ -15,6 +15,7 @@ config X86_64 > ### Arch settings > config X86 > def_bool y >+ select HAVE_IPIPE_HOSTRT if IPIPE > select HAVE_AOUT if X86_32 > select HAVE_READQ > select HAVE_WRITEQ >@@ -480,6 +481,7 @@ config SCHED_OMIT_FRAME_POINTER > > menuconfig PARAVIRT_GUEST > bool "Paravirtualized guest support" >+ depends on !IPIPE > ---help--- > Say Y here to get to see options related to running Linux under > various hypervisors. This option alone does not add any kernel code. >@@ -512,6 +514,7 @@ source "arch/x86/lguest/Kconfig" > > config PARAVIRT > bool "Enable paravirtualization code" >+ depends on !IPIPE > ---help--- > This changes the kernel so it can modify itself when it is run > under a hypervisor, potentially improving performance significantly >@@ -753,6 +756,8 @@ config IRQ_TIME_ACCOUNTING > > source "kernel/Kconfig.preempt" > >+source "kernel/ipipe/Kconfig" >+ > config X86_UP_APIC > bool "Local APIC support on uniprocessors" > depends on X86_32 && !SMP && !X86_32_NON_STANDARD >diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h >index f6ce0bd..9ffe6de 100644 >--- a/arch/x86/include/asm/apic.h >+++ b/arch/x86/include/asm/apic.h >@@ -413,7 +413,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; } > > #endif /* CONFIG_X86_LOCAL_APIC */ > >+#ifdef CONFIG_IPIPE >+#define ack_APIC_irq() do { } while(0) >+static inline void __ack_APIC_irq(void) >+#else /* !CONFIG_IPIPE */ >+#define __ack_APIC_irq() ack_APIC_irq() > static inline void ack_APIC_irq(void) >+#endif /* CONFIG_IPIPE */ > { > /* > * ack_APIC_irq() actually gets compiled as a single instruction >diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h >index a859ca4..adc4712 100644 >--- a/arch/x86/include/asm/apicdef.h >+++ b/arch/x86/include/asm/apicdef.h >@@ -150,6 +150,7 @@ > # define MAX_LOCAL_APIC 32768 > #endif > >+#ifndef __ASSEMBLY__ > /* > * All x86-64 systems are xAPIC compatible. > * In the following, "apicid" is a physical APIC ID. >@@ -425,4 +426,7 @@ struct local_apic { > #else > #define BAD_APICID 0xFFFFu > #endif >+ >+#endif /* !__ASSEMBLY__ */ >+ > #endif /* _ASM_X86_APICDEF_H */ >diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h >index 57650ab..f2292de 100644 >--- a/arch/x86/include/asm/entry_arch.h >+++ b/arch/x86/include/asm/entry_arch.h >@@ -16,7 +16,11 @@ BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) > BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) > BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) > >+#ifdef CONFIG_IPIPE >+.irpc idx, "012" >+#else > .irpc idx, "01234567" >+#endif > BUILD_INTERRUPT3(invalidate_interrupt\idx, > (INVALIDATE_TLB_VECTOR_START)+\idx, > smp_invalidate_interrupt) >diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h >index 0274ec5..7b1216f 100644 >--- a/arch/x86/include/asm/hw_irq.h >+++ b/arch/x86/include/asm/hw_irq.h >@@ -35,6 +35,13 @@ extern void spurious_interrupt(void); > extern void thermal_interrupt(void); > extern void reschedule_interrupt(void); > extern void mce_self_interrupt(void); >+#ifdef CONFIG_IPIPE >+void ipipe_ipi0(void); >+void ipipe_ipi1(void); >+void ipipe_ipi2(void); >+void ipipe_ipi3(void); >+void ipipe_ipiX(void); >+#endif > > extern void invalidate_interrupt(void); > extern void invalidate_interrupt0(void); >@@ -124,6 +131,7 @@ extern void smp_apic_timer_interrupt(struct pt_regs *); > extern void smp_spurious_interrupt(struct pt_regs *); > extern void smp_x86_platform_ipi(struct pt_regs *); > extern void smp_error_interrupt(struct pt_regs *); >+extern void smp_perf_pending_interrupt(struct pt_regs *); > #ifdef CONFIG_X86_IO_APIC > extern asmlinkage void smp_irq_move_cleanup_interrupt(void); > #endif >@@ -136,6 +144,7 @@ extern void smp_invalidate_interrupt(struct pt_regs *); > #else > extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); > #endif >+extern asmlinkage void smp_reboot_interrupt(void); > #endif > > extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); >diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h >index 4aa2bb3..28dd6bb 100644 >--- a/arch/x86/include/asm/i387.h >+++ b/arch/x86/include/asm/i387.h >@@ -286,11 +286,14 @@ static inline void __clear_fpu(struct task_struct *tsk) > static inline void kernel_fpu_begin(void) > { > struct thread_info *me = current_thread_info(); >+ unsigned long flags; > preempt_disable(); >+ local_irq_save_hw_cond(flags); > if (me->status & TS_USEDFPU) > __save_init_fpu(me->task); > else > clts(); >+ local_irq_restore_hw_cond(flags); > } > > static inline void kernel_fpu_end(void) >diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h >index a203659..637db3a 100644 >--- a/arch/x86/include/asm/i8259.h >+++ b/arch/x86/include/asm/i8259.h >@@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask; > #define SLAVE_ICW4_DEFAULT 0x01 > #define PIC_ICW4_AEOI 2 > >-extern raw_spinlock_t i8259A_lock; >+IPIPE_DECLARE_RAW_SPINLOCK(i8259A_lock); > > /* the PIC may need a careful delay on some platforms, hence specific calls */ > static inline unsigned char inb_pic(unsigned int port) >diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h >index 0b72282..6574056 100644 >--- a/arch/x86/include/asm/ipi.h >+++ b/arch/x86/include/asm/ipi.h >@@ -68,6 +68,9 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest > * to the APIC. > */ > unsigned int cfg; >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); > > /* > * Wait for idle. >@@ -83,6 +86,8 @@ __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest > * Send the IPI. The write to APIC_ICR fires this off. > */ > native_apic_mem_write(APIC_ICR, cfg); >+ >+ local_irq_restore_hw(flags); > } > > /* >diff --git a/arch/x86/include/asm/ipipe.h b/arch/x86/include/asm/ipipe.h >new file mode 100644 >index 0000000..bb0cfcc >--- /dev/null >+++ b/arch/x86/include/asm/ipipe.h >@@ -0,0 +1,158 @@ >+/* -*- linux-c -*- >+ * arch/x86/include/asm/ipipe.h >+ * >+ * Copyright (C) 2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __X86_IPIPE_H >+#define __X86_IPIPE_H >+ >+#ifdef CONFIG_IPIPE >+ >+#ifndef IPIPE_ARCH_STRING >+#define IPIPE_ARCH_STRING "2.9-00" >+#define IPIPE_MAJOR_NUMBER 2 >+#define IPIPE_MINOR_NUMBER 9 >+#define IPIPE_PATCH_NUMBER 0 >+#endif >+ >+DECLARE_PER_CPU(struct pt_regs, __ipipe_tick_regs); >+ >+DECLARE_PER_CPU(unsigned long, __ipipe_cr2); >+ >+static inline unsigned __ipipe_get_irq_vector(int irq) >+{ >+#ifdef CONFIG_X86_IO_APIC >+ unsigned __ipipe_get_ioapic_irq_vector(int irq); >+ return __ipipe_get_ioapic_irq_vector(irq); >+#elif defined(CONFIG_X86_LOCAL_APIC) >+ return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ? >+ ipipe_apic_irq_vector(irq) : irq + IRQ0_VECTOR; >+#else >+ return irq + IRQ0_VECTOR; >+#endif >+} >+ >+#ifdef CONFIG_X86_32 >+# include "ipipe_32.h" >+#else >+# include "ipipe_64.h" >+#endif >+ >+/* >+ * The logical processor id and the current Linux task are read from the PDA, >+ * so this is always safe, regardless of the underlying stack. >+ */ >+#define ipipe_processor_id() raw_smp_processor_id() >+#define ipipe_safe_current() current >+ >+#define prepare_arch_switch(next) \ >+do { \ >+ ipipe_schedule_notify(current, next); \ >+ local_irq_disable_hw(); \ >+} while(0) >+ >+#define task_hijacked(p) \ >+ ({ int x = __ipipe_root_domain_p; \ >+ if (x) local_irq_enable_hw(); !x; }) >+ >+struct ipipe_domain; >+ >+struct ipipe_sysinfo { >+ >+ int ncpus; /* Number of CPUs on board */ >+ u64 cpufreq; /* CPU frequency (in Hz) */ >+ >+ /* Arch-dependent block */ >+ >+ struct { >+ unsigned tmirq; /* Timer tick IRQ */ >+ u64 tmfreq; /* Timer frequency */ >+ } archdep; >+}; >+ >+/* Private interface -- Internal use only */ >+ >+#define __ipipe_check_platform() do { } while(0) >+#define __ipipe_init_platform() do { } while(0) >+#define __ipipe_enable_irq(irq) irq_to_desc(irq)->chip->enable(irq) >+#define __ipipe_disable_irq(irq) irq_to_desc(irq)->chip->disable(irq) >+ >+#ifdef CONFIG_SMP >+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd); >+#else >+#define __ipipe_hook_critical_ipi(ipd) do { } while(0) >+#endif >+ >+#define __ipipe_disable_irqdesc(ipd, irq) do { } while(0) >+ >+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq); >+ >+void __ipipe_enable_pipeline(void); >+ >+void __ipipe_do_critical_sync(unsigned irq, void *cookie); >+ >+void __ipipe_serial_debug(const char *fmt, ...); >+ >+extern int __ipipe_tick_irq; >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+#define ipipe_update_tick_evtdev(evtdev) \ >+ do { \ >+ if (strcmp((evtdev)->name, "lapic") == 0) \ >+ __ipipe_tick_irq = \ >+ ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR); \ >+ else \ >+ __ipipe_tick_irq = 0; \ >+ } while (0) >+#else >+#define ipipe_update_tick_evtdev(evtdev) \ >+ __ipipe_tick_irq = 0 >+#endif >+ >+int __ipipe_check_lapic(void); >+ >+int __ipipe_check_tickdev(const char *devname); >+ >+#define __ipipe_syscall_watched_p(p, sc) \ >+ (ipipe_notifier_enabled_p(p) || (unsigned long)sc >= NR_syscalls) >+ >+#define __ipipe_root_tick_p(regs) ((regs)->flags & X86_EFLAGS_IF) >+ >+#else /* !CONFIG_IPIPE */ >+ >+#define ipipe_update_tick_evtdev(evtdev) do { } while (0) >+#define task_hijacked(p) 0 >+ >+#endif /* CONFIG_IPIPE */ >+ >+#if defined(CONFIG_SMP) && defined(CONFIG_IPIPE) >+#define __ipipe_move_root_irq(irq) \ >+ do { \ >+ if (irq < NR_IRQS) { \ >+ struct irq_desc *desc = irq_to_desc(irq); \ >+ struct irq_chip *chip = desc->irq_data.chip; \ >+ if (chip->irq_move) \ >+ chip->irq_move(&desc->irq_data); \ >+ } \ >+ } while (0) >+#else /* !(CONFIG_SMP && CONFIG_IPIPE) */ >+#define __ipipe_move_root_irq(irq) do { } while (0) >+#endif /* !(CONFIG_SMP && CONFIG_IPIPE) */ >+ >+#endif /* !__X86_IPIPE_H */ >diff --git a/arch/x86/include/asm/ipipe_32.h b/arch/x86/include/asm/ipipe_32.h >new file mode 100644 >index 0000000..08bc268 >--- /dev/null >+++ b/arch/x86/include/asm/ipipe_32.h >@@ -0,0 +1,100 @@ >+/* -*- linux-c -*- >+ * arch/x86/include/asm/ipipe_32.h >+ * >+ * Copyright (C) 2002-2005 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __X86_IPIPE_32_H >+#define __X86_IPIPE_32_H >+ >+#include <linux/cpumask.h> >+#include <linux/list.h> >+#include <linux/threads.h> >+#include <linux/ipipe_percpu.h> >+#include <asm/ptrace.h> >+ >+#define ipipe_read_tsc(t) __asm__ __volatile__("rdtsc" : "=A" (t)) >+#define ipipe_cpu_freq() ({ unsigned long long __freq = cpu_has_tsc?(1000LL * cpu_khz):CLOCK_TICK_RATE; __freq; }) >+ >+#define ipipe_tsc2ns(t) \ >+({ \ >+ unsigned long long delta = (t)*1000; \ >+ do_div(delta, cpu_khz/1000+1); \ >+ (unsigned long)delta; \ >+}) >+ >+#define ipipe_tsc2us(t) \ >+({ \ >+ unsigned long long delta = (t); \ >+ do_div(delta, cpu_khz/1000+1); \ >+ (unsigned long)delta; \ >+}) >+ >+/* Private interface -- Internal use only */ >+ >+int __ipipe_handle_irq(struct pt_regs *regs); >+ >+static inline unsigned long __ipipe_ffnz(unsigned long ul) >+{ >+ __asm__("bsrl %1, %0":"=r"(ul) >+ : "r"(ul)); >+ return ul; >+} >+ >+struct irq_desc; >+ >+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc); >+ >+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc); >+ >+static inline void __do_root_xirq(ipipe_irq_handler_t handler, >+ unsigned int irq) >+{ >+ struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs); >+ >+ regs->orig_ax = ~__ipipe_get_irq_vector(irq); >+ >+ __asm__ __volatile__("pushfl\n\t" >+ "orl %[x86if],(%%esp)\n\t" >+ "pushl %%cs\n\t" >+ "pushl $__xirq_end\n\t" >+ "pushl %%eax\n\t" >+ "pushl %%gs\n\t" >+ "pushl %%fs\n\t" >+ "pushl %%es\n\t" >+ "pushl %%ds\n\t" >+ "pushl %%eax\n\t" >+ "pushl %%ebp\n\t" >+ "pushl %%edi\n\t" >+ "pushl %%esi\n\t" >+ "pushl %%edx\n\t" >+ "pushl %%ecx\n\t" >+ "pushl %%ebx\n\t" >+ "movl %2,%%eax\n\t" >+ "call *%1\n\t" >+ "jmp ret_from_intr\n\t" >+ "__xirq_end: cli\n" >+ : /* no output */ >+ : "a" (~irq), "r" (handler), "rm" (regs), >+ [x86if] "i" (X86_EFLAGS_IF)); >+} >+ >+#define __ipipe_do_root_xirq(ipd, irq) \ >+ __do_root_xirq((ipd)->irqs[irq].handler, irq) >+ >+#endif /* !__X86_IPIPE_32_H */ >diff --git a/arch/x86/include/asm/ipipe_64.h b/arch/x86/include/asm/ipipe_64.h >new file mode 100644 >index 0000000..7dce1e1 >--- /dev/null >+++ b/arch/x86/include/asm/ipipe_64.h >@@ -0,0 +1,112 @@ >+/* -*- linux-c -*- >+ * arch/x86/include/asm/ipipe_64.h >+ * >+ * Copyright (C) 2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __X86_IPIPE_64_H >+#define __X86_IPIPE_64_H >+ >+#include <asm/ptrace.h> >+#include <asm/irq.h> >+#include <asm/processor.h> >+#include <linux/cpumask.h> >+#include <linux/list.h> >+#include <linux/ipipe_percpu.h> >+#ifdef CONFIG_SMP >+#include <asm/mpspec.h> >+#include <linux/thread_info.h> >+#endif >+ >+#define ipipe_read_tsc(t) do { \ >+ unsigned int __a,__d; \ >+ asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ >+ (t) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ >+} while(0) >+ >+extern unsigned cpu_khz; >+#define ipipe_cpu_freq() ({ unsigned long __freq = (1000UL * cpu_khz); __freq; }) >+#define ipipe_tsc2ns(t) (((t) * 1000UL) / (ipipe_cpu_freq() / 1000000UL)) >+#define ipipe_tsc2us(t) ((t) / (ipipe_cpu_freq() / 1000000UL)) >+ >+/* Private interface -- Internal use only */ >+ >+int __ipipe_handle_irq(struct pt_regs *regs); >+ >+static inline unsigned long __ipipe_ffnz(unsigned long ul) >+{ >+ __asm__("bsrq %1, %0":"=r"(ul) >+ : "rm"(ul)); >+ return ul; >+} >+ >+struct irq_desc; >+ >+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc); >+ >+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc); >+ >+static inline void __do_root_xirq(ipipe_irq_handler_t handler, >+ unsigned int irq) >+{ >+ struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs); >+ >+ regs->orig_ax = ~__ipipe_get_irq_vector(irq); >+ >+ __asm__ __volatile__("movq %%rsp, %%rax\n\t" >+ "pushq $0\n\t" >+ "pushq %%rax\n\t" >+ "pushfq\n\t" >+ "orq %[x86if],(%%rsp)\n\t" >+ "pushq %[kernel_cs]\n\t" >+ "pushq $__xirq_end\n\t" >+ "pushq %[vector]\n\t" >+ "subq $9*8,%%rsp\n\t" >+ "movq %%rdi,8*8(%%rsp)\n\t" >+ "movq %%rsi,7*8(%%rsp)\n\t" >+ "movq %%rdx,6*8(%%rsp)\n\t" >+ "movq %%rcx,5*8(%%rsp)\n\t" >+ "movq %%rax,4*8(%%rsp)\n\t" >+ "movq %%r8,3*8(%%rsp)\n\t" >+ "movq %%r9,2*8(%%rsp)\n\t" >+ "movq %%r10,1*8(%%rsp)\n\t" >+ "movq %%r11,(%%rsp)\n\t" >+ "call *%[handler]\n\t" >+ "cli\n\t" >+ "jmp exit_intr\n\t" >+ "__xirq_end: cli\n" >+ : /* no output */ >+ : [kernel_cs] "i" (__KERNEL_CS), >+ [vector] "rm" (regs->orig_ax), >+ [handler] "r" (handler), "D" (regs), >+ [x86if] "i" (X86_EFLAGS_IF) >+ : "rax"); >+} >+ >+#define __ipipe_do_root_xirq(ipd, irq) \ >+ __do_root_xirq((ipd)->irqs[irq].handler, irq) >+ >+#ifdef CONFIG_PREEMPT >+#define __ipipe_check_root_resched() \ >+ (preempt_count() == 0 && need_resched() && \ >+ per_cpu(irq_count, ipipe_processor_id()) < 0) >+#else >+#define __ipipe_check_root_resched() 0 >+#endif >+ >+#endif /* !__X86_IPIPE_64_H */ >diff --git a/arch/x86/include/asm/ipipe_base.h b/arch/x86/include/asm/ipipe_base.h >new file mode 100644 >index 0000000..44c8c73 >--- /dev/null >+++ b/arch/x86/include/asm/ipipe_base.h >@@ -0,0 +1,216 @@ >+/* -*- linux-c -*- >+ * arch/x86/include/asm/ipipe_base.h >+ * >+ * Copyright (C) 2007-2009 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __X86_IPIPE_BASE_H >+#define __X86_IPIPE_BASE_H >+ >+#include <linux/threads.h> >+#include <asm/apicdef.h> >+#include <asm/irq_vectors.h> >+#include <asm/bitsperlong.h> >+ >+#ifdef CONFIG_X86_32 >+#define IPIPE_NR_FAULTS 33 /* 32 from IDT + iret_error */ >+#else >+#define IPIPE_NR_FAULTS 32 >+#endif >+ >+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) >+/* >+ * System interrupts are mapped beyond the last defined external IRQ >+ * number. >+ */ >+#define IPIPE_NR_XIRQS (NR_IRQS + 32) >+#define IPIPE_FIRST_APIC_IRQ NR_IRQS >+#define IPIPE_SERVICE_VECTOR0 (INVALIDATE_TLB_VECTOR_END + 1) >+#define IPIPE_SERVICE_IPI0 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0) >+#define IPIPE_SERVICE_VECTOR1 (INVALIDATE_TLB_VECTOR_END + 2) >+#define IPIPE_SERVICE_IPI1 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1) >+#define IPIPE_SERVICE_VECTOR2 (INVALIDATE_TLB_VECTOR_END + 3) >+#define IPIPE_SERVICE_IPI2 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2) >+#define IPIPE_SERVICE_VECTOR3 (INVALIDATE_TLB_VECTOR_END + 4) >+#define IPIPE_SERVICE_IPI3 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3) >+#ifdef CONFIG_SMP >+#define IPIPE_CRITICAL_VECTOR (INVALIDATE_TLB_VECTOR_END + 5) >+#define IPIPE_CRITICAL_IPI ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR) >+#endif >+#define ipipe_apic_irq_vector(irq) ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR) >+#define ipipe_apic_vector_irq(vec) ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ) >+#else /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */ >+#define IPIPE_NR_XIRQS NR_IRQS >+#endif /* !(CONFIG_X86_64 || CONFIG_X86_LOCAL_APIC) */ >+ >+#define ipipe_ipi_p(ipi) \ >+ (ipi >= IPIPE_SERVICE_IPI0 && ipi <= IPIPE_SERVICE_IPI3) >+ >+/* Pseudo-vectors used for kernel events */ >+#define IPIPE_FIRST_EVENT IPIPE_NR_FAULTS >+#define IPIPE_EVENT_SYSCALL (IPIPE_FIRST_EVENT) >+#define IPIPE_EVENT_SCHEDULE (IPIPE_FIRST_EVENT + 1) >+#define IPIPE_EVENT_SIGWAKE (IPIPE_FIRST_EVENT + 2) >+#define IPIPE_EVENT_SETSCHED (IPIPE_FIRST_EVENT + 3) >+#define IPIPE_EVENT_INIT (IPIPE_FIRST_EVENT + 4) >+#define IPIPE_EVENT_EXIT (IPIPE_FIRST_EVENT + 5) >+#define IPIPE_EVENT_CLEANUP (IPIPE_FIRST_EVENT + 6) >+#define IPIPE_EVENT_RETURN (IPIPE_FIRST_EVENT + 7) >+#define IPIPE_EVENT_HOSTRT (IPIPE_FIRST_EVENT + 8) >+#define IPIPE_LAST_EVENT IPIPE_EVENT_HOSTRT >+#define IPIPE_NR_EVENTS (IPIPE_LAST_EVENT + 1) >+ >+#define ex_do_divide_error 0 >+#define ex_do_debug 1 >+/* NMI not pipelined. */ >+#define ex_do_int3 3 >+#define ex_do_overflow 4 >+#define ex_do_bounds 5 >+#define ex_do_invalid_op 6 >+#define ex_do_device_not_available 7 >+/* Double fault not pipelined. */ >+#define ex_do_coprocessor_segment_overrun 9 >+#define ex_do_invalid_TSS 10 >+#define ex_do_segment_not_present 11 >+#define ex_do_stack_segment 12 >+#define ex_do_general_protection 13 >+#define ex_do_page_fault 14 >+#define ex_do_spurious_interrupt_bug 15 >+#define ex_do_coprocessor_error 16 >+#define ex_do_alignment_check 17 >+#define ex_machine_check_vector 18 >+#define ex_reserved ex_machine_check_vector >+#define ex_do_simd_coprocessor_error 19 >+#define ex_do_iret_error 32 >+ >+#ifndef __ASSEMBLY__ >+ >+#ifdef CONFIG_SMP >+ >+#include <asm/alternative.h> >+ >+#ifdef CONFIG_X86_32 >+#define GET_ROOT_STATUS_ADDR \ >+ "pushfl; cli;" \ >+ "movl %%fs:this_cpu_off, %%eax;" \ >+ "lea ipipe_percpu_darray(%%eax), %%eax;" >+#define PUT_ROOT_STATUS_ADDR "popfl;" >+#define TEST_AND_SET_ROOT_STATUS \ >+ "btsl $0,(%%eax);" >+#define TEST_ROOT_STATUS \ >+ "btl $0,(%%eax);" >+#define ROOT_TEST_CLOBBER_LIST "eax" >+#else /* CONFIG_X86_64 */ >+#define GET_ROOT_STATUS_ADDR \ >+ "pushfq; cli;" \ >+ "movq %%gs:this_cpu_off, %%rax;" \ >+ "lea ipipe_percpu_darray(%%rax), %%rax;" >+#define PUT_ROOT_STATUS_ADDR "popfq;" >+#define TEST_AND_SET_ROOT_STATUS \ >+ "btsl $0,(%%rax);" >+#define TEST_ROOT_STATUS \ >+ "btl $0,(%%rax);" >+#define ROOT_TEST_CLOBBER_LIST "rax" >+#endif /* CONFIG_X86_64 */ >+ >+static inline void __ipipe_stall_root(void) >+{ >+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR >+ LOCK_PREFIX >+ TEST_AND_SET_ROOT_STATUS >+ PUT_ROOT_STATUS_ADDR >+ : : : ROOT_TEST_CLOBBER_LIST, "memory"); >+} >+ >+static inline unsigned long __ipipe_test_and_stall_root(void) >+{ >+ int oldbit; >+ >+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR >+ LOCK_PREFIX >+ TEST_AND_SET_ROOT_STATUS >+ "sbbl %0,%0;" >+ PUT_ROOT_STATUS_ADDR >+ :"=r" (oldbit) >+ : : ROOT_TEST_CLOBBER_LIST, "memory"); >+ return oldbit; >+} >+ >+static inline unsigned long __ipipe_test_root(void) >+{ >+ int oldbit; >+ >+ __asm__ __volatile__(GET_ROOT_STATUS_ADDR >+ TEST_ROOT_STATUS >+ "sbbl %0,%0;" >+ PUT_ROOT_STATUS_ADDR >+ :"=r" (oldbit) >+ : : ROOT_TEST_CLOBBER_LIST); >+ return oldbit; >+} >+ >+#else /* !CONFIG_SMP */ >+ >+#if __GNUC__ >= 4 >+/* Alias to ipipe_root_cpudom_var(status) */ >+extern unsigned long __ipipe_root_status; >+#else >+extern unsigned long *const __ipipe_root_status_addr; >+#define __ipipe_root_status (*__ipipe_root_status_addr) >+#endif >+ >+static inline void __ipipe_stall_root(void) >+{ >+ volatile unsigned long *p = &__ipipe_root_status; >+ __asm__ __volatile__("btsl $0,%0;" >+ :"+m" (*p) : : "memory"); >+} >+ >+static inline unsigned long __ipipe_test_and_stall_root(void) >+{ >+ volatile unsigned long *p = &__ipipe_root_status; >+ int oldbit; >+ >+ __asm__ __volatile__("btsl $0,%1;" >+ "sbbl %0,%0;" >+ :"=r" (oldbit), "+m" (*p) >+ : : "memory"); >+ return oldbit; >+} >+ >+static inline unsigned long __ipipe_test_root(void) >+{ >+ volatile unsigned long *p = &__ipipe_root_status; >+ int oldbit; >+ >+ __asm__ __volatile__("btl $0,%1;" >+ "sbbl %0,%0;" >+ :"=r" (oldbit) >+ :"m" (*p)); >+ return oldbit; >+} >+ >+#endif /* !CONFIG_SMP */ >+ >+void __ipipe_halt_root(void); >+ >+void __ipipe_serial_debug(const char *fmt, ...); >+ >+#endif /* !__ASSEMBLY__ */ >+ >+#endif /* !__X86_IPIPE_BASE_H */ >diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h >index 6af0894..9c9f5f3 100644 >--- a/arch/x86/include/asm/irq_vectors.h >+++ b/arch/x86/include/asm/irq_vectors.h >@@ -96,10 +96,17 @@ > #define THRESHOLD_APIC_VECTOR 0xf9 > #define REBOOT_VECTOR 0xf8 > >+#ifdef CONFIG_IPIPE >+/* f0-f2 used for TLB flush, f3-f7 reserved for the I-pipe */ >+#define INVALIDATE_TLB_VECTOR_END 0xf2 >+#define INVALIDATE_TLB_VECTOR_START 0xf0 >+#define NUM_INVALIDATE_TLB_VECTORS 3 >+#else /* !CONFIG_IPIPE */ > /* f0-f7 used for spreading out TLB flushes: */ > #define INVALIDATE_TLB_VECTOR_END 0xf7 > #define INVALIDATE_TLB_VECTOR_START 0xf0 > #define NUM_INVALIDATE_TLB_VECTORS 8 >+#endif > > /* > * Local APIC timer IRQ vector is on a different priority level, >@@ -120,6 +127,9 @@ > > #define UV_BAU_MESSAGE 0xea > >+/* I-pipe: Lowest number of vectors above */ >+#define FIRST_SYSTEM_VECTOR 0xea >+ > /* > * Self IPI vector for machine checks > */ >diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h >index 5745ce8..ddfd8cc 100644 >--- a/arch/x86/include/asm/irqflags.h >+++ b/arch/x86/include/asm/irqflags.h >@@ -4,6 +4,11 @@ > #include <asm/processor-flags.h> > > #ifndef __ASSEMBLY__ >+ >+#include <linux/ipipe_base.h> >+#include <linux/ipipe_trace.h> >+#include <linux/compiler.h> >+ > /* > * Interrupt control: > */ >@@ -54,6 +59,13 @@ static inline void native_halt(void) > asm volatile("hlt": : :"memory"); > } > >+static inline int native_irqs_disabled(void) >+{ >+ unsigned long flags = native_save_fl(); >+ >+ return !(flags & X86_EFLAGS_IF); >+} >+ > #endif > > #ifdef CONFIG_PARAVIRT >@@ -63,22 +75,46 @@ static inline void native_halt(void) > > static inline unsigned long arch_local_save_flags(void) > { >+#ifdef CONFIG_IPIPE >+ unsigned long flags; >+ >+ flags = (!__ipipe_test_root()) << 9; >+ barrier(); >+ return flags; >+#else > return native_save_fl(); >+#endif > } > > static inline void arch_local_irq_restore(unsigned long flags) > { >+#ifdef CONFIG_IPIPE >+ barrier(); >+ __ipipe_restore_root(!(flags & X86_EFLAGS_IF)); >+#else > native_restore_fl(flags); >+#endif > } > > static inline void arch_local_irq_disable(void) > { >+#ifdef CONFIG_IPIPE >+ ipipe_check_context(ipipe_root_domain); >+ __ipipe_stall_root(); >+ barrier(); >+#else > native_irq_disable(); >+#endif > } > > static inline void arch_local_irq_enable(void) > { >+#ifdef CONFIG_IPIPE >+ barrier(); >+ __ipipe_unstall_root(); >+#else > native_irq_enable(); >+#endif > } > > /* >@@ -87,7 +123,12 @@ static inline void arch_local_irq_enable(void) > */ > static inline void arch_safe_halt(void) > { >+#ifdef CONFIG_IPIPE >+ barrier(); >+ __ipipe_halt_root(); >+#else > native_safe_halt(); >+#endif > } > > /* >@@ -99,6 +140,20 @@ static inline void halt(void) > native_halt(); > } > >+/* Merge virtual+real interrupt mask bits into a single word. */ >+static inline unsigned long arch_mangle_irq_bits(int virt, unsigned long real) >+{ >+ return (real & ~(1L << 31)) | ((virt != 0) << 31); >+} >+ >+/* Converse operation of arch_mangle_irq_bits() */ >+static inline int arch_demangle_irq_bits(unsigned long *x) >+{ >+ int virt = (*x & (1L << 31)) != 0; >+ *x &= ~(1L << 31); >+ return virt; >+} >+ > /* > * For spinlocks, etc: > */ >@@ -113,6 +168,14 @@ static inline unsigned long arch_local_irq_save(void) > #define ENABLE_INTERRUPTS(x) sti > #define DISABLE_INTERRUPTS(x) cli > >+#ifdef CONFIG_IPIPE >+#define ENABLE_INTERRUPTS_HW_COND sti >+#define DISABLE_INTERRUPTS_HW_COND cli >+#else /* !CONFIG_IPIPE */ >+#define ENABLE_INTERRUPTS_HW_COND >+#define DISABLE_INTERRUPTS_HW_COND >+#endif /* !CONFIG_IPIPE */ >+ > #ifdef CONFIG_X86_64 > #define SWAPGS swapgs > /* >@@ -163,6 +226,80 @@ static inline int arch_irqs_disabled(void) > return arch_irqs_disabled_flags(flags); > } > >+/* >+ * FIXME: we should really align on native_* at some point, instead of >+ * introducing yet another layer (i.e. *_hw()). >+ */ >+#define local_irq_save_hw_notrace(flags) \ >+ do { \ >+ (flags) = native_save_fl(); \ >+ native_irq_disable(); \ >+ } while (0) >+ >+static inline void local_irq_restore_hw_notrace(unsigned long flags) >+{ >+ native_restore_fl(flags); >+} >+ >+static inline void local_irq_disable_hw_notrace(void) >+{ >+ native_irq_disable(); >+} >+ >+static inline void local_irq_enable_hw_notrace(void) >+{ >+ native_irq_enable(); >+} >+ >+static inline int irqs_disabled_hw(void) >+{ >+ return native_irqs_disabled(); >+} >+ >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+ >+#define local_irq_disable_hw() do { \ >+ if (!native_irqs_disabled()) { \ >+ native_irq_disable(); \ >+ ipipe_trace_begin(0x80000000); \ >+ } \ >+ } while (0) >+ >+#define local_irq_enable_hw() do { \ >+ if (native_irqs_disabled()) { \ >+ ipipe_trace_end(0x80000000); \ >+ native_irq_enable(); \ >+ } \ >+ } while (0) >+ >+#define local_irq_save_hw(flags) do { \ >+ (flags) = native_save_fl(); \ >+ if ((flags) & X86_EFLAGS_IF) { \ >+ native_irq_disable(); \ >+ ipipe_trace_begin(0x80000001); \ >+ } \ >+ } while (0) >+ >+#define local_irq_restore_hw(flags) do { \ >+ if ((flags) & X86_EFLAGS_IF) \ >+ ipipe_trace_end(0x80000001); \ >+ native_irq_disable(); \ >+ } while (0) >+ >+#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ >+ >+#define local_irq_save_hw(flags) local_irq_save_hw_notrace(flags) >+#define local_irq_restore_hw(flags) local_irq_restore_hw_notrace(flags) >+#define local_irq_enable_hw() local_irq_enable_hw_notrace() >+#define local_irq_disable_hw() local_irq_disable_hw_notrace() >+ >+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ >+ >+#define local_save_flags_hw(flags) \ >+ do { \ >+ (flags) = native_save_fl(); \ >+ } while (0) >+ > #else > > #ifdef CONFIG_X86_64 >@@ -181,7 +318,10 @@ static inline int arch_irqs_disabled(void) > pushl %eax; \ > pushl %ecx; \ > pushl %edx; \ >+ pushfl; \ >+ sti; \ > call lockdep_sys_exit; \ >+ popfl; \ > popl %edx; \ > popl %ecx; \ > popl %eax; >@@ -190,8 +330,38 @@ static inline int arch_irqs_disabled(void) > #endif > > #ifdef CONFIG_TRACE_IRQFLAGS >+# ifdef CONFIG_IPIPE >+# ifdef CONFIG_X86_64 >+# define TRACE_IRQS_ON \ >+ call trace_hardirqs_on_thunk; \ >+ pushq %rax; \ >+ PER_CPU(ipipe_percpu_darray, %rax); \ >+ btrl $0,(%rax); \ >+ popq %rax >+# define TRACE_IRQS_OFF \ >+ pushq %rax; \ >+ PER_CPU(ipipe_percpu_darray, %rax); \ >+ btsl $0,(%rax); \ >+ popq %rax; \ >+ call trace_hardirqs_off_thunk >+# else /* CONFIG_X86_32 */ >+# define TRACE_IRQS_ON \ >+ call trace_hardirqs_on_thunk; \ >+ pushl %eax; \ >+ PER_CPU(ipipe_percpu_darray, %eax); \ >+ btrl $0,(%eax); \ >+ popl %eax >+# define TRACE_IRQS_OFF \ >+ pushl %eax; \ >+ PER_CPU(ipipe_percpu_darray, %eax); \ >+ btsl $0,(%eax); \ >+ popl %eax; \ >+ call trace_hardirqs_off_thunk >+# endif /* CONFIG_X86_32 */ >+# else /* !CONFIG_IPIPE */ > # define TRACE_IRQS_ON call trace_hardirqs_on_thunk; > # define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; >+# endif /* !CONFIG_IPIPE */ > #else > # define TRACE_IRQS_ON > # define TRACE_IRQS_OFF >diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h >index 4a2d4e0..1ee45d4 100644 >--- a/arch/x86/include/asm/mmu_context.h >+++ b/arch/x86/include/asm/mmu_context.h >@@ -30,11 +30,14 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) > #endif > } > >-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, >- struct task_struct *tsk) >+static inline void __switch_mm(struct mm_struct *prev, struct mm_struct *next, >+ struct task_struct *tsk) > { > unsigned cpu = smp_processor_id(); > >+#ifdef CONFIG_IPIPE_DEBUG_INTERNAL >+ WARN_ON_ONCE(!irqs_disabled_hw()); >+#endif > if (likely(prev != next)) { > /* stop flush ipis for the previous mm */ > cpumask_clear_cpu(cpu, mm_cpumask(prev)); >@@ -70,10 +73,23 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, > #endif > } > >+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, >+ struct task_struct *tsk) >+{ >+ unsigned long flags; >+ local_irq_save_hw_cond(flags); >+ __switch_mm(prev, next, tsk); >+ local_irq_restore_hw_cond(flags); >+} >+ >+#define ipipe_mm_switch_protect(flags) local_irq_save_hw_cond(flags) >+#define ipipe_mm_switch_unprotect(flags) \ >+ local_irq_restore_hw_cond(flags) >+ > #define activate_mm(prev, next) \ > do { \ > paravirt_activate_mm((prev), (next)); \ >- switch_mm((prev), (next), NULL); \ >+ __switch_mm((prev), (next), NULL); \ > } while (0); > > #ifdef CONFIG_X86_32 >diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h >index 932f0f8..d8edb28 100644 >--- a/arch/x86/include/asm/nmi.h >+++ b/arch/x86/include/asm/nmi.h >@@ -30,7 +30,7 @@ extern void setup_apic_nmi_watchdog(void *); > extern void stop_apic_nmi_watchdog(void *); > extern void disable_timer_nmi_watchdog(void); > extern void enable_timer_nmi_watchdog(void); >-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); >+extern int (*nmi_watchdog_tick)(struct pt_regs *regs, unsigned reason); > extern void cpu_nmi_set_wd_enabled(void); > > extern atomic_t nmi_active; >diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h >index cae9c3c..6c65960 100644 >--- a/arch/x86/include/asm/processor.h >+++ b/arch/x86/include/asm/processor.h >@@ -440,6 +440,7 @@ struct thread_struct { > unsigned short ds; > unsigned short fsindex; > unsigned short gsindex; >+ unsigned long rip; > #endif > #ifdef CONFIG_X86_32 > unsigned long ip; >diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h >index 33ecc3e..ee98176 100644 >--- a/arch/x86/include/asm/system.h >+++ b/arch/x86/include/asm/system.h >@@ -127,8 +127,12 @@ do { \ > #define switch_to(prev, next, last) \ > asm volatile(SAVE_CONTEXT \ > "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ >+ "movq $thread_return,%P[threadrip](%[prev])\n\t" /* save RIP */ \ > "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ >- "call __switch_to\n\t" \ >+ "pushq %P[threadrip](%[next])\n\t" /* restore RIP */ \ >+ "jmp __switch_to\n\t" \ >+ ".globl thread_return\n\t" \ >+ "thread_return:\n\t" \ > "movq "__percpu_arg([current_task])",%%rsi\n\t" \ > __switch_canary \ > "movq %P[thread_info](%%rsi),%%r8\n\t" \ >@@ -140,6 +144,7 @@ do { \ > __switch_canary_oparam \ > : [next] "S" (next), [prev] "D" (prev), \ > [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ >+ [threadrip] "i" (offsetof(struct task_struct, thread.rip)), \ > [ti_flags] "i" (offsetof(struct thread_info, flags)), \ > [_tif_fork] "i" (_TIF_FORK), \ > [thread_info] "i" (offsetof(struct task_struct, stack)), \ >@@ -307,8 +312,13 @@ static inline void native_wbinvd(void) > #else > #define read_cr0() (native_read_cr0()) > #define write_cr0(x) (native_write_cr0(x)) >+#ifdef CONFIG_IPIPE >+#define read_cr2() __raw_get_cpu_var(__ipipe_cr2) >+#define write_cr2(x) __raw_get_cpu_var(__ipipe_cr2) = (x) >+#else /* !CONFIG_IPIPE */ > #define read_cr2() (native_read_cr2()) > #define write_cr2(x) (native_write_cr2(x)) >+#endif /* !CONFIG_IPIPE */ > #define read_cr3() (native_read_cr3()) > #define write_cr3(x) (native_write_cr3(x)) > #define read_cr4() (native_read_cr4()) >diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h >index f66cda5..021a149 100644 >--- a/arch/x86/include/asm/traps.h >+++ b/arch/x86/include/asm/traps.h >@@ -82,8 +82,8 @@ extern int panic_on_unrecovered_nmi; > void math_error(struct pt_regs *, int, int); > void math_emulate(struct math_emu_info *); > #ifndef CONFIG_X86_32 >-asmlinkage void smp_thermal_interrupt(void); > asmlinkage void mce_threshold_interrupt(void); > #endif >+asmlinkage void smp_thermal_interrupt(void); > > #endif /* _ASM_X86_TRAPS_H */ >diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h >index 1ca132f..f8b9a98 100644 >--- a/arch/x86/include/asm/tsc.h >+++ b/arch/x86/include/asm/tsc.h >@@ -14,6 +14,7 @@ > */ > typedef unsigned long long cycles_t; > >+extern struct clocksource clocksource_tsc; > extern unsigned int cpu_khz; > extern unsigned int tsc_khz; > >diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile >index 1e99475..f21aa27 100644 >--- a/arch/x86/kernel/Makefile >+++ b/arch/x86/kernel/Makefile >@@ -63,6 +63,7 @@ obj-$(CONFIG_MCA) += mca_32.o > obj-$(CONFIG_X86_MSR) += msr.o > obj-$(CONFIG_X86_CPUID) += cpuid.o > obj-$(CONFIG_PCI) += early-quirks.o >+obj-$(CONFIG_IPIPE) += ipipe.o > apm-y := apm_32.o > obj-$(CONFIG_APM) += apm.o > obj-$(CONFIG_SMP) += smp.o >diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c >index 7821813..303ed13 100644 >--- a/arch/x86/kernel/apic/apic.c >+++ b/arch/x86/kernel/apic/apic.c >@@ -475,7 +475,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, > if (evt->features & CLOCK_EVT_FEAT_DUMMY) > return; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > > switch (mode) { > case CLOCK_EVT_MODE_PERIODIC: >@@ -495,7 +495,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, > break; > } > >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > /* >@@ -1011,7 +1011,7 @@ void lapic_shutdown(void) > if (!cpu_has_apic && !apic_from_smp_config()) > return; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > > #ifdef CONFIG_X86_32 > if (!enabled_via_apicbase) >@@ -1021,7 +1021,7 @@ void lapic_shutdown(void) > disable_local_APIC(); > > >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > /* >@@ -1195,6 +1195,10 @@ static void __cpuinit lapic_setup_esr(void) > oldvalue, value); > } > >+int __ipipe_check_lapic(void) >+{ >+ return !(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY); >+} > > /** > * setup_local_APIC - setup the local APIC >@@ -1268,7 +1272,7 @@ void __cpuinit setup_local_APIC(void) > value = apic_read(APIC_ISR + i*0x10); > for (j = 31; j >= 0; j--) { > if (value & (1<<j)) { >- ack_APIC_irq(); >+ __ack_APIC_irq(); > acked++; > } > } >@@ -1792,7 +1796,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) > */ > v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); > if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) >- ack_APIC_irq(); >+ __ack_APIC_irq(); > > inc_irq_stat(irq_spurious_count); > >@@ -2061,13 +2065,13 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) > apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); > #endif > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > disable_local_APIC(); > > if (intr_remapping_enabled) > disable_intr_remapping(); > >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > return 0; > } > >@@ -2082,7 +2086,7 @@ static int lapic_resume(struct sys_device *dev) > if (!apic_pm_state.active) > return 0; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > if (intr_remapping_enabled) { > ioapic_entries = alloc_ioapic_entries(); > if (!ioapic_entries) { >@@ -2148,7 +2152,7 @@ static int lapic_resume(struct sys_device *dev) > free_ioapic_entries(ioapic_entries); > } > restore: >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > > return ret; > } >diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c >index 09d3b17..0820b62 100644 >--- a/arch/x86/kernel/apic/apic_flat_64.c >+++ b/arch/x86/kernel/apic/apic_flat_64.c >@@ -72,9 +72,9 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector) > { > unsigned long flags; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > __default_send_IPI_dest_field(mask, vector, apic->dest_logical); >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) >diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c >index fadcd74..fa599db 100644 >--- a/arch/x86/kernel/apic/io_apic.c >+++ b/arch/x86/kernel/apic/io_apic.c >@@ -74,8 +74,8 @@ > */ > int sis_apic_bug = -1; > >-static DEFINE_RAW_SPINLOCK(ioapic_lock); >-static DEFINE_RAW_SPINLOCK(vector_lock); >+static IPIPE_DEFINE_RAW_SPINLOCK(ioapic_lock); >+static IPIPE_DEFINE_RAW_SPINLOCK(vector_lock); > > /* > * # of IRQ routing registers >@@ -309,6 +309,8 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned > writel(value, &io_apic->data); > } > >+#if !defined(CONFIG_IPIPE) || defined(CONFIG_SMP) >+ > static bool io_apic_level_ack_pending(struct irq_cfg *cfg) > { > struct irq_pin_list *entry; >@@ -332,6 +334,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) > return false; > } > >+#endif /* !CONFIG_IPIPE || CONFIG_SMP */ >+ > union entry_union { > struct { u32 w1, w2; }; > struct IO_APIC_route_entry entry; >@@ -495,18 +499,24 @@ static void io_apic_sync(struct irq_pin_list *entry) > readl(&io_apic->data); > } > >-static void mask_ioapic(struct irq_cfg *cfg) >+static inline void __mask_ioapic(struct irq_cfg *cfg) >+{ >+ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); >+} >+ >+static void mask_ioapic(unsigned int irq, struct irq_cfg *cfg) > { > unsigned long flags; > > raw_spin_lock_irqsave(&ioapic_lock, flags); >- io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); >+ ipipe_irq_lock(irq); >+ __mask_ioapic(cfg); > raw_spin_unlock_irqrestore(&ioapic_lock, flags); > } > > static void mask_ioapic_irq(struct irq_data *data) > { >- mask_ioapic(data->chip_data); >+ mask_ioapic(data->irq, data->chip_data); > } > > static void __unmask_ioapic(struct irq_cfg *cfg) >@@ -514,18 +524,19 @@ static void __unmask_ioapic(struct irq_cfg *cfg) > io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); > } > >-static void unmask_ioapic(struct irq_cfg *cfg) >+static void unmask_ioapic(unsigned int irq, struct irq_cfg *cfg) > { > unsigned long flags; > > raw_spin_lock_irqsave(&ioapic_lock, flags); > __unmask_ioapic(cfg); >+ ipipe_irq_unlock(irq); > raw_spin_unlock_irqrestore(&ioapic_lock, flags); > } > > static void unmask_ioapic_irq(struct irq_data *data) > { >- unmask_ioapic(data->chip_data); >+ unmask_ioapic(data->irq, data->chip_data); > } > > static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) >@@ -2124,6 +2135,7 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) > was_pending = 1; > } > __unmask_ioapic(data->chip_data); >+ ipipe_irq_unlock(irq); > raw_spin_unlock_irqrestore(&ioapic_lock, flags); > > return was_pending; >@@ -2377,35 +2389,19 @@ static inline void irq_complete_move(struct irq_cfg *cfg) { } > > static void ack_apic_edge(struct irq_data *data) > { >+#ifndef CONFIG_IPIPE > irq_complete_move(data->chip_data); > move_native_irq(data->irq); >- ack_APIC_irq(); >+#endif /* CONFIG_IPIPE */ >+ __ack_APIC_irq(); > } > > atomic_t irq_mis_count; > >-/* >- * IO-APIC versions below 0x20 don't support EOI register. >- * For the record, here is the information about various versions: >- * 0Xh 82489DX >- * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant >- * 2Xh I/O(x)APIC which is PCI 2.2 Compliant >- * 30h-FFh Reserved >- * >- * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic >- * version as 0x2. This is an error with documentation and these ICH chips >- * use io-apic's of version 0x20. >- * >- * For IO-APIC's with EOI register, we use that to do an explicit EOI. >- * Otherwise, we simulate the EOI message manually by changing the trigger >- * mode to edge and then back to level, with RTE being masked during this. >-*/ >-static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) >+static void __eoi_ioapic_irq(struct irq_cfg *cfg) > { > struct irq_pin_list *entry; >- unsigned long flags; > >- raw_spin_lock_irqsave(&ioapic_lock, flags); > for_each_irq_pin(entry, cfg->irq_2_pin) { > if (mp_ioapics[entry->apic].apicver >= 0x20) { > /* >@@ -2423,21 +2419,82 @@ static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) > __unmask_and_level_IO_APIC_irq(entry); > } > } >+} >+ >+#if !defined(CONFIG_IPIPE) || defined(CONFIG_SMP) >+ >+/* >+ * IO-APIC versions below 0x20 don't support EOI register. >+ * For the record, here is the information about various versions: >+ * 0Xh 82489DX >+ * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant >+ * 2Xh I/O(x)APIC which is PCI 2.2 Compliant >+ * 30h-FFh Reserved >+ * >+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic >+ * version as 0x2. This is an error with documentation and these ICH chips >+ * use io-apic's of version 0x20. >+ * >+ * For IO-APIC's with EOI register, we use that to do an explicit EOI. >+ * Otherwise, we simulate the EOI message manually by changing the trigger >+ * mode to edge and then back to level, with RTE being masked during this. >+*/ >+ >+static inline void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) >+{ >+ unsigned long flags; >+ >+ raw_spin_lock_irqsave(&ioapic_lock, flags); >+ __eoi_ioapic_irq(cfg); > raw_spin_unlock_irqrestore(&ioapic_lock, flags); > } > >+#endif /* !IPIPE || SMP */ >+ >+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) >+ >+static void move_xxapic_irq(struct irq_data *data) >+{ >+ unsigned int irq = data->irq; >+ struct irq_desc *desc = irq_to_desc(irq); >+ struct irq_cfg *cfg = data->chip_data; >+ >+ if (desc->handle_irq == &handle_edge_irq) { >+ raw_spin_lock(&desc->lock); >+ irq_complete_move(cfg); >+ move_native_irq(irq); >+ raw_spin_unlock(&desc->lock); >+ } else if (desc->handle_irq == &handle_fasteoi_irq) { >+ raw_spin_lock(&desc->lock); >+ irq_complete_move(cfg); >+ if (irq_remapped(cfg)) >+ eoi_ioapic_irq(irq, cfg); >+ if (unlikely(desc->status & IRQ_MOVE_PENDING)) { >+ if (!io_apic_level_ack_pending(cfg)) >+ move_masked_irq(irq); >+ unmask_ioapic(irq, cfg); >+ } >+ raw_spin_unlock(&desc->lock); >+ } else >+ WARN_ON_ONCE(1); >+} >+ >+#endif /* CONFIG_IPIPE && CONFIG_SMP */ >+ > static void ack_apic_level(struct irq_data *data) > { > struct irq_cfg *cfg = data->chip_data; >- int i, do_unmask_irq = 0, irq = data->irq; > unsigned long v; >+ int i; >+#ifndef CONFIG_IPIPE >+ int do_unmask_irq = 0, irq = data->irq; > > irq_complete_move(cfg); > #ifdef CONFIG_GENERIC_PENDING_IRQ > /* If we are moving the irq we need to mask it */ > if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { > do_unmask_irq = 1; >- mask_ioapic(cfg); >+ mask_ioapic(irq, cfg); > } > #endif > >@@ -2525,19 +2582,38 @@ static void ack_apic_level(struct irq_data *data) > */ > if (!io_apic_level_ack_pending(cfg)) > move_masked_irq(irq); >- unmask_ioapic(cfg); >+ unmask_ioapic(irq, cfg); > } >+#else /* CONFIG_IPIPE */ >+ /* >+ * Prevent low priority IRQs grabbed by high priority domains >+ * from being delayed, waiting for a high priority interrupt >+ * handler running in a low priority domain to complete. >+ * This code assumes hw interrupts off. >+ */ >+ i = cfg->vector; >+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); >+ raw_spin_lock(&ioapic_lock); >+ if (unlikely(!(v & (1 << (i & 0x1f))))) { >+ /* IO-APIC erratum: see comment above. */ >+ atomic_inc(&irq_mis_count); >+ __eoi_ioapic_irq(cfg); >+ } >+ __mask_ioapic(cfg); >+ raw_spin_unlock(&ioapic_lock); >+ __ack_APIC_irq(); >+#endif /* CONFIG_IPIPE */ > } > > #ifdef CONFIG_INTR_REMAP > static void ir_ack_apic_edge(struct irq_data *data) > { >- ack_APIC_irq(); >+ __ack_APIC_irq(); > } > > static void ir_ack_apic_level(struct irq_data *data) > { >- ack_APIC_irq(); >+ __ack_APIC_irq(); > eoi_ioapic_irq(data->irq, data->chip_data); > } > #endif /* CONFIG_INTR_REMAP */ >@@ -2551,6 +2627,9 @@ static struct irq_chip ioapic_chip __read_mostly = { > .irq_eoi = ack_apic_level, > #ifdef CONFIG_SMP > .irq_set_affinity = ioapic_set_affinity, >+#ifdef CONFIG_IPIPE >+ .irq_move = move_xxapic_irq, >+#endif > #endif > .irq_retrigger = ioapic_retrigger_irq, > }; >@@ -2565,6 +2644,9 @@ static struct irq_chip ir_ioapic_chip __read_mostly = { > .irq_eoi = ir_ack_apic_level, > #ifdef CONFIG_SMP > .irq_set_affinity = ir_ioapic_set_affinity, >+#ifdef CONFIG_IPIPE >+ .irq_move = move_xxapic_irq, >+#endif > #endif > #endif > .irq_retrigger = ioapic_retrigger_irq, >@@ -2609,23 +2691,29 @@ static inline void init_IO_APIC_traps(void) > > static void mask_lapic_irq(struct irq_data *data) > { >- unsigned long v; >+ unsigned long v, flags; > >+ local_irq_save_hw_cond(flags); >+ ipipe_irq_lock(data->irq); > v = apic_read(APIC_LVT0); > apic_write(APIC_LVT0, v | APIC_LVT_MASKED); >+ local_irq_restore_hw_cond(flags); > } > > static void unmask_lapic_irq(struct irq_data *data) > { >- unsigned long v; >+ unsigned long v, flags; > >+ local_irq_save_hw_cond(flags); > v = apic_read(APIC_LVT0); > apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); >+ ipipe_irq_unlock(data->irq); >+ local_irq_restore_hw_cond(flags); > } > > static void ack_lapic_irq(struct irq_data *data) > { >- ack_APIC_irq(); >+ __ack_APIC_irq(); > } > > static struct irq_chip lapic_chip __read_mostly = { >@@ -2633,6 +2721,9 @@ static struct irq_chip lapic_chip __read_mostly = { > .irq_mask = mask_lapic_irq, > .irq_unmask = unmask_lapic_irq, > .irq_ack = ack_lapic_irq, >+#if defined(CONFIG_IPIPE) && defined(CONFIG_SMP) >+ .irq_move = move_xxapic_irq, >+#endif > }; > > static void lapic_register_intr(int irq) >@@ -2818,7 +2909,7 @@ static inline void __init check_timer(void) > int idx; > idx = find_irq_entry(apic1, pin1, mp_INT); > if (idx != -1 && irq_trigger(idx)) >- unmask_ioapic(cfg); >+ unmask_ioapic(0, cfg); > } > if (timer_irq_works()) { > if (nmi_watchdog == NMI_IO_APIC) { >@@ -2879,6 +2970,10 @@ static inline void __init check_timer(void) > "...trying to set up timer as Virtual Wire IRQ...\n"); > > lapic_register_intr(0); >+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_64) >+ irq_to_desc(0)->ipipe_ack = __ipipe_ack_edge_irq; >+ irq_to_desc(0)->ipipe_end = __ipipe_end_edge_irq; >+#endif > apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ > legacy_pic->unmask(0); > >@@ -3263,6 +3358,9 @@ static struct irq_chip msi_chip = { > .irq_ack = ack_apic_edge, > #ifdef CONFIG_SMP > .irq_set_affinity = msi_set_affinity, >+#ifdef CONFIG_IPIPE >+ .irq_move = move_xxapic_irq, >+#endif > #endif > .irq_retrigger = ioapic_retrigger_irq, > }; >@@ -3275,6 +3373,9 @@ static struct irq_chip msi_ir_chip = { > .irq_ack = ir_ack_apic_edge, > #ifdef CONFIG_SMP > .irq_set_affinity = ir_msi_set_affinity, >+#ifdef CONFIG_IPIPE >+ .irq_move = move_xxapic_irq, >+#endif > #endif > #endif > .irq_retrigger = ioapic_retrigger_irq, >@@ -3576,6 +3677,9 @@ static struct irq_chip ht_irq_chip = { > .irq_ack = ack_apic_edge, > #ifdef CONFIG_SMP > .irq_set_affinity = ht_set_affinity, >+#ifdef CONFIG_IPIPE >+ .irq_move = move_xxapic_irq, >+#endif > #endif > .irq_retrigger = ioapic_retrigger_irq, > }; >@@ -3877,6 +3981,14 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) > return 0; > } > >+#ifdef CONFIG_IPIPE >+unsigned __ipipe_get_ioapic_irq_vector(int irq) >+{ >+ return irq >= IPIPE_FIRST_APIC_IRQ && irq < IPIPE_NR_XIRQS ? >+ ipipe_apic_irq_vector(irq) : irq_cfg(irq)->vector; >+} >+#endif /* CONFIG_IPIPE */ >+ > /* > * This function currently is only a helper for the i386 smp boot process where > * we need to reprogram the ioredtbls to cater for the cpus which have come online >diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c >index 08385e0..f5ad117 100644 >--- a/arch/x86/kernel/apic/ipi.c >+++ b/arch/x86/kernel/apic/ipi.c >@@ -29,12 +29,12 @@ void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector) > * to an arbitrary mask, so I do a unicast to each CPU instead. > * - mbligh > */ >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, > query_cpu), vector, APIC_DEST_PHYSICAL); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, >@@ -46,14 +46,14 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, > > /* See Hack comment above */ > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > if (query_cpu == this_cpu) > continue; > __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, > query_cpu), vector, APIC_DEST_PHYSICAL); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, >@@ -68,12 +68,12 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, > * should be modified to do 1 message per cluster ID - mbligh > */ > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) > __default_send_IPI_dest_field( > apic->cpu_to_logical_apicid(query_cpu), vector, > apic->dest_logical); >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, >@@ -85,7 +85,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, > > /* See Hack comment above */ > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > if (query_cpu == this_cpu) > continue; >@@ -93,7 +93,7 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, > apic->cpu_to_logical_apicid(query_cpu), vector, > apic->dest_logical); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > #ifdef CONFIG_X86_32 >@@ -109,10 +109,10 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) > if (WARN_ONCE(!mask, "empty IPI mask")) > return; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); > __default_send_IPI_dest_field(mask, vector, apic->dest_logical); >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > void default_send_IPI_allbutself(int vector) >diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c >index c90041c..ea61b7a 100644 >--- a/arch/x86/kernel/apic/nmi.c >+++ b/arch/x86/kernel/apic/nmi.c >@@ -61,6 +61,10 @@ static unsigned int nmi_hz = HZ; > static DEFINE_PER_CPU(short, wd_enabled); > static int endflag __initdata; > >+static int default_nmi_watchdog_tick(struct pt_regs * regs, unsigned reason); >+int (*nmi_watchdog_tick) (struct pt_regs * regs, unsigned reason) = &default_nmi_watchdog_tick; >+EXPORT_SYMBOL(nmi_watchdog_tick); >+ > static inline unsigned int get_nmi_count(int cpu) > { > return per_cpu(irq_stat, cpu).__nmi_count; >@@ -389,7 +393,7 @@ void touch_nmi_watchdog(void) > EXPORT_SYMBOL(touch_nmi_watchdog); > > notrace __kprobes int >-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) >+default_nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) > { > /* > * Since current_thread_info()-> is always on the stack, and we >diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c >index cf69c59..23baa6d 100644 >--- a/arch/x86/kernel/apic/x2apic_cluster.c >+++ b/arch/x86/kernel/apic/x2apic_cluster.c >@@ -61,13 +61,13 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > __x2apic_send_IPI_dest( > per_cpu(x86_cpu_to_logical_apicid, query_cpu), > vector, apic->dest_logical); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void >@@ -79,7 +79,7 @@ static void > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > if (query_cpu == this_cpu) > continue; >@@ -87,7 +87,7 @@ static void > per_cpu(x86_cpu_to_logical_apicid, query_cpu), > vector, apic->dest_logical); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void x2apic_send_IPI_allbutself(int vector) >@@ -98,7 +98,7 @@ static void x2apic_send_IPI_allbutself(int vector) > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_online_cpu(query_cpu) { > if (query_cpu == this_cpu) > continue; >@@ -106,7 +106,7 @@ static void x2apic_send_IPI_allbutself(int vector) > per_cpu(x86_cpu_to_logical_apicid, query_cpu), > vector, apic->dest_logical); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void x2apic_send_IPI_all(int vector) >diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c >index 8972f38..19bccee 100644 >--- a/arch/x86/kernel/apic/x2apic_phys.c >+++ b/arch/x86/kernel/apic/x2apic_phys.c >@@ -62,12 +62,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), > vector, APIC_DEST_PHYSICAL); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void >@@ -79,14 +79,14 @@ static void > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_cpu(query_cpu, mask) { > if (query_cpu != this_cpu) > __x2apic_send_IPI_dest( > per_cpu(x86_cpu_to_apicid, query_cpu), > vector, APIC_DEST_PHYSICAL); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void x2apic_send_IPI_allbutself(int vector) >@@ -97,14 +97,14 @@ static void x2apic_send_IPI_allbutself(int vector) > > x2apic_wrmsr_fence(); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > for_each_online_cpu(query_cpu) { > if (query_cpu == this_cpu) > continue; > __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), > vector, APIC_DEST_PHYSICAL); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > static void x2apic_send_IPI_all(int vector) >diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c >index 68a3343..f7e0927 100644 >--- a/arch/x86/kernel/cpu/mtrr/cyrix.c >+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c >@@ -18,7 +18,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base, > > arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > > ccr3 = getCx86(CX86_CCR3); > setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ >@@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned long *base, > rcr = getCx86(CX86_RCR_BASE + reg); > setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ > >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > > shift = ((unsigned char *) base)[1] & 0x0f; > *base >>= PAGE_SHIFT; >@@ -178,6 +178,7 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, > unsigned long size, mtrr_type type) > { > unsigned char arr, arr_type, arr_size; >+ unsigned long flags; > > arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ > >@@ -221,6 +222,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, > } > } > >+ local_irq_save_hw(flags); >+ > prepare_set(); > > base <<= PAGE_SHIFT; >@@ -230,6 +233,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, > setCx86(CX86_RCR_BASE + reg, arr_type); > > post_set(); >+ >+ local_irq_restore_hw(flags); > } > > typedef struct { >@@ -247,8 +252,10 @@ static unsigned char ccr_state[7] = { 0, 0, 0, 0, 0, 0, 0 }; > > static void cyrix_set_all(void) > { >+ unsigned long flags; > int i; > >+ local_irq_save_hw(flags); > prepare_set(); > > /* the CCRs are not contiguous */ >@@ -263,6 +270,7 @@ static void cyrix_set_all(void) > } > > post_set(); >+ local_irq_restore_hw(flags); > } > > static const struct mtrr_ops cyrix_mtrr_ops = { >diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c >index 9f27228..caef22b 100644 >--- a/arch/x86/kernel/cpu/mtrr/generic.c >+++ b/arch/x86/kernel/cpu/mtrr/generic.c >@@ -717,7 +717,7 @@ static void generic_set_all(void) > unsigned long mask, count; > unsigned long flags; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > prepare_set(); > > /* Actually set the state */ >@@ -727,7 +727,7 @@ static void generic_set_all(void) > pat_init(); > > post_set(); >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > > /* Use the atomic bitops to update the global mask */ > for (count = 0; count < sizeof mask * 8; ++count) { >@@ -751,12 +751,12 @@ static void generic_set_all(void) > static void generic_set_mtrr(unsigned int reg, unsigned long base, > unsigned long size, mtrr_type type) > { >- unsigned long flags; >+ unsigned long flags, _flags; > struct mtrr_var_range *vr; > > vr = &mtrr_state.var_ranges[reg]; > >- local_irq_save(flags); >+ local_irq_save_full(flags, _flags); > prepare_set(); > > if (size == 0) { >@@ -777,7 +777,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, > } > > post_set(); >- local_irq_restore(flags); >+ local_irq_restore_full(flags, _flags); > } > > int generic_validate_add_page(unsigned long base, unsigned long size, >diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c >index 6e8752c..18a3f2a 100644 >--- a/arch/x86/kernel/dumpstack.c >+++ b/arch/x86/kernel/dumpstack.c >@@ -349,6 +349,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) > local_irq_enable(); > do_exit(SIGBUS); > } >+EXPORT_SYMBOL_GPL(die_nmi); > > static int __init oops_setup(char *s) > { >diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c >index 1bc7f75..8d99aef 100644 >--- a/arch/x86/kernel/dumpstack_32.c >+++ b/arch/x86/kernel/dumpstack_32.c >@@ -101,6 +101,9 @@ void show_registers(struct pt_regs *regs) > printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", > TASK_COMM_LEN, current->comm, task_pid_nr(current), > current_thread_info(), current, task_thread_info(current)); >+#ifdef CONFIG_IPIPE >+ printk(KERN_EMERG "I-pipe domain %s\n", ipipe_current_domain->name); >+#endif /* CONFIG_IPIPE */ > /* > * When in-kernel, we also print out the stack and code at the > * time of the fault.. >diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c >index 6a34048..4b5e381 100644 >--- a/arch/x86/kernel/dumpstack_64.c >+++ b/arch/x86/kernel/dumpstack_64.c >@@ -293,6 +293,11 @@ void show_registers(struct pt_regs *regs) > printk("CPU %d ", cpu); > print_modules(); > __show_regs(regs, 1); >+#ifdef CONFIG_IPIPE >+ if (ipipe_current_domain != ipipe_root_domain) >+ printk("I-pipe domain %s\n", ipipe_current_domain->name); >+ else >+#endif /* CONFIG_IPIPE */ > printk("Process %s (pid: %d, threadinfo %p, task %p)\n", > cur->comm, cur->pid, task_thread_info(cur), cur); > >diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S >index 591e601..efb5dcf 100644 >--- a/arch/x86/kernel/entry_32.S >+++ b/arch/x86/kernel/entry_32.S >@@ -44,6 +44,7 @@ > #include <linux/linkage.h> > #include <asm/thread_info.h> > #include <asm/irqflags.h> >+#include <asm/ipipe_base.h> > #include <asm/errno.h> > #include <asm/segment.h> > #include <asm/smp.h> >@@ -80,6 +81,58 @@ > > #define nr_syscalls ((syscall_table_size)/4) > >+#ifdef CONFIG_IPIPE >+#define CATCH_ROOT_SYSCALL(bypass_check,bypass_nocheck) \ >+ movl %esp,%eax ; \ >+ call __ipipe_syscall_root ; \ >+ testl %eax,%eax ; \ >+ movl PT_EAX(%esp),%eax ; \ >+ js bypass_check ; \ >+ jne bypass_nocheck ; \ >+ movl PT_ORIG_EAX(%esp),%eax >+#define PUSH_XCODE(v) pushl $ ex_ ## v >+#define PUSH_XCODE_CFI(v) pushl $ ex_ ## v ; CFI_ADJUST_CFA_OFFSET 4 >+#define PUSH_XVEC(v) pushl $ ex_ ## v >+#define PUSH_XVEC_CFI(v) pushl $ ex_ ## v ; CFI_ADJUST_CFA_OFFSET 4 >+#define HANDLE_EXCEPTION(code) movl %code,%ecx ; \ >+ call __ipipe_handle_exception ; \ >+ testl %eax,%eax ; \ >+ jnz restore_nocheck >+#define DIVERT_EXCEPTION(code) movl $(__USER_DS), %ecx ; \ >+ movl %ecx, %ds ; \ >+ movl %ecx, %es ; \ >+ movl %esp, %eax ; \ >+ movl $ex_ ## code,%edx ; \ >+ call __ipipe_divert_exception ; \ >+ testl %eax,%eax ; \ >+ jnz restore_nocheck >+#define PREEMPT_SCHEDULE_IRQ call __ipipe_preempt_schedule_irq >+ >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+# define IPIPE_TRACE_IRQ_ENTER \ >+ lea PT_EIP-4(%esp), %ebp; \ >+ movl PT_ORIG_EAX(%esp), %eax; \ >+ call ipipe_trace_begin >+# define IPIPE_TRACE_IRQ_EXIT \ >+ pushl %eax; \ >+ movl PT_ORIG_EAX+4(%esp), %eax; \ >+ call ipipe_trace_end; \ >+ popl %eax >+#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ >+#define IPIPE_TRACE_IRQ_ENTER >+#define IPIPE_TRACE_IRQ_EXIT >+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ >+#else /* !CONFIG_IPIPE */ >+#define CATCH_ROOT_SYSCALL(bypass_check,bypass_nocheck) >+#define PUSH_XCODE(v) pushl $v >+#define PUSH_XCODE_CFI(v) pushl $v ; CFI_ADJUST_CFA_OFFSET 4 >+#define PUSH_XVEC(v) pushl v >+#define PUSH_XVEC_CFI(v) pushl v ; CFI_ADJUST_CFA_OFFSET 4 >+#define HANDLE_EXCEPTION(code) call *%code >+#define DIVERT_EXCEPTION(code) >+#define PREEMPT_SCHEDULE_IRQ call preempt_schedule_irq >+#endif /* CONFIG_IPIPE */ >+ > #ifdef CONFIG_PREEMPT > #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF > #else >@@ -296,6 +349,7 @@ > .endm > > ENTRY(ret_from_fork) >+ ENABLE_INTERRUPTS_HW_COND > CFI_STARTPROC > pushl_cfi %eax > call schedule_tail >@@ -323,7 +377,7 @@ END(ret_from_fork) > RING0_PTREGS_FRAME > ret_from_exception: > preempt_stop(CLBR_ANY) >-ret_from_intr: >+ENTRY(ret_from_intr) > GET_THREAD_INFO(%ebp) > check_userspace: > movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS >@@ -356,7 +410,7 @@ need_resched: > jz restore_all > testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? > jz restore_all >- call preempt_schedule_irq >+ PREEMPT_SCHEDULE_IRQ > jmp need_resched > END(resume_kernel) > #endif >@@ -416,6 +470,7 @@ sysenter_past_esp: > .previous > > GET_THREAD_INFO(%ebp) >+ CATCH_ROOT_SYSCALL(sysenter_tail,sysenter_exit) > > testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) > jnz sysenter_audit >@@ -424,6 +479,7 @@ sysenter_do_call: > jae syscall_badsys > call *sys_call_table(,%eax,4) > movl %eax,PT_EAX(%esp) >+sysenter_tail: > LOCKDEP_SYS_EXIT > DISABLE_INTERRUPTS(CLBR_ANY) > TRACE_IRQS_OFF >@@ -498,6 +554,7 @@ ENTRY(system_call) > pushl_cfi %eax # save orig_eax > SAVE_ALL > GET_THREAD_INFO(%ebp) >+ CATCH_ROOT_SYSCALL(syscall_exit,restore_nocheck) > # system call tracing in operation / emulation > testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) > jnz syscall_trace_entry >@@ -536,7 +593,7 @@ irq_return: > .section .fixup,"ax" > ENTRY(iret_exc) > pushl $0 # no error code >- pushl $do_iret_error >+ PUSH_XCODE(do_iret_error) > jmp error_code > .previous > .section __ex_table,"a" >@@ -602,6 +659,7 @@ work_pending: > testb $_TIF_NEED_RESCHED, %cl > jz work_notifysig > work_resched: >+ ENABLE_INTERRUPTS_HW_COND > call schedule > LOCKDEP_SYS_EXIT > DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt >@@ -617,6 +675,7 @@ work_resched: > > work_notifysig: # deal with pending signals and > # notify-resume requests >+ ENABLE_INTERRUPTS_HW_COND > #ifdef CONFIG_VM86 > testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) > movl %esp, %eax >@@ -819,6 +878,49 @@ END(irq_entries_start) > END(interrupt) > .previous > >+#ifdef CONFIG_IPIPE >+ .p2align CONFIG_X86_L1_CACHE_SHIFT >+common_interrupt: >+ addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ >+ SAVE_ALL >+ IPIPE_TRACE_IRQ_ENTER >+ movl %esp, %eax >+ call *ipipe_irq_handler >+ IPIPE_TRACE_IRQ_EXIT >+ testl %eax,%eax >+ jnz ret_from_intr >+ jmp restore_nocheck >+ CFI_ENDPROC >+ >+ .pushsection .kprobes.text, "ax" >+#define BUILD_INTERRUPT3(name, nr, fn) \ >+ENTRY(name) \ >+ RING0_INT_FRAME; \ >+ pushl $~(nr); \ >+ CFI_ADJUST_CFA_OFFSET 4; \ >+ SAVE_ALL; \ >+ IPIPE_TRACE_IRQ_ENTER; \ >+ movl %esp, %eax; \ >+ call *ipipe_irq_handler; \ >+ IPIPE_TRACE_IRQ_EXIT; \ >+ testl %eax,%eax; \ >+ jnz ret_from_intr; \ >+ jmp restore_nocheck; \ >+ CFI_ENDPROC >+ >+#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+ BUILD_INTERRUPT(ipipe_ipi0,IPIPE_SERVICE_VECTOR0) >+ BUILD_INTERRUPT(ipipe_ipi1,IPIPE_SERVICE_VECTOR1) >+ BUILD_INTERRUPT(ipipe_ipi2,IPIPE_SERVICE_VECTOR2) >+ BUILD_INTERRUPT(ipipe_ipi3,IPIPE_SERVICE_VECTOR3) >+#ifdef CONFIG_SMP >+ BUILD_INTERRUPT(ipipe_ipiX,IPIPE_CRITICAL_VECTOR) >+#endif >+#endif >+ >+#else /* !CONFIG_IPIPE */ > /* > * the CPU automatically disables interrupts when executing an IRQ vector, > * so IRQ-flags tracing has to follow that: >@@ -852,13 +954,15 @@ ENDPROC(name) > > #define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) > >+#endif /* !CONFIG_IPIPE */ >+ > /* The include is where all of the SMP etc. interrupts come from */ > #include <asm/entry_arch.h> > > ENTRY(coprocessor_error) > RING0_INT_FRAME > pushl_cfi $0 >- pushl_cfi $do_coprocessor_error >+ PUSH_XCODE_CFI(do_coprocessor_error) > jmp error_code > CFI_ENDPROC > END(coprocessor_error) >@@ -868,7 +972,7 @@ ENTRY(simd_coprocessor_error) > pushl_cfi $0 > #ifdef CONFIG_X86_INVD_BUG > /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ >-661: pushl_cfi $do_general_protection >+661: PUSH_XCODE_CFI(do_general_protection) > 662: > .section .altinstructions,"a" > .balign 4 >@@ -879,11 +983,11 @@ ENTRY(simd_coprocessor_error) > .byte 664f-663f > .previous > .section .altinstr_replacement,"ax" >-663: pushl $do_simd_coprocessor_error >+663: PUSH_XCODE(do_simd_coprocessor_error) > 664: > .previous > #else >- pushl_cfi $do_simd_coprocessor_error >+ PUSH_XCODE_CFI(do_simd_coprocessor_error) > #endif > jmp error_code > CFI_ENDPROC >@@ -892,7 +996,7 @@ END(simd_coprocessor_error) > ENTRY(device_not_available) > RING0_INT_FRAME > pushl_cfi $-1 # mark this as an int >- pushl_cfi $do_device_not_available >+ PUSH_XCODE_CFI(do_device_not_available) > jmp error_code > CFI_ENDPROC > END(device_not_available) >@@ -915,7 +1019,7 @@ END(native_irq_enable_sysexit) > ENTRY(overflow) > RING0_INT_FRAME > pushl_cfi $0 >- pushl_cfi $do_overflow >+ PUSH_XCODE_CFI(do_overflow) > jmp error_code > CFI_ENDPROC > END(overflow) >@@ -923,7 +1027,7 @@ END(overflow) > ENTRY(bounds) > RING0_INT_FRAME > pushl_cfi $0 >- pushl_cfi $do_bounds >+ PUSH_XCODE_CFI(do_bounds) > jmp error_code > CFI_ENDPROC > END(bounds) >@@ -931,7 +1035,7 @@ END(bounds) > ENTRY(invalid_op) > RING0_INT_FRAME > pushl_cfi $0 >- pushl_cfi $do_invalid_op >+ PUSH_XCODE_CFI(do_invalid_op) > jmp error_code > CFI_ENDPROC > END(invalid_op) >@@ -939,35 +1043,35 @@ END(invalid_op) > ENTRY(coprocessor_segment_overrun) > RING0_INT_FRAME > pushl_cfi $0 >- pushl_cfi $do_coprocessor_segment_overrun >+ PUSH_XCODE_CFI(do_coprocessor_segment_overrun) > jmp error_code > CFI_ENDPROC > END(coprocessor_segment_overrun) > > ENTRY(invalid_TSS) > RING0_EC_FRAME >- pushl_cfi $do_invalid_TSS >+ PUSH_XCODE_CFI(do_invalid_TSS) > jmp error_code > CFI_ENDPROC > END(invalid_TSS) > > ENTRY(segment_not_present) > RING0_EC_FRAME >- pushl_cfi $do_segment_not_present >+ PUSH_XCODE_CFI(do_segment_not_present) > jmp error_code > CFI_ENDPROC > END(segment_not_present) > > ENTRY(stack_segment) > RING0_EC_FRAME >- pushl_cfi $do_stack_segment >+ PUSH_XCODE_CFI(do_stack_segment) > jmp error_code > CFI_ENDPROC > END(stack_segment) > > ENTRY(alignment_check) > RING0_EC_FRAME >- pushl_cfi $do_alignment_check >+ PUSH_XCODE_CFI(do_alignment_check) > jmp error_code > CFI_ENDPROC > END(alignment_check) >@@ -975,7 +1079,7 @@ END(alignment_check) > ENTRY(divide_error) > RING0_INT_FRAME > pushl_cfi $0 # no error code >- pushl_cfi $do_divide_error >+ PUSH_XCODE_CFI(do_divide_error) > jmp error_code > CFI_ENDPROC > END(divide_error) >@@ -984,7 +1088,7 @@ END(divide_error) > ENTRY(machine_check) > RING0_INT_FRAME > pushl_cfi $0 >- pushl_cfi machine_check_vector >+ PUSH_XVEC_CFI(machine_check_vector) > jmp error_code > CFI_ENDPROC > END(machine_check) >@@ -993,7 +1097,7 @@ END(machine_check) > ENTRY(spurious_interrupt_bug) > RING0_INT_FRAME > pushl_cfi $0 >- pushl_cfi $do_spurious_interrupt_bug >+ PUSH_XCODE_CFI(do_spurious_interrupt_bug) > jmp error_code > CFI_ENDPROC > END(spurious_interrupt_bug) >@@ -1223,7 +1327,7 @@ syscall_table_size=(.-sys_call_table) > > ENTRY(page_fault) > RING0_EC_FRAME >- pushl_cfi $do_page_fault >+ PUSH_XCODE_CFI(do_page_fault) > ALIGN > error_code: > /* the function address is in %gs's slot on the stack */ >@@ -1260,9 +1364,11 @@ error_code: > movl $(__USER_DS), %ecx > movl %ecx, %ds > movl %ecx, %es >+#ifndef CONFIG_IPIPE > TRACE_IRQS_OFF >+#endif > movl %esp,%eax # pt_regs pointer >- call *%edi >+ HANDLE_EXCEPTION(edi) > jmp ret_from_exception > CFI_ENDPROC > END(page_fault) >@@ -1302,6 +1408,7 @@ debug_stack_correct: > pushl_cfi $-1 # mark this as an int > SAVE_ALL > TRACE_IRQS_OFF >+ DIVERT_EXCEPTION(do_debug) > xorl %edx,%edx # error code 0 > movl %esp,%eax # pt_regs pointer > call do_debug >@@ -1392,6 +1499,7 @@ ENTRY(int3) > pushl_cfi $-1 # mark this as an int > SAVE_ALL > TRACE_IRQS_OFF >+ DIVERT_EXCEPTION(do_int3) > xorl %edx,%edx # zero error code > movl %esp,%eax # pt_regs pointer > call do_int3 >@@ -1401,7 +1509,7 @@ END(int3) > > ENTRY(general_protection) > RING0_EC_FRAME >- pushl_cfi $do_general_protection >+ PUSH_XCODE_CFI(do_general_protection) > jmp error_code > CFI_ENDPROC > END(general_protection) >diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S >index e3ba417..e088911 100644 >--- a/arch/x86/kernel/entry_64.S >+++ b/arch/x86/kernel/entry_64.S >@@ -48,6 +48,7 @@ > #include <asm/unistd.h> > #include <asm/thread_info.h> > #include <asm/hw_irq.h> >+#include <asm/ipipe_base.h> > #include <asm/page_types.h> > #include <asm/irqflags.h> > #include <asm/paravirt.h> >@@ -61,6 +62,13 @@ > #define __AUDIT_ARCH_LE 0x40000000 > > .code64 >+ >+#ifdef CONFIG_IPIPE >+#define PREEMPT_SCHEDULE_IRQ call __ipipe_preempt_schedule_irq >+#else /* !CONFIG_IPIPE */ >+#define PREEMPT_SCHEDULE_IRQ call preempt_schedule_irq >+#endif /* !CONFIG_IPIPE */ >+ > #ifdef CONFIG_FUNCTION_TRACER > #ifdef CONFIG_DYNAMIC_FTRACE > ENTRY(mcount) >@@ -331,7 +339,10 @@ ENTRY(save_args) > /* > * We entered an interrupt context - irqs are off: > */ >-2: TRACE_IRQS_OFF >+2: >+#ifndef CONFIG_IPIPE >+ TRACE_IRQS_OFF >+#endif > ret > CFI_ENDPROC > END(save_args) >@@ -396,6 +407,7 @@ ENTRY(ret_from_fork) > > pushq_cfi kernel_eflags(%rip) > popfq_cfi # reset kernel eflags >+ ENABLE_INTERRUPTS_HW_COND > > call schedule_tail # rdi: 'prev' task parameter > >@@ -471,6 +483,17 @@ ENTRY(system_call_after_swapgs) > movq %rax,ORIG_RAX-ARGOFFSET(%rsp) > movq %rcx,RIP-ARGOFFSET(%rsp) > CFI_REL_OFFSET rip,RIP-ARGOFFSET >+#ifdef CONFIG_IPIPE >+ pushq %rdi >+ pushq %rax >+ leaq -(ARGOFFSET-16)(%rsp),%rdi # regs for handler >+ call __ipipe_syscall_root_thunk >+ testl %eax, %eax >+ popq %rax >+ popq %rdi >+ js ret_from_sys_call >+ jnz sysret_fastexit >+#endif > GET_THREAD_INFO(%rcx) > testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) > jnz tracesys >@@ -500,6 +523,7 @@ sysret_check: > * sysretq will re-enable interrupts: > */ > TRACE_IRQS_ON >+sysret_fastexit: > movq RIP-ARGOFFSET(%rsp),%rcx > CFI_REGISTER rip,rcx > RESTORE_ARGS 0,-ARG_SKIP,1 >@@ -511,6 +535,8 @@ sysret_check: > /* Handle reschedules */ > /* edx: work, edi: workmask */ > sysret_careful: >+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),%edx >+ jnz ret_from_sys_call_trace > bt $TIF_NEED_RESCHED,%edx > jnc sysret_signal > TRACE_IRQS_ON >@@ -520,6 +546,16 @@ sysret_careful: > popq_cfi %rdi > jmp sysret_check > >+ret_from_sys_call_trace: >+ TRACE_IRQS_ON >+ sti >+ SAVE_REST >+ FIXUP_TOP_OF_STACK %rdi >+ movq %rsp,%rdi >+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ >+ RESTORE_REST >+ jmp int_ret_from_sys_call >+ > /* Handle a signal */ > sysret_signal: > TRACE_IRQS_ON >@@ -786,7 +822,29 @@ END(interrupt) > CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 > call save_args > PARTIAL_FRAME 0 >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+ pushq %rbp >+ leaq RIP-8(%rdi), %rbp # make interrupted address show up in trace >+ pushq %rdi >+ movq ORIG_RAX(%rdi), %rdi # IRQ number >+ notq %rdi # ...is inverted, fix up >+ call ipipe_trace_begin >+ popq %rdi >+ popq %rbp >+ >+ call \func >+ >+ pushq %rbp >+ pushq %rax >+ movq 8-ARGOFFSET+ORIG_RAX(%rbp), %rdi >+ leaq 8-ARGOFFSET+RIP-8(%rbp), %rbp >+ notq %rdi >+ call ipipe_trace_end >+ popq %rax >+ popq %rbp >+#else > call \func >+#endif > .endm > > /* >@@ -799,9 +857,24 @@ END(interrupt) > */ > .p2align CONFIG_X86_L1_CACHE_SHIFT > common_interrupt: >+#ifdef CONFIG_IPIPE >+ XCPT_FRAME >+ addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ >+ interrupt *ipipe_irq_handler >+ testl %eax, %eax >+ jnz ret_from_intr >+ decl PER_CPU_VAR(irq_count) >+ leaveq >+ CFI_DEF_CFA_REGISTER rsp >+ CFI_ADJUST_CFA_OFFSET -8 >+ testl $3,CS-ARGOFFSET(%rsp) >+ jz restore_args >+ jmp retint_swapgs_notrace >+#else /* !CONFIG_IPIPE */ > XCPT_FRAME > addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ > interrupt do_IRQ >+#endif /* !CONFIG_IPIPE */ > /* 0(%rsp): old_rsp-ARGOFFSET */ > ret_from_intr: > DISABLE_INTERRUPTS(CLBR_NONE) >@@ -811,7 +884,7 @@ ret_from_intr: > CFI_RESTORE rbp > CFI_DEF_CFA_REGISTER rsp > CFI_ADJUST_CFA_OFFSET -8 >-exit_intr: >+ENTRY(exit_intr) > GET_THREAD_INFO(%rcx) > testl $3,CS-ARGOFFSET(%rsp) > je retint_kernel >@@ -831,20 +904,20 @@ retint_check: > jnz retint_careful > > retint_swapgs: /* return to user-space */ >+ TRACE_IRQS_IRETQ > /* > * The iretq could re-enable interrupts: > */ >- DISABLE_INTERRUPTS(CLBR_ANY) >- TRACE_IRQS_IRETQ >+retint_swapgs_notrace: > SWAPGS >+retint_noswapgs: > jmp restore_args > > retint_restore_args: /* return to kernel space */ >- DISABLE_INTERRUPTS(CLBR_ANY) >+ TRACE_IRQS_IRETQ > /* > * The iretq could re-enable interrupts: > */ >- TRACE_IRQS_IRETQ > restore_args: > RESTORE_ARGS 0,8,0 > >@@ -924,7 +997,15 @@ ENTRY(retint_kernel) > jnc retint_restore_args > bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ > jnc retint_restore_args >- call preempt_schedule_irq >+#ifdef CONFIG_IPIPE >+ /* >+ * We may have preempted call_softirq before __do_softirq raised or >+ * after it lowered the preemption counter. >+ */ >+ cmpl $0,PER_CPU_VAR(irq_count) >+ jge retint_restore_args >+#endif >+ PREEMPT_SCHEDULE_IRQ > jmp exit_intr > #endif > >@@ -942,11 +1023,26 @@ END(common_interrupt) > ENTRY(\sym) > INTR_FRAME > pushq_cfi $~(\num) >+#ifdef CONFIG_IPIPE >+ interrupt *ipipe_irq_handler >+ testl %eax, %eax >+ jnz ret_from_intr >+ decl PER_CPU_VAR(irq_count) >+ leaveq >+ CFI_DEF_CFA_REGISTER rsp >+ CFI_ADJUST_CFA_OFFSET -8 >+ testl $3,CS-ARGOFFSET(%rsp) >+ jz restore_args >+ jmp retint_swapgs_notrace >+ CFI_ENDPROC >+ .endm >+#else /* !CONFIG_IPIPE */ > interrupt \do_sym > jmp ret_from_intr > CFI_ENDPROC > END(\sym) > .endm >+#endif /* !CONFIG_IPIPE */ > > #ifdef CONFIG_SMP > apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ >@@ -965,7 +1061,11 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ > x86_platform_ipi smp_x86_platform_ipi > > #ifdef CONFIG_SMP >+#ifdef CONFIG_IPIPE >+.irpc idx, "012" >+#else > .irpc idx, "01234567" >+#endif > apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ > invalidate_interrupt\idx smp_invalidate_interrupt > .endr >@@ -1003,7 +1103,7 @@ apicinterrupt IRQ_WORK_VECTOR \ > /* > * Exception entry points. > */ >-.macro zeroentry sym do_sym >+.macro zeroentry sym do_sym ex_code > ENTRY(\sym) > INTR_FRAME > PARAVIRT_ADJUST_EXCEPTION_FRAME >@@ -1014,13 +1114,28 @@ ENTRY(\sym) > DEFAULT_FRAME 0 > movq %rsp,%rdi /* pt_regs pointer */ > xorl %esi,%esi /* no error code */ >+#ifdef CONFIG_IPIPE >+ movq $\ex_code,%rdx >+ call __ipipe_handle_exception /* handle(regs, error_code, ex_code) */ >+ TRACE_IRQS_OFF >+ testl %eax, %eax >+ jz error_exit >+ movl %ebx,%eax >+ RESTORE_REST >+ DISABLE_INTERRUPTS(CLBR_NONE) >+ testl %eax,%eax >+ jne retint_noswapgs >+ jmp retint_swapgs_notrace >+#else /* !CONFIG_IPIPE */ >+ TRACE_IRQS_OFF > call \do_sym >+#endif /* !CONFIG_IPIPE */ > jmp error_exit /* %ebx: no swapgs flag */ > CFI_ENDPROC > END(\sym) > .endm > >-.macro paranoidzeroentry sym do_sym >+.macro paranoidzeroentry sym do_sym ex_code=0 > ENTRY(\sym) > INTR_FRAME > PARAVIRT_ADJUST_EXCEPTION_FRAME >@@ -1028,17 +1143,32 @@ ENTRY(\sym) > subq $ORIG_RAX-R15, %rsp > CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 > call save_paranoid >- TRACE_IRQS_OFF > movq %rsp,%rdi /* pt_regs pointer */ >+#ifdef CONFIG_IPIPE >+ .if \ex_code >+ movq $\ex_code,%rsi >+ call __ipipe_divert_exception /* handle(regs, ex_code) */ >+ TRACE_IRQS_OFF >+ testl %eax,%eax >+ jnz 1f >+ movq %rsp,%rdi >+ .endif >+#else >+ TRACE_IRQS_OFF >+#endif > xorl %esi,%esi /* no error code */ > call \do_sym >+#ifdef CONFIG_IPIPE >+ xorl %eax,%eax /* tell paranoid_exit to propagate the exception */ >+1: >+#endif > jmp paranoid_exit /* %ebx: no swapgs flag */ > CFI_ENDPROC > END(\sym) > .endm > > #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) >-.macro paranoidzeroentry_ist sym do_sym ist >+.macro paranoidzeroentry_ist sym do_sym ist ex_code=0 > ENTRY(\sym) > INTR_FRAME > PARAVIRT_ADJUST_EXCEPTION_FRAME >@@ -1048,16 +1178,29 @@ ENTRY(\sym) > call save_paranoid > TRACE_IRQS_OFF > movq %rsp,%rdi /* pt_regs pointer */ >+#ifdef CONFIG_IPIPE >+ .if \ex_code >+ movq $\ex_code,%rsi >+ call __ipipe_divert_exception /* handle(regs, ex_code) */ >+ testl %eax,%eax >+ jnz 1f >+ movq %rsp,%rdi >+ .endif >+#endif > xorl %esi,%esi /* no error code */ > subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) > call \do_sym > addq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) >+#ifdef CONFIG_IPIPE >+ xorl %eax,%eax /* tell paranoid_exit to propagate the exception */ >+1: >+#endif > jmp paranoid_exit /* %ebx: no swapgs flag */ > CFI_ENDPROC > END(\sym) > .endm > >-.macro errorentry sym do_sym >+.macro errorentry sym do_sym ex_code > ENTRY(\sym) > XCPT_FRAME > PARAVIRT_ADJUST_EXCEPTION_FRAME >@@ -1068,14 +1211,29 @@ ENTRY(\sym) > movq %rsp,%rdi /* pt_regs pointer */ > movq ORIG_RAX(%rsp),%rsi /* get error code */ > movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ >+#ifdef CONFIG_IPIPE >+ movq $\ex_code,%rdx >+ call __ipipe_handle_exception /* handle(regs, error_code, ex_code) */ >+ TRACE_IRQS_OFF >+ testl %eax, %eax >+ jz error_exit >+ movl %ebx,%eax >+ RESTORE_REST >+ DISABLE_INTERRUPTS(CLBR_NONE) >+ testl %eax,%eax >+ jne retint_noswapgs >+ jmp retint_swapgs_notrace >+#else /* !CONFIG_IPIPE */ >+ TRACE_IRQS_OFF > call \do_sym >+#endif /* !CONFIG_IPIPE */ > jmp error_exit /* %ebx: no swapgs flag */ > CFI_ENDPROC > END(\sym) > .endm > > /* error code is on the stack already */ >-.macro paranoiderrorentry sym do_sym >+.macro paranoiderrorentry sym do_sym ex_code=0 > ENTRY(\sym) > XCPT_FRAME > PARAVIRT_ADJUST_EXCEPTION_FRAME >@@ -1085,27 +1243,40 @@ ENTRY(\sym) > DEFAULT_FRAME 0 > TRACE_IRQS_OFF > movq %rsp,%rdi /* pt_regs pointer */ >+#ifdef CONFIG_IPIPE >+ .if \ex_code >+ movq $\ex_code,%rsi >+ call __ipipe_divert_exception /* handle(regs, ex_code) */ >+ testl %eax,%eax >+ jnz 1f >+ movq %rsp,%rdi >+ .endif >+#endif > movq ORIG_RAX(%rsp),%rsi /* get error code */ > movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ > call \do_sym >+#ifdef CONFIG_IPIPE >+ xorl %eax,%eax /* tell paranoid_exit to propagate the exception */ >+1: >+#endif > jmp paranoid_exit /* %ebx: no swapgs flag */ > CFI_ENDPROC > END(\sym) > .endm > >-zeroentry divide_error do_divide_error >-zeroentry overflow do_overflow >-zeroentry bounds do_bounds >-zeroentry invalid_op do_invalid_op >-zeroentry device_not_available do_device_not_available >+zeroentry divide_error do_divide_error ex_do_divide_error >+zeroentry overflow do_overflow ex_do_overflow >+zeroentry bounds do_bounds ex_do_bounds >+zeroentry invalid_op do_invalid_op ex_do_invalid_op >+zeroentry device_not_available do_device_not_available ex_do_device_not_available > paranoiderrorentry double_fault do_double_fault >-zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun >-errorentry invalid_TSS do_invalid_TSS >-errorentry segment_not_present do_segment_not_present >-zeroentry spurious_interrupt_bug do_spurious_interrupt_bug >-zeroentry coprocessor_error do_coprocessor_error >-errorentry alignment_check do_alignment_check >-zeroentry simd_coprocessor_error do_simd_coprocessor_error >+zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun ex_do_coprocessor_segment_overrun >+errorentry invalid_TSS do_invalid_TSS ex_do_invalid_TSS >+errorentry segment_not_present do_segment_not_present ex_do_segment_not_present >+zeroentry spurious_interrupt_bug do_spurious_interrupt_bug ex_do_spurious_interrupt_bug >+zeroentry coprocessor_error do_coprocessor_error ex_do_coprocessor_error >+errorentry alignment_check do_alignment_check ex_do_alignment_check >+zeroentry simd_coprocessor_error do_simd_coprocessor_error ex_do_simd_coprocessor_error > > /* Reload gs selector with exception handling */ > /* edi: new selector */ >@@ -1189,15 +1360,19 @@ ENTRY(call_softirq) > CFI_REL_OFFSET rbp,0 > mov %rsp,%rbp > CFI_DEF_CFA_REGISTER rbp >+ DISABLE_INTERRUPTS_HW_COND > incl PER_CPU_VAR(irq_count) > cmove PER_CPU_VAR(irq_stack_ptr),%rsp >+ ENABLE_INTERRUPTS_HW_COND > push %rbp # backlink for old unwinder > call __do_softirq >+ DISABLE_INTERRUPTS_HW_COND > leaveq > CFI_RESTORE rbp > CFI_DEF_CFA_REGISTER rsp > CFI_ADJUST_CFA_OFFSET -8 > decl PER_CPU_VAR(irq_count) >+ ENABLE_INTERRUPTS_HW_COND > ret > CFI_ENDPROC > END(call_softirq) >@@ -1309,16 +1484,16 @@ apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ > */ > .pushsection .kprobes.text, "ax" > >-paranoidzeroentry_ist debug do_debug DEBUG_STACK >-paranoidzeroentry_ist int3 do_int3 DEBUG_STACK >+paranoidzeroentry_ist debug do_debug DEBUG_STACK ex_do_debug >+paranoidzeroentry_ist int3 do_int3 DEBUG_STACK ex_do_int3 > paranoiderrorentry stack_segment do_stack_segment > #ifdef CONFIG_XEN > zeroentry xen_debug do_debug > zeroentry xen_int3 do_int3 > errorentry xen_stack_segment do_stack_segment > #endif >-errorentry general_protection do_general_protection >-errorentry page_fault do_page_fault >+errorentry general_protection do_general_protection ex_do_general_protection >+errorentry page_fault do_page_fault ex_do_page_fault > #ifdef CONFIG_X86_MCE > paranoidzeroentry machine_check *machine_check_vector(%rip) > #endif >@@ -1341,8 +1516,13 @@ ENTRY(paranoid_exit) > DEFAULT_FRAME > DISABLE_INTERRUPTS(CLBR_NONE) > TRACE_IRQS_OFF >+paranoid_notrace: > testl %ebx,%ebx /* swapgs needed? */ > jnz paranoid_restore >+#ifdef CONFIG_IPIPE >+ testl %eax,%eax >+ jnz paranoid_swapgs >+#endif > testl $3,CS(%rsp) > jnz paranoid_userspace > paranoid_swapgs: >@@ -1413,7 +1593,6 @@ ENTRY(error_entry) > error_swapgs: > SWAPGS > error_sti: >- TRACE_IRQS_OFF > ret > > /* >diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c >index 2dfd315..3253978 100644 >--- a/arch/x86/kernel/i8253.c >+++ b/arch/x86/kernel/i8253.c >@@ -11,6 +11,7 @@ > #include <linux/delay.h> > #include <linux/init.h> > #include <linux/io.h> >+#include <linux/ipipe.h> > > #include <asm/i8253.h> > #include <asm/hpet.h> >@@ -130,6 +131,12 @@ static cycle_t pit_read(struct clocksource *cs) > int count; > u32 jifs; > >+#ifdef CONFIG_IPIPE >+ if (!__ipipe_pipeline_head_p(ipipe_root_domain)) >+ /* We don't really own the PIT. */ >+ return (cycle_t)(jiffies * LATCH) + (LATCH - 1) - old_count; >+#endif /* CONFIG_IPIPE */ >+ > raw_spin_lock_irqsave(&i8253_lock, flags); > /* > * Although our caller may have the read side of xtime_lock, >diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c >index 20757cb..269c884 100644 >--- a/arch/x86/kernel/i8259.c >+++ b/arch/x86/kernel/i8259.c >@@ -32,7 +32,7 @@ > static void init_8259A(int auto_eoi); > > static int i8259A_auto_eoi; >-DEFINE_RAW_SPINLOCK(i8259A_lock); >+IPIPE_DEFINE_RAW_SPINLOCK(i8259A_lock); > > /* > * 8259A PIC functions to handle ISA devices: >@@ -60,6 +60,7 @@ static void mask_8259A_irq(unsigned int irq) > unsigned long flags; > > raw_spin_lock_irqsave(&i8259A_lock, flags); >+ ipipe_irq_lock(irq); > cached_irq_mask |= mask; > if (irq & 8) > outb(cached_slave_mask, PIC_SLAVE_IMR); >@@ -75,15 +76,18 @@ static void disable_8259A_irq(struct irq_data *data) > > static void unmask_8259A_irq(unsigned int irq) > { >- unsigned int mask = ~(1 << irq); >+ unsigned int mask = (1 << irq); > unsigned long flags; > > raw_spin_lock_irqsave(&i8259A_lock, flags); >- cached_irq_mask &= mask; >- if (irq & 8) >- outb(cached_slave_mask, PIC_SLAVE_IMR); >- else >- outb(cached_master_mask, PIC_MASTER_IMR); >+ if (cached_irq_mask & mask) { >+ cached_irq_mask &= ~mask; >+ if (irq & 8) >+ outb(cached_slave_mask, PIC_SLAVE_IMR); >+ else >+ outb(cached_master_mask, PIC_MASTER_IMR); >+ ipipe_irq_unlock(irq); >+ } > raw_spin_unlock_irqrestore(&i8259A_lock, flags); > } > >@@ -170,6 +174,18 @@ static void mask_and_ack_8259A(struct irq_data *data) > */ > if (cached_irq_mask & irqmask) > goto spurious_8259A_irq; >+#ifdef CONFIG_IPIPE >+ if (irq == 0) { >+ /* >+ * Fast timer ack -- don't mask (unless supposedly >+ * spurious). We trace outb's in order to detect >+ * broken hardware inducing large delays. >+ */ >+ outb(0x60, PIC_MASTER_CMD); /* Specific EOI to master. */ >+ raw_spin_unlock_irqrestore(&i8259A_lock, flags); >+ return; >+ } >+#endif /* CONFIG_IPIPE */ > cached_irq_mask |= irqmask; > > handle_real_irq: >diff --git a/arch/x86/kernel/ipipe.c b/arch/x86/kernel/ipipe.c >new file mode 100644 >index 0000000..d1d4544 >--- /dev/null >+++ b/arch/x86/kernel/ipipe.c >@@ -0,0 +1,862 @@ >+/* -*- linux-c -*- >+ * linux/arch/x86/kernel/ipipe.c >+ * >+ * Copyright (C) 2002-2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ * >+ * Architecture-dependent I-PIPE support for x86. >+ */ >+ >+#include <linux/kernel.h> >+#include <linux/smp.h> >+#include <linux/module.h> >+#include <linux/sched.h> >+#include <linux/interrupt.h> >+#include <linux/slab.h> >+#include <linux/irq.h> >+#include <linux/clockchips.h> >+#include <linux/kprobes.h> >+#include <asm/unistd.h> >+#include <asm/processor.h> >+#include <asm/system.h> >+#include <asm/atomic.h> >+#include <asm/hw_irq.h> >+#include <asm/irq.h> >+#include <asm/desc.h> >+#include <asm/io.h> >+#ifdef CONFIG_X86_LOCAL_APIC >+#include <asm/tlbflush.h> >+#include <asm/fixmap.h> >+#include <asm/bitops.h> >+#include <asm/mpspec.h> >+#ifdef CONFIG_X86_IO_APIC >+#include <asm/io_apic.h> >+#endif /* CONFIG_X86_IO_APIC */ >+#include <asm/apic.h> >+#endif /* CONFIG_X86_LOCAL_APIC */ >+#include <asm/traps.h> >+ >+void *ipipe_irq_handler = __ipipe_handle_irq; >+EXPORT_SYMBOL(ipipe_irq_handler); >+EXPORT_SYMBOL(io_apic_irqs); >+EXPORT_PER_CPU_SYMBOL(__ipipe_tick_regs); >+__attribute__((regparm(3))) void do_notify_resume(struct pt_regs *, void *, __u32); >+EXPORT_SYMBOL(do_notify_resume); >+extern void *sys_call_table; >+EXPORT_SYMBOL(sys_call_table); >+#ifdef CONFIG_X86_32 >+extern void ret_from_intr(void); >+EXPORT_SYMBOL(ret_from_intr); >+extern spinlock_t i8259A_lock; >+extern struct desc_struct idt_table[]; >+#else >+extern ipipe_spinlock_t i8259A_lock; >+extern gate_desc idt_table[]; >+#endif >+EXPORT_PER_CPU_SYMBOL(vector_irq); >+EXPORT_SYMBOL(idt_table); >+EXPORT_SYMBOL(i8259A_lock); >+EXPORT_SYMBOL(__ipipe_sync_stage); >+EXPORT_SYMBOL(kill_proc_info); >+EXPORT_SYMBOL(find_task_by_pid_ns); >+ >+int __ipipe_tick_irq = 0; /* Legacy timer */ >+ >+DEFINE_PER_CPU(struct pt_regs, __ipipe_tick_regs); >+ >+DEFINE_PER_CPU(unsigned long, __ipipe_cr2); >+EXPORT_PER_CPU_SYMBOL_GPL(__ipipe_cr2); >+ >+/* >+ * ipipe_trigger_irq() -- Push the interrupt at front of the pipeline >+ * just like if it has been actually received from a hw source. Also >+ * works for virtual interrupts. >+ */ >+int ipipe_trigger_irq(unsigned int irq) >+{ >+ struct pt_regs regs; >+ unsigned long flags; >+ >+#ifdef CONFIG_IPIPE_DEBUG >+ if (irq >= IPIPE_NR_IRQS) >+ return -EINVAL; >+ if (ipipe_virtual_irq_p(irq)) { >+ if (!test_bit(irq - IPIPE_VIRQ_BASE, >+ &__ipipe_virtual_irq_map)) >+ return -EINVAL; >+ } else if (irq_to_desc(irq) == NULL) >+ return -EINVAL; >+#endif >+ local_irq_save_hw(flags); >+ regs.flags = flags; >+ regs.orig_ax = irq; /* Positive value - IRQ won't be acked */ >+ regs.cs = __KERNEL_CS; >+ __ipipe_handle_irq(®s); >+ local_irq_restore_hw(flags); >+ >+ return 1; >+} >+ >+int ipipe_get_sysinfo(struct ipipe_sysinfo *info) >+{ >+ info->ncpus = num_online_cpus(); >+ info->cpufreq = ipipe_cpu_freq(); >+ info->archdep.tmirq = __ipipe_tick_irq; >+#ifdef CONFIG_X86_TSC >+ info->archdep.tmfreq = ipipe_cpu_freq(); >+#else /* !CONFIG_X86_TSC */ >+ info->archdep.tmfreq = CLOCK_TICK_RATE; >+#endif /* CONFIG_X86_TSC */ >+ >+ return 0; >+} >+ >+#ifdef CONFIG_X86_UV >+asmlinkage void uv_bau_message_interrupt(struct pt_regs *regs); >+#endif >+#ifdef CONFIG_X86_MCE_THRESHOLD >+asmlinkage void smp_threshold_interrupt(void); >+#endif >+#ifdef CONFIG_X86_NEW_MCE >+asmlinkage void smp_mce_self_interrupt(void); >+#endif >+ >+static void __ipipe_ack_irq(unsigned irq, struct irq_desc *desc) >+{ >+ desc->ipipe_ack(irq, desc); >+} >+ >+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq) >+{ >+ irq_to_desc(irq)->status &= ~IRQ_DISABLED; >+} >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+ >+static void __ipipe_noack_apic(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+static void __ipipe_ack_apic(unsigned irq, struct irq_desc *desc) >+{ >+ __ack_APIC_irq(); >+} >+ >+static void __ipipe_null_handler(unsigned irq, void *cookie) >+{ >+} >+ >+#endif /* CONFIG_X86_LOCAL_APIC */ >+ >+/* __ipipe_enable_pipeline() -- We are running on the boot CPU, hw >+ interrupts are off, and secondary CPUs are still lost in space. */ >+ >+void __init __ipipe_enable_pipeline(void) >+{ >+ unsigned int vector, irq; >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+ >+ /* Map the APIC system vectors. */ >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR), >+ (ipipe_irq_handler_t)&smp_apic_timer_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR), >+ (ipipe_irq_handler_t)&smp_spurious_interrupt, >+ NULL, >+ &__ipipe_noack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(ERROR_APIC_VECTOR), >+ (ipipe_irq_handler_t)&smp_error_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0), >+ &__ipipe_null_handler, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1), >+ &__ipipe_null_handler, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2), >+ &__ipipe_null_handler, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3), >+ &__ipipe_null_handler, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+#ifdef CONFIG_X86_THERMAL_VECTOR >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(THERMAL_APIC_VECTOR), >+ (ipipe_irq_handler_t)&smp_thermal_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#endif /* CONFIG_X86_THERMAL_VECTOR */ >+ >+#ifdef CONFIG_X86_MCE_THRESHOLD >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(THRESHOLD_APIC_VECTOR), >+ (ipipe_irq_handler_t)&smp_threshold_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#endif /* CONFIG_X86_MCE_THRESHOLD */ >+ >+#ifdef CONFIG_X86_NEW_MCE >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(MCE_SELF_VECTOR), >+ (ipipe_irq_handler_t)&smp_mce_self_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#endif /* CONFIG_X86_MCE_THRESHOLD */ >+ >+#ifdef CONFIG_X86_UV >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(UV_BAU_MESSAGE), >+ (ipipe_irq_handler_t)&uv_bau_message_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#endif /* CONFIG_X86_UV */ >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(X86_PLATFORM_IPI_VECTOR), >+ (ipipe_irq_handler_t)&smp_x86_platform_ipi, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+#ifdef CONFIG_IRQ_WORK >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(IRQ_WORK_VECTOR), >+ (ipipe_irq_handler_t)&irq_work_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#endif /* CONFIG_IRQ_WORK */ >+ >+#endif /* CONFIG_X86_LOCAL_APIC */ >+ >+#ifdef CONFIG_SMP >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(RESCHEDULE_VECTOR), >+ (ipipe_irq_handler_t)&smp_reschedule_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ for (vector = INVALIDATE_TLB_VECTOR_START; >+ vector <= INVALIDATE_TLB_VECTOR_END; ++vector) >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(vector), >+ (ipipe_irq_handler_t)&smp_invalidate_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR), >+ (ipipe_irq_handler_t)&smp_call_function_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(CALL_FUNCTION_SINGLE_VECTOR), >+ (ipipe_irq_handler_t)&smp_call_function_single_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ IRQ_MOVE_CLEANUP_VECTOR, >+ (ipipe_irq_handler_t)&smp_irq_move_cleanup_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ ipipe_apic_vector_irq(REBOOT_VECTOR), >+ (ipipe_irq_handler_t)&smp_reboot_interrupt, >+ NULL, >+ &__ipipe_ack_apic, >+ IPIPE_STDROOT_MASK); >+#else >+ (void)vector; >+#endif /* CONFIG_SMP */ >+ >+ /* Finally, virtualize the remaining ISA and IO-APIC >+ * interrupts. Interrupts which have already been virtualized >+ * will just beget a silent -EPERM error since >+ * IPIPE_SYSTEM_MASK has been passed for them, that's ok. */ >+ >+ for (irq = 0; irq < NR_IRQS; irq++) >+ /* >+ * Fails for IPIPE_CRITICAL_IPI and IRQ_MOVE_CLEANUP_VECTOR, >+ * but that's ok. >+ */ >+ ipipe_virtualize_irq(ipipe_root_domain, >+ irq, >+ (ipipe_irq_handler_t)&do_IRQ, >+ NULL, >+ &__ipipe_ack_irq, >+ IPIPE_STDROOT_MASK); >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+ /* Eventually allow these vectors to be reprogrammed. */ >+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI0].control &= ~IPIPE_SYSTEM_MASK; >+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI1].control &= ~IPIPE_SYSTEM_MASK; >+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI2].control &= ~IPIPE_SYSTEM_MASK; >+ ipipe_root_domain->irqs[IPIPE_SERVICE_IPI3].control &= ~IPIPE_SYSTEM_MASK; >+#endif /* CONFIG_X86_LOCAL_APIC */ >+} >+ >+#ifdef CONFIG_SMP >+ >+cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask) >+{ >+ cpumask_t oldmask; >+ >+ if (irq_to_desc(irq)->chip->set_affinity == NULL) >+ return CPU_MASK_NONE; >+ >+ if (cpus_empty(cpumask)) >+ return CPU_MASK_NONE; /* Return mask value -- no change. */ >+ >+ cpus_and(cpumask, cpumask, cpu_online_map); >+ if (cpus_empty(cpumask)) >+ return CPU_MASK_NONE; /* Error -- bad mask value or non-routable IRQ. */ >+ >+ cpumask_copy(&oldmask, irq_to_desc(irq)->affinity); >+ irq_to_desc(irq)->chip->set_affinity(irq, &cpumask); >+ >+ return oldmask; >+} >+ >+int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask) >+{ >+ unsigned long flags; >+ int self; >+ >+ local_irq_save_hw(flags); >+ >+ self = cpu_isset(ipipe_processor_id(),cpumask); >+ cpu_clear(ipipe_processor_id(), cpumask); >+ >+ if (!cpus_empty(cpumask)) >+ apic->send_IPI_mask(&cpumask, ipipe_apic_irq_vector(ipi)); >+ >+ if (self) >+ ipipe_trigger_irq(ipi); >+ >+ local_irq_restore_hw(flags); >+ >+ return 0; >+} >+ >+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd) >+{ >+ ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge = &__ipipe_ack_apic; >+ ipd->irqs[IPIPE_CRITICAL_IPI].handler = &__ipipe_do_critical_sync; >+ ipd->irqs[IPIPE_CRITICAL_IPI].cookie = NULL; >+ /* Immediately handle in the current domain but *never* pass */ >+ ipd->irqs[IPIPE_CRITICAL_IPI].control = >+ IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK; >+} >+ >+#endif /* CONFIG_SMP */ >+ >+static inline void __fixup_if(int s, struct pt_regs *regs) >+{ >+ /* >+ * Have the saved hw state look like the domain stall bit, so >+ * that __ipipe_unstall_iret_root() restores the proper >+ * pipeline state for the root stage upon exit. >+ */ >+ if (s) >+ regs->flags &= ~X86_EFLAGS_IF; >+ else >+ regs->flags |= X86_EFLAGS_IF; >+} >+ >+void __ipipe_halt_root(void) >+{ >+ struct ipipe_percpu_domain_data *p; >+ >+ /* Emulate sti+hlt sequence over the root domain. */ >+ >+ local_irq_disable_hw(); >+ >+ p = ipipe_root_cpudom_ptr(); >+ >+ trace_hardirqs_on(); >+ clear_bit(IPIPE_STALL_FLAG, &p->status); >+ >+ if (unlikely(__ipipe_ipending_p(p))) { >+ __ipipe_sync_pipeline(); >+ local_irq_enable_hw(); >+ } else { >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+ ipipe_trace_end(0x8000000E); >+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ >+ asm volatile("sti; hlt": : :"memory"); >+ } >+} >+ >+static void do_machine_check_vector(struct pt_regs *regs, long error_code) >+{ >+#ifdef CONFIG_X86_MCE >+#ifdef CONFIG_X86_32 >+ extern void (*machine_check_vector)(struct pt_regs *, long error_code); >+ machine_check_vector(regs, error_code); >+#else >+ do_machine_check(regs, error_code); >+#endif >+#endif /* CONFIG_X86_MCE */ >+} >+ >+/* Work around genksyms's issue with over-qualification in decls. */ >+ >+typedef void dotraplinkage __ipipe_exhandler(struct pt_regs *, long); >+ >+typedef __ipipe_exhandler *__ipipe_exptr; >+ >+static __ipipe_exptr __ipipe_std_extable[] = { >+ >+ [ex_do_divide_error] = &do_divide_error, >+ [ex_do_overflow] = &do_overflow, >+ [ex_do_bounds] = &do_bounds, >+ [ex_do_invalid_op] = &do_invalid_op, >+ [ex_do_coprocessor_segment_overrun] = &do_coprocessor_segment_overrun, >+ [ex_do_invalid_TSS] = &do_invalid_TSS, >+ [ex_do_segment_not_present] = &do_segment_not_present, >+ [ex_do_stack_segment] = &do_stack_segment, >+ [ex_do_general_protection] = do_general_protection, >+ [ex_do_page_fault] = (__ipipe_exptr)&do_page_fault, >+ [ex_do_spurious_interrupt_bug] = &do_spurious_interrupt_bug, >+ [ex_do_coprocessor_error] = &do_coprocessor_error, >+ [ex_do_alignment_check] = &do_alignment_check, >+ [ex_machine_check_vector] = &do_machine_check_vector, >+ [ex_do_simd_coprocessor_error] = &do_simd_coprocessor_error, >+ [ex_do_device_not_available] = &do_device_not_available, >+#ifdef CONFIG_X86_32 >+ [ex_do_iret_error] = &do_iret_error, >+#endif >+}; >+ >+#ifdef CONFIG_KGDB >+#include <linux/kgdb.h> >+ >+static int __ipipe_xlate_signo[] = { >+ >+ [ex_do_divide_error] = SIGFPE, >+ [ex_do_debug] = SIGTRAP, >+ [2] = -1, >+ [ex_do_int3] = SIGTRAP, >+ [ex_do_overflow] = SIGSEGV, >+ [ex_do_bounds] = SIGSEGV, >+ [ex_do_invalid_op] = SIGILL, >+ [ex_do_device_not_available] = -1, >+ [8] = -1, >+ [ex_do_coprocessor_segment_overrun] = SIGFPE, >+ [ex_do_invalid_TSS] = SIGSEGV, >+ [ex_do_segment_not_present] = SIGBUS, >+ [ex_do_stack_segment] = SIGBUS, >+ [ex_do_general_protection] = SIGSEGV, >+ [ex_do_page_fault] = SIGSEGV, >+ [ex_do_spurious_interrupt_bug] = -1, >+ [ex_do_coprocessor_error] = -1, >+ [ex_do_alignment_check] = SIGBUS, >+ [ex_machine_check_vector] = -1, >+ [ex_do_simd_coprocessor_error] = -1, >+ [20 ... 31] = -1, >+#ifdef CONFIG_X86_32 >+ [ex_do_iret_error] = SIGSEGV, >+#endif >+}; >+#endif /* CONFIG_KGDB */ >+ >+int __ipipe_handle_exception(struct pt_regs *regs, long error_code, int vector) >+{ >+ bool root_entry = false; >+ unsigned long flags = 0; >+ unsigned long cr2 = 0; >+ >+ if (ipipe_root_domain_p) { >+ root_entry = true; >+ >+ local_save_flags(flags); >+ /* >+ * Replicate hw interrupt state into the virtual mask >+ * before calling the I-pipe event handler over the >+ * root domain. Also required later when calling the >+ * Linux exception handler. >+ */ >+ if (irqs_disabled_hw()) >+ local_irq_disable(); >+ } >+#ifdef CONFIG_KGDB >+ /* catch exception KGDB is interested in over non-root domains */ >+ else if (__ipipe_xlate_signo[vector] >= 0 && >+ !kgdb_handle_exception(vector, __ipipe_xlate_signo[vector], >+ error_code, regs)) >+ return 1; >+#endif /* CONFIG_KGDB */ >+ >+ if (vector == ex_do_page_fault) >+ cr2 = native_read_cr2(); >+ >+ if (unlikely(ipipe_trap_notify(vector, regs))) { >+ if (root_entry) >+ local_irq_restore_nosync(flags); >+ return 1; >+ } >+ >+ if (likely(ipipe_root_domain_p)) { >+ /* >+ * If root is not the topmost domain or in case we faulted in >+ * the iret path of x86-32, regs.flags does not match the root >+ * domain state. The fault handler or the low-level return >+ * code may evaluate it. So fix this up, either by the root >+ * state sampled on entry or, if we migrated to root, with the >+ * current state. >+ */ >+ __fixup_if(root_entry ? raw_irqs_disabled_flags(flags) : >+ raw_irqs_disabled(), regs); >+ } else { >+ /* Detect unhandled faults over non-root domains. */ >+ struct ipipe_domain *ipd = ipipe_current_domain; >+ >+ /* Switch to root so that Linux can handle the fault cleanly. */ >+ __ipipe_current_domain = ipipe_root_domain; >+ >+ ipipe_trace_panic_freeze(); >+ >+ /* Always warn about user land and unfixable faults. */ >+ if ((error_code & 4) || !search_exception_tables(instruction_pointer(regs))) { >+ printk(KERN_ERR "BUG: Unhandled exception over domain" >+ " %s at 0x%lx - switching to ROOT\n", >+ ipd->name, instruction_pointer(regs)); >+ dump_stack(); >+ ipipe_trace_panic_dump(); >+#ifdef CONFIG_IPIPE_DEBUG >+ /* Also report fixable ones when debugging is enabled. */ >+ } else { >+ printk(KERN_WARNING "WARNING: Fixable exception over " >+ "domain %s at 0x%lx - switching to ROOT\n", >+ ipd->name, instruction_pointer(regs)); >+ dump_stack(); >+ ipipe_trace_panic_dump(); >+#endif /* CONFIG_IPIPE_DEBUG */ >+ } >+ } >+ >+ if (vector == ex_do_page_fault) >+ write_cr2(cr2); >+ >+ __ipipe_std_extable[vector](regs, error_code); >+ >+ /* >+ * Relevant for 64-bit: Restore root domain state as the low-level >+ * return code will not align it to regs.flags. >+ */ >+ if (root_entry) >+ local_irq_restore_nosync(flags); >+ >+ return 0; >+} >+ >+int __ipipe_divert_exception(struct pt_regs *regs, int vector) >+{ >+ bool root_entry = false; >+ unsigned long flags = 0; >+ >+ if (ipipe_root_domain_p) { >+ root_entry = true; >+ >+ local_save_flags(flags); >+ >+ if (irqs_disabled_hw()) { >+ /* >+ * Same root state handling as in >+ * __ipipe_handle_exception. >+ */ >+ local_irq_disable(); >+ } >+ } >+#ifdef CONFIG_KGDB >+ /* catch int1 and int3 over non-root domains */ >+ else { >+#ifdef CONFIG_X86_32 >+ if (vector != ex_do_device_not_available) >+#endif >+ { >+ unsigned int condition = 0; >+ >+ if (vector == 1) >+ get_debugreg(condition, 6); >+ if (!kgdb_handle_exception(vector, SIGTRAP, condition, regs)) >+ return 1; >+ } >+ } >+#endif /* CONFIG_KGDB */ >+ >+ if (unlikely(ipipe_trap_notify(vector, regs))) { >+ if (root_entry) >+ local_irq_restore_nosync(flags); >+ return 1; >+ } >+ >+ /* see __ipipe_handle_exception */ >+ if (likely(ipipe_root_domain_p)) >+ __fixup_if(root_entry ? raw_irqs_disabled_flags(flags) : >+ raw_irqs_disabled(), regs); >+ /* >+ * No need to restore root state in the 64-bit case, the Linux handler >+ * and the return code will take care of it. >+ */ >+ >+ return 0; >+} >+ >+int __ipipe_syscall_root(struct pt_regs *regs) >+{ >+ unsigned long flags; >+ int ret; >+ >+ /* >+ * This routine either returns: >+ * 0 -- if the syscall is to be passed to Linux; >+ * >0 -- if the syscall should not be passed to Linux, and no >+ * tail work should be performed; >+ * <0 -- if the syscall should not be passed to Linux but the >+ * tail work has to be performed (for handling signals etc). >+ */ >+ >+ if (!__ipipe_syscall_watched_p(current, regs->orig_ax) || >+ !__ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL)) >+ return 0; >+ >+ ret = __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL, regs); >+ >+ local_irq_save_hw(flags); >+ >+ if (current->ipipe_flags & PF_EVTRET) { >+ current->ipipe_flags &= ~PF_EVTRET; >+ __ipipe_dispatch_event(IPIPE_EVENT_RETURN, regs); >+ } >+ >+ if (!ipipe_root_domain_p) >+ return 1; >+ >+ /* >+ * If allowed, sync pending VIRQs before _TIF_NEED_RESCHED is >+ * tested. >+ */ >+ if (__ipipe_ipending_p(ipipe_root_cpudom_ptr())) >+ __ipipe_sync_pipeline(); >+ >+ if (!ret) >+ local_irq_restore_hw(flags); >+ >+ return -ret; >+} >+ >+/* >+ * __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic >+ * interrupt protection log is maintained here for each domain. Hw >+ * interrupts are off on entry. >+ */ >+int __ipipe_handle_irq(struct pt_regs *regs) >+{ >+ struct ipipe_domain *this_domain, *next_domain; >+ unsigned int vector = regs->orig_ax, irq; >+ struct list_head *head, *pos; >+ struct pt_regs *tick_regs; >+ int m_ack; >+ >+ if ((long)regs->orig_ax < 0) { >+ vector = ~vector; >+#ifdef CONFIG_X86_LOCAL_APIC >+ if (vector >= FIRST_SYSTEM_VECTOR) >+ irq = ipipe_apic_vector_irq(vector); >+#ifdef CONFIG_SMP >+ else if (vector == IRQ_MOVE_CLEANUP_VECTOR) >+ irq = vector; >+#endif /* CONFIG_SMP */ >+ else >+#endif /* CONFIG_X86_LOCAL_APIC */ >+ irq = __get_cpu_var(vector_irq)[vector]; >+ m_ack = 0; >+ } else { /* This is a self-triggered one. */ >+ irq = vector; >+ m_ack = 1; >+ } >+ >+ this_domain = ipipe_current_domain; >+ >+ if (test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control)) >+ head = &this_domain->p_link; >+ else { >+ head = __ipipe_pipeline.next; >+ next_domain = list_entry(head, struct ipipe_domain, p_link); >+ if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) { >+ if (!m_ack && next_domain->irqs[irq].acknowledge) >+ next_domain->irqs[irq].acknowledge(irq, irq_to_desc(irq)); >+ __ipipe_dispatch_wired(next_domain, irq); >+ goto finalize_nosync; >+ } >+ } >+ >+ /* Ack the interrupt. */ >+ >+ pos = head; >+ >+ while (pos != &__ipipe_pipeline) { >+ next_domain = list_entry(pos, struct ipipe_domain, p_link); >+ if (test_bit(IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) { >+ __ipipe_set_irq_pending(next_domain, irq); >+ if (!m_ack && next_domain->irqs[irq].acknowledge) { >+ next_domain->irqs[irq].acknowledge(irq, irq_to_desc(irq)); >+ m_ack = 1; >+ } >+ } >+ if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control)) >+ break; >+ pos = next_domain->p_link.next; >+ } >+ >+ /* >+ * If the interrupt preempted the head domain, then do not >+ * even try to walk the pipeline, unless an interrupt is >+ * pending for it. >+ */ >+ if (test_bit(IPIPE_AHEAD_FLAG, &this_domain->flags) && >+ !__ipipe_ipending_p(ipipe_head_cpudom_ptr())) >+ goto finalize_nosync; >+ >+ /* >+ * Now walk the pipeline, yielding control to the highest >+ * priority domain that has pending interrupt(s) or >+ * immediately to the current domain if the interrupt has been >+ * marked as 'sticky'. This search does not go beyond the >+ * current domain in the pipeline. >+ */ >+ >+ __ipipe_walk_pipeline(head); >+ >+finalize_nosync: >+ >+ /* >+ * Given our deferred dispatching model for regular IRQs, we >+ * only record CPU regs for the last timer interrupt, so that >+ * the timer handler charges CPU times properly. It is assumed >+ * that other interrupt handlers don't actually care for such >+ * information. >+ */ >+ >+ if (irq == __ipipe_tick_irq) { >+ tick_regs = &__raw_get_cpu_var(__ipipe_tick_regs); >+ tick_regs->flags = regs->flags; >+ tick_regs->cs = regs->cs; >+ tick_regs->ip = regs->ip; >+ tick_regs->bp = regs->bp; >+#ifdef CONFIG_X86_64 >+ tick_regs->ss = regs->ss; >+ tick_regs->sp = regs->sp; >+#endif >+ if (!__ipipe_root_domain_p) >+ tick_regs->flags &= ~X86_EFLAGS_IF; >+ } >+ >+ if (user_mode(regs) && (current->ipipe_flags & PF_EVTRET) != 0) { >+ current->ipipe_flags &= ~PF_EVTRET; >+ __ipipe_dispatch_event(IPIPE_EVENT_RETURN, regs); >+ } >+ >+ if (!__ipipe_root_domain_p || >+ test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) >+ return 0; >+ >+ return 1; >+} >+ >+int __ipipe_check_tickdev(const char *devname) >+{ >+ int ret = 1; >+ >+#ifdef CONFIG_X86_LOCAL_APIC >+ if (!strcmp(devname, "lapic")) { >+ ret = __ipipe_check_lapic(); >+ if (ret) >+ return ret; >+ printk(KERN_INFO "I-pipe: cannot use LAPIC as a tick device\n"); >+ if (cpu_has_amd_erratum(amd_erratum_400)) >+ printk(KERN_INFO "I-pipe: disable C1E power state in your BIOS\n"); >+ } >+#endif >+ >+ return ret; >+} >+ >+EXPORT_SYMBOL(__ipipe_tick_irq); >+ >+EXPORT_SYMBOL_GPL(irq_to_desc); >+struct task_struct *__switch_to(struct task_struct *prev_p, >+ struct task_struct *next_p); >+EXPORT_SYMBOL_GPL(__switch_to); >+EXPORT_SYMBOL_GPL(show_stack); >+ >+EXPORT_PER_CPU_SYMBOL_GPL(init_tss); >+#ifdef CONFIG_SMP >+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tlbstate); >+#endif /* CONFIG_SMP */ >+ >+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) >+EXPORT_SYMBOL(tasklist_lock); >+#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */ >+ >+#if defined(CONFIG_CC_STACKPROTECTOR) && defined(CONFIG_X86_64) >+EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); >+#endif >+ >+EXPORT_SYMBOL(__ipipe_halt_root); >diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c >index 83ec017..f4339e2 100644 >--- a/arch/x86/kernel/irq.c >+++ b/arch/x86/kernel/irq.c >@@ -38,7 +38,7 @@ void ack_bad_irq(unsigned int irq) > * completely. > * But only ack when the APIC is enabled -AK > */ >- ack_APIC_irq(); >+ __ack_APIC_irq(); > } > > #define irq_stats(x) (&per_cpu(irq_stat, x)) >@@ -231,11 +231,12 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) > unsigned vector = ~regs->orig_ax; > unsigned irq; > >+ irq = __get_cpu_var(vector_irq)[vector]; >+ __ipipe_move_root_irq(irq); >+ > exit_idle(); > irq_enter(); > >- irq = __get_cpu_var(vector_irq)[vector]; >- > if (!handle_irq(irq, regs)) { > ack_APIC_irq(); > >diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c >index c752e97..699fb0e 100644 >--- a/arch/x86/kernel/irqinit.c >+++ b/arch/x86/kernel/irqinit.c >@@ -167,11 +167,13 @@ static void __init smp_intr_init(void) > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); >+#ifndef CONFIG_IPIPE > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); > alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); >+#endif > > /* IPI for generic function call */ > alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); >@@ -186,6 +188,10 @@ static void __init smp_intr_init(void) > > /* IPI used for rebooting/stopping */ > alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); >+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32) >+ /* IPI for critical lock */ >+ alloc_intr_gate(IPIPE_CRITICAL_VECTOR, ipipe_ipiX); >+#endif > #endif > #endif /* CONFIG_SMP */ > } >@@ -220,6 +226,12 @@ static void __init apic_intr_init(void) > alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt); > # endif > >+#if defined(CONFIG_IPIPE) && defined(CONFIG_X86_32) >+ alloc_intr_gate(IPIPE_SERVICE_VECTOR0, ipipe_ipi0); >+ alloc_intr_gate(IPIPE_SERVICE_VECTOR1, ipipe_ipi1); >+ alloc_intr_gate(IPIPE_SERVICE_VECTOR2, ipipe_ipi2); >+ alloc_intr_gate(IPIPE_SERVICE_VECTOR3, ipipe_ipi3); >+#endif > #endif > } > >diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c >index 57d1868..efd1e6d 100644 >--- a/arch/x86/kernel/process.c >+++ b/arch/x86/kernel/process.c >@@ -41,6 +41,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) > if (ret) > return ret; > fpu_copy(&dst->thread.fpu, &src->thread.fpu); >+ } else { >+#ifdef CONFIG_IPIPE >+ /* unconditionally allocate, RT domain may need it */ >+ memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); >+ ret = fpu_alloc(&dst->thread.fpu); >+ if (ret) >+ return ret; >+#endif > } > return 0; > } >@@ -62,6 +70,10 @@ void arch_task_cache_init(void) > kmem_cache_create("task_xstate", xstate_size, > __alignof__(union thread_xstate), > SLAB_PANIC | SLAB_NOTRACK, NULL); >+#ifdef CONFIG_IPIPE >+ memset(¤t->thread.fpu, 0, sizeof(current->thread.fpu)); >+ fpu_alloc(¤t->thread.fpu); >+#endif > } > > /* >@@ -398,7 +410,7 @@ EXPORT_SYMBOL(default_idle); > > void stop_this_cpu(void *dummy) > { >- local_irq_disable(); >+ local_irq_disable_hw(); > /* > * Remove this CPU: > */ >@@ -591,6 +603,11 @@ static void c1e_idle(void) > > void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) > { >+#ifdef CONFIG_IPIPE >+#define default_to_mwait force_mwait >+#else >+#define default_to_mwait 1 >+#endif > #ifdef CONFIG_SMP > if (pm_idle == poll_idle && smp_num_siblings > 1) { > printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," >@@ -600,7 +617,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) > if (pm_idle) > return; > >- if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { >+ if (default_to_mwait && cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { > /* > * One CPU supports mwait => All CPUs supports mwait > */ >diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c >index 96586c3..c5225d1 100644 >--- a/arch/x86/kernel/process_32.c >+++ b/arch/x86/kernel/process_32.c >@@ -256,10 +256,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) > regs->cs = __USER_CS; > regs->ip = new_ip; > regs->sp = new_sp; >+#ifndef CONFIG_IPIPE /* Lazily handled, init_fpu() will reset the state. */ > /* > * Free the old FP and other extended state > */ > free_thread_xstate(current); >+#endif > } > EXPORT_SYMBOL_GPL(start_thread); > >@@ -296,7 +298,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) > { > struct thread_struct *prev = &prev_p->thread, > *next = &next_p->thread; >- int cpu = smp_processor_id(); >+ int cpu = raw_smp_processor_id(); > struct tss_struct *tss = &per_cpu(init_tss, cpu); > bool preload_fpu; > >diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c >index b3d7a3a..079eae3 100644 >--- a/arch/x86/kernel/process_64.c >+++ b/arch/x86/kernel/process_64.c >@@ -58,6 +58,8 @@ asmlinkage extern void ret_from_fork(void); > DEFINE_PER_CPU(unsigned long, old_rsp); > static DEFINE_PER_CPU(unsigned char, is_idle); > >+asmlinkage extern void thread_return(void); >+ > static ATOMIC_NOTIFIER_HEAD(idle_notifier); > > void idle_notifier_register(struct notifier_block *n) >@@ -277,6 +279,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, > p->thread.sp = (unsigned long) childregs; > p->thread.sp0 = (unsigned long) (childregs+1); > p->thread.usersp = me->thread.usersp; >+ p->thread.rip = (unsigned long) thread_return; > > set_tsk_thread_flag(p, TIF_FORK); > >@@ -343,10 +346,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, > regs->ss = _ss; > regs->flags = X86_EFLAGS_IF; > set_fs(USER_DS); >+#ifndef CONFIG_IPIPE /* Lazily handled, init_fpu() will reset the state. */ > /* > * Free the old FP and other extended state > */ > free_thread_xstate(current); >+#endif > } > > void >@@ -379,7 +384,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) > { > struct thread_struct *prev = &prev_p->thread; > struct thread_struct *next = &next_p->thread; >- int cpu = smp_processor_id(); >+ int cpu = raw_smp_processor_id(); > struct tss_struct *tss = &per_cpu(init_tss, cpu); > unsigned fsindex, gsindex; > bool preload_fpu; >diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c >index 45892dc..23ff994 100644 >--- a/arch/x86/kernel/ptrace.c >+++ b/arch/x86/kernel/ptrace.c >@@ -19,6 +19,7 @@ > #include <linux/audit.h> > #include <linux/seccomp.h> > #include <linux/signal.h> >+#include <linux/unistd.h> > #include <linux/perf_event.h> > #include <linux/hw_breakpoint.h> > >@@ -1396,6 +1397,10 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) > { > bool step; > >+#ifdef CONFIG_IPIPE >+ if (syscall_get_nr(current, regs) >= NR_syscalls) >+ return; >+#endif > if (unlikely(current->audit_context)) > audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); > >diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c >index 513deac..8021cf4 100644 >--- a/arch/x86/kernel/smp.c >+++ b/arch/x86/kernel/smp.c >@@ -188,9 +188,9 @@ static void native_stop_other_cpus(int wait) > udelay(1); > } > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > disable_local_APIC(); >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > /* >diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c >index 083e99d..12b612d 100644 >--- a/arch/x86/kernel/smpboot.c >+++ b/arch/x86/kernel/smpboot.c >@@ -292,7 +292,7 @@ static void __cpuinit smp_callin(void) > /* > * Activate a secondary processor. > */ >-notrace static void __cpuinit start_secondary(void *unused) >+static void __cpuinit start_secondary(void *unused) > { > /* > * Don't put *anything* before cpu_init(), SMP booting is too >@@ -897,7 +897,7 @@ do_rest: > int __cpuinit native_cpu_up(unsigned int cpu) > { > int apicid = apic->cpu_present_to_apicid(cpu); >- unsigned long flags; >+ unsigned long flags, _flags; > int err; > > WARN_ON(irqs_disabled()); >@@ -936,9 +936,9 @@ int __cpuinit native_cpu_up(unsigned int cpu) > * Check TSC synchronization with the AP (keep irqs disabled > * while doing so): > */ >- local_irq_save(flags); >+ local_irq_save_full(flags, _flags); > check_tsc_sync_source(cpu); >- local_irq_restore(flags); >+ local_irq_restore_full(flags, _flags); > > while (!cpu_online(cpu)) { > cpu_relax(); >diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c >index fb5cc5e..6491f29 100644 >--- a/arch/x86/kernel/time.c >+++ b/arch/x86/kernel/time.c >@@ -60,6 +60,8 @@ EXPORT_SYMBOL(profile_pc); > */ > static irqreturn_t timer_interrupt(int irq, void *dev_id) > { >+ unsigned long flags; >+ > /* Keep nmi watchdog up to date */ > inc_irq_stat(irq0_irqs); > >@@ -70,11 +72,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) > * manually to deassert NMI lines for the watchdog if run > * on an 82489DX-based system. > */ >- raw_spin_lock(&i8259A_lock); >+ raw_spin_lock_irqsave(&i8259A_lock, flags); > outb(0x0c, PIC_MASTER_OCW3); > /* Ack the IRQ; AEOI will end it automatically. */ > inb(PIC_MASTER_POLL); >- raw_spin_unlock(&i8259A_lock); >+ raw_spin_unlock_irqrestore(&i8259A_lock, flags); > } > > global_clock_event->event_handler(global_clock_event); >diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c >index cb838ca..65d6a8c 100644 >--- a/arch/x86/kernel/traps.c >+++ b/arch/x86/kernel/traps.c >@@ -733,6 +733,7 @@ void __math_state_restore(void) > */ > if (unlikely(restore_fpu_checking(tsk))) { > stts(); >+ local_irq_enable_hw_cond(); > force_sig(SIGSEGV, tsk); > return; > } >@@ -755,6 +756,7 @@ asmlinkage void math_state_restore(void) > { > struct thread_info *thread = current_thread_info(); > struct task_struct *tsk = thread->task; >+ unsigned long flags; > > if (!tsk_used_math(tsk)) { > local_irq_enable(); >@@ -771,9 +773,11 @@ asmlinkage void math_state_restore(void) > local_irq_disable(); > } > >+ local_irq_save_hw_cond(flags); > clts(); /* Allow maths ops (or we recurse) */ > > __math_state_restore(); >+ local_irq_restore_hw_cond(flags); > } > EXPORT_SYMBOL_GPL(math_state_restore); > >diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c >index 0c40d8b..23f74eb 100644 >--- a/arch/x86/kernel/tsc.c >+++ b/arch/x86/kernel/tsc.c >@@ -741,7 +741,7 @@ core_initcall(cpufreq_tsc); > > /* clocksource code */ > >-static struct clocksource clocksource_tsc; >+struct clocksource clocksource_tsc; > > /* > * We compare the TSC to the cycle_last value in the clocksource >@@ -787,7 +787,7 @@ static void resume_tsc(struct clocksource *cs) > clocksource_tsc.cycle_last = 0; > } > >-static struct clocksource clocksource_tsc = { >+struct clocksource clocksource_tsc = { > .name = "tsc", > .rating = 300, > .read = read_tsc, >diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c >index 61fb985..d3dfdcb 100644 >--- a/arch/x86/kernel/vm86_32.c >+++ b/arch/x86/kernel/vm86_32.c >@@ -148,12 +148,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) > do_exit(SIGSEGV); > } > >+ local_irq_disable_hw_cond(); > tss = &per_cpu(init_tss, get_cpu()); > current->thread.sp0 = current->thread.saved_sp0; > current->thread.sysenter_cs = __KERNEL_CS; > load_sp0(tss, ¤t->thread); > current->thread.saved_sp0 = 0; > put_cpu(); >+ local_irq_enable_hw_cond(); > > ret = KVM86->regs32; > >@@ -323,12 +325,14 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk > tsk->thread.saved_fs = info->regs32->fs; > tsk->thread.saved_gs = get_user_gs(info->regs32); > >+ local_irq_disable_hw_cond(); > tss = &per_cpu(init_tss, get_cpu()); > tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; > if (cpu_has_sep) > tsk->thread.sysenter_cs = 0; > load_sp0(tss, &tsk->thread); > put_cpu(); >+ local_irq_enable_hw_cond(); > > tsk->thread.screen_bitmap = info->screen_bitmap; > if (info->flags & VM86_SCREEN_BITMAP) >diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c >index dcbb28c..2ca4253 100644 >--- a/arch/x86/kernel/vsyscall_64.c >+++ b/arch/x86/kernel/vsyscall_64.c >@@ -32,6 +32,7 @@ > #include <linux/cpu.h> > #include <linux/smp.h> > #include <linux/notifier.h> >+#include <linux/ipipe_tickdev.h> > > #include <asm/vsyscall.h> > #include <asm/pgtable.h> >@@ -90,6 +91,9 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, > vsyscall_gtod_data.wall_to_monotonic = *wtm; > vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); > write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); >+ >+ if (clock == &clocksource_tsc) >+ ipipe_update_hostrt(wall_time, clock); > } > > /* RED-PEN may want to readd seq locking, but then the variable should be >diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c >index c9f2d9b..78d780a 100644 >--- a/arch/x86/lib/mmx_32.c >+++ b/arch/x86/lib/mmx_32.c >@@ -30,7 +30,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) > void *p; > int i; > >- if (unlikely(in_interrupt())) >+ if (unlikely(!ipipe_root_domain_p || in_interrupt())) > return __memcpy(to, from, len); > > p = to; >diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S >index bf9a7d5..98609ae 100644 >--- a/arch/x86/lib/thunk_64.S >+++ b/arch/x86/lib/thunk_64.S >@@ -65,6 +65,10 @@ > thunk lockdep_sys_exit_thunk,lockdep_sys_exit > #endif > >+#ifdef CONFIG_IPIPE >+ thunk_retrax __ipipe_syscall_root_thunk,__ipipe_syscall_root >+#endif >+ > /* SAVE_ARGS below is used only for the .cfi directives it contains. */ > CFI_STARTPROC > SAVE_ARGS >diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c >index 7d90ceb..b54d7ff 100644 >--- a/arch/x86/mm/fault.c >+++ b/arch/x86/mm/fault.c >@@ -353,9 +353,9 @@ void vmalloc_sync_all(void) > * > * This assumes no large pages in there. > */ >-static noinline __kprobes int vmalloc_fault(unsigned long address) >+static inline int vmalloc_sync_one(pgd_t *pgd, unsigned long address) > { >- pgd_t *pgd, *pgd_ref; >+ pgd_t *pgd_ref; > pud_t *pud, *pud_ref; > pmd_t *pmd, *pmd_ref; > pte_t *pte, *pte_ref; >@@ -371,7 +371,6 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) > * happen within a race in page table update. In the later > * case just flush: > */ >- pgd = pgd_offset(current->active_mm, address); > pgd_ref = pgd_offset_k(address); > if (pgd_none(*pgd_ref)) > return -1; >@@ -419,6 +418,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) > return 0; > } > >+static noinline __kprobes int vmalloc_fault(unsigned long address) >+{ >+ pgd_t *pgd = pgd_offset(current->active_mm, address); >+ return vmalloc_sync_one(pgd, address); >+} >+ > static const char errata93_warning[] = > KERN_ERR > "******* Your BIOS seems to not contain a fix for K8 errata #93\n" >@@ -967,6 +972,9 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) > /* Get the faulting address: */ > address = read_cr2(); > >+ if (!__ipipe_pipeline_head_p(ipipe_root_domain)) >+ local_irq_enable_hw_cond(); >+ > /* > * Detect and handle instructions that would cause a page fault for > * both a tracked kernel page and a userspace page. >@@ -1158,3 +1166,43 @@ good_area: > > up_read(&mm->mmap_sem); > } >+ >+#ifdef CONFIG_IPIPE >+void __ipipe_pin_range_globally(unsigned long start, unsigned long end) >+{ >+#ifdef CONFIG_X86_32 >+ unsigned long next, addr = start; >+ >+ do { >+ unsigned long flags; >+ struct page *page; >+ >+ next = pgd_addr_end(addr, end); >+ spin_lock_irqsave(&pgd_lock, flags); >+ list_for_each_entry(page, &pgd_list, lru) >+ vmalloc_sync_one(page_address(page), addr); >+ spin_unlock_irqrestore(&pgd_lock, flags); >+ >+ } while (addr = next, addr != end); >+#else >+ unsigned long next, addr = start; >+ int ret = 0; >+ >+ do { >+ struct page *page; >+ >+ next = pgd_addr_end(addr, end); >+ spin_lock(&pgd_lock); >+ list_for_each_entry(page, &pgd_list, lru) { >+ pgd_t *pgd; >+ pgd = (pgd_t *)page_address(page) + pgd_index(addr); >+ ret = vmalloc_sync_one(pgd, addr); >+ if (ret) >+ break; >+ } >+ spin_unlock(&pgd_lock); >+ addr = next; >+ } while (!ret && addr != end); >+#endif >+} >+#endif /* CONFIG_IPIPE */ >diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c >index 6acc724..072f1f2 100644 >--- a/arch/x86/mm/tlb.c >+++ b/arch/x86/mm/tlb.c >@@ -61,11 +61,15 @@ static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset); > */ > void leave_mm(int cpu) > { >+ unsigned long flags; >+ > if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) > BUG(); >+ local_irq_save_hw_cond(flags); > cpumask_clear_cpu(cpu, > mm_cpumask(percpu_read(cpu_tlbstate.active_mm))); > load_cr3(swapper_pg_dir); >+ local_irq_restore_hw_cond(flags); > } > EXPORT_SYMBOL_GPL(leave_mm); > >@@ -196,6 +200,9 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, > apic->send_IPI_mask(to_cpumask(f->flush_cpumask), > INVALIDATE_TLB_VECTOR_START + sender); > >+#ifdef CONFIG_IPIPE >+ WARN_ON_ONCE(irqs_disabled_hw()); >+#endif > while (!cpumask_empty(to_cpumask(f->flush_cpumask))) > cpu_relax(); > } >diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c >index 834842a..86f2406 100644 >--- a/drivers/pci/htirq.c >+++ b/drivers/pci/htirq.c >@@ -20,7 +20,7 @@ > * With multiple simultaneous hypertransport irq devices it might pay > * to make this more fine grained. But start with simple, stupid, and correct. > */ >-static DEFINE_SPINLOCK(ht_irq_lock); >+static IPIPE_DEFINE_SPINLOCK(ht_irq_lock); > > struct ht_irq_cfg { > struct pci_dev *dev; >diff --git a/fs/exec.c b/fs/exec.c >index c62efcb..a24406e 100644 >--- a/fs/exec.c >+++ b/fs/exec.c >@@ -771,6 +771,7 @@ static int exec_mmap(struct mm_struct *mm) > { > struct task_struct *tsk; > struct mm_struct * old_mm, *active_mm; >+ unsigned long flags; > > /* Notify parent that we're no longer interested in the old VM */ > tsk = current; >@@ -794,12 +795,14 @@ static int exec_mmap(struct mm_struct *mm) > task_lock(tsk); > active_mm = tsk->active_mm; > tsk->mm = mm; >+ ipipe_mm_switch_protect(flags); > tsk->active_mm = mm; > activate_mm(active_mm, mm); > if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { > atomic_dec(&old_mm->oom_disable_count); > atomic_inc(&tsk->mm->oom_disable_count); > } >+ ipipe_mm_switch_unprotect(flags); > task_unlock(tsk); > arch_pick_mmap_layout(mm); > if (old_mm) { >diff --git a/fs/proc/array.c b/fs/proc/array.c >index fff6572..fc6bd22 100644 >--- a/fs/proc/array.c >+++ b/fs/proc/array.c >@@ -142,6 +142,10 @@ static const char *task_state_array[] = { > "x (dead)", /* 64 */ > "K (wakekill)", /* 128 */ > "W (waking)", /* 256 */ >+#ifdef CONFIG_IPIPE >+ "A (atomic switch)", /* 512 */ >+ "N (wakeup disabled)", /* 1024 */ >+#endif > }; > > static inline const char *get_task_state(struct task_struct *tsk) >diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h >index e994197..4914a26 100644 >--- a/include/asm-generic/atomic.h >+++ b/include/asm-generic/atomic.h >@@ -58,11 +58,11 @@ static inline int atomic_add_return(int i, atomic_t *v) > unsigned long flags; > int temp; > >- raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */ >+ local_irq_save_hw(flags); /* Don't trace it in an irqsoff handler */ > temp = v->counter; > temp += i; > v->counter = temp; >- raw_local_irq_restore(flags); >+ local_irq_restore_hw(flags); > > return temp; > } >@@ -79,11 +79,11 @@ static inline int atomic_sub_return(int i, atomic_t *v) > unsigned long flags; > int temp; > >- raw_local_irq_save(flags); /* Don't trace it in an irqsoff handler */ >+ local_irq_save_hw(flags); > temp = v->counter; > temp -= i; > v->counter = temp; >- raw_local_irq_restore(flags); >+ local_irq_restore_hw(flags); > > return temp; > } >@@ -145,9 +145,9 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) > unsigned long flags; > > mask = ~mask; >- raw_local_irq_save(flags); /* Don't trace it in a irqsoff handler */ >+ local_irq_save_hw(flags); > *addr &= mask; >- raw_local_irq_restore(flags); >+ local_irq_restore_hw(flags); > } > > /* Assume that atomic operations are already serializing */ >diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h >index ecc44a8..5caf6e9 100644 >--- a/include/asm-generic/bitops/atomic.h >+++ b/include/asm-generic/bitops/atomic.h >@@ -21,20 +21,20 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; > * this is the substitute */ > #define _atomic_spin_lock_irqsave(l,f) do { \ > arch_spinlock_t *s = ATOMIC_HASH(l); \ >- local_irq_save(f); \ >+ local_irq_save_hw(f); \ > arch_spin_lock(s); \ > } while(0) > > #define _atomic_spin_unlock_irqrestore(l,f) do { \ > arch_spinlock_t *s = ATOMIC_HASH(l); \ > arch_spin_unlock(s); \ >- local_irq_restore(f); \ >+ local_irq_restore_hw(f); \ > } while(0) > > > #else >-# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0) >-# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0) >+# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save_hw(f); } while (0) >+# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore_hw(f); } while (0) > #endif > > /* >diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h >index 2533fdd..41865db 100644 >--- a/include/asm-generic/cmpxchg-local.h >+++ b/include/asm-generic/cmpxchg-local.h >@@ -21,7 +21,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, > if (size == 8 && sizeof(unsigned long) != 8) > wrong_size_cmpxchg(ptr); > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > switch (size) { > case 1: prev = *(u8 *)ptr; > if (prev == old) >@@ -42,7 +42,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr, > default: > wrong_size_cmpxchg(ptr); > } >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > return prev; > } > >@@ -55,11 +55,11 @@ static inline u64 __cmpxchg64_local_generic(volatile void *ptr, > u64 prev; > unsigned long flags; > >- local_irq_save(flags); >+ local_irq_save_hw(flags); > prev = *(u64 *)ptr; > if (prev == old) > *(u64 *)ptr = new; >- local_irq_restore(flags); >+ local_irq_restore_hw(flags); > return prev; > } > >diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h >index d17784e..d2a6f27 100644 >--- a/include/asm-generic/percpu.h >+++ b/include/asm-generic/percpu.h >@@ -67,6 +67,20 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; > > #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) > #define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var))) >+#ifdef CONFIG_IPIPE >+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) >+extern int __ipipe_check_percpu_access(void); >+#define __ipipe_local_cpu_offset \ >+ ({ \ >+ WARN_ON_ONCE(__ipipe_check_percpu_access()); \ >+ __my_cpu_offset; \ >+ }) >+#else >+#define __ipipe_local_cpu_offset __my_cpu_offset >+#endif >+#define __ipipe_get_cpu_var(var) \ >+ (*SHIFT_PERCPU_PTR(&(var), __ipipe_local_cpu_offset)) >+#endif /* CONFIG_IPIPE */ > > #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA > extern void setup_per_cpu_areas(void); >@@ -82,6 +96,7 @@ extern void setup_per_cpu_areas(void); > #define per_cpu(var, cpu) (*((void)(cpu), VERIFY_PERCPU_PTR(&(var)))) > #define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) > #define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) >+#define __ipipe_get_cpu_var(var) __raw_get_cpu_var(var) > #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) > #define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) > >diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h >index 32f9fd6..0fcf21b 100644 >--- a/include/linux/hardirq.h >+++ b/include/linux/hardirq.h >@@ -217,6 +217,7 @@ extern void irq_exit(void); > > #define nmi_enter() \ > do { \ >+ ipipe_nmi_enter(); \ > ftrace_nmi_enter(); \ > BUG_ON(in_nmi()); \ > add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ >@@ -233,6 +234,7 @@ extern void irq_exit(void); > BUG_ON(!in_nmi()); \ > sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ > ftrace_nmi_exit(); \ >+ ipipe_nmi_exit(); \ > } while (0) > > #endif /* LINUX_HARDIRQ_H */ >diff --git a/include/linux/ipipe.h b/include/linux/ipipe.h >new file mode 100644 >index 0000000..7c205c0 >--- /dev/null >+++ b/include/linux/ipipe.h >@@ -0,0 +1,744 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe.h >+ * >+ * Copyright (C) 2002-2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_H >+#define __LINUX_IPIPE_H >+ >+#include <linux/spinlock.h> >+#include <linux/cache.h> >+#include <linux/percpu.h> >+#include <linux/mutex.h> >+#include <linux/linkage.h> >+#include <linux/ipipe_base.h> >+#include <asm/ipipe.h> >+#include <asm/bug.h> >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+ >+#include <linux/cpumask.h> >+#include <asm/system.h> >+ >+static inline int ipipe_disable_context_check(int cpu) >+{ >+ return xchg(&per_cpu(ipipe_percpu_context_check, cpu), 0); >+} >+ >+static inline void ipipe_restore_context_check(int cpu, int old_state) >+{ >+ per_cpu(ipipe_percpu_context_check, cpu) = old_state; >+} >+ >+static inline void ipipe_context_check_off(void) >+{ >+ int cpu; >+ for_each_online_cpu(cpu) >+ per_cpu(ipipe_percpu_context_check, cpu) = 0; >+} >+ >+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ >+ >+static inline int ipipe_disable_context_check(int cpu) >+{ >+ return 0; >+} >+ >+static inline void ipipe_restore_context_check(int cpu, int old_state) { } >+ >+static inline void ipipe_context_check_off(void) { } >+ >+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ >+ >+#ifdef CONFIG_IPIPE_DEBUG_INTERNAL >+#define IPIPE_WARN(c) WARN_ON(c) >+#define IPIPE_WARN_ONCE(c) WARN_ON_ONCE(c) >+#else >+#define IPIPE_WARN(c) do { (void)(c); } while (0) >+#define IPIPE_WARN_ONCE(c) do { (void)(c); } while (0) >+#endif >+ >+#ifdef CONFIG_IPIPE >+ >+#define IPIPE_VERSION_STRING IPIPE_ARCH_STRING >+#define IPIPE_RELEASE_NUMBER ((IPIPE_MAJOR_NUMBER << 16) | \ >+ (IPIPE_MINOR_NUMBER << 8) | \ >+ (IPIPE_PATCH_NUMBER)) >+ >+#ifndef BROKEN_BUILTIN_RETURN_ADDRESS >+#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0)) >+#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1)) >+#endif /* !BUILTIN_RETURN_ADDRESS */ >+ >+#define IPIPE_ROOT_PRIO 100 >+#define IPIPE_ROOT_ID 0 >+#define IPIPE_ROOT_NPTDKEYS 4 /* Must be <= BITS_PER_LONG */ >+ >+#define IPIPE_RESET_TIMER 0x1 >+#define IPIPE_GRAB_TIMER 0x2 >+ >+/* Global domain flags */ >+#define IPIPE_SPRINTK_FLAG 0 /* Synchronous printk() allowed */ >+#define IPIPE_AHEAD_FLAG 1 /* Domain always heads the pipeline */ >+ >+/* Interrupt control bits */ >+#define IPIPE_HANDLE_FLAG 0 >+#define IPIPE_PASS_FLAG 1 >+#define IPIPE_ENABLE_FLAG 2 >+#define IPIPE_DYNAMIC_FLAG IPIPE_HANDLE_FLAG >+#define IPIPE_STICKY_FLAG 3 >+#define IPIPE_SYSTEM_FLAG 4 >+#define IPIPE_LOCK_FLAG 5 >+#define IPIPE_WIRED_FLAG 6 >+#define IPIPE_EXCLUSIVE_FLAG 7 >+ >+#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG) >+#define IPIPE_PASS_MASK (1 << IPIPE_PASS_FLAG) >+#define IPIPE_ENABLE_MASK (1 << IPIPE_ENABLE_FLAG) >+#define IPIPE_DYNAMIC_MASK IPIPE_HANDLE_MASK >+#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG) >+#define IPIPE_SYSTEM_MASK (1 << IPIPE_SYSTEM_FLAG) >+#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG) >+#define IPIPE_WIRED_MASK (1 << IPIPE_WIRED_FLAG) >+#define IPIPE_EXCLUSIVE_MASK (1 << IPIPE_EXCLUSIVE_FLAG) >+ >+#define IPIPE_DEFAULT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK) >+#define IPIPE_STDROOT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK) >+ >+#define IPIPE_EVENT_SELF 0x80000000 >+ >+#define IPIPE_NR_CPUS NR_CPUS >+ >+/* This accessor assumes hw IRQs are off on SMP; allows assignment. */ >+#define __ipipe_current_domain __ipipe_get_cpu_var(ipipe_percpu_domain) >+/* This read-only accessor makes sure that hw IRQs are off on SMP. */ >+#define ipipe_current_domain \ >+ ({ \ >+ struct ipipe_domain *__ipd__; \ >+ unsigned long __flags__; \ >+ local_irq_save_hw_smp(__flags__); \ >+ __ipd__ = __ipipe_current_domain; \ >+ local_irq_restore_hw_smp(__flags__); \ >+ __ipd__; \ >+ }) >+ >+#define ipipe_virtual_irq_p(irq) ((irq) >= IPIPE_VIRQ_BASE && \ >+ (irq) < IPIPE_NR_IRQS) >+ >+#define IPIPE_SAME_HANDLER ((ipipe_irq_handler_t)(-1)) >+ >+struct irq_desc; >+ >+typedef void (*ipipe_irq_ackfn_t)(unsigned irq, struct irq_desc *desc); >+ >+typedef int (*ipipe_event_handler_t)(unsigned event, >+ struct ipipe_domain *from, >+ void *data); >+struct ipipe_domain { >+ >+ int slot; /* Slot number in percpu domain data array. */ >+ struct list_head p_link; /* Link in pipeline */ >+ ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */ >+ unsigned long long evself; /* Self-monitored event bits. */ >+ >+ struct ipipe_irqdesc { >+ unsigned long control; >+ ipipe_irq_ackfn_t acknowledge; >+ ipipe_irq_handler_t handler; >+ void *cookie; >+ } ____cacheline_aligned irqs[IPIPE_NR_IRQS]; >+ >+ int priority; >+ void *pdd; >+ unsigned long flags; >+ unsigned domid; >+ const char *name; >+ struct mutex mutex; >+}; >+ >+#define IPIPE_HEAD_PRIORITY (-1) /* For domains always heading the pipeline */ >+ >+struct ipipe_domain_attr { >+ >+ unsigned domid; /* Domain identifier -- Magic value set by caller */ >+ const char *name; /* Domain name -- Warning: won't be dup'ed! */ >+ int priority; /* Priority in interrupt pipeline */ >+ void (*entry) (void); /* Domain entry point */ >+ void *pdd; /* Per-domain (opaque) data pointer */ >+}; >+ >+#define __ipipe_irq_cookie(ipd, irq) (ipd)->irqs[irq].cookie >+#define __ipipe_irq_handler(ipd, irq) (ipd)->irqs[irq].handler >+#define __ipipe_cpudata_irq_hits(ipd, cpu, irq) ipipe_percpudom(ipd, irqall, cpu)[irq] >+ >+extern unsigned __ipipe_printk_virq; >+ >+extern unsigned long __ipipe_virtual_irq_map; >+ >+extern struct list_head __ipipe_pipeline; >+ >+extern int __ipipe_event_monitors[]; >+ >+/* Private interface */ >+ >+void ipipe_init_early(void); >+ >+void ipipe_init(void); >+ >+#ifdef CONFIG_PROC_FS >+void ipipe_init_proc(void); >+ >+#ifdef CONFIG_IPIPE_TRACE >+void __ipipe_init_tracer(void); >+#else /* !CONFIG_IPIPE_TRACE */ >+#define __ipipe_init_tracer() do { } while(0) >+#endif /* CONFIG_IPIPE_TRACE */ >+ >+#else /* !CONFIG_PROC_FS */ >+#define ipipe_init_proc() do { } while(0) >+#endif /* CONFIG_PROC_FS */ >+ >+void __ipipe_init_stage(struct ipipe_domain *ipd); >+ >+void __ipipe_cleanup_domain(struct ipipe_domain *ipd); >+ >+void __ipipe_add_domain_proc(struct ipipe_domain *ipd); >+ >+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd); >+ >+void __ipipe_flush_printk(unsigned irq, void *cookie); >+ >+void __ipipe_walk_pipeline(struct list_head *pos); >+ >+void __ipipe_pend_irq(unsigned irq, struct list_head *head); >+ >+int __ipipe_dispatch_event(unsigned event, void *data); >+ >+void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq); >+ >+void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq); >+ >+void __ipipe_sync_stage(void); >+ >+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq); >+ >+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq); >+ >+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq); >+ >+void __ipipe_pin_range_globally(unsigned long start, unsigned long end); >+ >+/* Must be called hw IRQs off. */ >+static inline void ipipe_irq_lock(unsigned irq) >+{ >+ __ipipe_lock_irq(__ipipe_current_domain, ipipe_processor_id(), irq); >+} >+ >+/* Must be called hw IRQs off. */ >+static inline void ipipe_irq_unlock(unsigned irq) >+{ >+ __ipipe_unlock_irq(__ipipe_current_domain, irq); >+} >+ >+#ifndef __ipipe_sync_pipeline >+#define __ipipe_sync_pipeline() __ipipe_sync_stage() >+#endif >+ >+#ifndef __ipipe_do_root_xirq >+#define __ipipe_do_root_xirq(ipd, irq) \ >+ (ipd)->irqs[irq].handler(irq, (ipd)->irqs[irq].cookie) >+#endif >+ >+#ifndef __ipipe_check_root_resched >+#ifdef CONFIG_PREEMPT >+#define __ipipe_check_root_resched() \ >+ (preempt_count() == 0 && need_resched()) >+#else >+#define __ipipe_check_root_resched() 0 >+#endif >+#endif >+ >+#ifndef __ipipe_run_irqtail >+#define __ipipe_run_irqtail(irq) do { } while(0) >+#endif >+ >+#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next) >+ >+#define __ipipe_ipending_p(p) ((p)->irqpend_himap != 0) >+ >+/* >+ * Keep the following as a macro, so that client code could check for >+ * the support of the invariant pipeline head optimization. >+ */ >+#define __ipipe_pipeline_head() \ >+ list_entry(__ipipe_pipeline.next, struct ipipe_domain, p_link) >+ >+#define local_irq_enable_hw_cond() local_irq_enable_hw() >+#define local_irq_disable_hw_cond() local_irq_disable_hw() >+#define local_irq_save_hw_cond(flags) local_irq_save_hw(flags) >+#define local_irq_restore_hw_cond(flags) local_irq_restore_hw(flags) >+ >+#ifdef CONFIG_SMP >+cpumask_t __ipipe_set_irq_affinity(unsigned irq, cpumask_t cpumask); >+int __ipipe_send_ipi(unsigned ipi, cpumask_t cpumask); >+#define local_irq_save_hw_smp(flags) local_irq_save_hw(flags) >+#define local_irq_restore_hw_smp(flags) local_irq_restore_hw(flags) >+#else /* !CONFIG_SMP */ >+#define local_irq_save_hw_smp(flags) do { (void)(flags); } while(0) >+#define local_irq_restore_hw_smp(flags) do { } while(0) >+#endif /* CONFIG_SMP */ >+ >+#define local_irq_save_full(vflags, rflags) \ >+ do { \ >+ local_irq_save(vflags); \ >+ local_irq_save_hw(rflags); \ >+ } while(0) >+ >+#define local_irq_restore_full(vflags, rflags) \ >+ do { \ >+ local_irq_restore_hw(rflags); \ >+ local_irq_restore(vflags); \ >+ } while(0) >+ >+static inline void __local_irq_restore_nosync(unsigned long x) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_root_cpudom_ptr(); >+ >+ if (raw_irqs_disabled_flags(x)) { >+ set_bit(IPIPE_STALL_FLAG, &p->status); >+ trace_hardirqs_off(); >+ } else { >+ trace_hardirqs_on(); >+ clear_bit(IPIPE_STALL_FLAG, &p->status); >+ } >+} >+ >+static inline void local_irq_restore_nosync(unsigned long x) >+{ >+ unsigned long flags; >+ local_irq_save_hw_smp(flags); >+ __local_irq_restore_nosync(x); >+ local_irq_restore_hw_smp(flags); >+} >+ >+#define __ipipe_root_domain_p (__ipipe_current_domain == ipipe_root_domain) >+#define ipipe_root_domain_p (ipipe_current_domain == ipipe_root_domain) >+ >+/* This has to be called with hw IRQs off. */ >+#define __ipipe_head_domain_p __ipipe_pipeline_head_p(__ipipe_current_domain) >+ >+static inline int __ipipe_event_monitored_p(int ev) >+{ >+ if (__ipipe_event_monitors[ev] > 0) >+ return 1; >+ >+ return (ipipe_current_domain->evself & (1LL << ev)) != 0; >+} >+ >+#define ipipe_notifier_enabled_p(p) \ >+ (((p)->ipipe_flags) & PF_EVNOTIFY) >+ >+#define ipipe_sigwake_notify(p) \ >+ do { \ >+ if (ipipe_notifier_enabled_p(p) && \ >+ __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE)) \ >+ __ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE, p); \ >+ } while (0) >+ >+#define ipipe_exit_notify(p) \ >+ do { \ >+ if (ipipe_notifier_enabled_p(p) && \ >+ __ipipe_event_monitored_p(IPIPE_EVENT_EXIT)) \ >+ __ipipe_dispatch_event(IPIPE_EVENT_EXIT, p); \ >+ } while (0) >+ >+#define ipipe_setsched_notify(p) \ >+ do { \ >+ if (ipipe_notifier_enabled_p(p) && \ >+ __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED)) \ >+ __ipipe_dispatch_event(IPIPE_EVENT_SETSCHED, p); \ >+ } while (0) >+ >+#define ipipe_schedule_notify(prev, next) \ >+do { \ >+ if ((ipipe_notifier_enabled_p(next) || \ >+ ipipe_notifier_enabled_p(prev)) && \ >+ __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE)) \ >+ __ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE, next); \ >+} while (0) >+ >+#define ipipe_trap_notify(ex, regs) \ >+ ({ \ >+ unsigned long __flags__; \ >+ int __ret__ = 0; \ >+ local_irq_save_hw_smp(__flags__); \ >+ if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)) || \ >+ ipipe_notifier_enabled_p(current)) && \ >+ __ipipe_event_monitored_p(ex)) { \ >+ local_irq_restore_hw_smp(__flags__); \ >+ __ret__ = __ipipe_dispatch_event(ex, regs); \ >+ } else \ >+ local_irq_restore_hw_smp(__flags__); \ >+ __ret__; \ >+ }) >+ >+#define ipipe_init_notify(p) \ >+ do { \ >+ if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT)) \ >+ __ipipe_dispatch_event(IPIPE_EVENT_INIT, p); \ >+ } while (0) >+ >+#define ipipe_cleanup_notify(mm) \ >+ do { \ >+ if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP)) \ >+ __ipipe_dispatch_event(IPIPE_EVENT_CLEANUP, mm); \ >+ } while (0) >+ >+/* Public interface */ >+ >+int ipipe_register_domain(struct ipipe_domain *ipd, >+ struct ipipe_domain_attr *attr); >+ >+int ipipe_unregister_domain(struct ipipe_domain *ipd); >+ >+void ipipe_suspend_domain(void); >+ >+int ipipe_virtualize_irq(struct ipipe_domain *ipd, >+ unsigned irq, >+ ipipe_irq_handler_t handler, >+ void *cookie, >+ ipipe_irq_ackfn_t acknowledge, >+ unsigned modemask); >+ >+int ipipe_control_irq(struct ipipe_domain *ipd, >+ unsigned int irq, >+ unsigned clrmask, >+ unsigned setmask); >+ >+unsigned ipipe_alloc_virq(void); >+ >+int ipipe_free_virq(unsigned virq); >+ >+int ipipe_trigger_irq(unsigned irq); >+ >+static inline void __ipipe_propagate_irq(unsigned irq) >+{ >+ struct list_head *next = __ipipe_current_domain->p_link.next; >+ if (next == &ipipe_root.p_link) { >+ /* Fast path: root must handle all interrupts. */ >+ __ipipe_set_irq_pending(&ipipe_root, irq); >+ return; >+ } >+ __ipipe_pend_irq(irq, next); >+} >+ >+static inline void __ipipe_schedule_irq(unsigned irq) >+{ >+ __ipipe_pend_irq(irq, &__ipipe_current_domain->p_link); >+} >+ >+static inline void __ipipe_schedule_irq_head(unsigned irq) >+{ >+ __ipipe_set_irq_pending(__ipipe_pipeline_head(), irq); >+} >+ >+static inline void __ipipe_schedule_irq_root(unsigned irq) >+{ >+ __ipipe_set_irq_pending(&ipipe_root, irq); >+} >+ >+static inline void ipipe_propagate_irq(unsigned irq) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ __ipipe_propagate_irq(irq); >+ local_irq_restore_hw(flags); >+} >+ >+static inline void ipipe_schedule_irq(unsigned irq) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ __ipipe_schedule_irq(irq); >+ local_irq_restore_hw(flags); >+} >+ >+static inline void ipipe_schedule_irq_head(unsigned irq) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ __ipipe_schedule_irq_head(irq); >+ local_irq_restore_hw(flags); >+} >+ >+static inline void ipipe_schedule_irq_root(unsigned irq) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ __ipipe_schedule_irq_root(irq); >+ local_irq_restore_hw(flags); >+} >+ >+void ipipe_stall_pipeline_from(struct ipipe_domain *ipd); >+ >+unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd); >+ >+unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd); >+ >+static inline void ipipe_unstall_pipeline_from(struct ipipe_domain *ipd) >+{ >+ ipipe_test_and_unstall_pipeline_from(ipd); >+} >+ >+void ipipe_restore_pipeline_from(struct ipipe_domain *ipd, >+ unsigned long x); >+ >+static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd) >+{ >+ return test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); >+} >+ >+static inline void ipipe_stall_pipeline_head(void) >+{ >+ local_irq_disable_hw(); >+ __set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status)); >+} >+ >+static inline unsigned long ipipe_test_and_stall_pipeline_head(void) >+{ >+ local_irq_disable_hw(); >+ return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status)); >+} >+ >+void ipipe_unstall_pipeline_head(void); >+ >+void __ipipe_restore_pipeline_head(unsigned long x); >+ >+static inline void ipipe_restore_pipeline_head(unsigned long x) >+{ >+#ifdef CONFIG_IPIPE_DEBUG >+ if (WARN_ON_ONCE(!irqs_disabled_hw())) >+ local_irq_disable_hw(); >+#endif >+ if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_head_cpudom_var(status))) & 1) >+ __ipipe_restore_pipeline_head(x); >+} >+ >+#define ipipe_unstall_pipeline() \ >+ ipipe_unstall_pipeline_from(ipipe_current_domain) >+ >+#define ipipe_test_and_unstall_pipeline() \ >+ ipipe_test_and_unstall_pipeline_from(ipipe_current_domain) >+ >+#define ipipe_test_pipeline() \ >+ ipipe_test_pipeline_from(ipipe_current_domain) >+ >+#define ipipe_test_and_stall_pipeline() \ >+ ipipe_test_and_stall_pipeline_from(ipipe_current_domain) >+ >+#define ipipe_stall_pipeline() \ >+ ipipe_stall_pipeline_from(ipipe_current_domain) >+ >+#define ipipe_restore_pipeline(x) \ >+ ipipe_restore_pipeline_from(ipipe_current_domain, (x)) >+ >+void ipipe_init_attr(struct ipipe_domain_attr *attr); >+ >+int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo); >+ >+void __ipipe_do_critical_sync(unsigned irq, void *cookie); >+ >+unsigned long ipipe_critical_enter(void (*syncfn) (void)); >+ >+void ipipe_critical_exit(unsigned long flags); >+ >+void ipipe_prepare_panic(void); >+ >+static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd) >+{ >+ set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); >+} >+ >+static inline void ipipe_set_printk_async(struct ipipe_domain *ipd) >+{ >+ clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); >+} >+ >+static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd) >+{ >+ /* Must be called hw interrupts off. */ >+ __set_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status)); >+} >+ >+static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd) >+{ >+ /* Must be called hw interrupts off. */ >+ __clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status)); >+} >+ >+static inline int ipipe_test_foreign_stack(void) >+{ >+ /* Must be called hw interrupts off. */ >+ return test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)); >+} >+ >+#ifndef ipipe_safe_current >+#define ipipe_safe_current() \ >+({ \ >+ struct task_struct *p; \ >+ unsigned long flags; \ >+ local_irq_save_hw_smp(flags); \ >+ p = ipipe_test_foreign_stack() ? &init_task : current; \ >+ local_irq_restore_hw_smp(flags); \ >+ p; \ >+}) >+#endif >+ >+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, >+ unsigned event, >+ ipipe_event_handler_t handler); >+ >+cpumask_t ipipe_set_irq_affinity(unsigned irq, >+ cpumask_t cpumask); >+ >+int ipipe_send_ipi(unsigned ipi, >+ cpumask_t cpumask); >+ >+int ipipe_setscheduler_root(struct task_struct *p, >+ int policy, >+ int prio); >+ >+int ipipe_reenter_root(struct task_struct *prev, >+ int policy, >+ int prio); >+ >+int ipipe_alloc_ptdkey(void); >+ >+int ipipe_free_ptdkey(int key); >+ >+int ipipe_set_ptd(int key, >+ void *value); >+ >+void *ipipe_get_ptd(int key); >+ >+int ipipe_disable_ondemand_mappings(struct task_struct *tsk); >+ >+static inline void ipipe_nmi_enter(void) >+{ >+ int cpu = ipipe_processor_id(); >+ >+ per_cpu(ipipe_nmi_saved_root, cpu) = ipipe_root_cpudom_var(status); >+ __set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+ per_cpu(ipipe_saved_context_check_state, cpu) = >+ ipipe_disable_context_check(cpu); >+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ >+} >+ >+static inline void ipipe_nmi_exit(void) >+{ >+ int cpu = ipipe_processor_id(); >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+ ipipe_restore_context_check >+ (cpu, per_cpu(ipipe_saved_context_check_state, cpu)); >+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ >+ >+ if (!test_bit(IPIPE_STALL_FLAG, &per_cpu(ipipe_nmi_saved_root, cpu))) >+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); >+} >+ >+#define ipipe_enable_notifier(p) \ >+ do { \ >+ (p)->ipipe_flags |= PF_EVNOTIFY; \ >+ } while (0) >+ >+#define ipipe_disable_notifier(p) \ >+ do { \ >+ (p)->ipipe_flags &= ~(PF_EVNOTIFY|PF_EVTRET); \ >+ } while (0) >+ >+/* hw IRQs off. */ >+#define ipipe_return_notify(p) \ >+ do { \ >+ if (ipipe_notifier_enabled_p(p) && \ >+ __ipipe_event_monitored_p(IPIPE_EVENT_RETURN)) \ >+ (p)->ipipe_flags |= PF_EVTRET; \ >+ } while (0) >+ >+#define ipipe_clear_flags(p) do { (p)->ipipe_flags = 0; } while (0) >+ >+#else /* !CONFIG_IPIPE */ >+ >+#define ipipe_init_early() do { } while(0) >+#define ipipe_init() do { } while(0) >+#define ipipe_suspend_domain() do { } while(0) >+#define ipipe_sigwake_notify(p) do { } while(0) >+#define ipipe_setsched_notify(p) do { } while(0) >+#define ipipe_init_notify(p) do { } while(0) >+#define ipipe_exit_notify(p) do { } while(0) >+#define ipipe_cleanup_notify(mm) do { } while(0) >+#define ipipe_trap_notify(t,r) 0 >+#define ipipe_init_proc() do { } while(0) >+ >+static inline void __ipipe_pin_range_globally(unsigned long start, >+ unsigned long end) >+{ >+} >+ >+static inline int ipipe_test_foreign_stack(void) >+{ >+ return 0; >+} >+ >+#define local_irq_enable_hw_cond() do { } while(0) >+#define local_irq_disable_hw_cond() do { } while(0) >+#define local_irq_save_hw_cond(flags) do { (void)(flags); } while(0) >+#define local_irq_restore_hw_cond(flags) do { } while(0) >+#define local_irq_save_hw_smp(flags) do { (void)(flags); } while(0) >+#define local_irq_restore_hw_smp(flags) do { } while(0) >+ >+#define ipipe_irq_lock(irq) do { } while(0) >+#define ipipe_irq_unlock(irq) do { } while(0) >+ >+#define __ipipe_root_domain_p 1 >+#define ipipe_root_domain_p 1 >+#define ipipe_safe_current() current >+#define ipipe_processor_id() smp_processor_id() >+#define ipipe_clear_flags(p) do { } while (0) >+ >+#define ipipe_nmi_enter() do { } while (0) >+#define ipipe_nmi_exit() do { } while (0) >+ >+#define local_irq_disable_head() local_irq_disable() >+ >+#define local_irq_save_full(vflags, rflags) do { (void)(vflags); local_irq_save(rflags); } while(0) >+#define local_irq_restore_full(vflags, rflags) do { (void)(vflags); local_irq_restore(rflags); } while(0) >+#define local_irq_restore_nosync(vflags) local_irq_restore(vflags) >+ >+#define __ipipe_pipeline_head_p(ipd) 1 >+ >+#endif /* CONFIG_IPIPE */ >+ >+#endif /* !__LINUX_IPIPE_H */ >diff --git a/include/linux/ipipe_base.h b/include/linux/ipipe_base.h >new file mode 100644 >index 0000000..3f43ba5 >--- /dev/null >+++ b/include/linux/ipipe_base.h >@@ -0,0 +1,134 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_base.h >+ * >+ * Copyright (C) 2002-2007 Philippe Gerum. >+ * 2007 Jan Kiszka. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_BASE_H >+#define __LINUX_IPIPE_BASE_H >+ >+#ifdef CONFIG_IPIPE >+ >+#include <asm/ipipe_base.h> >+ >+#define __bpl_up(x) (((x)+(BITS_PER_LONG-1)) & ~(BITS_PER_LONG-1)) >+/* Number of virtual IRQs (must be a multiple of BITS_PER_LONG) */ >+#define IPIPE_NR_VIRQS BITS_PER_LONG >+/* First virtual IRQ # (must be aligned on BITS_PER_LONG) */ >+#define IPIPE_VIRQ_BASE __bpl_up(IPIPE_NR_XIRQS) >+/* Total number of IRQ slots */ >+#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE+IPIPE_NR_VIRQS) >+ >+#define IPIPE_IRQ_LOMAPSZ (IPIPE_NR_IRQS / BITS_PER_LONG) >+#if IPIPE_IRQ_LOMAPSZ > BITS_PER_LONG >+/* >+ * We need a 3-level mapping. This allows us to handle up to 32k IRQ >+ * vectors on 32bit machines, 256k on 64bit ones. >+ */ >+#define __IPIPE_3LEVEL_IRQMAP 1 >+#define IPIPE_IRQ_MDMAPSZ (__bpl_up(IPIPE_IRQ_LOMAPSZ) / BITS_PER_LONG) >+#else >+/* >+ * 2-level mapping is enough. This allows us to handle up to 1024 IRQ >+ * vectors on 32bit machines, 4096 on 64bit ones. >+ */ >+#define __IPIPE_2LEVEL_IRQMAP 1 >+#endif >+ >+/* Per-cpu pipeline status */ >+#define IPIPE_STALL_FLAG 0 /* Stalls a pipeline stage -- guaranteed at bit #0 */ >+#define IPIPE_NOSTACK_FLAG 1 /* Domain currently runs on a foreign stack */ >+ >+#define IPIPE_STALL_MASK (1L << IPIPE_STALL_FLAG) >+#define IPIPE_NOSTACK_MASK (1L << IPIPE_NOSTACK_FLAG) >+ >+typedef void (*ipipe_irq_handler_t)(unsigned int irq, >+ void *cookie); >+ >+extern struct ipipe_domain ipipe_root; >+ >+#define ipipe_root_domain (&ipipe_root) >+ >+void __ipipe_unstall_root(void); >+ >+void __ipipe_restore_root(unsigned long x); >+ >+#define ipipe_preempt_disable(flags) \ >+ do { \ >+ local_irq_save_hw(flags); \ >+ if (__ipipe_root_domain_p) \ >+ preempt_disable(); \ >+ } while (0) >+ >+#define ipipe_preempt_enable(flags) \ >+ do { \ >+ if (__ipipe_root_domain_p) { \ >+ preempt_enable_no_resched(); \ >+ local_irq_restore_hw(flags); \ >+ preempt_check_resched(); \ >+ } else \ >+ local_irq_restore_hw(flags); \ >+ } while (0) >+ >+#define ipipe_get_cpu(flags) ({ ipipe_preempt_disable(flags); ipipe_processor_id(); }) >+#define ipipe_put_cpu(flags) ipipe_preempt_enable(flags) >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+void ipipe_check_context(struct ipipe_domain *border_ipd); >+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ >+static inline void ipipe_check_context(struct ipipe_domain *border_ipd) { } >+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ >+ >+/* Generic features */ >+ >+#ifdef CONFIG_GENERIC_CLOCKEVENTS >+#define __IPIPE_FEATURE_REQUEST_TICKDEV 1 >+#endif >+#define __IPIPE_FEATURE_DELAYED_ATOMICSW 1 >+#define __IPIPE_FEATURE_FASTPEND_IRQ 1 >+#define __IPIPE_FEATURE_TRACE_EVENT 1 >+#define __IPIPE_FEATURE_ENABLE_NOTIFIER 1 >+#ifdef CONFIG_HAVE_IPIPE_HOSTRT >+#define __IPIPE_FEATURE_HOSTRT 1 >+#endif >+#define __IPIPE_FEATURE_PREPARE_PANIC 1 >+#define __IPIPE_FEATURE_ROOT_PREEMPT_NOTIFIER 1 >+#define __IPIPE_FEATURE_CONTROL_IRQ 1 >+ >+#else /* !CONFIG_IPIPE */ >+ >+#define ipipe_preempt_disable(flags) \ >+ do { \ >+ preempt_disable(); \ >+ (void)(flags); \ >+ } while (0) >+#define ipipe_preempt_enable(flags) preempt_enable() >+ >+#define ipipe_get_cpu(flags) ({ (void)(flags); get_cpu(); }) >+#define ipipe_put_cpu(flags) \ >+ do { \ >+ (void)(flags); \ >+ put_cpu(); \ >+ } while (0) >+ >+#define ipipe_check_context(ipd) do { } while(0) >+ >+#endif /* CONFIG_IPIPE */ >+ >+#endif /* !__LINUX_IPIPE_BASE_H */ >diff --git a/include/linux/ipipe_lock.h b/include/linux/ipipe_lock.h >new file mode 100644 >index 0000000..5382208 >--- /dev/null >+++ b/include/linux/ipipe_lock.h >@@ -0,0 +1,241 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_lock.h >+ * >+ * Copyright (C) 2009 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_LOCK_H >+#define __LINUX_IPIPE_LOCK_H >+ >+typedef struct { >+ arch_spinlock_t arch_lock; >+} __ipipe_spinlock_t; >+ >+#define ipipe_spinlock_p(lock) \ >+ __builtin_types_compatible_p(typeof(lock), __ipipe_spinlock_t *) >+ >+#define std_spinlock_raw_p(lock) \ >+ __builtin_types_compatible_p(typeof(lock), raw_spinlock_t *) >+ >+#define std_spinlock_p(lock) \ >+ __builtin_types_compatible_p(typeof(lock), spinlock_t *) >+ >+#define ipipe_spinlock(lock) ((__ipipe_spinlock_t *)(lock)) >+#define std_spinlock_raw(lock) ((raw_spinlock_t *)(lock)) >+#define std_spinlock(lock) ((spinlock_t *)(lock)) >+ >+#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ >+ do { \ >+ if (ipipe_spinlock_p(lock)) \ >+ (flags) = __ipipe_spin_lock_irqsave(ipipe_spinlock(lock)); \ >+ else if (std_spinlock_raw_p(lock)) \ >+ __real_raw_spin_lock_irqsave(std_spinlock_raw(lock), flags); \ >+ else if (std_spinlock_p(lock)) \ >+ __real_raw_spin_lock_irqsave(&std_spinlock(lock)->rlock, flags); \ >+ else __bad_lock_type(); \ >+ } while (0) >+ >+#define PICK_SPINTRYLOCK_IRQSAVE(lock, flags) \ >+ ({ \ >+ int __ret__; \ >+ if (ipipe_spinlock_p(lock)) \ >+ __ret__ = __ipipe_spin_trylock_irqsave(ipipe_spinlock(lock), &(flags)); \ >+ else if (std_spinlock_raw_p(lock)) \ >+ __ret__ = __real_raw_spin_trylock_irqsave(std_spinlock_raw(lock), flags); \ >+ else if (std_spinlock_p(lock)) \ >+ __ret__ = __real_raw_spin_trylock_irqsave(&std_spinlock(lock)->rlock, flags); \ >+ else __bad_lock_type(); \ >+ __ret__; \ >+ }) >+ >+#define PICK_SPINTRYLOCK_IRQ(lock) \ >+ ({ \ >+ int __ret__; \ >+ if (ipipe_spinlock_p(lock)) \ >+ __ret__ = __ipipe_spin_trylock_irq(ipipe_spinlock(lock)); \ >+ else if (std_spinlock_raw_p(lock)) \ >+ __ret__ = __real_raw_spin_trylock_irq(std_spinlock_raw(lock)); \ >+ else if (std_spinlock_p(lock)) \ >+ __ret__ = __real_raw_spin_trylock_irq(&std_spinlock(lock)->rlock); \ >+ else __bad_lock_type(); \ >+ __ret__; \ >+ }) >+ >+#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ >+ do { \ >+ if (ipipe_spinlock_p(lock)) \ >+ __ipipe_spin_unlock_irqrestore(ipipe_spinlock(lock), flags); \ >+ else { \ >+ __ipipe_spin_unlock_debug(flags); \ >+ if (std_spinlock_raw_p(lock)) \ >+ __real_raw_spin_unlock_irqrestore(std_spinlock_raw(lock), flags); \ >+ else if (std_spinlock_p(lock)) \ >+ __real_raw_spin_unlock_irqrestore(&std_spinlock(lock)->rlock, flags); \ >+ } \ >+ } while (0) >+ >+#define PICK_SPINOP(op, lock) \ >+ do { \ >+ if (ipipe_spinlock_p(lock)) \ >+ arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ >+ else if (std_spinlock_raw_p(lock)) \ >+ __real_raw_spin##op(std_spinlock_raw(lock)); \ >+ else if (std_spinlock_p(lock)) \ >+ __real_raw_spin##op(&std_spinlock(lock)->rlock); \ >+ else __bad_lock_type(); \ >+ } while (0) >+ >+#define PICK_SPINOP_RET(op, lock, type) \ >+ ({ \ >+ type __ret__; \ >+ if (ipipe_spinlock_p(lock)) \ >+ __ret__ = arch_spin##op(&ipipe_spinlock(lock)->arch_lock); \ >+ else if (std_spinlock_raw_p(lock)) \ >+ __ret__ = __real_raw_spin##op(std_spinlock_raw(lock)); \ >+ else if (std_spinlock_p(lock)) \ >+ __ret__ = __real_raw_spin##op(&std_spinlock(lock)->rlock); \ >+ else { __ret__ = -1; __bad_lock_type(); } \ >+ __ret__; \ >+ }) >+ >+#define arch_spin_lock_init(lock) \ >+ do { \ >+ IPIPE_DEFINE_SPINLOCK(__lock__); \ >+ *((ipipe_spinlock_t *)lock) = __lock__; \ >+ } while (0) >+ >+#define arch_spin_lock_irq(lock) \ >+ do { \ >+ local_irq_disable_hw(); \ >+ arch_spin_lock(lock); \ >+ } while (0) >+ >+#define arch_spin_unlock_irq(lock) \ >+ do { \ >+ arch_spin_unlock(lock); \ >+ local_irq_enable_hw(); \ >+ } while (0) >+ >+typedef struct { >+ arch_rwlock_t arch_lock; >+} __ipipe_rwlock_t; >+ >+#define ipipe_rwlock_p(lock) \ >+ __builtin_types_compatible_p(typeof(lock), __ipipe_rwlock_t *) >+ >+#define std_rwlock_p(lock) \ >+ __builtin_types_compatible_p(typeof(lock), rwlock_t *) >+ >+#define ipipe_rwlock(lock) ((__ipipe_rwlock_t *)(lock)) >+#define std_rwlock(lock) ((rwlock_t *)(lock)) >+ >+#define PICK_RWOP(op, lock) \ >+ do { \ >+ if (ipipe_rwlock_p(lock)) \ >+ arch##op(&ipipe_rwlock(lock)->arch_lock); \ >+ else if (std_rwlock_p(lock)) \ >+ _raw##op(std_rwlock(lock)); \ >+ else __bad_lock_type(); \ >+ } while (0) >+ >+extern int __bad_lock_type(void); >+ >+#ifdef CONFIG_IPIPE >+ >+#define ipipe_spinlock_t __ipipe_spinlock_t >+#define IPIPE_DEFINE_RAW_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED >+#define IPIPE_DECLARE_RAW_SPINLOCK(x) extern ipipe_spinlock_t x >+#define IPIPE_DEFINE_SPINLOCK(x) IPIPE_DEFINE_RAW_SPINLOCK(x) >+#define IPIPE_DECLARE_SPINLOCK(x) IPIPE_DECLARE_RAW_SPINLOCK(x) >+ >+#define IPIPE_SPIN_LOCK_UNLOCKED \ >+ (__ipipe_spinlock_t) { .arch_lock = __ARCH_SPIN_LOCK_UNLOCKED } >+ >+#define spin_lock_irqsave_cond(lock, flags) \ >+ spin_lock_irqsave(lock, flags) >+ >+#define spin_unlock_irqrestore_cond(lock, flags) \ >+ spin_unlock_irqrestore(lock, flags) >+ >+void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock); >+ >+int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock); >+ >+void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock); >+ >+unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock); >+ >+int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock, >+ unsigned long *x); >+ >+void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock, >+ unsigned long x); >+ >+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock); >+ >+void __ipipe_spin_unlock_irqcomplete(unsigned long x); >+ >+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) >+void __ipipe_spin_unlock_debug(unsigned long flags); >+#else >+#define __ipipe_spin_unlock_debug(flags) do { } while (0) >+#endif >+ >+#define ipipe_rwlock_t __ipipe_rwlock_t >+#define IPIPE_DEFINE_RWLOCK(x) ipipe_rwlock_t x = IPIPE_RW_LOCK_UNLOCKED >+#define IPIPE_DECLARE_RWLOCK(x) extern ipipe_rwlock_t x >+ >+#define IPIPE_RW_LOCK_UNLOCKED \ >+ (__ipipe_rwlock_t) { .arch_lock = __ARCH_RW_LOCK_UNLOCKED } >+ >+#else /* !CONFIG_IPIPE */ >+ >+#define ipipe_spinlock_t spinlock_t >+#define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x) >+#define IPIPE_DECLARE_SPINLOCK(x) extern spinlock_t x >+#define IPIPE_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED >+#define IPIPE_DEFINE_RAW_SPINLOCK(x) DEFINE_RAW_SPINLOCK(x) >+#define IPIPE_DECLARE_RAW_SPINLOCK(x) extern raw_spinlock_t x >+ >+#define spin_lock_irqsave_cond(lock, flags) \ >+ do { \ >+ (void)(flags); \ >+ spin_lock(lock); \ >+ } while(0) >+ >+#define spin_unlock_irqrestore_cond(lock, flags) \ >+ spin_unlock(lock) >+ >+#define __ipipe_spin_lock_irq(lock) do { } while (0) >+#define __ipipe_spin_unlock_irq(lock) do { } while (0) >+#define __ipipe_spin_lock_irqsave(lock) 0 >+#define __ipipe_spin_trylock_irq(lock) 1 >+#define __ipipe_spin_trylock_irqsave(lock, x) ({ (void)(x); 1; }) >+#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while (0) >+#define __ipipe_spin_unlock_irqbegin(lock) do { } while (0) >+#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while (0) >+#define __ipipe_spin_unlock_debug(flags) do { } while (0) >+ >+#define ipipe_rwlock_t rwlock_t >+#define IPIPE_DEFINE_RWLOCK(x) DEFINE_RWLOCK(x) >+#define IPIPE_DECLARE_RWLOCK(x) extern rwlock_t x >+#define IPIPE_RW_LOCK_UNLOCKED RW_LOCK_UNLOCKED >+ >+#endif /* !CONFIG_IPIPE */ >+ >+#endif /* !__LINUX_IPIPE_LOCK_H */ >diff --git a/include/linux/ipipe_percpu.h b/include/linux/ipipe_percpu.h >new file mode 100644 >index 0000000..0b42e8c >--- /dev/null >+++ b/include/linux/ipipe_percpu.h >@@ -0,0 +1,89 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_percpu.h >+ * >+ * Copyright (C) 2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_PERCPU_H >+#define __LINUX_IPIPE_PERCPU_H >+ >+#include <asm/percpu.h> >+#include <asm/ptrace.h> >+ >+struct ipipe_domain; >+ >+struct ipipe_percpu_domain_data { >+ unsigned long status; /* <= Must be first in struct. */ >+ unsigned long irqpend_himap; >+#ifdef __IPIPE_3LEVEL_IRQMAP >+ unsigned long irqpend_mdmap[IPIPE_IRQ_MDMAPSZ]; >+#endif >+ unsigned long irqpend_lomap[IPIPE_IRQ_LOMAPSZ]; >+ unsigned long irqheld_map[IPIPE_IRQ_LOMAPSZ]; >+ unsigned long irqall[IPIPE_NR_IRQS]; >+ u64 evsync; >+}; >+ >+/* >+ * CAREFUL: all accessors based on __raw_get_cpu_var() you may find in >+ * this file should be used only while hw interrupts are off, to >+ * prevent from CPU migration regardless of the running domain. >+ */ >+#ifdef CONFIG_SMP >+#define ipipe_percpudom_ptr(ipd, cpu) \ >+ (&per_cpu(ipipe_percpu_darray, cpu)[(ipd)->slot]) >+#define ipipe_cpudom_ptr(ipd) \ >+ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[(ipd)->slot]) >+#else >+DECLARE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]); >+#define ipipe_percpudom_ptr(ipd, cpu) \ >+ (per_cpu(ipipe_percpu_daddr, cpu)[(ipd)->slot]) >+#define ipipe_cpudom_ptr(ipd) \ >+ (__ipipe_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot]) >+#endif >+#define ipipe_percpudom(ipd, var, cpu) (ipipe_percpudom_ptr(ipd, cpu)->var) >+#define ipipe_cpudom_var(ipd, var) (ipipe_cpudom_ptr(ipd)->var) >+ >+#define IPIPE_ROOT_SLOT 0 >+#define IPIPE_HEAD_SLOT (CONFIG_IPIPE_DOMAINS - 1) >+ >+DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]); >+ >+DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain); >+ >+DECLARE_PER_CPU(unsigned long, ipipe_nmi_saved_root); >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+DECLARE_PER_CPU(int, ipipe_percpu_context_check); >+DECLARE_PER_CPU(int, ipipe_saved_context_check_state); >+#endif >+ >+#define ipipe_root_cpudom_ptr() \ >+ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT]) >+ >+#define ipipe_root_cpudom_var(var) ipipe_root_cpudom_ptr()->var >+ >+#define ipipe_this_cpudom_var(var) \ >+ ipipe_cpudom_var(__ipipe_current_domain, var) >+ >+#define ipipe_head_cpudom_ptr() \ >+ (&__ipipe_get_cpu_var(ipipe_percpu_darray)[IPIPE_HEAD_SLOT]) >+ >+#define ipipe_head_cpudom_var(var) ipipe_head_cpudom_ptr()->var >+ >+#endif /* !__LINUX_IPIPE_PERCPU_H */ >diff --git a/include/linux/ipipe_tickdev.h b/include/linux/ipipe_tickdev.h >new file mode 100644 >index 0000000..d3be89b >--- /dev/null >+++ b/include/linux/ipipe_tickdev.h >@@ -0,0 +1,79 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_tickdev.h >+ * >+ * Copyright (C) 2007 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef __LINUX_IPIPE_TICKDEV_H >+#define __LINUX_IPIPE_TICKDEV_H >+ >+#if defined(CONFIG_IPIPE) && defined(CONFIG_GENERIC_CLOCKEVENTS) >+ >+#include <linux/clockchips.h> >+#include <linux/clocksource.h> >+ >+struct tick_device; >+ >+struct ipipe_tick_device { >+ >+ void (*emul_set_mode)(enum clock_event_mode, >+ struct clock_event_device *cdev); >+ int (*emul_set_tick)(unsigned long delta, >+ struct clock_event_device *cdev); >+ void (*real_set_mode)(enum clock_event_mode mode, >+ struct clock_event_device *cdev); >+ int (*real_set_tick)(unsigned long delta, >+ struct clock_event_device *cdev); >+ struct tick_device *slave; >+ unsigned long real_max_delta_ns; >+ unsigned long real_mult; >+ int real_shift; >+}; >+ >+struct ipipe_hostrt_data { >+ short int live; >+ seqcount_t seqcount; >+ time_t wall_time_sec; >+ u32 wall_time_nsec; >+ struct timespec wall_to_monotonic; >+ cycle_t cycle_last; >+ cycle_t mask; >+ u32 mult; >+ u32 shift; >+}; >+ >+int ipipe_request_tickdev(const char *devname, >+ void (*emumode)(enum clock_event_mode mode, >+ struct clock_event_device *cdev), >+ int (*emutick)(unsigned long evt, >+ struct clock_event_device *cdev), >+ int cpu, unsigned long *tmfreq); >+ >+void ipipe_release_tickdev(int cpu); >+ >+#endif /* CONFIG_IPIPE && CONFIG_GENERIC_CLOCKEVENTS */ >+ >+#ifdef CONFIG_HAVE_IPIPE_HOSTRT >+void ipipe_update_hostrt(struct timespec *wall_time, >+ struct clocksource *clock); >+#else /* !CONFIG_IPIPE_HOSTRT */ >+static inline void >+ipipe_update_hostrt(struct timespec *wall_time, struct clocksource *clock) {} >+#endif >+ >+#endif /* !__LINUX_IPIPE_TICKDEV_H */ >diff --git a/include/linux/ipipe_trace.h b/include/linux/ipipe_trace.h >new file mode 100644 >index 0000000..627b354 >--- /dev/null >+++ b/include/linux/ipipe_trace.h >@@ -0,0 +1,72 @@ >+/* -*- linux-c -*- >+ * include/linux/ipipe_trace.h >+ * >+ * Copyright (C) 2005 Luotao Fu. >+ * 2005-2007 Jan Kiszka. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#ifndef _LINUX_IPIPE_TRACE_H >+#define _LINUX_IPIPE_TRACE_H >+ >+#ifdef CONFIG_IPIPE_TRACE >+ >+#include <linux/types.h> >+ >+void ipipe_trace_begin(unsigned long v); >+void ipipe_trace_end(unsigned long v); >+void ipipe_trace_freeze(unsigned long v); >+void ipipe_trace_special(unsigned char special_id, unsigned long v); >+void ipipe_trace_pid(pid_t pid, short prio); >+void ipipe_trace_event(unsigned char id, unsigned long delay_tsc); >+int ipipe_trace_max_reset(void); >+int ipipe_trace_frozen_reset(void); >+ >+#else /* !CONFIG_IPIPE_TRACE */ >+ >+#define ipipe_trace_begin(v) do { (void)(v); } while(0) >+#define ipipe_trace_end(v) do { (void)(v); } while(0) >+#define ipipe_trace_freeze(v) do { (void)(v); } while(0) >+#define ipipe_trace_special(id, v) do { (void)(id); (void)(v); } while(0) >+#define ipipe_trace_pid(pid, prio) do { (void)(pid); (void)(prio); } while(0) >+#define ipipe_trace_event(id, delay_tsc) do { (void)(id); (void)(delay_tsc); } while(0) >+#define ipipe_trace_max_reset() do { } while(0) >+#define ipipe_trace_froze_reset() do { } while(0) >+ >+#endif /* !CONFIG_IPIPE_TRACE */ >+ >+#ifdef CONFIG_IPIPE_TRACE_PANIC >+void ipipe_trace_panic_freeze(void); >+void ipipe_trace_panic_dump(void); >+#else >+static inline void ipipe_trace_panic_freeze(void) { } >+static inline void ipipe_trace_panic_dump(void) { } >+#endif >+ >+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF >+#define ipipe_trace_irq_entry(irq) ipipe_trace_begin(irq) >+#define ipipe_trace_irq_exit(irq) ipipe_trace_end(irq) >+#define ipipe_trace_irqsoff() ipipe_trace_begin(0x80000000UL) >+#define ipipe_trace_irqson() ipipe_trace_end(0x80000000UL) >+#else >+#define ipipe_trace_irq_entry(irq) do { (void)(irq);} while(0) >+#define ipipe_trace_irq_exit(irq) do { (void)(irq);} while(0) >+#define ipipe_trace_irqsoff() do { } while(0) >+#define ipipe_trace_irqson() do { } while(0) >+#endif >+ >+#endif /* !__LINUX_IPIPE_TRACE_H */ >diff --git a/include/linux/irq.h b/include/linux/irq.h >index abde252..bf0ab3e 100644 >--- a/include/linux/irq.h >+++ b/include/linux/irq.h >@@ -194,6 +194,9 @@ struct irq_chip { > > void (*irq_bus_lock)(struct irq_data *data); > void (*irq_bus_sync_unlock)(struct irq_data *data); >+#ifdef CONFIG_IPIPE >+ void (*irq_move)(struct irq_data *data); >+#endif /* CONFIG_IPIPE */ > > /* Currently used only by UML, might disappear one day.*/ > #ifdef CONFIG_IRQ_RELEASE_METHOD >diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h >index 979c68c..ee39a3e 100644 >--- a/include/linux/irqdesc.h >+++ b/include/linux/irqdesc.h >@@ -55,6 +55,12 @@ struct irq_desc { > }; > }; > #endif >+#ifdef CONFIG_IPIPE >+ void (*ipipe_ack)(unsigned int irq, >+ struct irq_desc *desc); >+ void (*ipipe_end)(unsigned int irq, >+ struct irq_desc *desc); >+#endif /* CONFIG_IPIPE */ > > struct timer_rand_state *timer_rand_state; > unsigned int *kstat_irqs; >@@ -145,6 +151,10 @@ static inline int irq_balancing_disabled(unsigned int irq) > return desc->status & IRQ_NO_BALANCING_MASK; > } > >+irq_flow_handler_t >+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, >+ int is_chained); >+ > /* caller has locked the irq_desc and both params are valid */ > static inline void __set_irq_handler_unlocked(int irq, > irq_flow_handler_t handler) >@@ -152,6 +162,7 @@ static inline void __set_irq_handler_unlocked(int irq, > struct irq_desc *desc; > > desc = irq_to_desc(irq); >+ handler = __fixup_irq_handler(desc, handler, 0); > desc->handle_irq = handler; > } > #endif >diff --git a/include/linux/kernel.h b/include/linux/kernel.h >index b6de9a6..c7dde19 100644 >--- a/include/linux/kernel.h >+++ b/include/linux/kernel.h >@@ -16,6 +16,7 @@ > #include <linux/compiler.h> > #include <linux/bitops.h> > #include <linux/log2.h> >+#include <linux/ipipe_base.h> > #include <linux/typecheck.h> > #include <linux/printk.h> > #include <linux/dynamic_debug.h> >@@ -114,9 +115,12 @@ struct user; > > #ifdef CONFIG_PREEMPT_VOLUNTARY > extern int _cond_resched(void); >-# define might_resched() _cond_resched() >+# define might_resched() do { \ >+ ipipe_check_context(ipipe_root_domain); \ >+ _cond_resched(); \ >+ } while (0) > #else >-# define might_resched() do { } while (0) >+# define might_resched() ipipe_check_context(ipipe_root_domain) > #endif > > #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP >diff --git a/include/linux/preempt.h b/include/linux/preempt.h >index 2e681d9..130b7d5 100644 >--- a/include/linux/preempt.h >+++ b/include/linux/preempt.h >@@ -9,13 +9,20 @@ > #include <linux/thread_info.h> > #include <linux/linkage.h> > #include <linux/list.h> >+#include <linux/ipipe_base.h> > > #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) > extern void add_preempt_count(int val); > extern void sub_preempt_count(int val); > #else >-# define add_preempt_count(val) do { preempt_count() += (val); } while (0) >-# define sub_preempt_count(val) do { preempt_count() -= (val); } while (0) >+# define add_preempt_count(val) do { \ >+ ipipe_check_context(ipipe_root_domain); \ >+ preempt_count() += (val); \ >+ } while (0) >+# define sub_preempt_count(val) do { \ >+ ipipe_check_context(ipipe_root_domain); \ >+ preempt_count() -= (val); \ >+ } while (0) > #endif > > #define inc_preempt_count() add_preempt_count(1) >diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h >index bc2994e..5e2da8d 100644 >--- a/include/linux/rwlock.h >+++ b/include/linux/rwlock.h >@@ -61,8 +61,8 @@ do { \ > #define read_trylock(lock) __cond_lock(lock, _raw_read_trylock(lock)) > #define write_trylock(lock) __cond_lock(lock, _raw_write_trylock(lock)) > >-#define write_lock(lock) _raw_write_lock(lock) >-#define read_lock(lock) _raw_read_lock(lock) >+#define write_lock(lock) PICK_RWOP(_write_lock, lock) >+#define read_lock(lock) PICK_RWOP(_read_lock, lock) > > #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) > >@@ -96,8 +96,8 @@ do { \ > #define read_lock_bh(lock) _raw_read_lock_bh(lock) > #define write_lock_irq(lock) _raw_write_lock_irq(lock) > #define write_lock_bh(lock) _raw_write_lock_bh(lock) >-#define read_unlock(lock) _raw_read_unlock(lock) >-#define write_unlock(lock) _raw_write_unlock(lock) >+#define read_unlock(lock) PICK_RWOP(_read_unlock, lock) >+#define write_unlock(lock) PICK_RWOP(_write_unlock, lock) > #define read_unlock_irq(lock) _raw_read_unlock_irq(lock) > #define write_unlock_irq(lock) _raw_write_unlock_irq(lock) > >diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h >index 9c9f049..62c8941 100644 >--- a/include/linux/rwlock_api_smp.h >+++ b/include/linux/rwlock_api_smp.h >@@ -141,7 +141,9 @@ static inline int __raw_write_trylock(rwlock_t *lock) > * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are > * not re-enabled during lock-acquire (which the preempt-spin-ops do): > */ >-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) >+#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ >+ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ >+ defined(CONFIG_IPIPE) > > static inline void __raw_read_lock(rwlock_t *lock) > { >diff --git a/include/linux/sched.h b/include/linux/sched.h >index 2238745..78e42b3 100644 >--- a/include/linux/sched.h >+++ b/include/linux/sched.h >@@ -61,6 +61,7 @@ struct sched_param { > #include <linux/errno.h> > #include <linux/nodemask.h> > #include <linux/mm_types.h> >+#include <linux/ipipe.h> > > #include <asm/system.h> > #include <asm/page.h> >@@ -191,9 +192,17 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) > #define TASK_DEAD 64 > #define TASK_WAKEKILL 128 > #define TASK_WAKING 256 >+#ifdef CONFIG_IPIPE >+#define TASK_ATOMICSWITCH 512 >+#define TASK_NOWAKEUP 1024 >+#define TASK_STATE_MAX 2048 >+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWAN" >+#else /* !CONFIG_IPIPE */ >+#define TASK_ATOMICSWITCH 0 >+#define TASK_NOWAKEUP 0 > #define TASK_STATE_MAX 512 >- > #define TASK_STATE_TO_CHAR_STR "RSDTtZXxKW" >+#endif /* CONFIG_IPIPE */ > > extern char ___assert_task_state[1 - 2*!!( > sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; >@@ -305,6 +314,15 @@ extern void trap_init(void); > extern void update_process_times(int user); > extern void scheduler_tick(void); > >+#ifdef CONFIG_IPIPE >+void update_root_process_times(struct pt_regs *regs); >+#else /* !CONFIG_IPIPE */ >+static inline void update_root_process_times(struct pt_regs *regs) >+{ >+ update_process_times(user_mode(regs)); >+} >+#endif /* CONFIG_IPIPE */ >+ > extern void sched_show_task(struct task_struct *p); > > #ifdef CONFIG_LOCKUP_DETECTOR >@@ -355,7 +373,7 @@ extern signed long schedule_timeout(signed long timeout); > extern signed long schedule_timeout_interruptible(signed long timeout); > extern signed long schedule_timeout_killable(signed long timeout); > extern signed long schedule_timeout_uninterruptible(signed long timeout); >-asmlinkage void schedule(void); >+asmlinkage int schedule(void); > extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); > > struct nsproxy; >@@ -431,6 +449,9 @@ extern int get_dumpable(struct mm_struct *mm); > #endif > /* leave room for more dump flags */ > #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ >+#ifdef CONFIG_IPIPE >+#define MMF_VM_PINNED 31 /* ondemand load up and COW disabled */ >+#endif > > #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) > >@@ -1459,6 +1480,10 @@ struct task_struct { > #endif > atomic_t fs_excl; /* holding fs exclusive resources */ > struct rcu_head rcu; >+#ifdef CONFIG_IPIPE >+ unsigned int ipipe_flags; >+ void *ptd[IPIPE_ROOT_NPTDKEYS]; >+#endif > > /* > * cache last used pipe for splice >@@ -1731,6 +1756,10 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * > #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ > #define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ > >+/* p->ipipe_flags */ >+#define PF_EVTRET 0x1 /* EVENT_RETURN is pending */ >+#define PF_EVNOTIFY 0x2 /* Notify other domains about internal events */ >+ > /* > * Only the _current_ task can read/write to tsk->flags, but other > * tasks can access tsk->flags in readonly mode for example >diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h >index 80e5358..2e29b16 100644 >--- a/include/linux/spinlock.h >+++ b/include/linux/spinlock.h >@@ -89,10 +89,12 @@ > # include <linux/spinlock_up.h> > #endif > >+#include <linux/ipipe_lock.h> >+ > #ifdef CONFIG_DEBUG_SPINLOCK > extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, > struct lock_class_key *key); >-# define raw_spin_lock_init(lock) \ >+# define __real_raw_spin_lock_init(lock) \ > do { \ > static struct lock_class_key __key; \ > \ >@@ -100,9 +102,10 @@ do { \ > } while (0) > > #else >-# define raw_spin_lock_init(lock) \ >+# define __real_raw_spin_lock_init(lock) \ > do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0) > #endif >+#define raw_spin_lock_init(lock) PICK_SPINOP(_lock_init, lock) > > #define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock) > >@@ -165,9 +168,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) > * various methods are defined as nops in the case they are not > * required. > */ >-#define raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock)) >+#define __real_raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock)) >+#define raw_spin_trylock(lock) PICK_SPINOP_RET(_trylock, lock, int) > >-#define raw_spin_lock(lock) _raw_spin_lock(lock) >+#define __real_raw_spin_lock(lock) _raw_spin_lock(lock) >+#define raw_spin_lock(lock) PICK_SPINOP(_lock, lock) > > #ifdef CONFIG_DEBUG_LOCK_ALLOC > # define raw_spin_lock_nested(lock, subclass) \ >@@ -185,7 +190,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) > > #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) > >-#define raw_spin_lock_irqsave(lock, flags) \ >+#define __real_raw_spin_lock_irqsave(lock, flags) \ > do { \ > typecheck(unsigned long, flags); \ > flags = _raw_spin_lock_irqsave(lock); \ >@@ -207,7 +212,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) > > #else > >-#define raw_spin_lock_irqsave(lock, flags) \ >+#define __real_raw_spin_lock_irqsave(lock, flags) \ > do { \ > typecheck(unsigned long, flags); \ > _raw_spin_lock_irqsave(lock, flags); \ >@@ -218,34 +223,46 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) > > #endif > >-#define raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock) >+#define raw_spin_lock_irqsave(lock, flags) \ >+ PICK_SPINLOCK_IRQSAVE(lock, flags) >+ >+#define __real_raw_spin_lock_irq(lock) _raw_spin_lock_irq(lock) >+#define raw_spin_lock_irq(lock) PICK_SPINOP(_lock_irq, lock) > #define raw_spin_lock_bh(lock) _raw_spin_lock_bh(lock) >-#define raw_spin_unlock(lock) _raw_spin_unlock(lock) >-#define raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock) >+#define __real_raw_spin_unlock(lock) _raw_spin_unlock(lock) >+#define raw_spin_unlock(lock) PICK_SPINOP(_unlock, lock) >+#define __real_raw_spin_unlock_irq(lock) _raw_spin_unlock_irq(lock) >+#define raw_spin_unlock_irq(lock) PICK_SPINOP(_unlock_irq, lock) > >-#define raw_spin_unlock_irqrestore(lock, flags) \ >+#define __real_raw_spin_unlock_irqrestore(lock, flags) \ > do { \ > typecheck(unsigned long, flags); \ > _raw_spin_unlock_irqrestore(lock, flags); \ > } while (0) >+#define raw_spin_unlock_irqrestore(lock, flags) \ >+ PICK_SPINUNLOCK_IRQRESTORE(lock, flags) >+ > #define raw_spin_unlock_bh(lock) _raw_spin_unlock_bh(lock) > > #define raw_spin_trylock_bh(lock) \ > __cond_lock(lock, _raw_spin_trylock_bh(lock)) > >-#define raw_spin_trylock_irq(lock) \ >+#define __real_raw_spin_trylock_irq(lock) \ > ({ \ > local_irq_disable(); \ >- raw_spin_trylock(lock) ? \ >+ __real_raw_spin_trylock(lock) ? \ > 1 : ({ local_irq_enable(); 0; }); \ > }) >+#define raw_spin_trylock_irq(lock) PICK_SPINTRYLOCK_IRQ(lock) > >-#define raw_spin_trylock_irqsave(lock, flags) \ >+#define __real_raw_spin_trylock_irqsave(lock, flags) \ > ({ \ > local_irq_save(flags); \ > raw_spin_trylock(lock) ? \ > 1 : ({ local_irq_restore(flags); 0; }); \ > }) >+#define raw_spin_trylock_irqsave(lock, flags) \ >+ PICK_SPINTRYLOCK_IRQSAVE(lock, flags) > > /** > * raw_spin_can_lock - would raw_spin_trylock() succeed? >@@ -276,24 +293,17 @@ static inline raw_spinlock_t *spinlock_check(spinlock_t *lock) > > #define spin_lock_init(_lock) \ > do { \ >- spinlock_check(_lock); \ >- raw_spin_lock_init(&(_lock)->rlock); \ >+ raw_spin_lock_init(_lock); \ > } while (0) > >-static inline void spin_lock(spinlock_t *lock) >-{ >- raw_spin_lock(&lock->rlock); >-} >+#define spin_lock(lock) raw_spin_lock(lock) > > static inline void spin_lock_bh(spinlock_t *lock) > { > raw_spin_lock_bh(&lock->rlock); > } > >-static inline int spin_trylock(spinlock_t *lock) >-{ >- return raw_spin_trylock(&lock->rlock); >-} >+#define spin_trylock(lock) raw_spin_trylock(lock) > > #define spin_lock_nested(lock, subclass) \ > do { \ >@@ -305,14 +315,11 @@ do { \ > raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ > } while (0) > >-static inline void spin_lock_irq(spinlock_t *lock) >-{ >- raw_spin_lock_irq(&lock->rlock); >-} >+#define spin_lock_irq(lock) raw_spin_lock_irq(lock) > > #define spin_lock_irqsave(lock, flags) \ > do { \ >- raw_spin_lock_irqsave(spinlock_check(lock), flags); \ >+ raw_spin_lock_irqsave(lock, flags); \ > } while (0) > > #define spin_lock_irqsave_nested(lock, flags, subclass) \ >@@ -320,39 +327,28 @@ do { \ > raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \ > } while (0) > >-static inline void spin_unlock(spinlock_t *lock) >-{ >- raw_spin_unlock(&lock->rlock); >-} >+#define spin_unlock(lock) raw_spin_unlock(lock) > > static inline void spin_unlock_bh(spinlock_t *lock) > { > raw_spin_unlock_bh(&lock->rlock); > } > >-static inline void spin_unlock_irq(spinlock_t *lock) >-{ >- raw_spin_unlock_irq(&lock->rlock); >-} >+#define spin_unlock_irq(lock) raw_spin_unlock_irq(lock) > >-static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) >-{ >- raw_spin_unlock_irqrestore(&lock->rlock, flags); >-} >+#define spin_unlock_irqrestore(lock, flags) \ >+ raw_spin_unlock_irqrestore(lock, flags) > > static inline int spin_trylock_bh(spinlock_t *lock) > { > return raw_spin_trylock_bh(&lock->rlock); > } > >-static inline int spin_trylock_irq(spinlock_t *lock) >-{ >- return raw_spin_trylock_irq(&lock->rlock); >-} >+#define spin_trylock_irq(lock) raw_spin_trylock_irq(lock) > > #define spin_trylock_irqsave(lock, flags) \ > ({ \ >- raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ >+ raw_spin_trylock_irqsave(lock, flags); \ > }) > > static inline void spin_unlock_wait(spinlock_t *lock) >diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h >index e253ccd..378e01e 100644 >--- a/include/linux/spinlock_api_smp.h >+++ b/include/linux/spinlock_api_smp.h >@@ -99,7 +99,9 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) > * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are > * not re-enabled during lock-acquire (which the preempt-spin-ops do): > */ >-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) >+#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ >+ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ >+ defined(CONFIG_IPIPE) > > static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock) > { >@@ -113,7 +115,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock) > * do_raw_spin_lock_flags() code, because lockdep assumes > * that interrupts are not re-enabled during lock-acquire: > */ >-#ifdef CONFIG_LOCKDEP >+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE) > LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); > #else > do_raw_spin_lock_flags(lock, &flags); >diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h >index b14f6a9..e400972 100644 >--- a/include/linux/spinlock_up.h >+++ b/include/linux/spinlock_up.h >@@ -49,13 +49,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) > /* > * Read-write spinlocks. No debug version. > */ >-#define arch_read_lock(lock) do { (void)(lock); } while (0) >-#define arch_write_lock(lock) do { (void)(lock); } while (0) >-#define arch_read_trylock(lock) ({ (void)(lock); 1; }) >-#define arch_write_trylock(lock) ({ (void)(lock); 1; }) >-#define arch_read_unlock(lock) do { (void)(lock); } while (0) >-#define arch_write_unlock(lock) do { (void)(lock); } while (0) >- > #else /* DEBUG_SPINLOCK */ > #define arch_spin_is_locked(lock) ((void)(lock), 0) > /* for sched.c and kernel_lock.c: */ >@@ -65,6 +58,13 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) > # define arch_spin_trylock(lock) ({ (void)(lock); 1; }) > #endif /* DEBUG_SPINLOCK */ > >+#define arch_read_lock(lock) do { (void)(lock); } while (0) >+#define arch_write_lock(lock) do { (void)(lock); } while (0) >+#define arch_read_trylock(lock) ({ (void)(lock); 1; }) >+#define arch_write_trylock(lock) ({ (void)(lock); 1; }) >+#define arch_read_unlock(lock) do { (void)(lock); } while (0) >+#define arch_write_unlock(lock) do { (void)(lock); } while (0) >+ > #define arch_spin_is_contended(lock) (((void)(lock), 0)) > > #define arch_read_can_lock(lock) (((void)(lock), 1)) >diff --git a/init/Kconfig b/init/Kconfig >index c972899..6fc1103 100644 >--- a/init/Kconfig >+++ b/init/Kconfig >@@ -93,6 +93,7 @@ config CROSS_COMPILE > > config LOCALVERSION > string "Local version - append to kernel release" >+ default "-ipipe" > help > Append an extra string to the end of your kernel version. > This will show up when you type uname, for example. >diff --git a/init/main.c b/init/main.c >index 8646401..402850c 100644 >--- a/init/main.c >+++ b/init/main.c >@@ -552,7 +552,7 @@ asmlinkage void __init start_kernel(void) > > cgroup_init_early(); > >- local_irq_disable(); >+ local_irq_disable_hw(); > early_boot_irqs_off(); > > /* >@@ -585,6 +585,7 @@ asmlinkage void __init start_kernel(void) > pidhash_init(); > vfs_caches_init_early(); > sort_main_extable(); >+ ipipe_init_early(); > trap_init(); > mm_init(); > /* >@@ -614,6 +615,11 @@ asmlinkage void __init start_kernel(void) > softirq_init(); > timekeeping_init(); > time_init(); >+ /* >+ * We need to wait for the interrupt and time subsystems to be >+ * initialized before enabling the pipeline. >+ */ >+ ipipe_init(); > profile_init(); > if (!irqs_disabled()) > printk(KERN_CRIT "start_kernel(): bug: interrupts were " >@@ -794,6 +800,7 @@ static void __init do_basic_setup(void) > init_tmpfs(); > driver_init(); > init_irq_proc(); >+ ipipe_init_proc(); > do_ctors(); > do_initcalls(); > } >diff --git a/kernel/Makefile b/kernel/Makefile >index 0b5ff08..be86d65 100644 >--- a/kernel/Makefile >+++ b/kernel/Makefile >@@ -88,6 +88,7 @@ obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o > obj-$(CONFIG_TINY_RCU) += rcutiny.o > obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o > obj-$(CONFIG_RELAY) += relay.o >+obj-$(CONFIG_IPIPE) += ipipe/ > obj-$(CONFIG_SYSCTL) += utsname_sysctl.o > obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o > obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o >diff --git a/kernel/exit.c b/kernel/exit.c >index 676149a..c8f2f9d 100644 >--- a/kernel/exit.c >+++ b/kernel/exit.c >@@ -989,6 +989,7 @@ NORET_TYPE void do_exit(long code) > acct_process(); > trace_sched_process_exit(tsk); > >+ ipipe_exit_notify(tsk); > exit_sem(tsk); > exit_files(tsk); > exit_fs(tsk); >@@ -1796,3 +1797,37 @@ SYSCALL_DEFINE3(waitpid, pid_t, pid, int > } > > #endif >+ >+void rt_daemonize(void) >+{ >+ sigset_t blocked; >+ >+ /* >+ * We don't want to have TIF_FREEZE set if the system-wide hibernation >+ * or suspend transition begins right now. >+ */ >+ current->flags |= (PF_NOFREEZE | PF_KTHREAD); >+ >+ if (current->nsproxy != &init_nsproxy) { >+ get_nsproxy(&init_nsproxy); >+ switch_task_namespaces(current, &init_nsproxy); >+ } >+ set_special_pids(&init_struct_pid); >+ proc_clear_tty(current); >+ >+ /* Block and flush all signals */ >+ sigfillset(&blocked); >+ sigprocmask(SIG_BLOCK, &blocked, NULL); >+ flush_signals(current); >+ >+ /* Become as one with the init task */ >+ >+ daemonize_fs_struct(); >+ exit_files(current); >+ current->files = init_task.files; >+ atomic_inc(¤t->files->count); >+ >+ reparent_to_kthreadd(); >+} >+ >+EXPORT_SYMBOL(rt_daemonize); >diff --git a/kernel/fork.c b/kernel/fork.c >index 5447dc7..8348e59 100644 >--- a/kernel/fork.c >+++ b/kernel/fork.c >@@ -543,6 +543,7 @@ void mmput(struct mm_struct *mm) > exit_aio(mm); > ksm_exit(mm); > exit_mmap(mm); >+ ipipe_cleanup_notify(mm); > set_mm_exe_file(mm, NULL); > if (!list_empty(&mm->mmlist)) { > spin_lock(&mmlist_lock); >@@ -922,6 +923,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) > new_flags |= PF_FORKNOEXEC; > new_flags |= PF_STARTING; > p->flags = new_flags; >+ ipipe_clear_flags(p); > clear_freeze_flag(p); > } > >@@ -1294,6 +1296,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, > write_unlock_irq(&tasklist_lock); > proc_fork_connector(p); > cgroup_post_fork(p); >+#ifdef CONFIG_IPIPE >+ p->ipipe_flags = 0; >+ memset(p->ptd, 0, sizeof(p->ptd)); >+#endif /* CONFIG_IPIPE */ > perf_event_fork(p); > return p; > >@@ -1701,15 +1707,18 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) > } > > if (new_mm) { >+ unsigned long flags; > mm = current->mm; > active_mm = current->active_mm; > current->mm = new_mm; >+ ipipe_mm_switch_protect(flags); > current->active_mm = new_mm; > if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { > atomic_dec(&mm->oom_disable_count); > atomic_inc(&new_mm->oom_disable_count); > } > activate_mm(active_mm, new_mm); >+ ipipe_mm_switch_unprotect(flags); > new_mm = mm; > } > >diff --git a/kernel/ipipe/Kconfig b/kernel/ipipe/Kconfig >new file mode 100644 >index 0000000..693a7d2 >--- /dev/null >+++ b/kernel/ipipe/Kconfig >@@ -0,0 +1,26 @@ >+config IPIPE >+ bool "Interrupt pipeline" >+ default y >+ ---help--- >+ Activate this option if you want the interrupt pipeline to be >+ compiled in. >+ >+config IPIPE_DOMAINS >+ int "Max domains" >+ depends on IPIPE >+ default 4 >+ ---help--- >+ The maximum number of I-pipe domains to run concurrently. >+ >+config IPIPE_DELAYED_ATOMICSW >+ bool >+ depends on IPIPE >+ default n >+ >+config IPIPE_UNMASKED_CONTEXT_SWITCH >+ bool >+ depends on IPIPE >+ default n >+ >+config HAVE_IPIPE_HOSTRT >+ bool >diff --git a/kernel/ipipe/Kconfig.debug b/kernel/ipipe/Kconfig.debug >new file mode 100644 >index 0000000..ba87335 >--- /dev/null >+++ b/kernel/ipipe/Kconfig.debug >@@ -0,0 +1,95 @@ >+config IPIPE_DEBUG >+ bool "I-pipe debugging" >+ depends on IPIPE >+ >+config IPIPE_DEBUG_CONTEXT >+ bool "Check for illicit cross-domain calls" >+ depends on IPIPE_DEBUG >+ default y >+ ---help--- >+ Enable this feature to arm checkpoints in the kernel that >+ verify the correct invocation context. On entry of critical >+ Linux services a warning is issued if the caller is not >+ running over the root domain. >+ >+config IPIPE_DEBUG_INTERNAL >+ bool "Enable internal debug checks" >+ depends on IPIPE_DEBUG >+ default y >+ ---help--- >+ When this feature is enabled, I-pipe will perform internal >+ consistency checks of its subsystems, e.g. on per-cpu variable >+ access. >+ >+config IPIPE_TRACE >+ bool "Latency tracing" >+ depends on IPIPE_DEBUG >+ select ARCH_WANT_FRAME_POINTERS >+ select FRAME_POINTER >+ select KALLSYMS >+ select PROC_FS >+ ---help--- >+ Activate this option if you want to use per-function tracing of >+ the kernel. The tracer will collect data via instrumentation >+ features like the one below or with the help of explicite calls >+ of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the >+ in-kernel tracing API. The collected data and runtime control >+ is available via /proc/ipipe/trace/*. >+ >+if IPIPE_TRACE >+ >+config IPIPE_TRACE_ENABLE >+ bool "Enable tracing on boot" >+ default y >+ ---help--- >+ Disable this option if you want to arm the tracer after booting >+ manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce >+ boot time on slow embedded devices due to the tracer overhead. >+ >+config IPIPE_TRACE_MCOUNT >+ bool "Instrument function entries" >+ default y >+ select FTRACE >+ select FUNCTION_TRACER >+ ---help--- >+ When enabled, records every kernel function entry in the tracer >+ log. While this slows down the system noticeably, it provides >+ the highest level of information about the flow of events. >+ However, it can be switch off in order to record only explicit >+ I-pipe trace points. >+ >+config IPIPE_TRACE_IRQSOFF >+ bool "Trace IRQs-off times" >+ default y >+ ---help--- >+ Activate this option if I-pipe shall trace the longest path >+ with hard-IRQs switched off. >+ >+config IPIPE_TRACE_SHIFT >+ int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)" >+ range 10 18 >+ default 14 >+ ---help--- >+ The number of trace points to hold tracing data for each >+ trace path, as a power of 2. >+ >+config IPIPE_TRACE_VMALLOC >+ bool "Use vmalloc'ed trace buffer" >+ default y if EMBEDDED >+ ---help--- >+ Instead of reserving static kernel data, the required buffer >+ is allocated via vmalloc during boot-up when this option is >+ enabled. This can help to start systems that are low on memory, >+ but it slightly degrades overall performance. Try this option >+ when a traced kernel hangs unexpectedly at boot time. >+ >+config IPIPE_TRACE_PANIC >+ bool "Enable panic back traces" >+ default y >+ ---help--- >+ Provides services to freeze and dump a back trace on panic >+ situations. This is used on IPIPE_DEBUG_CONTEXT exceptions >+ as well as ordinary kernel oopses. You can control the number >+ of printed back trace points via /proc/ipipe/trace. >+ >+endif >diff --git a/kernel/ipipe/Makefile b/kernel/ipipe/Makefile >new file mode 100644 >index 0000000..6257dfa >--- /dev/null >+++ b/kernel/ipipe/Makefile >@@ -0,0 +1,3 @@ >+ >+obj-$(CONFIG_IPIPE) += core.o >+obj-$(CONFIG_IPIPE_TRACE) += tracer.o >diff --git a/kernel/ipipe/core.c b/kernel/ipipe/core.c >new file mode 100644 >index 0000000..9ad7757 >--- /dev/null >+++ b/kernel/ipipe/core.c >@@ -0,0 +1,2195 @@ >+/* -*- linux-c -*- >+ * linux/kernel/ipipe/core.c >+ * >+ * Copyright (C) 2002-2005 Philippe Gerum. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ * >+ * Architecture-independent I-PIPE core support. >+ */ >+ >+#include <linux/version.h> >+#include <linux/module.h> >+#include <linux/init.h> >+#include <linux/kernel.h> >+#include <linux/sched.h> >+#include <linux/sched.h> >+#include <linux/kallsyms.h> >+#include <linux/interrupt.h> >+#include <linux/bitops.h> >+#include <linux/tick.h> >+#ifdef CONFIG_PROC_FS >+#include <linux/proc_fs.h> >+#include <linux/seq_file.h> >+#endif /* CONFIG_PROC_FS */ >+#include <linux/ipipe_trace.h> >+#include <linux/ipipe_tickdev.h> >+#include <linux/irq.h> >+ >+static int __ipipe_ptd_key_count; >+ >+static unsigned long __ipipe_ptd_key_map; >+ >+static unsigned long __ipipe_domain_slot_map; >+ >+struct ipipe_domain ipipe_root; >+ >+#ifdef CONFIG_SMP >+ >+#define IPIPE_CRITICAL_TIMEOUT 1000000 >+ >+static cpumask_t __ipipe_cpu_sync_map; >+ >+static cpumask_t __ipipe_cpu_lock_map; >+ >+static cpumask_t __ipipe_cpu_pass_map; >+ >+static unsigned long __ipipe_critical_lock; >+ >+static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier); >+ >+static atomic_t __ipipe_critical_count = ATOMIC_INIT(0); >+ >+static void (*__ipipe_cpu_sync) (void); >+ >+#else /* !CONFIG_SMP */ >+ >+/* >+ * Create an alias to the unique root status, so that arch-dep code >+ * may get simple and easy access to this percpu variable. We also >+ * create an array of pointers to the percpu domain data; this tends >+ * to produce a better code when reaching non-root domains. We make >+ * sure that the early boot code would be able to dereference the >+ * pointer to the root domain data safely by statically initializing >+ * its value (local_irq*() routines depend on this). >+ */ >+#if __GNUC__ >= 4 >+extern unsigned long __ipipe_root_status >+__attribute__((alias(__stringify(ipipe_percpu_darray)))); >+EXPORT_SYMBOL(__ipipe_root_status); >+#else /* __GNUC__ < 4 */ >+/* >+ * Work around a GCC 3.x issue making alias symbols unusable as >+ * constant initializers. >+ */ >+unsigned long *const __ipipe_root_status_addr = >+ &__raw_get_cpu_var(ipipe_percpu_darray)[IPIPE_ROOT_SLOT].status; >+EXPORT_SYMBOL(__ipipe_root_status_addr); >+#endif /* __GNUC__ < 4 */ >+ >+DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]) = >+{ [IPIPE_ROOT_SLOT] = (struct ipipe_percpu_domain_data *)ipipe_percpu_darray }; >+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr); >+#endif /* !CONFIG_SMP */ >+ >+DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]) = >+{ [IPIPE_ROOT_SLOT] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */ >+ >+DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) = { &ipipe_root }; >+ >+DEFINE_PER_CPU(unsigned long, ipipe_nmi_saved_root); /* Copy of root status during NMI */ >+ >+static IPIPE_DEFINE_SPINLOCK(__ipipe_pipelock); >+ >+LIST_HEAD(__ipipe_pipeline); >+ >+unsigned long __ipipe_virtual_irq_map; >+ >+#ifdef CONFIG_PRINTK >+unsigned __ipipe_printk_virq; >+#endif /* CONFIG_PRINTK */ >+ >+int __ipipe_event_monitors[IPIPE_NR_EVENTS]; >+ >+#ifdef CONFIG_GENERIC_CLOCKEVENTS >+ >+DECLARE_PER_CPU(struct tick_device, tick_cpu_device); >+ >+static DEFINE_PER_CPU(struct ipipe_tick_device, ipipe_tick_cpu_device); >+ >+int ipipe_request_tickdev(const char *devname, >+ void (*emumode)(enum clock_event_mode mode, >+ struct clock_event_device *cdev), >+ int (*emutick)(unsigned long delta, >+ struct clock_event_device *cdev), >+ int cpu, unsigned long *tmfreq) >+{ >+ struct ipipe_tick_device *itd; >+ struct tick_device *slave; >+ struct clock_event_device *evtdev; >+ unsigned long long freq; >+ unsigned long flags; >+ int status; >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ itd = &per_cpu(ipipe_tick_cpu_device, cpu); >+ >+ if (itd->slave != NULL) { >+ status = -EBUSY; >+ goto out; >+ } >+ >+ slave = &per_cpu(tick_cpu_device, cpu); >+ >+ if (strcmp(slave->evtdev->name, devname)) { >+ /* >+ * No conflict so far with the current tick device, >+ * check whether the requested device is sane and has >+ * been blessed by the kernel. >+ */ >+ status = __ipipe_check_tickdev(devname) ? >+ CLOCK_EVT_MODE_UNUSED : CLOCK_EVT_MODE_SHUTDOWN; >+ goto out; >+ } >+ >+ /* >+ * Our caller asks for using the same clock event device for >+ * ticking than we do, let's create a tick emulation device to >+ * interpose on the set_next_event() method, so that we may >+ * both manage the device in oneshot mode. Only the tick >+ * emulation code will actually program the clockchip hardware >+ * for the next shot, though. >+ * >+ * CAUTION: we still have to grab the tick device even when it >+ * current runs in periodic mode, since the kernel may switch >+ * to oneshot dynamically (highres/no_hz tick mode). >+ */ >+ >+ evtdev = slave->evtdev; >+ status = evtdev->mode; >+ >+ if (status == CLOCK_EVT_MODE_SHUTDOWN) >+ goto out; >+ >+ itd->slave = slave; >+ itd->emul_set_mode = emumode; >+ itd->emul_set_tick = emutick; >+ itd->real_set_mode = evtdev->set_mode; >+ itd->real_set_tick = evtdev->set_next_event; >+ itd->real_max_delta_ns = evtdev->max_delta_ns; >+ itd->real_mult = evtdev->mult; >+ itd->real_shift = evtdev->shift; >+ freq = (1000000000ULL * evtdev->mult) >> evtdev->shift; >+ *tmfreq = (unsigned long)freq; >+ evtdev->set_mode = emumode; >+ evtdev->set_next_event = emutick; >+ evtdev->max_delta_ns = ULONG_MAX; >+ evtdev->mult = 1; >+ evtdev->shift = 0; >+out: >+ ipipe_critical_exit(flags); >+ >+ return status; >+} >+ >+void ipipe_release_tickdev(int cpu) >+{ >+ struct ipipe_tick_device *itd; >+ struct tick_device *slave; >+ struct clock_event_device *evtdev; >+ unsigned long flags; >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ itd = &per_cpu(ipipe_tick_cpu_device, cpu); >+ >+ if (itd->slave != NULL) { >+ slave = &per_cpu(tick_cpu_device, cpu); >+ evtdev = slave->evtdev; >+ evtdev->set_mode = itd->real_set_mode; >+ evtdev->set_next_event = itd->real_set_tick; >+ evtdev->max_delta_ns = itd->real_max_delta_ns; >+ evtdev->mult = itd->real_mult; >+ evtdev->shift = itd->real_shift; >+ itd->slave = NULL; >+ } >+ >+ ipipe_critical_exit(flags); >+} >+ >+#endif /* CONFIG_GENERIC_CLOCKEVENTS */ >+ >+void __init ipipe_init_early(void) >+{ >+ struct ipipe_domain *ipd = &ipipe_root; >+ >+ /* >+ * Do the early init stuff. At this point, the kernel does not >+ * provide much services yet: be careful. >+ */ >+ __ipipe_check_platform(); /* Do platform dependent checks first. */ >+ >+ /* >+ * A lightweight registration code for the root domain. We are >+ * running on the boot CPU, hw interrupts are off, and >+ * secondary CPUs are still lost in space. >+ */ >+ >+ /* Reserve percpu data slot #0 for the root domain. */ >+ ipd->slot = 0; >+ set_bit(0, &__ipipe_domain_slot_map); >+ >+ ipd->name = "Linux"; >+ ipd->domid = IPIPE_ROOT_ID; >+ ipd->priority = IPIPE_ROOT_PRIO; >+ >+ __ipipe_init_stage(ipd); >+ >+ list_add_tail(&ipd->p_link, &__ipipe_pipeline); >+ >+ __ipipe_init_platform(); >+ >+#ifdef CONFIG_PRINTK >+ __ipipe_printk_virq = ipipe_alloc_virq(); /* Cannot fail here. */ >+ ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk; >+ ipd->irqs[__ipipe_printk_virq].cookie = NULL; >+ ipd->irqs[__ipipe_printk_virq].acknowledge = NULL; >+ ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK; >+#endif /* CONFIG_PRINTK */ >+} >+ >+void __init ipipe_init(void) >+{ >+ /* Now we may engage the pipeline. */ >+ __ipipe_enable_pipeline(); >+ >+ printk(KERN_INFO "I-pipe %s: pipeline enabled.\n", >+ IPIPE_VERSION_STRING); >+} >+ >+void __ipipe_init_stage(struct ipipe_domain *ipd) >+{ >+ struct ipipe_percpu_domain_data *p; >+ unsigned long status; >+ int cpu, n; >+ >+ for_each_online_cpu(cpu) { >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ status = p->status; >+ memset(p, 0, sizeof(*p)); >+ p->status = status; >+ } >+ >+ for (n = 0; n < IPIPE_NR_IRQS; n++) { >+ ipd->irqs[n].acknowledge = NULL; >+ ipd->irqs[n].handler = NULL; >+ ipd->irqs[n].control = IPIPE_PASS_MASK; /* Pass but don't handle */ >+ } >+ >+ for (n = 0; n < IPIPE_NR_EVENTS; n++) >+ ipd->evhand[n] = NULL; >+ >+ ipd->evself = 0LL; >+ mutex_init(&ipd->mutex); >+ >+ __ipipe_hook_critical_ipi(ipd); >+} >+ >+void __ipipe_cleanup_domain(struct ipipe_domain *ipd) >+{ >+ ipipe_unstall_pipeline_from(ipd); >+ >+#ifdef CONFIG_SMP >+ { >+ struct ipipe_percpu_domain_data *p; >+ int cpu; >+ >+ for_each_online_cpu(cpu) { >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ while (__ipipe_ipending_p(p)) >+ cpu_relax(); >+ } >+ } >+#else >+ __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = NULL; >+#endif >+ >+ clear_bit(ipd->slot, &__ipipe_domain_slot_map); >+} >+ >+void __ipipe_unstall_root(void) >+{ >+ struct ipipe_percpu_domain_data *p; >+ >+ local_irq_disable_hw(); >+ >+ /* This helps catching bad usage from assembly call sites. */ >+ ipipe_check_context(ipipe_root_domain); >+ >+ p = ipipe_root_cpudom_ptr(); >+ >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+ >+ if (unlikely(__ipipe_ipending_p(p))) >+ __ipipe_sync_pipeline(); >+ >+ local_irq_enable_hw(); >+} >+ >+void __ipipe_restore_root(unsigned long x) >+{ >+ ipipe_check_context(ipipe_root_domain); >+ >+ if (x) >+ __ipipe_stall_root(); >+ else >+ __ipipe_unstall_root(); >+} >+ >+void ipipe_stall_pipeline_from(struct ipipe_domain *ipd) >+{ >+ unsigned long flags; >+ /* >+ * We have to prevent against race on updating the status >+ * variable _and_ CPU migration at the same time, so disable >+ * hw IRQs here. >+ */ >+ local_irq_save_hw(flags); >+ >+ __set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); >+ >+ if (!__ipipe_pipeline_head_p(ipd)) >+ local_irq_restore_hw(flags); >+} >+ >+unsigned long ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd) >+{ >+ unsigned long flags, x; >+ >+ /* See ipipe_stall_pipeline_from() */ >+ local_irq_save_hw(flags); >+ >+ x = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); >+ >+ if (!__ipipe_pipeline_head_p(ipd)) >+ local_irq_restore_hw(flags); >+ >+ return x; >+} >+ >+unsigned long ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd) >+{ >+ unsigned long flags, x; >+ struct list_head *pos; >+ >+ local_irq_save_hw(flags); >+ >+ x = __test_and_clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); >+ >+ if (ipd == __ipipe_current_domain) >+ pos = &ipd->p_link; >+ else >+ pos = __ipipe_pipeline.next; >+ >+ __ipipe_walk_pipeline(pos); >+ >+ if (likely(__ipipe_pipeline_head_p(ipd))) >+ local_irq_enable_hw(); >+ else >+ local_irq_restore_hw(flags); >+ >+ return x; >+} >+ >+void ipipe_restore_pipeline_from(struct ipipe_domain *ipd, >+ unsigned long x) >+{ >+ if (x) >+ ipipe_stall_pipeline_from(ipd); >+ else >+ ipipe_unstall_pipeline_from(ipd); >+} >+ >+void ipipe_unstall_pipeline_head(void) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr(); >+ struct ipipe_domain *head_domain; >+ >+ local_irq_disable_hw(); >+ >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+ >+ if (unlikely(__ipipe_ipending_p(p))) { >+ head_domain = __ipipe_pipeline_head(); >+ if (likely(head_domain == __ipipe_current_domain)) >+ __ipipe_sync_pipeline(); >+ else >+ __ipipe_walk_pipeline(&head_domain->p_link); >+ } >+ >+ local_irq_enable_hw(); >+} >+ >+void __ipipe_restore_pipeline_head(unsigned long x) /* hw interrupt off */ >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_head_cpudom_ptr(); >+ struct ipipe_domain *head_domain; >+ >+ if (x) { >+#ifdef CONFIG_DEBUG_KERNEL >+ static int warned; >+ if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &p->status)) { >+ /* >+ * Already stalled albeit ipipe_restore_pipeline_head() >+ * should have detected it? Send a warning once. >+ */ >+ local_irq_enable_hw(); >+ warned = 1; >+ printk(KERN_WARNING >+ "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n"); >+ dump_stack(); >+ local_irq_disable_hw(); >+ } >+#else /* !CONFIG_DEBUG_KERNEL */ >+ set_bit(IPIPE_STALL_FLAG, &p->status); >+#endif /* CONFIG_DEBUG_KERNEL */ >+ } >+ else { >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+ if (unlikely(__ipipe_ipending_p(p))) { >+ head_domain = __ipipe_pipeline_head(); >+ if (likely(head_domain == __ipipe_current_domain)) >+ __ipipe_sync_pipeline(); >+ else >+ __ipipe_walk_pipeline(&head_domain->p_link); >+ } >+ local_irq_enable_hw(); >+ } >+} >+ >+void __ipipe_spin_lock_irq(ipipe_spinlock_t *lock) >+{ >+ local_irq_disable_hw(); >+ arch_spin_lock(&lock->arch_lock); >+ __set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+} >+ >+void __ipipe_spin_unlock_irq(ipipe_spinlock_t *lock) >+{ >+ arch_spin_unlock(&lock->arch_lock); >+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+ local_irq_enable_hw(); >+} >+ >+unsigned long __ipipe_spin_lock_irqsave(ipipe_spinlock_t *lock) >+{ >+ unsigned long flags; >+ int s; >+ >+ local_irq_save_hw(flags); >+ arch_spin_lock(&lock->arch_lock); >+ s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+ >+ return arch_mangle_irq_bits(s, flags); >+} >+ >+int __ipipe_spin_trylock_irqsave(ipipe_spinlock_t *lock, >+ unsigned long *x) >+{ >+ unsigned long flags; >+ int s; >+ >+ local_irq_save_hw(flags); >+ if (!arch_spin_trylock(&lock->arch_lock)) { >+ local_irq_restore_hw(flags); >+ return 0; >+ } >+ s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+ *x = arch_mangle_irq_bits(s, flags); >+ >+ return 1; >+} >+ >+void __ipipe_spin_unlock_irqrestore(ipipe_spinlock_t *lock, >+ unsigned long x) >+{ >+ arch_spin_unlock(&lock->arch_lock); >+ if (!arch_demangle_irq_bits(&x)) >+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+ local_irq_restore_hw(x); >+} >+ >+int __ipipe_spin_trylock_irq(ipipe_spinlock_t *lock) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ if (!arch_spin_trylock(&lock->arch_lock)) { >+ local_irq_restore_hw(flags); >+ return 0; >+ } >+ __set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+ >+ return 1; >+} >+ >+void __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock) >+{ >+ arch_spin_unlock(&lock->arch_lock); >+} >+ >+void __ipipe_spin_unlock_irqcomplete(unsigned long x) >+{ >+ if (!arch_demangle_irq_bits(&x)) >+ __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); >+ local_irq_restore_hw(x); >+} >+ >+#ifdef __IPIPE_3LEVEL_IRQMAP >+ >+/* Must be called hw IRQs off. */ >+static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, >+ unsigned int irq) >+{ >+ __set_bit(irq, p->irqheld_map); >+ p->irqall[irq]++; >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned int irq) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd); >+ int l0b, l1b; >+ >+ IPIPE_WARN_ONCE(!irqs_disabled_hw()); >+ >+ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); >+ l1b = irq / BITS_PER_LONG; >+ >+ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { >+ __set_bit(irq, p->irqpend_lomap); >+ __set_bit(l1b, p->irqpend_mdmap); >+ __set_bit(l0b, &p->irqpend_himap); >+ } else >+ __set_bit(irq, p->irqheld_map); >+ >+ p->irqall[irq]++; >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned int irq) >+{ >+ struct ipipe_percpu_domain_data *p; >+ int l0b, l1b; >+ >+ IPIPE_WARN_ONCE(!irqs_disabled_hw()); >+ >+ /* Wired interrupts cannot be locked (it is useless). */ >+ if (test_bit(IPIPE_WIRED_FLAG, &ipd->irqs[irq].control) || >+ test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) >+ return; >+ >+ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); >+ l1b = irq / BITS_PER_LONG; >+ >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ if (__test_and_clear_bit(irq, p->irqpend_lomap)) { >+ __set_bit(irq, p->irqheld_map); >+ if (p->irqpend_lomap[l1b] == 0) { >+ __clear_bit(l1b, p->irqpend_mdmap); >+ if (p->irqpend_mdmap[l0b] == 0) >+ __clear_bit(l0b, &p->irqpend_himap); >+ } >+ } >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned int irq) >+{ >+ struct ipipe_percpu_domain_data *p; >+ int l0b, l1b, cpu; >+ >+ IPIPE_WARN_ONCE(!irqs_disabled_hw()); >+ >+ if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG, >+ &ipd->irqs[irq].control))) >+ return; >+ >+ l0b = irq / (BITS_PER_LONG * BITS_PER_LONG); >+ l1b = irq / BITS_PER_LONG; >+ >+ for_each_online_cpu(cpu) { >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ if (test_and_clear_bit(irq, p->irqheld_map)) { >+ /* We need atomic ops here: */ >+ set_bit(irq, p->irqpend_lomap); >+ set_bit(l1b, p->irqpend_mdmap); >+ set_bit(l0b, &p->irqpend_himap); >+ } >+ } >+} >+ >+static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p) >+{ >+ int l0b, l1b, l2b; >+ unsigned long l0m, l1m, l2m; >+ unsigned int irq; >+ >+ l0m = p->irqpend_himap; >+ if (unlikely(l0m == 0)) >+ return -1; >+ >+ l0b = __ipipe_ffnz(l0m); >+ l1m = p->irqpend_mdmap[l0b]; >+ if (unlikely(l1m == 0)) >+ return -1; >+ >+ l1b = __ipipe_ffnz(l1m) + l0b * BITS_PER_LONG; >+ l2m = p->irqpend_lomap[l1b]; >+ if (unlikely(l2m == 0)) >+ return -1; >+ >+ l2b = __ipipe_ffnz(l2m); >+ irq = l1b * BITS_PER_LONG + l2b; >+ >+ __clear_bit(irq, p->irqpend_lomap); >+ if (p->irqpend_lomap[l1b] == 0) { >+ __clear_bit(l1b, p->irqpend_mdmap); >+ if (p->irqpend_mdmap[l0b] == 0) >+ __clear_bit(l0b, &p->irqpend_himap); >+ } >+ >+ return irq; >+} >+ >+#else /* __IPIPE_2LEVEL_IRQMAP */ >+ >+/* Must be called hw IRQs off. */ >+static inline void __ipipe_set_irq_held(struct ipipe_percpu_domain_data *p, >+ unsigned int irq) >+{ >+ __set_bit(irq, p->irqheld_map); >+ p->irqall[irq]++; >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(ipd); >+ int l0b = irq / BITS_PER_LONG; >+ >+ IPIPE_WARN_ONCE(!irqs_disabled_hw()); >+ >+ if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { >+ __set_bit(irq, p->irqpend_lomap); >+ __set_bit(l0b, &p->irqpend_himap); >+ } else >+ __set_bit(irq, p->irqheld_map); >+ >+ p->irqall[irq]++; >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq) >+{ >+ struct ipipe_percpu_domain_data *p; >+ int l0b = irq / BITS_PER_LONG; >+ >+ IPIPE_WARN_ONCE(!irqs_disabled_hw()); >+ >+ /* Wired interrupts cannot be locked (it is useless). */ >+ if (test_bit(IPIPE_WIRED_FLAG, &ipd->irqs[irq].control) || >+ test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) >+ return; >+ >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ if (__test_and_clear_bit(irq, p->irqpend_lomap)) { >+ __set_bit(irq, p->irqheld_map); >+ if (p->irqpend_lomap[l0b] == 0) >+ __clear_bit(l0b, &p->irqpend_himap); >+ } >+} >+ >+/* Must be called hw IRQs off. */ >+void __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq) >+{ >+ struct ipipe_percpu_domain_data *p; >+ int l0b = irq / BITS_PER_LONG, cpu; >+ >+ IPIPE_WARN_ONCE(!irqs_disabled_hw()); >+ >+ if (unlikely(!test_and_clear_bit(IPIPE_LOCK_FLAG, >+ &ipd->irqs[irq].control))) >+ return; >+ >+ for_each_online_cpu(cpu) { >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ if (test_and_clear_bit(irq, p->irqheld_map)) { >+ /* We need atomic ops here: */ >+ set_bit(irq, p->irqpend_lomap); >+ set_bit(l0b, &p->irqpend_himap); >+ } >+ } >+} >+ >+static inline int __ipipe_next_irq(struct ipipe_percpu_domain_data *p) >+{ >+ unsigned long l0m, l1m; >+ int l0b, l1b; >+ >+ l0m = p->irqpend_himap; >+ if (unlikely(l0m == 0)) >+ return -1; >+ >+ l0b = __ipipe_ffnz(l0m); >+ l1m = p->irqpend_lomap[l0b]; >+ if (unlikely(l1m == 0)) >+ return -1; >+ >+ l1b = __ipipe_ffnz(l1m); >+ __clear_bit(l1b, &p->irqpend_lomap[l0b]); >+ if (p->irqpend_lomap[l0b] == 0) >+ __clear_bit(l0b, &p->irqpend_himap); >+ >+ return l0b * BITS_PER_LONG + l1b; >+} >+ >+#endif /* __IPIPE_2LEVEL_IRQMAP */ >+ >+/* >+ * __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must >+ * be called with local hw interrupts disabled. >+ */ >+void __ipipe_walk_pipeline(struct list_head *pos) >+{ >+ struct ipipe_domain *this_domain = __ipipe_current_domain, *next_domain; >+ struct ipipe_percpu_domain_data *p, *np; >+ >+ p = ipipe_cpudom_ptr(this_domain); >+ >+ while (pos != &__ipipe_pipeline) { >+ >+ next_domain = list_entry(pos, struct ipipe_domain, p_link); >+ np = ipipe_cpudom_ptr(next_domain); >+ >+ if (test_bit(IPIPE_STALL_FLAG, &np->status)) >+ break; /* Stalled stage -- do not go further. */ >+ >+ if (__ipipe_ipending_p(np)) { >+ if (next_domain == this_domain) >+ __ipipe_sync_pipeline(); >+ else { >+ >+ p->evsync = 0; >+ __ipipe_current_domain = next_domain; >+ ipipe_suspend_domain(); /* Sync stage and propagate interrupts. */ >+ >+ if (__ipipe_current_domain == next_domain) >+ __ipipe_current_domain = this_domain; >+ /* >+ * Otherwise, something changed the current domain under our >+ * feet recycling the register set; do not override the new >+ * domain. >+ */ >+ >+ if (__ipipe_ipending_p(p) && >+ !test_bit(IPIPE_STALL_FLAG, &p->status)) >+ __ipipe_sync_pipeline(); >+ } >+ break; >+ } else if (next_domain == this_domain) >+ break; >+ >+ pos = next_domain->p_link.next; >+ } >+} >+ >+/* >+ * ipipe_suspend_domain() -- Suspend the current domain, switching to >+ * the next one which has pending work down the pipeline. >+ */ >+void ipipe_suspend_domain(void) >+{ >+ struct ipipe_domain *this_domain, *next_domain; >+ struct ipipe_percpu_domain_data *p; >+ struct list_head *ln; >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ >+ this_domain = next_domain = __ipipe_current_domain; >+ p = ipipe_cpudom_ptr(this_domain); >+ p->status &= ~IPIPE_STALL_MASK; >+ >+ if (__ipipe_ipending_p(p)) >+ goto sync_stage; >+ >+ for (;;) { >+ ln = next_domain->p_link.next; >+ >+ if (ln == &__ipipe_pipeline) >+ break; >+ >+ next_domain = list_entry(ln, struct ipipe_domain, p_link); >+ p = ipipe_cpudom_ptr(next_domain); >+ >+ if (p->status & IPIPE_STALL_MASK) >+ break; >+ >+ if (!__ipipe_ipending_p(p)) >+ continue; >+ >+ __ipipe_current_domain = next_domain; >+sync_stage: >+ __ipipe_sync_pipeline(); >+ >+ if (__ipipe_current_domain != next_domain) >+ /* >+ * Something has changed the current domain under our >+ * feet, recycling the register set; take note. >+ */ >+ this_domain = __ipipe_current_domain; >+ } >+ >+ __ipipe_current_domain = this_domain; >+ >+ local_irq_restore_hw(flags); >+} >+ >+ >+/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt. >+ * Virtual interrupts are handled in exactly the same way than their >+ * hw-generated counterparts wrt pipelining. >+ */ >+unsigned ipipe_alloc_virq(void) >+{ >+ unsigned long flags, irq = 0; >+ int ipos; >+ >+ spin_lock_irqsave(&__ipipe_pipelock, flags); >+ >+ if (__ipipe_virtual_irq_map != ~0) { >+ ipos = ffz(__ipipe_virtual_irq_map); >+ set_bit(ipos, &__ipipe_virtual_irq_map); >+ irq = ipos + IPIPE_VIRQ_BASE; >+ } >+ >+ spin_unlock_irqrestore(&__ipipe_pipelock, flags); >+ >+ return irq; >+} >+ >+/* >+ * ipipe_virtualize_irq() -- Set a per-domain pipelined interrupt >+ * handler. >+ */ >+int ipipe_virtualize_irq(struct ipipe_domain *ipd, >+ unsigned int irq, >+ ipipe_irq_handler_t handler, >+ void *cookie, >+ ipipe_irq_ackfn_t acknowledge, >+ unsigned modemask) >+{ >+ ipipe_irq_handler_t old_handler; >+ struct irq_desc *desc; >+ unsigned long flags; >+ int ret = 0; >+ >+ if (irq >= IPIPE_NR_IRQS) >+ return -EINVAL; >+ >+ if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) >+ return -EPERM; >+ >+ if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags)) >+ /* Silently unwire interrupts for non-heading domains. */ >+ modemask &= ~IPIPE_WIRED_MASK; >+ >+ spin_lock_irqsave(&__ipipe_pipelock, flags); >+ >+ old_handler = ipd->irqs[irq].handler; >+ >+ if (handler == NULL) { >+ modemask &= >+ ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK | >+ IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK); >+ >+ ipd->irqs[irq].handler = NULL; >+ ipd->irqs[irq].cookie = NULL; >+ ipd->irqs[irq].acknowledge = NULL; >+ ipd->irqs[irq].control = modemask; >+ >+ if (irq < NR_IRQS && !ipipe_virtual_irq_p(irq)) { >+ desc = irq_to_desc(irq); >+ if (old_handler && desc) >+ __ipipe_disable_irqdesc(ipd, irq); >+ } >+ >+ goto unlock_and_exit; >+ } >+ >+ if (handler == IPIPE_SAME_HANDLER) { >+ cookie = ipd->irqs[irq].cookie; >+ handler = old_handler; >+ if (handler == NULL) { >+ ret = -EINVAL; >+ goto unlock_and_exit; >+ } >+ } else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 && old_handler) { >+ ret = -EBUSY; >+ goto unlock_and_exit; >+ } >+ >+ /* >+ * Wired interrupts can only be delivered to domains always >+ * heading the pipeline, and using dynamic propagation. >+ */ >+ if ((modemask & IPIPE_WIRED_MASK) != 0) { >+ if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) { >+ ret = -EINVAL; >+ goto unlock_and_exit; >+ } >+ modemask |= IPIPE_HANDLE_MASK; >+ } >+ >+ if ((modemask & IPIPE_STICKY_MASK) != 0) >+ modemask |= IPIPE_HANDLE_MASK; >+ >+ if (acknowledge == NULL) >+ /* >+ * Acknowledge handler unspecified for a hw interrupt: >+ * use the Linux-defined handler instead. >+ */ >+ acknowledge = ipipe_root_domain->irqs[irq].acknowledge; >+ >+ ipd->irqs[irq].handler = handler; >+ ipd->irqs[irq].cookie = cookie; >+ ipd->irqs[irq].acknowledge = acknowledge; >+ ipd->irqs[irq].control = modemask; >+ >+ desc = irq_to_desc(irq); >+ if (desc == NULL) >+ goto unlock_and_exit; >+ >+ if (irq < NR_IRQS && !ipipe_virtual_irq_p(irq)) { >+ __ipipe_enable_irqdesc(ipd, irq); >+ /* >+ * IRQ enable/disable state is domain-sensitive, so we >+ * may not change it for another domain. What is >+ * allowed however is forcing some domain to handle an >+ * interrupt source, by passing the proper 'ipd' >+ * descriptor which thus may be different from >+ * __ipipe_current_domain. >+ */ >+ if ((modemask & IPIPE_ENABLE_MASK) != 0) { >+ if (ipd != __ipipe_current_domain) >+ ret = -EPERM; >+ else >+ __ipipe_enable_irq(irq); >+ } >+ } >+ >+unlock_and_exit: >+ >+ spin_unlock_irqrestore(&__ipipe_pipelock, flags); >+ >+ return ret; >+} >+ >+/* ipipe_control_irq() -- Change control mode of a pipelined interrupt. */ >+ >+int ipipe_control_irq(struct ipipe_domain *ipd, unsigned int irq, >+ unsigned clrmask, unsigned setmask) >+{ >+ unsigned long flags; >+ int ret = 0; >+ >+ if (irq >= IPIPE_NR_IRQS) >+ return -EINVAL; >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) { >+ ret = -EPERM; >+ goto out; >+ } >+ >+ if (ipd->irqs[irq].handler == NULL) >+ setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); >+ >+ if ((setmask & IPIPE_STICKY_MASK) != 0) >+ setmask |= IPIPE_HANDLE_MASK; >+ >+ if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0) /* If one goes, both go. */ >+ clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); >+ >+ ipd->irqs[irq].control &= ~clrmask; >+ ipd->irqs[irq].control |= setmask; >+ >+ if ((setmask & IPIPE_ENABLE_MASK) != 0) >+ __ipipe_enable_irq(irq); >+ else if ((clrmask & IPIPE_ENABLE_MASK) != 0) >+ __ipipe_disable_irq(irq); >+ >+out: >+ ipipe_critical_exit(flags); >+ >+ return ret; >+} >+ >+/* __ipipe_dispatch_event() -- Low-level event dispatcher. */ >+ >+int __ipipe_dispatch_event (unsigned event, void *data) >+{ >+extern void *ipipe_irq_handler; void *handler; if (ipipe_irq_handler != __ipipe_handle_irq && (handler = ipipe_root_domain->evhand[event])) { return ((int (*)(unsigned long, void *))handler)(event, data); } else { >+ struct ipipe_domain *start_domain, *this_domain, *next_domain; >+ struct ipipe_percpu_domain_data *np; >+ ipipe_event_handler_t evhand; >+ struct list_head *pos, *npos; >+ unsigned long flags; >+ int propagate = 1; >+ >+ local_irq_save_hw(flags); >+ >+ start_domain = this_domain = __ipipe_current_domain; >+ >+ list_for_each_safe(pos, npos, &__ipipe_pipeline) { >+ /* >+ * Note: Domain migration may occur while running >+ * event or interrupt handlers, in which case the >+ * current register set is going to be recycled for a >+ * different domain than the initiating one. We do >+ * care for that, always tracking the current domain >+ * descriptor upon return from those handlers. >+ */ >+ next_domain = list_entry(pos, struct ipipe_domain, p_link); >+ np = ipipe_cpudom_ptr(next_domain); >+ >+ /* >+ * Keep a cached copy of the handler's address since >+ * ipipe_catch_event() may clear it under our feet. >+ */ >+ evhand = next_domain->evhand[event]; >+ >+ if (evhand != NULL) { >+ __ipipe_current_domain = next_domain; >+ np->evsync |= (1LL << event); >+ local_irq_restore_hw(flags); >+ propagate = !evhand(event, start_domain, data); >+ local_irq_save_hw(flags); >+ /* >+ * We may have a migration issue here, if the >+ * current task is migrated to another CPU on >+ * behalf of the invoked handler, usually when >+ * a syscall event is processed. However, >+ * ipipe_catch_event() will make sure that a >+ * CPU that clears a handler for any given >+ * event will not attempt to wait for itself >+ * to clear the evsync bit for that event, >+ * which practically plugs the hole, without >+ * resorting to a much more complex strategy. >+ */ >+ np->evsync &= ~(1LL << event); >+ if (__ipipe_current_domain != next_domain) >+ this_domain = __ipipe_current_domain; >+ } >+ >+ /* NEVER sync the root stage here. */ >+ if (next_domain != ipipe_root_domain && >+ __ipipe_ipending_p(np) && >+ !test_bit(IPIPE_STALL_FLAG, &np->status)) { >+ __ipipe_current_domain = next_domain; >+ __ipipe_sync_pipeline(); >+ if (__ipipe_current_domain != next_domain) >+ this_domain = __ipipe_current_domain; >+ } >+ >+ __ipipe_current_domain = this_domain; >+ >+ if (next_domain == this_domain || !propagate) >+ break; >+ } >+ >+ local_irq_restore_hw(flags); >+ >+ return !propagate; >+} } >+ >+/* >+ * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired >+ * interrupts are immediately and unconditionally delivered to the >+ * domain heading the pipeline upon receipt, and such domain must have >+ * been registered as an invariant head for the system (priority == >+ * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is >+ * to get an extra-fast dispatching path for those IRQs, by relying on >+ * a straightforward logic based on assumptions that must always be >+ * true for invariant head domains. The following assumptions are >+ * made when dealing with such interrupts: >+ * >+ * 1- Wired interrupts are purely dynamic, i.e. the decision to >+ * propagate them down the pipeline must be done from the head domain >+ * ISR. >+ * 2- Wired interrupts cannot be shared or sticky. >+ * 3- The root domain cannot be an invariant pipeline head, in >+ * consequence of what the root domain cannot handle wired >+ * interrupts. >+ * 4- Wired interrupts must have a valid acknowledge handler for the >+ * head domain (if needed, see __ipipe_handle_irq). >+ * >+ * Called with hw interrupts off. >+ */ >+ >+void __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq) >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head); >+ >+ if (test_bit(IPIPE_STALL_FLAG, &p->status)) { >+ __ipipe_set_irq_pending(head, irq); >+ return; >+ } >+ >+ __ipipe_dispatch_wired_nocheck(head, irq); >+} >+ >+void __ipipe_dispatch_wired_nocheck(struct ipipe_domain *head, unsigned irq) /* hw interrupts off */ >+{ >+ struct ipipe_percpu_domain_data *p = ipipe_cpudom_ptr(head); >+ struct ipipe_domain *old; >+ >+ old = __ipipe_current_domain; >+ __ipipe_current_domain = head; /* Switch to the head domain. */ >+ >+ p->irqall[irq]++; >+ __set_bit(IPIPE_STALL_FLAG, &p->status); >+ barrier(); >+ head->irqs[irq].handler(irq, head->irqs[irq].cookie); /* Call the ISR. */ >+ __ipipe_run_irqtail(irq); >+ barrier(); >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+ >+ if (__ipipe_current_domain == head) { >+ __ipipe_current_domain = old; >+ if (old == head) { >+ if (__ipipe_ipending_p(p)) >+ __ipipe_sync_pipeline(); >+ return; >+ } >+ } >+ >+ __ipipe_walk_pipeline(&head->p_link); >+} >+ >+#ifdef CONFIG_TRACE_IRQFLAGS >+#define root_stall_after_handler() local_irq_disable() >+#else >+#define root_stall_after_handler() do { } while (0) >+#endif >+ >+#ifdef CONFIG_PREEMPT >+ >+asmlinkage void preempt_schedule_irq(void); >+ >+void __ipipe_preempt_schedule_irq(void) >+{ >+ struct ipipe_percpu_domain_data *p; >+ unsigned long flags; >+ >+ BUG_ON(!irqs_disabled_hw()); >+ local_irq_save(flags); >+ local_irq_enable_hw(); >+ preempt_schedule_irq(); /* Ok, may reschedule now. */ >+ local_irq_disable_hw(); >+ >+ /* >+ * Flush any pending interrupt that may have been logged after >+ * preempt_schedule_irq() stalled the root stage before >+ * returning to us, and now. >+ */ >+ p = ipipe_root_cpudom_ptr(); >+ if (unlikely(__ipipe_ipending_p(p))) { >+ add_preempt_count(PREEMPT_ACTIVE); >+ trace_hardirqs_on(); >+ clear_bit(IPIPE_STALL_FLAG, &p->status); >+ __ipipe_sync_pipeline(); >+ sub_preempt_count(PREEMPT_ACTIVE); >+ } >+ >+ __local_irq_restore_nosync(flags); >+} >+ >+#else /* !CONFIG_PREEMPT */ >+ >+#define __ipipe_preempt_schedule_irq() do { } while (0) >+ >+#endif /* !CONFIG_PREEMPT */ >+ >+/* >+ * __ipipe_sync_stage() -- Flush the pending IRQs for the current >+ * domain (and processor). This routine flushes the interrupt log >+ * (see "Optimistic interrupt protection" from D. Stodolsky et al. for >+ * more on the deferred interrupt scheme). Every interrupt that >+ * occurred while the pipeline was stalled gets played. WARNING: >+ * callers on SMP boxen should always check for CPU migration on >+ * return of this routine. >+ * >+ * This routine must be called with hw interrupts off. >+ */ >+void __ipipe_sync_stage(void) >+{ >+ struct ipipe_percpu_domain_data *p; >+ struct ipipe_domain *ipd; >+ int irq; >+ >+ ipd = __ipipe_current_domain; >+ p = ipipe_cpudom_ptr(ipd); >+ >+ __set_bit(IPIPE_STALL_FLAG, &p->status); >+ smp_wmb(); >+ >+ if (ipd == ipipe_root_domain) >+ trace_hardirqs_off(); >+ >+ for (;;) { >+ irq = __ipipe_next_irq(p); >+ if (irq < 0) >+ break; >+ /* >+ * Make sure the compiler does not reorder wrongly, so >+ * that all updates to maps are done before the >+ * handler gets called. >+ */ >+ barrier(); >+ >+ if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) >+ continue; >+ >+ if (!__ipipe_pipeline_head_p(ipd)) >+ local_irq_enable_hw(); >+ >+ if (likely(ipd != ipipe_root_domain)) { >+ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); >+ __ipipe_run_irqtail(irq); >+ local_irq_disable_hw(); >+ } else if (ipipe_virtual_irq_p(irq)) { >+ irq_enter(); >+ ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); >+ irq_exit(); >+ local_irq_disable_hw(); >+ root_stall_after_handler(); >+ while (__ipipe_check_root_resched()) >+ __ipipe_preempt_schedule_irq(); >+ } else { >+ __ipipe_do_root_xirq(ipd, irq); >+ local_irq_disable_hw(); >+ root_stall_after_handler(); >+ } >+ >+ p = ipipe_cpudom_ptr(__ipipe_current_domain); >+ } >+ >+ if (ipd == ipipe_root_domain) >+ trace_hardirqs_on(); >+ >+ __clear_bit(IPIPE_STALL_FLAG, &p->status); >+} >+ >+/* ipipe_register_domain() -- Link a new domain to the pipeline. */ >+ >+int ipipe_register_domain(struct ipipe_domain *ipd, >+ struct ipipe_domain_attr *attr) >+{ >+ struct ipipe_percpu_domain_data *p; >+ struct list_head *pos = NULL; >+ struct ipipe_domain *_ipd; >+ unsigned long flags; >+ >+ if (!ipipe_root_domain_p) { >+ printk(KERN_WARNING >+ "I-pipe: Only the root domain may register a new domain.\n"); >+ return -EPERM; >+ } >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ if (attr->priority == IPIPE_HEAD_PRIORITY) { >+ if (test_bit(IPIPE_HEAD_SLOT, &__ipipe_domain_slot_map)) { >+ ipipe_critical_exit(flags); >+ return -EAGAIN; /* Cannot override current head. */ >+ } >+ ipd->slot = IPIPE_HEAD_SLOT; >+ } else >+ ipd->slot = ffz(__ipipe_domain_slot_map); >+ >+ if (ipd->slot < CONFIG_IPIPE_DOMAINS) { >+ set_bit(ipd->slot, &__ipipe_domain_slot_map); >+ list_for_each(pos, &__ipipe_pipeline) { >+ _ipd = list_entry(pos, struct ipipe_domain, p_link); >+ if (_ipd->domid == attr->domid) >+ break; >+ } >+ } >+ >+ ipipe_critical_exit(flags); >+ >+ if (pos != &__ipipe_pipeline) { >+ if (ipd->slot < CONFIG_IPIPE_DOMAINS) >+ clear_bit(ipd->slot, &__ipipe_domain_slot_map); >+ return -EBUSY; >+ } >+ >+#ifndef CONFIG_SMP >+ /* >+ * Set up the perdomain pointers for direct access to the >+ * percpu domain data. This saves a costly multiply each time >+ * we need to refer to the contents of the percpu domain data >+ * array. >+ */ >+ __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = &__raw_get_cpu_var(ipipe_percpu_darray)[ipd->slot]; >+#endif >+ >+ ipd->name = attr->name; >+ ipd->domid = attr->domid; >+ ipd->pdd = attr->pdd; >+ ipd->flags = 0; >+ >+ if (attr->priority == IPIPE_HEAD_PRIORITY) { >+ ipd->priority = INT_MAX; >+ __set_bit(IPIPE_AHEAD_FLAG,&ipd->flags); >+ } >+ else >+ ipd->priority = attr->priority; >+ >+ __ipipe_init_stage(ipd); >+ >+ INIT_LIST_HEAD(&ipd->p_link); >+ >+#ifdef CONFIG_PROC_FS >+ __ipipe_add_domain_proc(ipd); >+#endif /* CONFIG_PROC_FS */ >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ list_for_each(pos, &__ipipe_pipeline) { >+ _ipd = list_entry(pos, struct ipipe_domain, p_link); >+ if (ipd->priority > _ipd->priority) >+ break; >+ } >+ >+ list_add_tail(&ipd->p_link, pos); >+ >+ ipipe_critical_exit(flags); >+ >+ printk(KERN_INFO "I-pipe: Domain %s registered.\n", ipd->name); >+ >+ if (attr->entry == NULL) >+ return 0; >+ >+ /* >+ * Finally, allow the new domain to perform its initialization >+ * duties. >+ */ >+ local_irq_save_hw_smp(flags); >+ __ipipe_current_domain = ipd; >+ local_irq_restore_hw_smp(flags); >+ attr->entry(); >+ local_irq_save_hw(flags); >+ __ipipe_current_domain = ipipe_root_domain; >+ p = ipipe_root_cpudom_ptr(); >+ >+ if (__ipipe_ipending_p(p) && >+ !test_bit(IPIPE_STALL_FLAG, &p->status)) >+ __ipipe_sync_pipeline(); >+ >+ local_irq_restore_hw(flags); >+ >+ return 0; >+} >+ >+/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */ >+ >+int ipipe_unregister_domain(struct ipipe_domain *ipd) >+{ >+ unsigned long flags; >+ >+ if (!ipipe_root_domain_p) { >+ printk(KERN_WARNING >+ "I-pipe: Only the root domain may unregister a domain.\n"); >+ return -EPERM; >+ } >+ >+ if (ipd == ipipe_root_domain) { >+ printk(KERN_WARNING >+ "I-pipe: Cannot unregister the root domain.\n"); >+ return -EPERM; >+ } >+#ifdef CONFIG_SMP >+ { >+ struct ipipe_percpu_domain_data *p; >+ unsigned int irq; >+ int cpu; >+ >+ /* >+ * In the SMP case, wait for the logged events to drain on >+ * other processors before eventually removing the domain >+ * from the pipeline. >+ */ >+ >+ ipipe_unstall_pipeline_from(ipd); >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { >+ clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control); >+ clear_bit(IPIPE_WIRED_FLAG, &ipd->irqs[irq].control); >+ clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control); >+ set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control); >+ } >+ >+ ipipe_critical_exit(flags); >+ >+ for_each_online_cpu(cpu) { >+ p = ipipe_percpudom_ptr(ipd, cpu); >+ while (__ipipe_ipending_p(p)) >+ cpu_relax(); >+ } >+ } >+#endif /* CONFIG_SMP */ >+ >+ mutex_lock(&ipd->mutex); >+ >+#ifdef CONFIG_PROC_FS >+ __ipipe_remove_domain_proc(ipd); >+#endif /* CONFIG_PROC_FS */ >+ >+ /* >+ * Simply remove the domain from the pipeline and we are almost done. >+ */ >+ >+ flags = ipipe_critical_enter(NULL); >+ list_del_init(&ipd->p_link); >+ ipipe_critical_exit(flags); >+ >+ __ipipe_cleanup_domain(ipd); >+ >+ mutex_unlock(&ipd->mutex); >+ >+ printk(KERN_INFO "I-pipe: Domain %s unregistered.\n", ipd->name); >+ >+ return 0; >+} >+ >+/* >+ * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of >+ * a running interrupt handler to the next domain down the pipeline. >+ * ipipe_schedule_irq() -- Does almost the same as above, but attempts >+ * to pend the interrupt for the current domain first. >+ * Must be called hw IRQs off. >+ */ >+void __ipipe_pend_irq(unsigned irq, struct list_head *head) >+{ >+ struct ipipe_domain *ipd; >+ struct list_head *ln; >+ >+#ifdef CONFIG_IPIPE_DEBUG >+ BUG_ON(irq >= IPIPE_NR_IRQS || >+ (ipipe_virtual_irq_p(irq) >+ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))); >+#endif >+ for (ln = head; ln != &__ipipe_pipeline; ln = ipd->p_link.next) { >+ ipd = list_entry(ln, struct ipipe_domain, p_link); >+ if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) { >+ __ipipe_set_irq_pending(ipd, irq); >+ return; >+ } >+ } >+} >+ >+/* ipipe_free_virq() -- Release a virtual/soft interrupt. */ >+ >+int ipipe_free_virq(unsigned virq) >+{ >+ if (!ipipe_virtual_irq_p(virq)) >+ return -EINVAL; >+ >+ clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map); >+ >+ return 0; >+} >+ >+void ipipe_init_attr(struct ipipe_domain_attr *attr) >+{ >+ attr->name = "anon"; >+ attr->domid = 1; >+ attr->entry = NULL; >+ attr->priority = IPIPE_ROOT_PRIO; >+ attr->pdd = NULL; >+} >+ >+/* >+ * ipipe_catch_event() -- Interpose or remove an event handler for a >+ * given domain. >+ */ >+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, >+ unsigned event, >+ ipipe_event_handler_t handler) >+{ >+ ipipe_event_handler_t old_handler; >+ unsigned long flags; >+ int self = 0, cpu; >+ >+ if (event & IPIPE_EVENT_SELF) { >+ event &= ~IPIPE_EVENT_SELF; >+ self = 1; >+ } >+ >+ if (event >= IPIPE_NR_EVENTS) >+ return NULL; >+ >+ flags = ipipe_critical_enter(NULL); >+ >+ if (!(old_handler = xchg(&ipd->evhand[event],handler))) { >+ if (handler) { >+ if (self) >+ ipd->evself |= (1LL << event); >+ else >+ __ipipe_event_monitors[event]++; >+ } >+ } >+ else if (!handler) { >+ if (ipd->evself & (1LL << event)) >+ ipd->evself &= ~(1LL << event); >+ else >+ __ipipe_event_monitors[event]--; >+ } else if ((ipd->evself & (1LL << event)) && !self) { >+ __ipipe_event_monitors[event]++; >+ ipd->evself &= ~(1LL << event); >+ } else if (!(ipd->evself & (1LL << event)) && self) { >+ __ipipe_event_monitors[event]--; >+ ipd->evself |= (1LL << event); >+ } >+ >+ ipipe_critical_exit(flags); >+ >+ if (!handler && ipipe_root_domain_p) { >+ /* >+ * If we cleared a handler on behalf of the root >+ * domain, we have to wait for any current invocation >+ * to drain, since our caller might subsequently unmap >+ * the target domain. To this aim, this code >+ * synchronizes with __ipipe_dispatch_event(), >+ * guaranteeing that either the dispatcher sees a null >+ * handler in which case it discards the invocation >+ * (which also prevents from entering a livelock), or >+ * finds a valid handler and calls it. Symmetrically, >+ * ipipe_catch_event() ensures that the called code >+ * won't be unmapped under our feet until the event >+ * synchronization flag is cleared for the given event >+ * on all CPUs. >+ */ >+ preempt_disable(); >+ cpu = smp_processor_id(); >+ /* >+ * Hack: this solves the potential migration issue >+ * raised in __ipipe_dispatch_event(). This is a >+ * work-around which makes the assumption that other >+ * CPUs will subsequently, either process at least one >+ * interrupt for the target domain, or call >+ * __ipipe_dispatch_event() without going through a >+ * migration while running the handler at least once; >+ * practically, this is safe on any normally running >+ * system. >+ */ >+ ipipe_percpudom(ipd, evsync, cpu) &= ~(1LL << event); >+ preempt_enable(); >+ >+ for_each_online_cpu(cpu) { >+ while (ipipe_percpudom(ipd, evsync, cpu) & (1LL << event)) >+ schedule_timeout_interruptible(HZ / 50); >+ } >+ } >+ >+ return old_handler; >+} >+ >+cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask) >+{ >+#ifdef CONFIG_SMP >+ if (irq >= NR_IRQS) // if (irq >= IPIPE_NR_XIRQS) >+ /* Allow changing affinity of external IRQs only. */ >+ return CPU_MASK_NONE; >+ >+ if (num_online_cpus() > 1) >+ return __ipipe_set_irq_affinity(irq,cpumask); >+#endif /* CONFIG_SMP */ >+ >+ return CPU_MASK_NONE; >+} >+ >+int ipipe_send_ipi (unsigned ipi, cpumask_t cpumask) >+ >+{ >+#ifdef CONFIG_SMP >+ if (!ipipe_ipi_p(ipi)) >+ return -EINVAL; >+ return __ipipe_send_ipi(ipi,cpumask); >+#else /* !CONFIG_SMP */ >+ return -EINVAL; >+#endif /* CONFIG_SMP */ >+} >+ >+#ifdef CONFIG_SMP >+ >+/* Always called with hw interrupts off. */ >+void __ipipe_do_critical_sync(unsigned irq, void *cookie) >+{ >+ int cpu = ipipe_processor_id(); >+ >+ cpu_set(cpu, __ipipe_cpu_sync_map); >+ >+ /* Now we are in sync with the lock requestor running on another >+ CPU. Enter a spinning wait until he releases the global >+ lock. */ >+ spin_lock(&__ipipe_cpu_barrier); >+ >+ /* Got it. Now get out. */ >+ >+ if (__ipipe_cpu_sync) >+ /* Call the sync routine if any. */ >+ __ipipe_cpu_sync(); >+ >+ cpu_set(cpu, __ipipe_cpu_pass_map); >+ >+ spin_unlock(&__ipipe_cpu_barrier); >+ >+ cpu_clear(cpu, __ipipe_cpu_sync_map); >+} >+#endif /* CONFIG_SMP */ >+ >+/* >+ * ipipe_critical_enter() -- Grab the superlock excluding all CPUs but >+ * the current one from a critical section. This lock is used when we >+ * must enforce a global critical section for a single CPU in a >+ * possibly SMP system whichever context the CPUs are running. >+ */ >+unsigned long ipipe_critical_enter(void (*syncfn)(void)) >+{ >+ unsigned long flags; >+ >+ local_irq_save_hw(flags); >+ >+#ifdef CONFIG_SMP >+ if (num_online_cpus() > 1) { >+ int cpu = ipipe_processor_id(); >+ cpumask_t allbutself; >+ unsigned long loops; >+ >+ if (!cpu_test_and_set(cpu, __ipipe_cpu_lock_map)) { >+ while (test_and_set_bit(0, &__ipipe_critical_lock)) { >+ int n = 0; >+ >+ local_irq_enable_hw(); >+ >+ do { >+ cpu_relax(); >+ } while (++n < cpu); >+ >+ local_irq_disable_hw(); >+ } >+ >+restart: >+ spin_lock(&__ipipe_cpu_barrier); >+ >+ __ipipe_cpu_sync = syncfn; >+ >+ cpus_clear(__ipipe_cpu_pass_map); >+ cpu_set(cpu, __ipipe_cpu_pass_map); >+ >+ /* >+ * Send the sync IPI to all processors but the current >+ * one. >+ */ >+ cpus_andnot(allbutself, cpu_online_map, >+ __ipipe_cpu_pass_map); >+ __ipipe_send_ipi(IPIPE_CRITICAL_IPI, allbutself); >+ >+ loops = IPIPE_CRITICAL_TIMEOUT; >+ >+ while (!cpus_equal(__ipipe_cpu_sync_map, allbutself)) { >+ cpu_relax(); >+ >+ if (--loops == 0) { >+ /* >+ * We ran into a deadlock due to a >+ * contended rwlock. Cancel this round >+ * and retry. >+ */ >+ __ipipe_cpu_sync = NULL; >+ >+ spin_unlock(&__ipipe_cpu_barrier); >+ >+ /* >+ * Ensure all CPUs consumed the IPI to >+ * avoid running __ipipe_cpu_sync >+ * prematurely. This usually resolves >+ * the deadlock reason too. >+ */ >+ while (!cpus_equal(cpu_online_map, >+ __ipipe_cpu_pass_map)) >+ cpu_relax(); >+ >+ goto restart; >+ } >+ } >+ } >+ >+ atomic_inc(&__ipipe_critical_count); >+ } >+#endif /* CONFIG_SMP */ >+ >+ return flags; >+} >+ >+/* ipipe_critical_exit() -- Release the superlock. */ >+ >+void ipipe_critical_exit(unsigned long flags) >+{ >+#ifdef CONFIG_SMP >+ if (num_online_cpus() > 1 && >+ atomic_dec_and_test(&__ipipe_critical_count)) { >+ spin_unlock(&__ipipe_cpu_barrier); >+ >+ while (!cpus_empty(__ipipe_cpu_sync_map)) >+ cpu_relax(); >+ >+ cpu_clear(ipipe_processor_id(), __ipipe_cpu_lock_map); >+ clear_bit(0, &__ipipe_critical_lock); >+ smp_mb__after_clear_bit(); >+ } >+#endif /* CONFIG_SMP */ >+ >+ local_irq_restore_hw(flags); >+} >+ >+#ifdef CONFIG_HAVE_IPIPE_HOSTRT >+/* >+ * NOTE: The architecture specific code must only call this function >+ * when a clocksource suitable for CLOCK_HOST_REALTIME is enabled. >+ */ >+void ipipe_update_hostrt(struct timespec *wall_time, struct clocksource *clock) >+{ >+ struct ipipe_hostrt_data hostrt_data; >+ >+ hostrt_data.live = 1; >+ hostrt_data.cycle_last = clock->cycle_last; >+ hostrt_data.mask = clock->mask; >+ hostrt_data.mult = clock->mult; >+ hostrt_data.shift = clock->shift; >+ hostrt_data.wall_time_sec = wall_time->tv_sec; >+ hostrt_data.wall_time_nsec = wall_time->tv_nsec; >+ hostrt_data.wall_to_monotonic = __get_wall_to_monotonic(); >+ >+ /* Note: The event receiver is responsible for providing >+ proper locking */ >+ if (__ipipe_event_monitored_p(IPIPE_EVENT_HOSTRT)) >+ __ipipe_dispatch_event(IPIPE_EVENT_HOSTRT, &hostrt_data); >+} >+#endif /* CONFIG_HAVE_IPIPE_HOSTRT */ >+ >+int ipipe_alloc_ptdkey (void) >+{ >+ unsigned long flags; >+ int key = -1; >+ >+ spin_lock_irqsave(&__ipipe_pipelock,flags); >+ >+ if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) { >+ key = ffz(__ipipe_ptd_key_map); >+ set_bit(key,&__ipipe_ptd_key_map); >+ __ipipe_ptd_key_count++; >+ } >+ >+ spin_unlock_irqrestore(&__ipipe_pipelock,flags); >+ >+ return key; >+} >+ >+int ipipe_free_ptdkey (int key) >+{ >+ unsigned long flags; >+ >+ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) >+ return -EINVAL; >+ >+ spin_lock_irqsave(&__ipipe_pipelock,flags); >+ >+ if (test_and_clear_bit(key,&__ipipe_ptd_key_map)) >+ __ipipe_ptd_key_count--; >+ >+ spin_unlock_irqrestore(&__ipipe_pipelock,flags); >+ >+ return 0; >+} >+ >+int ipipe_set_ptd (int key, void *value) >+ >+{ >+ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) >+ return -EINVAL; >+ >+ current->ptd[key] = value; >+ >+ return 0; >+} >+ >+void *ipipe_get_ptd (int key) >+ >+{ >+ if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) >+ return NULL; >+ >+ return current->ptd[key]; >+} >+ >+#ifdef CONFIG_PROC_FS >+ >+struct proc_dir_entry *ipipe_proc_root; >+ >+static int __ipipe_version_info_proc(char *page, >+ char **start, >+ off_t off, int count, int *eof, void *data) >+{ >+ int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING); >+ >+ len -= off; >+ >+ if (len <= off + count) >+ *eof = 1; >+ >+ *start = page + off; >+ >+ if(len > count) >+ len = count; >+ >+ if(len < 0) >+ len = 0; >+ >+ return len; >+} >+ >+static int __ipipe_common_info_show(struct seq_file *p, void *data) >+{ >+ struct ipipe_domain *ipd = (struct ipipe_domain *)p->private; >+ char handling, stickiness, lockbit, exclusive, virtuality; >+ >+ unsigned long ctlbits; >+ unsigned irq; >+ >+ seq_printf(p, " +----- Handling ([A]ccepted, [G]rabbed, [W]ired, [D]iscarded)\n"); >+ seq_printf(p, " |+---- Sticky\n"); >+ seq_printf(p, " ||+--- Locked\n"); >+ seq_printf(p, " |||+-- Exclusive\n"); >+ seq_printf(p, " ||||+- Virtual\n"); >+ seq_printf(p, "[IRQ] |||||\n"); >+ >+ mutex_lock(&ipd->mutex); >+ >+ for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { >+ /* Remember to protect against >+ * ipipe_virtual_irq/ipipe_control_irq if more fields >+ * get involved. */ >+ ctlbits = ipd->irqs[irq].control; >+ >+ if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)) >+ /* >+ * There might be a hole between the last external >+ * IRQ and the first virtual one; skip it. >+ */ >+ continue; >+ >+ if (ipipe_virtual_irq_p(irq) >+ && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)) >+ /* Non-allocated virtual IRQ; skip it. */ >+ continue; >+ >+ /* >+ * Statuses are as follows: >+ * o "accepted" means handled _and_ passed down the pipeline. >+ * o "grabbed" means handled, but the interrupt might be >+ * terminated _or_ passed down the pipeline depending on >+ * what the domain handler asks for to the I-pipe. >+ * o "wired" is basically the same as "grabbed", except that >+ * the interrupt is unconditionally delivered to an invariant >+ * pipeline head domain. >+ * o "passed" means unhandled by the domain but passed >+ * down the pipeline. >+ * o "discarded" means unhandled and _not_ passed down the >+ * pipeline. The interrupt merely disappears from the >+ * current domain down to the end of the pipeline. >+ */ >+ if (ctlbits & IPIPE_HANDLE_MASK) { >+ if (ctlbits & IPIPE_PASS_MASK) >+ handling = 'A'; >+ else if (ctlbits & IPIPE_WIRED_MASK) >+ handling = 'W'; >+ else >+ handling = 'G'; >+ } else if (ctlbits & IPIPE_PASS_MASK) >+ /* Do not output if no major action is taken. */ >+ continue; >+ else >+ handling = 'D'; >+ >+ if (ctlbits & IPIPE_STICKY_MASK) >+ stickiness = 'S'; >+ else >+ stickiness = '.'; >+ >+ if (ctlbits & IPIPE_LOCK_MASK) >+ lockbit = 'L'; >+ else >+ lockbit = '.'; >+ >+ if (ctlbits & IPIPE_EXCLUSIVE_MASK) >+ exclusive = 'X'; >+ else >+ exclusive = '.'; >+ >+ if (ipipe_virtual_irq_p(irq)) >+ virtuality = 'V'; >+ else >+ virtuality = '.'; >+ >+ seq_printf(p, " %3u: %c%c%c%c%c\n", >+ irq, handling, stickiness, lockbit, exclusive, virtuality); >+ } >+ >+ seq_printf(p, "[Domain info]\n"); >+ >+ seq_printf(p, "id=0x%.8x\n", ipd->domid); >+ >+ if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags)) >+ seq_printf(p, "priority=topmost\n"); >+ else >+ seq_printf(p, "priority=%d\n", ipd->priority); >+ >+ mutex_unlock(&ipd->mutex); >+ >+ return 0; >+} >+ >+static int __ipipe_common_info_open(struct inode *inode, struct file *file) >+{ >+ return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->data); >+} >+ >+static struct file_operations __ipipe_info_proc_ops = { >+ .owner = THIS_MODULE, >+ .open = __ipipe_common_info_open, >+ .read = seq_read, >+ .llseek = seq_lseek, >+ .release = single_release, >+}; >+ >+void __ipipe_add_domain_proc(struct ipipe_domain *ipd) >+{ >+ struct proc_dir_entry *e = create_proc_entry(ipd->name, 0444, ipipe_proc_root); >+ if (e) { >+ e->proc_fops = &__ipipe_info_proc_ops; >+ e->data = (void*) ipd; >+ } >+} >+ >+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd) >+{ >+ remove_proc_entry(ipd->name,ipipe_proc_root); >+} >+ >+void __init ipipe_init_proc(void) >+{ >+ ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0); >+ create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL); >+ __ipipe_add_domain_proc(ipipe_root_domain); >+ >+ __ipipe_init_tracer(); >+} >+ >+#endif /* CONFIG_PROC_FS */ >+ >+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT >+ >+DEFINE_PER_CPU(int, ipipe_percpu_context_check) = { 1 }; >+DEFINE_PER_CPU(int, ipipe_saved_context_check_state); >+ >+void ipipe_check_context(struct ipipe_domain *border_domain) >+{ >+ struct ipipe_percpu_domain_data *p; >+ struct ipipe_domain *this_domain; >+ unsigned long flags; >+ int cpu; >+ >+ local_irq_save_hw_smp(flags); >+ >+ this_domain = __ipipe_current_domain; >+ p = ipipe_head_cpudom_ptr(); >+ if (likely(this_domain->priority <= border_domain->priority && >+ !test_bit(IPIPE_STALL_FLAG, &p->status))) { >+ local_irq_restore_hw_smp(flags); >+ return; >+ } >+ >+ cpu = ipipe_processor_id(); >+ if (!per_cpu(ipipe_percpu_context_check, cpu)) { >+ local_irq_restore_hw_smp(flags); >+ return; >+ } >+ >+ local_irq_restore_hw_smp(flags); >+ >+ ipipe_context_check_off(); >+ ipipe_trace_panic_freeze(); >+ ipipe_set_printk_sync(__ipipe_current_domain); >+ >+ if (this_domain->priority > border_domain->priority) >+ printk(KERN_ERR "I-pipe: Detected illicit call from domain " >+ "'%s'\n" >+ KERN_ERR " into a service reserved for domain " >+ "'%s' and below.\n", >+ this_domain->name, border_domain->name); >+ else >+ printk(KERN_ERR "I-pipe: Detected stalled topmost domain, " >+ "probably caused by a bug.\n" >+ " A critical section may have been " >+ "left unterminated.\n"); >+ dump_stack(); >+ ipipe_trace_panic_dump(); >+} >+ >+EXPORT_SYMBOL(ipipe_check_context); >+ >+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ >+ >+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) >+ >+int notrace __ipipe_check_percpu_access(void) >+{ >+ struct ipipe_percpu_domain_data *p; >+ struct ipipe_domain *this_domain; >+ unsigned long flags; >+ int ret = 0; >+ >+ local_irq_save_hw_notrace(flags); >+ >+ this_domain = __raw_get_cpu_var(ipipe_percpu_domain); >+ >+ /* >+ * Only the root domain may implement preemptive CPU migration >+ * of tasks, so anything above in the pipeline should be fine. >+ */ >+ if (this_domain->priority > IPIPE_ROOT_PRIO) >+ goto out; >+ >+ if (raw_irqs_disabled_flags(flags)) >+ goto out; >+ >+ /* >+ * Last chance: hw interrupts were enabled on entry while >+ * running over the root domain, but the root stage might be >+ * currently stalled, in which case preemption would be >+ * disabled, and no migration could occur. >+ */ >+ if (this_domain == ipipe_root_domain) { >+ p = ipipe_root_cpudom_ptr(); >+ if (test_bit(IPIPE_STALL_FLAG, &p->status)) >+ goto out; >+ } >+ /* >+ * Our caller may end up accessing the wrong per-cpu variable >+ * instance due to CPU migration; tell it to complain about >+ * this. >+ */ >+ ret = 1; >+out: >+ local_irq_restore_hw_notrace(flags); >+ >+ return ret; >+} >+ >+void __ipipe_spin_unlock_debug(unsigned long flags) >+{ >+ /* >+ * We catch a nasty issue where spin_unlock_irqrestore() on a >+ * regular kernel spinlock is about to re-enable hw interrupts >+ * in a section entered with hw irqs off. This is clearly the >+ * sign of a massive breakage coming. Usual suspect is a >+ * regular spinlock which was overlooked, used within a >+ * section which must run with hw irqs disabled. >+ */ >+ WARN_ON_ONCE(!raw_irqs_disabled_flags(flags) && irqs_disabled_hw()); >+} >+EXPORT_SYMBOL(__ipipe_spin_unlock_debug); >+ >+#endif /* CONFIG_IPIPE_DEBUG_INTERNAL && CONFIG_SMP */ >+ >+ >+void ipipe_prepare_panic(void) >+{ >+ ipipe_set_printk_sync(ipipe_current_domain); >+ ipipe_context_check_off(); >+} >+ >+EXPORT_SYMBOL_GPL(ipipe_prepare_panic); >+ >+EXPORT_SYMBOL(ipipe_virtualize_irq); >+EXPORT_SYMBOL(ipipe_control_irq); >+EXPORT_SYMBOL(ipipe_suspend_domain); >+EXPORT_SYMBOL(ipipe_alloc_virq); >+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain); >+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_darray); >+EXPORT_SYMBOL(ipipe_root); >+EXPORT_SYMBOL(ipipe_stall_pipeline_from); >+EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from); >+EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from); >+EXPORT_SYMBOL(ipipe_restore_pipeline_from); >+EXPORT_SYMBOL(ipipe_unstall_pipeline_head); >+EXPORT_SYMBOL(__ipipe_restore_pipeline_head); >+EXPORT_SYMBOL(__ipipe_unstall_root); >+EXPORT_SYMBOL(__ipipe_restore_root); >+EXPORT_SYMBOL(__ipipe_spin_lock_irq); >+EXPORT_SYMBOL(__ipipe_spin_unlock_irq); >+EXPORT_SYMBOL(__ipipe_spin_lock_irqsave); >+EXPORT_SYMBOL(__ipipe_spin_trylock_irq); >+EXPORT_SYMBOL(__ipipe_spin_trylock_irqsave); >+EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore); >+EXPORT_SYMBOL(__ipipe_pipeline); >+EXPORT_SYMBOL(__ipipe_lock_irq); >+EXPORT_SYMBOL(__ipipe_unlock_irq); >+EXPORT_SYMBOL(ipipe_register_domain); >+EXPORT_SYMBOL(ipipe_unregister_domain); >+EXPORT_SYMBOL(ipipe_free_virq); >+EXPORT_SYMBOL(ipipe_init_attr); >+EXPORT_SYMBOL(ipipe_catch_event); >+EXPORT_SYMBOL(ipipe_alloc_ptdkey); >+EXPORT_SYMBOL(ipipe_free_ptdkey); >+EXPORT_SYMBOL(ipipe_set_ptd); >+EXPORT_SYMBOL(ipipe_get_ptd); >+EXPORT_SYMBOL(ipipe_set_irq_affinity); >+EXPORT_SYMBOL(ipipe_send_ipi); >+EXPORT_SYMBOL(__ipipe_pend_irq); >+EXPORT_SYMBOL(__ipipe_set_irq_pending); >+EXPORT_SYMBOL(__ipipe_event_monitors); >+#if defined(CONFIG_IPIPE_DEBUG_INTERNAL) && defined(CONFIG_SMP) >+EXPORT_SYMBOL(__ipipe_check_percpu_access); >+#endif >+#ifdef CONFIG_GENERIC_CLOCKEVENTS >+EXPORT_SYMBOL(ipipe_request_tickdev); >+EXPORT_SYMBOL(ipipe_release_tickdev); >+#endif >+ >+EXPORT_SYMBOL(ipipe_critical_enter); >+EXPORT_SYMBOL(ipipe_critical_exit); >+EXPORT_SYMBOL(ipipe_trigger_irq); >+EXPORT_SYMBOL(ipipe_get_sysinfo); >diff --git a/kernel/ipipe/tracer.c b/kernel/ipipe/tracer.c >new file mode 100644 >index 0000000..f013ef4 >--- /dev/null >+++ b/kernel/ipipe/tracer.c >@@ -0,0 +1,1442 @@ >+/* -*- linux-c -*- >+ * kernel/ipipe/tracer.c >+ * >+ * Copyright (C) 2005 Luotao Fu. >+ * 2005-2008 Jan Kiszka. >+ * >+ * This program is free software; you can redistribute it and/or modify >+ * it under the terms of the GNU General Public License as published by >+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, >+ * USA; either version 2 of the License, or (at your option) any later >+ * version. >+ * >+ * This program is distributed in the hope that it will be useful, >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+ * GNU General Public License for more details. >+ * >+ * You should have received a copy of the GNU General Public License >+ * along with this program; if not, write to the Free Software >+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. >+ */ >+ >+#include <linux/kernel.h> >+#include <linux/module.h> >+#include <linux/version.h> >+#include <linux/kallsyms.h> >+#include <linux/seq_file.h> >+#include <linux/proc_fs.h> >+#include <linux/ctype.h> >+#include <linux/vmalloc.h> >+#include <linux/pid.h> >+#include <linux/vermagic.h> >+#include <linux/sched.h> >+#include <linux/ipipe.h> >+#include <linux/ftrace.h> >+#include <asm/uaccess.h> >+ >+#define IPIPE_TRACE_PATHS 4 /* <!> Do not lower below 3 */ >+#define IPIPE_DEFAULT_ACTIVE 0 >+#define IPIPE_DEFAULT_MAX 1 >+#define IPIPE_DEFAULT_FROZEN 2 >+ >+#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT) >+#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1)) >+ >+#define IPIPE_DEFAULT_PRE_TRACE 10 >+#define IPIPE_DEFAULT_POST_TRACE 10 >+#define IPIPE_DEFAULT_BACK_TRACE 100 >+ >+#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */ >+#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */ >+ >+#define IPIPE_TFLG_NMI_LOCK 0x0001 >+#define IPIPE_TFLG_NMI_HIT 0x0002 >+#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004 >+ >+#define IPIPE_TFLG_HWIRQ_OFF 0x0100 >+#define IPIPE_TFLG_FREEZING 0x0200 >+#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */ >+#define IPIPE_TFLG_CURRDOM_MASK 0x0C00 >+#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */ >+#define IPIPE_TFLG_DOMSTATE_BITS 3 >+ >+#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \ >+ (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT))) >+#define IPIPE_TFLG_CURRENT_DOMAIN(point) \ >+ ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT) >+ >+struct ipipe_trace_point { >+ short type; >+ short flags; >+ unsigned long eip; >+ unsigned long parent_eip; >+ unsigned long v; >+ unsigned long long timestamp; >+}; >+ >+struct ipipe_trace_path { >+ volatile int flags; >+ int dump_lock; /* separated from flags due to cross-cpu access */ >+ int trace_pos; /* next point to fill */ >+ int begin, end; /* finalised path begin and end */ >+ int post_trace; /* non-zero when in post-trace phase */ >+ unsigned long long length; /* max path length in cycles */ >+ unsigned long nmi_saved_eip; /* for deferred requests from NMIs */ >+ unsigned long nmi_saved_parent_eip; >+ unsigned long nmi_saved_v; >+ struct ipipe_trace_point point[IPIPE_TRACE_POINTS]; >+} ____cacheline_aligned_in_smp; >+ >+enum ipipe_trace_type >+{ >+ IPIPE_TRACE_FUNC = 0, >+ IPIPE_TRACE_BEGIN, >+ IPIPE_TRACE_END, >+ IPIPE_TRACE_FREEZE, >+ IPIPE_TRACE_SPECIAL, >+ IPIPE_TRACE_PID, >+ IPIPE_TRACE_EVENT, >+}; >+ >+#define IPIPE_TYPE_MASK 0x0007 >+#define IPIPE_TYPE_BITS 3 >+ >+#ifdef CONFIG_IPIPE_TRACE_VMALLOC >+static DEFINE_PER_CPU(struct ipipe_trace_path *, trace_path); >+#else /* !CONFIG_IPIPE_TRACE_VMALLOC */ >+static DEFINE_PER_CPU(struct ipipe_trace_path, trace_path[IPIPE_TRACE_PATHS]) = >+ { [0 ... IPIPE_TRACE_PATHS-1] = { .begin = -1, .end = -1 } }; >+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ >+ >+int ipipe_trace_enable = 0; >+ >+static DEFINE_PER_CPU(int, active_path) = { IPIPE_DEFAULT_ACTIVE }; >+static DEFINE_PER_CPU(int, max_path) = { IPIPE_DEFAULT_MAX }; >+static DEFINE_PER_CPU(int, frozen_path) = { IPIPE_DEFAULT_FROZEN }; >+static IPIPE_DEFINE_SPINLOCK(global_path_lock); >+static int pre_trace = IPIPE_DEFAULT_PRE_TRACE; >+static int post_trace = IPIPE_DEFAULT_POST_TRACE; >+static int back_trace = IPIPE_DEFAULT_BACK_TRACE; >+static int verbose_trace = 1; >+static unsigned long trace_overhead; >+ >+static unsigned long trigger_begin; >+static unsigned long trigger_end; >+ >+static DEFINE_MUTEX(out_mutex); >+static struct ipipe_trace_path *print_path; >+#ifdef CONFIG_IPIPE_TRACE_PANIC >+static struct ipipe_trace_path *panic_path; >+#endif /* CONFIG_IPIPE_TRACE_PANIC */ >+static int print_pre_trace; >+static int print_post_trace; >+ >+ >+static long __ipipe_signed_tsc2us(long long tsc); >+static void >+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point); >+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip); >+ >+ >+static notrace void >+__ipipe_store_domain_states(struct ipipe_trace_point *point) >+{ >+ struct ipipe_domain *ipd; >+ struct list_head *pos; >+ int i = 0; >+ >+ list_for_each_prev(pos, &__ipipe_pipeline) { >+ ipd = list_entry(pos, struct ipipe_domain, p_link); >+ >+ if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status))) >+ point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT); >+ >+ if (ipd == __ipipe_current_domain) >+ point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT; >+ >+ if (++i > IPIPE_TFLG_DOMSTATE_BITS) >+ break; >+ } >+} >+ >+static notrace int __ipipe_get_free_trace_path(int old, int cpu) >+{ >+ int new_active = old; >+ struct ipipe_trace_path *tp; >+ >+ do { >+ if (++new_active == IPIPE_TRACE_PATHS) >+ new_active = 0; >+ tp = &per_cpu(trace_path, cpu)[new_active]; >+ } while (new_active == per_cpu(max_path, cpu) || >+ new_active == per_cpu(frozen_path, cpu) || >+ tp->dump_lock); >+ >+ return new_active; >+} >+ >+static notrace void >+__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp, >+ struct ipipe_trace_path *old_tp, int old_pos) >+{ >+ int i; >+ >+ new_tp->trace_pos = pre_trace+1; >+ >+ for (i = new_tp->trace_pos; i > 0; i--) >+ memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)], >+ &old_tp->point[WRAP_POINT_NO(old_pos-i)], >+ sizeof(struct ipipe_trace_point)); >+ >+ /* mark the end (i.e. the point before point[0]) invalid */ >+ new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0; >+} >+ >+static notrace struct ipipe_trace_path * >+__ipipe_trace_end(int cpu, struct ipipe_trace_path *tp, int pos) >+{ >+ struct ipipe_trace_path *old_tp = tp; >+ long active = per_cpu(active_path, cpu); >+ unsigned long long length; >+ >+ /* do we have a new worst case? */ >+ length = tp->point[tp->end].timestamp - >+ tp->point[tp->begin].timestamp; >+ if (length > per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)].length) { >+ /* we need protection here against other cpus trying >+ to start a proc dump */ >+ spin_lock(&global_path_lock); >+ >+ /* active path holds new worst case */ >+ tp->length = length; >+ per_cpu(max_path, cpu) = active; >+ >+ /* find next unused trace path */ >+ active = __ipipe_get_free_trace_path(active, cpu); >+ >+ spin_unlock(&global_path_lock); >+ >+ tp = &per_cpu(trace_path, cpu)[active]; >+ >+ /* migrate last entries for pre-tracing */ >+ __ipipe_migrate_pre_trace(tp, old_tp, pos); >+ } >+ >+ return tp; >+} >+ >+static notrace struct ipipe_trace_path * >+__ipipe_trace_freeze(int cpu, struct ipipe_trace_path *tp, int pos) >+{ >+ struct ipipe_trace_path *old_tp = tp; >+ long active = per_cpu(active_path, cpu); >+ int n; >+ >+ /* frozen paths have no core (begin=end) */ >+ tp->begin = tp->end; >+ >+ /* we need protection here against other cpus trying >+ * to set their frozen path or to start a proc dump */ >+ spin_lock(&global_path_lock); >+ >+ per_cpu(frozen_path, cpu) = active; >+ >+ /* find next unused trace path */ >+ active = __ipipe_get_free_trace_path(active, cpu); >+ >+ /* check if this is the first frozen path */ >+ for_each_possible_cpu(n) { >+ if (n != cpu && >+ per_cpu(trace_path, n)[per_cpu(frozen_path, n)].end >= 0) >+ tp->end = -1; >+ } >+ >+ spin_unlock(&global_path_lock); >+ >+ tp = &per_cpu(trace_path, cpu)[active]; >+ >+ /* migrate last entries for pre-tracing */ >+ __ipipe_migrate_pre_trace(tp, old_tp, pos); >+ >+ return tp; >+} >+ >+void notrace >+__ipipe_trace(enum ipipe_trace_type type, unsigned long eip, >+ unsigned long parent_eip, unsigned long v) >+{ >+ struct ipipe_trace_path *tp, *old_tp; >+ int pos, next_pos, begin; >+ struct ipipe_trace_point *point; >+ unsigned long flags; >+ int cpu; >+ >+ local_irq_save_hw_notrace(flags); >+ >+ cpu = ipipe_processor_id(); >+ restart: >+ tp = old_tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; >+ >+ /* here starts a race window with NMIs - catched below */ >+ >+ /* check for NMI recursion */ >+ if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) { >+ tp->flags |= IPIPE_TFLG_NMI_HIT; >+ >+ /* first freeze request from NMI context? */ >+ if ((type == IPIPE_TRACE_FREEZE) && >+ !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) { >+ /* save arguments and mark deferred freezing */ >+ tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ; >+ tp->nmi_saved_eip = eip; >+ tp->nmi_saved_parent_eip = parent_eip; >+ tp->nmi_saved_v = v; >+ } >+ return; /* no need for restoring flags inside IRQ */ >+ } >+ >+ /* clear NMI events and set lock (atomically per cpu) */ >+ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | >+ IPIPE_TFLG_NMI_FREEZE_REQ)) >+ | IPIPE_TFLG_NMI_LOCK; >+ >+ /* check active_path again - some nasty NMI may have switched >+ * it meanwhile */ >+ if (unlikely(tp != >+ &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)])) { >+ /* release lock on wrong path and restart */ >+ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; >+ >+ /* there is no chance that the NMI got deferred >+ * => no need to check for pending freeze requests */ >+ goto restart; >+ } >+ >+ /* get the point buffer */ >+ pos = tp->trace_pos; >+ point = &tp->point[pos]; >+ >+ /* store all trace point data */ >+ point->type = type; >+ point->flags = raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0; >+ point->eip = eip; >+ point->parent_eip = parent_eip; >+ point->v = v; >+ ipipe_read_tsc(point->timestamp); >+ >+ __ipipe_store_domain_states(point); >+ >+ /* forward to next point buffer */ >+ next_pos = WRAP_POINT_NO(pos+1); >+ tp->trace_pos = next_pos; >+ >+ /* only mark beginning if we haven't started yet */ >+ begin = tp->begin; >+ if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0)) >+ tp->begin = pos; >+ >+ /* end of critical path, start post-trace if not already started */ >+ if (unlikely(type == IPIPE_TRACE_END) && >+ (begin >= 0) && !tp->post_trace) >+ tp->post_trace = post_trace + 1; >+ >+ /* freeze only if the slot is free and we are not already freezing */ >+ if ((unlikely(type == IPIPE_TRACE_FREEZE) || >+ (unlikely(eip >= trigger_begin && eip <= trigger_end) && >+ type == IPIPE_TRACE_FUNC)) && >+ per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)].begin < 0 && >+ !(tp->flags & IPIPE_TFLG_FREEZING)) { >+ tp->post_trace = post_trace + 1; >+ tp->flags |= IPIPE_TFLG_FREEZING; >+ } >+ >+ /* enforce end of trace in case of overflow */ >+ if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) { >+ tp->end = pos; >+ goto enforce_end; >+ } >+ >+ /* stop tracing this path if we are in post-trace and >+ * a) that phase is over now or >+ * b) a new TRACE_BEGIN came in but we are not freezing this path */ >+ if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) || >+ ((type == IPIPE_TRACE_BEGIN) && >+ !(tp->flags & IPIPE_TFLG_FREEZING))))) { >+ /* store the path's end (i.e. excluding post-trace) */ >+ tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace); >+ >+ enforce_end: >+ if (tp->flags & IPIPE_TFLG_FREEZING) >+ tp = __ipipe_trace_freeze(cpu, tp, pos); >+ else >+ tp = __ipipe_trace_end(cpu, tp, pos); >+ >+ /* reset the active path, maybe already start a new one */ >+ tp->begin = (type == IPIPE_TRACE_BEGIN) ? >+ WRAP_POINT_NO(tp->trace_pos - 1) : -1; >+ tp->end = -1; >+ tp->post_trace = 0; >+ tp->flags = 0; >+ >+ /* update active_path not earlier to avoid races with NMIs */ >+ per_cpu(active_path, cpu) = tp - per_cpu(trace_path, cpu); >+ } >+ >+ /* we still have old_tp and point, >+ * let's reset NMI lock and check for catches */ >+ old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK; >+ if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) { >+ /* well, this late tagging may not immediately be visible for >+ * other cpus already dumping this path - a minor issue */ >+ point->flags |= IPIPE_TFLG_NMI_HIT; >+ >+ /* handle deferred freezing from NMI context */ >+ if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) >+ __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip, >+ old_tp->nmi_saved_parent_eip, >+ old_tp->nmi_saved_v); >+ } >+ >+ local_irq_restore_hw_notrace(flags); >+} >+ >+static unsigned long __ipipe_global_path_lock(void) >+{ >+ unsigned long flags; >+ int cpu; >+ struct ipipe_trace_path *tp; >+ >+ spin_lock_irqsave(&global_path_lock, flags); >+ >+ cpu = ipipe_processor_id(); >+ restart: >+ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; >+ >+ /* here is small race window with NMIs - catched below */ >+ >+ /* clear NMI events and set lock (atomically per cpu) */ >+ tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | >+ IPIPE_TFLG_NMI_FREEZE_REQ)) >+ | IPIPE_TFLG_NMI_LOCK; >+ >+ /* check active_path again - some nasty NMI may have switched >+ * it meanwhile */ >+ if (tp != &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]) { >+ /* release lock on wrong path and restart */ >+ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; >+ >+ /* there is no chance that the NMI got deferred >+ * => no need to check for pending freeze requests */ >+ goto restart; >+ } >+ >+ return flags; >+} >+ >+static void __ipipe_global_path_unlock(unsigned long flags) >+{ >+ int cpu; >+ struct ipipe_trace_path *tp; >+ >+ /* release spinlock first - it's not involved in the NMI issue */ >+ __ipipe_spin_unlock_irqbegin(&global_path_lock); >+ >+ cpu = ipipe_processor_id(); >+ tp = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; >+ >+ tp->flags &= ~IPIPE_TFLG_NMI_LOCK; >+ >+ /* handle deferred freezing from NMI context */ >+ if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) >+ __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip, >+ tp->nmi_saved_parent_eip, tp->nmi_saved_v); >+ >+ /* See __ipipe_spin_lock_irqsave() and friends. */ >+ __ipipe_spin_unlock_irqcomplete(flags); >+} >+ >+void notrace ipipe_trace_begin(unsigned long v) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, v); >+} >+EXPORT_SYMBOL(ipipe_trace_begin); >+ >+void notrace ipipe_trace_end(unsigned long v) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, v); >+} >+EXPORT_SYMBOL(ipipe_trace_end); >+ >+void notrace ipipe_trace_freeze(unsigned long v) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, v); >+} >+EXPORT_SYMBOL(ipipe_trace_freeze); >+ >+void notrace ipipe_trace_special(unsigned char id, unsigned long v) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS), >+ __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, v); >+} >+EXPORT_SYMBOL(ipipe_trace_special); >+ >+void notrace ipipe_trace_pid(pid_t pid, short prio) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS), >+ __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, pid); >+} >+EXPORT_SYMBOL(ipipe_trace_pid); >+ >+void notrace ipipe_trace_event(unsigned char id, unsigned long delay_tsc) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_EVENT | (id << IPIPE_TYPE_BITS), >+ __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, delay_tsc); >+} >+EXPORT_SYMBOL(ipipe_trace_event); >+ >+int ipipe_trace_max_reset(void) >+{ >+ int cpu; >+ unsigned long flags; >+ struct ipipe_trace_path *path; >+ int ret = 0; >+ >+ flags = __ipipe_global_path_lock(); >+ >+ for_each_possible_cpu(cpu) { >+ path = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; >+ >+ if (path->dump_lock) { >+ ret = -EBUSY; >+ break; >+ } >+ >+ path->begin = -1; >+ path->end = -1; >+ path->trace_pos = 0; >+ path->length = 0; >+ } >+ >+ __ipipe_global_path_unlock(flags); >+ >+ return ret; >+} >+EXPORT_SYMBOL(ipipe_trace_max_reset); >+ >+int ipipe_trace_frozen_reset(void) >+{ >+ int cpu; >+ unsigned long flags; >+ struct ipipe_trace_path *path; >+ int ret = 0; >+ >+ flags = __ipipe_global_path_lock(); >+ >+ for_each_online_cpu(cpu) { >+ path = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; >+ >+ if (path->dump_lock) { >+ ret = -EBUSY; >+ break; >+ } >+ >+ path->begin = -1; >+ path->end = -1; >+ path->trace_pos = 0; >+ path->length = 0; >+ } >+ >+ __ipipe_global_path_unlock(flags); >+ >+ return ret; >+} >+EXPORT_SYMBOL(ipipe_trace_frozen_reset); >+ >+static void >+__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point, >+ int trylock) >+{ >+ struct task_struct *task = NULL; >+ char buf[8]; >+ int i; >+ int locked = 1; >+ >+ if (trylock) { >+ if (!read_trylock(&tasklist_lock)) >+ locked = 0; >+ } else >+ read_lock(&tasklist_lock); >+ >+ if (locked) >+ task = find_task_by_pid_ns((pid_t)point->v, &init_pid_ns); >+ >+ if (task) >+ strncpy(task_info, task->comm, 11); >+ else >+ strcpy(task_info, "-<?>-"); >+ >+ if (locked) >+ read_unlock(&tasklist_lock); >+ >+ for (i = strlen(task_info); i < 11; i++) >+ task_info[i] = ' '; >+ >+ sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS); >+ strcpy(task_info + (11 - strlen(buf)), buf); >+} >+ >+static void >+__ipipe_get_event_date(char *buf,struct ipipe_trace_path *path, >+ struct ipipe_trace_point *point) >+{ >+ long time; >+ int type; >+ >+ time = __ipipe_signed_tsc2us(point->timestamp - >+ path->point[path->begin].timestamp + point->v); >+ type = point->type >> IPIPE_TYPE_BITS; >+ >+ if (type == 0) >+ /* >+ * Event type #0 is predefined, stands for the next >+ * timer tick. >+ */ >+ sprintf(buf, "tick@%-6ld", time); >+ else >+ sprintf(buf, "%3d@%-7ld", type, time); >+} >+ >+#ifdef CONFIG_IPIPE_TRACE_PANIC >+void ipipe_trace_panic_freeze(void) >+{ >+ unsigned long flags; >+ int cpu; >+ >+ if (!ipipe_trace_enable) >+ return; >+ >+ ipipe_trace_enable = 0; >+ local_irq_save_hw_notrace(flags); >+ >+ cpu = ipipe_processor_id(); >+ >+ panic_path = &per_cpu(trace_path, cpu)[per_cpu(active_path, cpu)]; >+ >+ local_irq_restore_hw(flags); >+} >+EXPORT_SYMBOL(ipipe_trace_panic_freeze); >+ >+void ipipe_trace_panic_dump(void) >+{ >+ int cnt = back_trace; >+ int start, pos; >+ char buf[16]; >+ >+ if (!panic_path) >+ return; >+ >+ ipipe_context_check_off(); >+ >+ printk("I-pipe tracer log (%d points):\n", cnt); >+ >+ start = pos = WRAP_POINT_NO(panic_path->trace_pos-1); >+ >+ while (cnt-- > 0) { >+ struct ipipe_trace_point *point = &panic_path->point[pos]; >+ long time; >+ char info[16]; >+ int i; >+ >+ printk(" %c", >+ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); >+ >+ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) >+ printk("%c", >+ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? >+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? >+ '#' : '+') : >+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? >+ '*' : ' ')); >+ >+ if (!point->eip) >+ printk("-<invalid>-\n"); >+ else { >+ __ipipe_trace_point_type(buf, point); >+ printk("%s", buf); >+ >+ switch (point->type & IPIPE_TYPE_MASK) { >+ case IPIPE_TRACE_FUNC: >+ printk(" "); >+ break; >+ >+ case IPIPE_TRACE_PID: >+ __ipipe_get_task_info(info, >+ point, 1); >+ printk("%s", info); >+ break; >+ >+ case IPIPE_TRACE_EVENT: >+ __ipipe_get_event_date(info, >+ panic_path, point); >+ printk("%s", info); >+ break; >+ >+ default: >+ printk("0x%08lx ", point->v); >+ } >+ >+ time = __ipipe_signed_tsc2us(point->timestamp - >+ panic_path->point[start].timestamp); >+ printk(" %5ld ", time); >+ >+ __ipipe_print_symname(NULL, point->eip); >+ printk(" ("); >+ __ipipe_print_symname(NULL, point->parent_eip); >+ printk(")\n"); >+ } >+ pos = WRAP_POINT_NO(pos - 1); >+ } >+ >+ panic_path = NULL; >+} >+EXPORT_SYMBOL(ipipe_trace_panic_dump); >+#endif /* CONFIG_IPIPE_TRACE_PANIC */ >+ >+ >+/* --- /proc output --- */ >+ >+static notrace int __ipipe_in_critical_trpath(long point_no) >+{ >+ return ((WRAP_POINT_NO(point_no-print_path->begin) < >+ WRAP_POINT_NO(print_path->end-print_path->begin)) || >+ ((print_path->end == print_path->begin) && >+ (WRAP_POINT_NO(point_no-print_path->end) > >+ print_post_trace))); >+} >+ >+static long __ipipe_signed_tsc2us(long long tsc) >+{ >+ unsigned long long abs_tsc; >+ long us; >+ >+ /* ipipe_tsc2us works on unsigned => handle sign separately */ >+ abs_tsc = (tsc >= 0) ? tsc : -tsc; >+ us = ipipe_tsc2us(abs_tsc); >+ if (tsc < 0) >+ return -us; >+ else >+ return us; >+} >+ >+static void >+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point) >+{ >+ switch (point->type & IPIPE_TYPE_MASK) { >+ case IPIPE_TRACE_FUNC: >+ strcpy(buf, "func "); >+ break; >+ >+ case IPIPE_TRACE_BEGIN: >+ strcpy(buf, "begin "); >+ break; >+ >+ case IPIPE_TRACE_END: >+ strcpy(buf, "end "); >+ break; >+ >+ case IPIPE_TRACE_FREEZE: >+ strcpy(buf, "freeze "); >+ break; >+ >+ case IPIPE_TRACE_SPECIAL: >+ sprintf(buf, "(0x%02x) ", >+ point->type >> IPIPE_TYPE_BITS); >+ break; >+ >+ case IPIPE_TRACE_PID: >+ sprintf(buf, "[%5d] ", (pid_t)point->v); >+ break; >+ >+ case IPIPE_TRACE_EVENT: >+ sprintf(buf, "event "); >+ break; >+ } >+} >+ >+static void >+__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point) >+{ >+ char mark = ' '; >+ int point_no = point - print_path->point; >+ int i; >+ >+ if (print_path->end == point_no) >+ mark = '<'; >+ else if (print_path->begin == point_no) >+ mark = '>'; >+ else if (__ipipe_in_critical_trpath(point_no)) >+ mark = ':'; >+ seq_printf(m, "%c%c", mark, >+ (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); >+ >+ if (!verbose_trace) >+ return; >+ >+ for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) >+ seq_printf(m, "%c", >+ (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? >+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? >+ '#' : '+') : >+ (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' ')); >+} >+ >+static void >+__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point) >+{ >+ unsigned long delay = 0; >+ int next; >+ char *mark = " "; >+ >+ next = WRAP_POINT_NO(point+1 - print_path->point); >+ >+ if (next != print_path->trace_pos) >+ delay = ipipe_tsc2ns(print_path->point[next].timestamp - >+ point->timestamp); >+ >+ if (__ipipe_in_critical_trpath(point - print_path->point)) { >+ if (delay > IPIPE_DELAY_WARN) >+ mark = "! "; >+ else if (delay > IPIPE_DELAY_NOTE) >+ mark = "+ "; >+ } >+ seq_puts(m, mark); >+ >+ if (verbose_trace) >+ seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000, >+ (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' '); >+ else >+ seq_puts(m, " "); >+} >+ >+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip) >+{ >+ char namebuf[KSYM_NAME_LEN+1]; >+ unsigned long size, offset; >+ const char *sym_name; >+ char *modname; >+ >+ sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); >+ >+#ifdef CONFIG_IPIPE_TRACE_PANIC >+ if (!m) { >+ /* panic dump */ >+ if (sym_name) { >+ printk("%s+0x%lx", sym_name, offset); >+ if (modname) >+ printk(" [%s]", modname); >+ } >+ } else >+#endif /* CONFIG_IPIPE_TRACE_PANIC */ >+ { >+ if (sym_name) { >+ if (verbose_trace) { >+ seq_printf(m, "%s+0x%lx", sym_name, offset); >+ if (modname) >+ seq_printf(m, " [%s]", modname); >+ } else >+ seq_puts(m, sym_name); >+ } else >+ seq_printf(m, "<%08lx>", eip); >+ } >+} >+ >+static void __ipipe_print_headline(struct seq_file *m) >+{ >+ seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu " >+ "us\n\n", trace_overhead/1000, trace_overhead%1000); >+ >+ if (verbose_trace) { >+ const char *name[4] = { [0 ... 3] = "<unused>" }; >+ struct list_head *pos; >+ int i = 0; >+ >+ list_for_each_prev(pos, &__ipipe_pipeline) { >+ struct ipipe_domain *ipd = >+ list_entry(pos, struct ipipe_domain, p_link); >+ >+ name[i] = ipd->name; >+ if (++i > 3) >+ break; >+ } >+ >+ seq_printf(m, >+ " +----- Hard IRQs ('|': locked)\n" >+ " |+---- %s\n" >+ " ||+--- %s\n" >+ " |||+-- %s\n" >+ " ||||+- %s%s\n" >+ " ||||| +---------- " >+ "Delay flag ('+': > %d us, '!': > %d us)\n" >+ " ||||| | +- " >+ "NMI noise ('N')\n" >+ " ||||| | |\n" >+ " Type User Val. Time Delay Function " >+ "(Parent)\n", >+ name[3], name[2], name[1], name[0], >+ name[0] ? " ('*': domain stalled, '+': current, " >+ "'#': current+stalled)" : "", >+ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); >+ } else >+ seq_printf(m, >+ " +--------------- Hard IRQs ('|': locked)\n" >+ " | +- Delay flag " >+ "('+': > %d us, '!': > %d us)\n" >+ " | |\n" >+ " Type Time Function (Parent)\n", >+ IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); >+} >+ >+static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos) >+{ >+ loff_t n = *pos; >+ >+ mutex_lock(&out_mutex); >+ >+ if (!n) { >+ struct ipipe_trace_path *tp; >+ unsigned long length_usecs; >+ int points, cpu; >+ unsigned long flags; >+ >+ /* protect against max_path/frozen_path updates while we >+ * haven't locked our target path, also avoid recursively >+ * taking global_path_lock from NMI context */ >+ flags = __ipipe_global_path_lock(); >+ >+ /* find the longest of all per-cpu paths */ >+ print_path = NULL; >+ for_each_online_cpu(cpu) { >+ tp = &per_cpu(trace_path, cpu)[per_cpu(max_path, cpu)]; >+ if ((print_path == NULL) || >+ (tp->length > print_path->length)) { >+ print_path = tp; >+ break; >+ } >+ } >+ print_path->dump_lock = 1; >+ >+ __ipipe_global_path_unlock(flags); >+ >+ /* does this path actually contain data? */ >+ if (print_path->end == print_path->begin) >+ return NULL; >+ >+ /* number of points inside the critical path */ >+ points = WRAP_POINT_NO(print_path->end-print_path->begin+1); >+ >+ /* pre- and post-tracing length, post-trace length was frozen >+ in __ipipe_trace, pre-trace may have to be reduced due to >+ buffer overrun */ >+ print_pre_trace = pre_trace; >+ print_post_trace = WRAP_POINT_NO(print_path->trace_pos - >+ print_path->end - 1); >+ if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) >+ print_pre_trace = IPIPE_TRACE_POINTS - 1 - points - >+ print_post_trace; >+ >+ length_usecs = ipipe_tsc2us(print_path->length); >+ seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n" >+ "------------------------------------------------------------\n", >+ UTS_RELEASE, IPIPE_ARCH_STRING); >+ seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: " >+ "%d (-%d/+%d), Length: %lu us\n", >+ cpu, print_path->point[print_path->begin].timestamp, >+ points, print_pre_trace, print_post_trace, length_usecs); >+ __ipipe_print_headline(m); >+ } >+ >+ /* check if we are inside the trace range */ >+ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + >+ print_pre_trace + print_post_trace)) >+ return NULL; >+ >+ /* return the next point to be shown */ >+ return &print_path->point[WRAP_POINT_NO(print_path->begin - >+ print_pre_trace + n)]; >+} >+ >+static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos) >+{ >+ loff_t n = ++*pos; >+ >+ /* check if we are inside the trace range with the next entry */ >+ if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + >+ print_pre_trace + print_post_trace)) >+ return NULL; >+ >+ /* return the next point to be shown */ >+ return &print_path->point[WRAP_POINT_NO(print_path->begin - >+ print_pre_trace + *pos)]; >+} >+ >+static void __ipipe_prtrace_stop(struct seq_file *m, void *p) >+{ >+ if (print_path) >+ print_path->dump_lock = 0; >+ mutex_unlock(&out_mutex); >+} >+ >+static int __ipipe_prtrace_show(struct seq_file *m, void *p) >+{ >+ long time; >+ struct ipipe_trace_point *point = p; >+ char buf[16]; >+ >+ if (!point->eip) { >+ seq_puts(m, "-<invalid>-\n"); >+ return 0; >+ } >+ >+ __ipipe_print_pathmark(m, point); >+ __ipipe_trace_point_type(buf, point); >+ seq_puts(m, buf); >+ if (verbose_trace) >+ switch (point->type & IPIPE_TYPE_MASK) { >+ case IPIPE_TRACE_FUNC: >+ seq_puts(m, " "); >+ break; >+ >+ case IPIPE_TRACE_PID: >+ __ipipe_get_task_info(buf, point, 0); >+ seq_puts(m, buf); >+ break; >+ >+ case IPIPE_TRACE_EVENT: >+ __ipipe_get_event_date(buf, print_path, point); >+ seq_puts(m, buf); >+ break; >+ >+ default: >+ seq_printf(m, "0x%08lx ", point->v); >+ } >+ >+ time = __ipipe_signed_tsc2us(point->timestamp - >+ print_path->point[print_path->begin].timestamp); >+ seq_printf(m, "%5ld", time); >+ >+ __ipipe_print_delay(m, point); >+ __ipipe_print_symname(m, point->eip); >+ seq_puts(m, " ("); >+ __ipipe_print_symname(m, point->parent_eip); >+ seq_puts(m, ")\n"); >+ >+ return 0; >+} >+ >+static struct seq_operations __ipipe_max_ptrace_ops = { >+ .start = __ipipe_max_prtrace_start, >+ .next = __ipipe_prtrace_next, >+ .stop = __ipipe_prtrace_stop, >+ .show = __ipipe_prtrace_show >+}; >+ >+static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file) >+{ >+ return seq_open(file, &__ipipe_max_ptrace_ops); >+} >+ >+static ssize_t >+__ipipe_max_reset(struct file *file, const char __user *pbuffer, >+ size_t count, loff_t *data) >+{ >+ mutex_lock(&out_mutex); >+ ipipe_trace_max_reset(); >+ mutex_unlock(&out_mutex); >+ >+ return count; >+} >+ >+struct file_operations __ipipe_max_prtrace_fops = { >+ .open = __ipipe_max_prtrace_open, >+ .read = seq_read, >+ .write = __ipipe_max_reset, >+ .llseek = seq_lseek, >+ .release = seq_release, >+}; >+ >+static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos) >+{ >+ loff_t n = *pos; >+ >+ mutex_lock(&out_mutex); >+ >+ if (!n) { >+ struct ipipe_trace_path *tp; >+ int cpu; >+ unsigned long flags; >+ >+ /* protect against max_path/frozen_path updates while we >+ * haven't locked our target path, also avoid recursively >+ * taking global_path_lock from NMI context */ >+ flags = __ipipe_global_path_lock(); >+ >+ /* find the first of all per-cpu frozen paths */ >+ print_path = NULL; >+ for_each_online_cpu(cpu) { >+ tp = &per_cpu(trace_path, cpu)[per_cpu(frozen_path, cpu)]; >+ if (tp->end >= 0) { >+ print_path = tp; >+ break; >+ } >+ } >+ if (print_path) >+ print_path->dump_lock = 1; >+ >+ __ipipe_global_path_unlock(flags); >+ >+ if (!print_path) >+ return NULL; >+ >+ /* back- and post-tracing length, post-trace length was frozen >+ in __ipipe_trace, back-trace may have to be reduced due to >+ buffer overrun */ >+ print_pre_trace = back_trace-1; /* substract freeze point */ >+ print_post_trace = WRAP_POINT_NO(print_path->trace_pos - >+ print_path->end - 1); >+ if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) >+ print_pre_trace = IPIPE_TRACE_POINTS - 2 - >+ print_post_trace; >+ >+ seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n" >+ "------------------------------------------------------" >+ "------\n", >+ UTS_RELEASE, IPIPE_ARCH_STRING); >+ seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n", >+ cpu, print_path->point[print_path->begin].timestamp, >+ print_pre_trace+1, print_post_trace); >+ __ipipe_print_headline(m); >+ } >+ >+ /* check if we are inside the trace range */ >+ if (n >= print_pre_trace + 1 + print_post_trace) >+ return NULL; >+ >+ /* return the next point to be shown */ >+ return &print_path->point[WRAP_POINT_NO(print_path->begin- >+ print_pre_trace+n)]; >+} >+ >+static struct seq_operations __ipipe_frozen_ptrace_ops = { >+ .start = __ipipe_frozen_prtrace_start, >+ .next = __ipipe_prtrace_next, >+ .stop = __ipipe_prtrace_stop, >+ .show = __ipipe_prtrace_show >+}; >+ >+static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file) >+{ >+ return seq_open(file, &__ipipe_frozen_ptrace_ops); >+} >+ >+static ssize_t >+__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer, >+ size_t count, loff_t *data) >+{ >+ char *end, buf[16]; >+ int val; >+ int n; >+ >+ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; >+ >+ if (copy_from_user(buf, pbuffer, n)) >+ return -EFAULT; >+ >+ buf[n] = '\0'; >+ val = simple_strtol(buf, &end, 0); >+ >+ if (((*end != '\0') && !isspace(*end)) || (val < 0)) >+ return -EINVAL; >+ >+ mutex_lock(&out_mutex); >+ ipipe_trace_frozen_reset(); >+ if (val > 0) >+ ipipe_trace_freeze(-1); >+ mutex_unlock(&out_mutex); >+ >+ return count; >+} >+ >+struct file_operations __ipipe_frozen_prtrace_fops = { >+ .open = __ipipe_frozen_prtrace_open, >+ .read = seq_read, >+ .write = __ipipe_frozen_ctrl, >+ .llseek = seq_lseek, >+ .release = seq_release, >+}; >+ >+static int __ipipe_rd_proc_val(char *page, char **start, off_t off, >+ int count, int *eof, void *data) >+{ >+ int len; >+ >+ len = sprintf(page, "%u\n", *(int *)data); >+ len -= off; >+ if (len <= off + count) >+ *eof = 1; >+ *start = page + off; >+ if (len > count) >+ len = count; >+ if (len < 0) >+ len = 0; >+ >+ return len; >+} >+ >+static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer, >+ unsigned long count, void *data) >+{ >+ char *end, buf[16]; >+ int val; >+ int n; >+ >+ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; >+ >+ if (copy_from_user(buf, buffer, n)) >+ return -EFAULT; >+ >+ buf[n] = '\0'; >+ val = simple_strtol(buf, &end, 0); >+ >+ if (((*end != '\0') && !isspace(*end)) || (val < 0)) >+ return -EINVAL; >+ >+ mutex_lock(&out_mutex); >+ *(int *)data = val; >+ mutex_unlock(&out_mutex); >+ >+ return count; >+} >+ >+static int __ipipe_rd_trigger(char *page, char **start, off_t off, int count, >+ int *eof, void *data) >+{ >+ int len; >+ >+ if (!trigger_begin) >+ return 0; >+ >+ len = sprint_symbol(page, trigger_begin); >+ page[len++] = '\n'; >+ >+ len -= off; >+ if (len <= off + count) >+ *eof = 1; >+ *start = page + off; >+ if (len > count) >+ len = count; >+ if (len < 0) >+ len = 0; >+ >+ return len; >+} >+ >+static int __ipipe_wr_trigger(struct file *file, const char __user *buffer, >+ unsigned long count, void *data) >+{ >+ char buf[KSYM_SYMBOL_LEN]; >+ unsigned long begin, end; >+ >+ if (count > sizeof(buf) - 1) >+ count = sizeof(buf) - 1; >+ if (copy_from_user(buf, buffer, count)) >+ return -EFAULT; >+ buf[count] = 0; >+ if (buf[count-1] == '\n') >+ buf[count-1] = 0; >+ >+ begin = kallsyms_lookup_name(buf); >+ if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL)) >+ return -ENOENT; >+ end += begin - 1; >+ >+ mutex_lock(&out_mutex); >+ /* invalidate the current range before setting a new one */ >+ trigger_end = 0; >+ wmb(); >+ ipipe_trace_frozen_reset(); >+ >+ /* set new range */ >+ trigger_begin = begin; >+ wmb(); >+ trigger_end = end; >+ mutex_unlock(&out_mutex); >+ >+ return count; >+} >+ >+#ifdef CONFIG_IPIPE_TRACE_MCOUNT >+static void notrace >+ipipe_trace_function(unsigned long ip, unsigned long parent_ip) >+{ >+ if (!ipipe_trace_enable) >+ return; >+ __ipipe_trace(IPIPE_TRACE_FUNC, ip, parent_ip, 0); >+} >+ >+static struct ftrace_ops ipipe_trace_ops = { >+ .func = ipipe_trace_function >+}; >+ >+static int __ipipe_wr_enable(struct file *file, const char __user *buffer, >+ unsigned long count, void *data) >+{ >+ char *end, buf[16]; >+ int val; >+ int n; >+ >+ n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; >+ >+ if (copy_from_user(buf, buffer, n)) >+ return -EFAULT; >+ >+ buf[n] = '\0'; >+ val = simple_strtol(buf, &end, 0); >+ >+ if (((*end != '\0') && !isspace(*end)) || (val < 0)) >+ return -EINVAL; >+ >+ mutex_lock(&out_mutex); >+ >+ if (ipipe_trace_enable) { >+ if (!val) >+ unregister_ftrace_function(&ipipe_trace_ops); >+ } else if (val) >+ register_ftrace_function(&ipipe_trace_ops); >+ >+ ipipe_trace_enable = val; >+ >+ mutex_unlock(&out_mutex); >+ >+ return count; >+} >+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ >+ >+extern struct proc_dir_entry *ipipe_proc_root; >+ >+static struct proc_dir_entry * __init >+__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir, >+ const char *name, int *value_ptr) >+{ >+ struct proc_dir_entry *entry; >+ >+ entry = create_proc_entry(name, 0644, trace_dir); >+ if (entry) { >+ entry->data = value_ptr; >+ entry->read_proc = __ipipe_rd_proc_val; >+ entry->write_proc = __ipipe_wr_proc_val; >+ } >+ return entry; >+} >+ >+void __init __ipipe_init_tracer(void) >+{ >+ struct proc_dir_entry *trace_dir; >+ struct proc_dir_entry *entry; >+ unsigned long long start, end, min = ULLONG_MAX; >+ int i; >+#ifdef CONFIG_IPIPE_TRACE_VMALLOC >+ int cpu, path; >+ >+ for_each_possible_cpu(cpu) { >+ struct ipipe_trace_path *tp_buf; >+ >+ tp_buf = vmalloc_node(sizeof(struct ipipe_trace_path) * >+ IPIPE_TRACE_PATHS, cpu_to_node(cpu)); >+ if (!tp_buf) { >+ printk(KERN_ERR "I-pipe: " >+ "insufficient memory for trace buffer.\n"); >+ return; >+ } >+ memset(tp_buf, 0, >+ sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); >+ for (path = 0; path < IPIPE_TRACE_PATHS; path++) { >+ tp_buf[path].begin = -1; >+ tp_buf[path].end = -1; >+ } >+ per_cpu(trace_path, cpu) = tp_buf; >+ } >+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ >+ >+ /* Calculate minimum overhead of __ipipe_trace() */ >+ local_irq_disable_hw(); >+ for (i = 0; i < 100; i++) { >+ ipipe_read_tsc(start); >+ __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0, >+ __BUILTIN_RETURN_ADDRESS1, 0); >+ ipipe_read_tsc(end); >+ >+ end -= start; >+ if (end < min) >+ min = end; >+ } >+ local_irq_enable_hw(); >+ trace_overhead = ipipe_tsc2ns(min); >+ >+#ifdef CONFIG_IPIPE_TRACE_ENABLE >+ ipipe_trace_enable = 1; >+#ifdef CONFIG_IPIPE_TRACE_MCOUNT >+ ftrace_enabled = 1; >+ register_ftrace_function(&ipipe_trace_ops); >+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ >+#endif /* CONFIG_IPIPE_TRACE_ENABLE */ >+ >+ trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root); >+ >+ entry = create_proc_entry("max", 0644, trace_dir); >+ if (entry) >+ entry->proc_fops = &__ipipe_max_prtrace_fops; >+ >+ entry = create_proc_entry("frozen", 0644, trace_dir); >+ if (entry) >+ entry->proc_fops = &__ipipe_frozen_prtrace_fops; >+ >+ entry = create_proc_entry("trigger", 0644, trace_dir); >+ if (entry) { >+ entry->read_proc = __ipipe_rd_trigger; >+ entry->write_proc = __ipipe_wr_trigger; >+ } >+ >+ __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points", >+ &pre_trace); >+ __ipipe_create_trace_proc_val(trace_dir, "post_trace_points", >+ &post_trace); >+ __ipipe_create_trace_proc_val(trace_dir, "back_trace_points", >+ &back_trace); >+ __ipipe_create_trace_proc_val(trace_dir, "verbose", >+ &verbose_trace); >+ entry = __ipipe_create_trace_proc_val(trace_dir, "enable", >+ &ipipe_trace_enable); >+#ifdef CONFIG_IPIPE_TRACE_MCOUNT >+ if (entry) >+ entry->write_proc = __ipipe_wr_enable; >+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ >+} >diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c >index baa5c4a..967f86c 100644 >--- a/kernel/irq/chip.c >+++ b/kernel/irq/chip.c >@@ -15,6 +15,7 @@ > #include <linux/module.h> > #include <linux/interrupt.h> > #include <linux/kernel_stat.h> >+#include <linux/ipipe.h> > > #include "internals.h" > >@@ -505,7 +506,9 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) > irqreturn_t action_ret; > > raw_spin_lock(&desc->lock); >+#ifndef CONFIG_IPIPE > mask_ack_irq(desc); >+#endif > > if (unlikely(desc->status & IRQ_INPROGRESS)) > goto out_unlock; >@@ -582,8 +585,13 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) > > raw_spin_lock(&desc->lock); > desc->status &= ~IRQ_INPROGRESS; >+#ifdef CONFIG_IPIPE >+ desc->irq_data.chip->irq_unmask(&desc->irq_data); >+out: >+#else > out: > desc->irq_data.chip->irq_eoi(&desc->irq_data); >+#endif > > raw_spin_unlock(&desc->lock); > } >@@ -625,7 +633,9 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) > kstat_incr_irqs_this_cpu(irq, desc); > > /* Start handling the irq */ >+#ifndef CONFIG_IPIPE > desc->irq_data.chip->irq_ack(&desc->irq_data); >+#endif > > /* Mark the IRQ currently in progress.*/ > desc->status |= IRQ_INPROGRESS; >@@ -678,8 +688,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) > > kstat_incr_irqs_this_cpu(irq, desc); > >+#ifndef CONFIG_IPIPE > if (desc->irq_data.chip->irq_ack) > desc->irq_data.chip->irq_ack(&desc->irq_data); >+#endif /* CONFIG_IPIPE */ > > action_ret = handle_IRQ_event(irq, desc->action); > if (!noirqdebug) >@@ -689,6 +701,135 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) > desc->irq_data.chip->irq_eoi(&desc->irq_data); > } > >+#ifdef CONFIG_IPIPE >+ >+void __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+void __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+void __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc) >+{ >+ mask_ack_irq(desc); >+} >+ >+void __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc) >+{ >+ if (desc->irq_data.chip->irq_unmask) >+ desc->irq_data.chip->irq_unmask(&desc->irq_data); >+} >+ >+void __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *desc) >+{ >+ if (desc->irq_data.chip->irq_eoi) >+ desc->irq_data.chip->irq_eoi(&desc->irq_data); >+} >+ >+void __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *desc) >+{ >+ /* >+ * Non-requestable IRQs should not be masked in EOI handler. >+ */ >+ if (!(desc->status & IRQ_NOREQUEST)) >+ desc->irq_data.chip->irq_unmask(&desc->irq_data); >+} >+ >+void __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc) >+{ >+ desc->irq_data.chip->irq_ack(&desc->irq_data); >+} >+ >+void __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc) >+{ >+ if (desc->irq_data.chip->irq_ack) >+ desc->irq_data.chip->irq_ack(&desc->irq_data); >+} >+ >+void __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc) >+{ >+ if (desc->irq_data.chip->irq_eoi) >+ desc->irq_data.chip->irq_eoi(&desc->irq_data); >+} >+ >+void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+void __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc) >+{ >+ static int done; >+ >+ handle_bad_irq(irq, desc); >+ >+ if (!done) { >+ printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n", >+ __FUNCTION__, irq); >+ done = 1; >+ } >+} >+ >+void __ipipe_noack_irq(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+void __ipipe_noend_irq(unsigned irq, struct irq_desc *desc) >+{ >+} >+ >+irq_flow_handler_t >+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) >+{ >+ if (unlikely(handle == NULL)) { >+ desc->ipipe_ack = &__ipipe_ack_bad_irq; >+ desc->ipipe_end = &__ipipe_noend_irq; >+ } else { >+ if (is_chained) { >+ desc->ipipe_ack = handle; >+ desc->ipipe_end = &__ipipe_noend_irq; >+ handle = __ipipe_noack_irq; >+ } else if (handle == &handle_simple_irq) { >+ desc->ipipe_ack = &__ipipe_ack_simple_irq; >+ desc->ipipe_end = &__ipipe_end_simple_irq; >+ } else if (handle == &handle_level_irq) { >+ desc->ipipe_ack = &__ipipe_ack_level_irq; >+ desc->ipipe_end = &__ipipe_end_level_irq; >+ } else if (handle == &handle_edge_irq) { >+ desc->ipipe_ack = &__ipipe_ack_edge_irq; >+ desc->ipipe_end = &__ipipe_end_edge_irq; >+ } else if (handle == &handle_fasteoi_irq) { >+ desc->ipipe_ack = &__ipipe_ack_fasteoi_irq; >+ desc->ipipe_end = &__ipipe_end_fasteoi_irq; >+ } else if (handle == &handle_percpu_irq) { >+ desc->ipipe_ack = &__ipipe_ack_percpu_irq; >+ desc->ipipe_end = &__ipipe_end_percpu_irq; >+ } else if (desc->chip == &no_irq_chip) { >+ desc->ipipe_ack = &__ipipe_noack_irq; >+ desc->ipipe_end = &__ipipe_noend_irq; >+ } else { >+ desc->ipipe_ack = &__ipipe_ack_bad_irq; >+ desc->ipipe_end = &__ipipe_noend_irq; >+ } >+ } >+ >+ /* Suppress intermediate trampoline routine. */ >+ ipipe_root_domain->irqs[desc->irq].acknowledge = desc->ipipe_ack; >+ >+ return handle; >+} >+ >+#else /* !CONFIG_IPIPE */ >+ >+irq_flow_handler_t >+__fixup_irq_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained) >+{ >+ return handle; >+} >+ >+#endif /* !CONFIG_IPIPE */ >+ > void > __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, > const char *name) >@@ -720,6 +861,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, > chip_bus_lock(desc); > raw_spin_lock_irqsave(&desc->lock, flags); > >+ handle = __fixup_irq_handler(desc, handle, is_chained); >+ > /* Uninstall? */ > if (handle == handle_bad_irq) { > if (desc->irq_data.chip != &no_irq_chip) >diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c >index e2347eb..887c0b8 100644 >--- a/kernel/irq/handle.c >+++ b/kernel/irq/handle.c >@@ -150,8 +150,10 @@ unsigned int __do_IRQ(unsigned int irq) > /* > * No locking required for CPU-local interrupts: > */ >+#ifndef CONFIG_IPIPE > if (desc->irq_data.chip->ack) > desc->irq_data.chip->ack(irq); >+#endif > if (likely(!(desc->status & IRQ_DISABLED))) { > action_ret = handle_IRQ_event(irq, desc->action); > if (!noirqdebug) >@@ -162,8 +164,10 @@ unsigned int __do_IRQ(unsigned int irq) > } > > raw_spin_lock(&desc->lock); >+#ifndef CONFIG_IPIPE > if (desc->irq_data.chip->ack) > desc->irq_data.chip->ack(irq); >+#endif > /* > * REPLAY is when Linux resends an IRQ that was dropped earlier > * WAITING is used by probe to mark irqs that are being tested >diff --git a/kernel/lockdep.c b/kernel/lockdep.c >index 42ba65d..2aeead2 100644 >--- a/kernel/lockdep.c >+++ b/kernel/lockdep.c >@@ -2334,7 +2334,7 @@ void trace_hardirqs_on_caller(unsigned long ip) > /* we'll do an OFF -> ON transition: */ > curr->hardirqs_enabled = 1; > >- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) >+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) > return; > if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) > return; >@@ -2377,7 +2377,7 @@ void trace_hardirqs_off_caller(unsigned long ip) > if (unlikely(!debug_locks || current->lockdep_recursion)) > return; > >- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) >+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) > return; > > if (curr->hardirqs_enabled) { >@@ -2409,7 +2409,7 @@ void trace_softirqs_on(unsigned long ip) > if (unlikely(!debug_locks)) > return; > >- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) >+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) > return; > > if (curr->softirqs_enabled) { >@@ -2443,7 +2443,7 @@ void trace_softirqs_off(unsigned long ip) > if (unlikely(!debug_locks)) > return; > >- if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) >+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled() && !irqs_disabled_hw())) > return; > > if (curr->softirqs_enabled) { >diff --git a/kernel/panic.c b/kernel/panic.c >index 4c13b1a..93a1543 100644 >--- a/kernel/panic.c >+++ b/kernel/panic.c >@@ -23,6 +23,7 @@ > #include <linux/init.h> > #include <linux/nmi.h> > #include <linux/dmi.h> >+#include <linux/ipipe_trace.h> > > #define PANIC_TIMER_STEP 100 > #define PANIC_BLINK_SPD 18 >@@ -318,6 +319,8 @@ void oops_enter(void) > { > tracing_off(); > /* can't trust the integrity of the kernel anymore: */ >+ ipipe_trace_panic_freeze(); >+ ipipe_disable_context_check(ipipe_processor_id()); > debug_locks_off(); > do_oops_enter_exit(); > } >diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c >index 048d0b5..7c4cdac 100644 >--- a/kernel/power/hibernate.c >+++ b/kernel/power/hibernate.c >@@ -270,6 +270,7 @@ static int create_image(int platform_mode) > goto Enable_cpus; > > local_irq_disable(); >+ local_irq_disable_hw_cond(); > > error = sysdev_suspend(PMSG_FREEZE); > if (error) { >@@ -301,6 +302,7 @@ static int create_image(int platform_mode) > */ > > Enable_irqs: >+ local_irq_enable_hw_cond(); > local_irq_enable(); > > Enable_cpus: >@@ -401,6 +403,7 @@ static int resume_target_kernel(bool platform_mode) > goto Enable_cpus; > > local_irq_disable(); >+ local_irq_disable_hw_cond(); > > error = sysdev_suspend(PMSG_QUIESCE); > if (error) >@@ -432,6 +435,7 @@ static int resume_target_kernel(bool platform_mode) > sysdev_resume(); > > Enable_irqs: >+ local_irq_enable_hw_cond(); > local_irq_enable(); > > Enable_cpus: >@@ -515,6 +519,7 @@ int hibernation_platform_enter(void) > goto Platform_finish; > > local_irq_disable(); >+ local_irq_disable_hw_cond(); > sysdev_suspend(PMSG_HIBERNATE); > if (!pm_check_wakeup_events()) { > error = -EAGAIN; >diff --git a/kernel/printk.c b/kernel/printk.c >index a23315d..289db66 100644 >--- a/kernel/printk.c >+++ b/kernel/printk.c >@@ -597,6 +597,41 @@ static int have_callable_console(void) > return 0; > } > >+#ifdef CONFIG_IPIPE >+ >+static IPIPE_DEFINE_SPINLOCK(__ipipe_printk_lock); >+ >+static int __ipipe_printk_fill; >+ >+static char __ipipe_printk_buf[__LOG_BUF_LEN]; >+ >+void __ipipe_flush_printk (unsigned virq, void *cookie) >+{ >+ char *p = __ipipe_printk_buf; >+ int len, lmax, out = 0; >+ unsigned long flags; >+ >+ goto start; >+ >+ do { >+ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); >+ start: >+ lmax = __ipipe_printk_fill; >+ while (out < lmax) { >+ len = strlen(p) + 1; >+ printk("%s",p); >+ p += len; >+ out += len; >+ } >+ spin_lock_irqsave(&__ipipe_printk_lock, flags); >+ } >+ while (__ipipe_printk_fill != lmax); >+ >+ __ipipe_printk_fill = 0; >+ >+ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); >+} >+ > /** > * printk - print a kernel message > * @fmt: format string >@@ -621,6 +656,65 @@ static int have_callable_console(void) > > asmlinkage int printk(const char *fmt, ...) > { >+ int r, fbytes, oldcount; >+ unsigned long flags; >+ int sprintk = 1; >+ int cs = -1; >+ va_list args; >+ >+ va_start(args, fmt); >+ >+ local_irq_save_hw(flags); >+ >+ if (test_bit(IPIPE_SPRINTK_FLAG, &__ipipe_current_domain->flags) || >+ oops_in_progress) >+ cs = ipipe_disable_context_check(ipipe_processor_id()); >+ else if (__ipipe_current_domain == ipipe_root_domain) { >+ struct ipipe_domain *dom; >+ >+ list_for_each_entry(dom, &__ipipe_pipeline, p_link) { >+ if (dom == ipipe_root_domain) >+ break; >+ if (test_bit(IPIPE_STALL_FLAG, >+ &ipipe_cpudom_var(dom, status))) >+ sprintk = 0; >+ } >+ } else >+ sprintk = 0; >+ >+ local_irq_restore_hw(flags); >+ >+ if (sprintk) { >+ r = vprintk(fmt, args); >+ if (cs != -1) >+ ipipe_restore_context_check(ipipe_processor_id(), cs); >+ goto out; >+ } >+ >+ spin_lock_irqsave(&__ipipe_printk_lock, flags); >+ >+ oldcount = __ipipe_printk_fill; >+ fbytes = __LOG_BUF_LEN - oldcount; >+ >+ if (fbytes > 1) { >+ r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill, >+ fbytes, fmt, args) + 1; /* account for the null byte */ >+ __ipipe_printk_fill += r; >+ } else >+ r = 0; >+ >+ spin_unlock_irqrestore(&__ipipe_printk_lock, flags); >+ >+ if (oldcount == 0) >+ ipipe_trigger_irq(__ipipe_printk_virq); >+out: >+ va_end(args); >+ >+ return r; >+} >+#else /* !CONFIG_IPIPE */ >+asmlinkage int printk(const char *fmt, ...) >+{ > va_list args; > int r; > >@@ -638,6 +732,7 @@ asmlinkage int printk(const char *fmt, ...) > > return r; > } >+#endif /* CONFIG_IPIPE */ > > /* cpu currently holding logbuf_lock */ > static volatile unsigned int printk_cpu = UINT_MAX; >diff --git a/kernel/sched.c b/kernel/sched.c >index 297d1a0..abdee01 100644 >--- a/kernel/sched.c >+++ b/kernel/sched.c >@@ -2558,7 +2558,8 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, > > smp_wmb(); > rq = task_rq_lock(p, &flags); >- if (!(p->state & state)) >+ if (!(p->state & state) || >+ (p->state & (TASK_NOWAKEUP|TASK_ATOMICSWITCH))) > goto out; > > if (p->se.on_rq) >@@ -3031,22 +3032,29 @@ asmlinkage void schedule_tail(struct tas > #endif > if (current->set_child_tid) > put_user(task_pid_vnr(current), current->set_child_tid); >+ >+ ipipe_init_notify(current); > } > > /* > * context_switch - switch to the new MM and the new > * thread's register state. > */ >-static inline void >+int > context_switch(struct rq *rq, struct task_struct *prev, > struct task_struct *next) > { > struct mm_struct *mm, *oldmm; > >- prepare_task_switch(rq, prev, next); >- trace_sched_switch(prev, next); > mm = next->mm; > oldmm = prev->active_mm; >+ >+if (!rq) { >+ switch_mm(oldmm, next->active_mm, next); >+ if (!mm) enter_lazy_tlb(oldmm, next); >+} else { >+ prepare_task_switch(rq, prev, next); >+ trace_sched_switch(prev, next); > /* > * For paravirt, this is coupled with an exit in switch_to to > * combine the page table reload and the switch backend into >@@ -3074,11 +3082,24 @@ context_switch(struct rq *rq, struct tas > #ifndef __ARCH_WANT_UNLOCKED_CTXSW > spin_release(&rq->lock.dep_map, 1, _THIS_IP_); > #endif >- >+} >+#ifdef CONFIG_IPIPE >+ next->ptd[IPIPE_ROOT_NPTDKEYS - 1] = prev; >+#endif /* CONFIG_IPIPE */ > /* Here we just switch the register state and the stack. */ > switch_to(prev, next, prev); > > barrier(); >+ >+if (unlikely(rq)) { >+#if 1 // def CONFIG_IPIPE_DELAYED_ATOMICSW >+ current->state &= ~TASK_ATOMICSWITCH; >+#else >+ prev->state &= ~TASK_ATOMICSWITCH; >+#endif >+ if (task_hijacked(prev)) >+ return 1; __ipipe_dispatch_event(IPIPE_FIRST_EVENT - 2, 0); >+ > /* > * this_rq must be evaluated again because prev may have moved > * CPUs since it called schedule(), thus the 'rq' on its stack >@@ -3086,6 +3107,10 @@ context_switch(struct rq *rq, struct tas > */ > finish_task_switch(this_rq(), prev); > } >+ return 0; >+} >+ >+EXPORT_SYMBOL(context_switch); > > /* > * nr_running, nr_uninterruptible and nr_context_switches: >@@ -3937,6 +3962,7 @@ notrace unsigned long get_parent_ip(unsi > > void __kprobes add_preempt_count(int val) > { >+ ipipe_check_context(ipipe_root_domain); > #ifdef CONFIG_DEBUG_PREEMPT > /* > * Underflow? >@@ -3959,6 +3985,7 @@ EXPORT_SYMBOL(add_preempt_count); > > void __kprobes sub_preempt_count(int val) > { >+ ipipe_check_context(ipipe_root_domain); > #ifdef CONFIG_DEBUG_PREEMPT > /* > * Underflow? >@@ -4007,6 +4034,7 @@ static noinline void __schedule_bug(stru > */ > static inline void schedule_debug(struct task_struct *prev) > { >+ ipipe_check_context(ipipe_root_domain); > /* > * Test if we are atomic. Since do_exit() needs to call into > * schedule() atomically, we ignore that path for now. >@@ -4064,7 +4092,7 @@ pick_next_task(struct rq *rq) > /* > * schedule() is the main scheduler function. > */ >-asmlinkage void __sched schedule(void) >+asmlinkage int __sched schedule(void) > { > struct task_struct *prev, *next; > unsigned long *switch_count; >@@ -4078,6 +4106,10 @@ need_resched: > rcu_note_context_switch(cpu); > prev = rq->curr; > >+ if (unlikely(prev->state & TASK_ATOMICSWITCH)) >+ /* Pop one disable level -- one still remains. */ >+ preempt_enable(); >+ > release_kernel_lock(prev); > need_resched_nonpreemptible: > >@@ -4129,7 +4161,8 @@ need_resched_nonpreemptible: > rq->curr = next; > ++*switch_count; > >- context_switch(rq, prev, next); /* unlocks the rq */ >+ if (context_switch(rq, prev, next)) /* unlocks the rq */ >+ return 1; /* task hijacked by higher domain */ > /* > * The context switch have flipped the stack from under us > * and restored the local variables which were saved when >@@ -4138,8 +4171,10 @@ need_resched_nonpreemptible: > */ > cpu = smp_processor_id(); > rq = cpu_rq(cpu); >- } else >+ } else { >+ prev->state &= ~TASK_ATOMICSWITCH; > raw_spin_unlock_irq(&rq->lock); >+ } > > post_schedule(rq); > >@@ -4149,6 +4184,8 @@ need_resched_nonpreemptible: > preempt_enable_no_resched(); > if (need_resched()) > goto need_resched; >+ >+ return 0; > } > EXPORT_SYMBOL(schedule); > >@@ -4240,7 +4277,8 @@ asmlinkage void __sched notrace preempt_ > > do { > add_preempt_count_notrace(PREEMPT_ACTIVE); >- schedule(); >+ if (schedule()) >+ return; > sub_preempt_count_notrace(PREEMPT_ACTIVE); > > /* >@@ -5031,6 +5069,7 @@ recheck: > oldprio = p->prio; > prev_class = p->sched_class; > __setscheduler(rq, p, policy, param->sched_priority); >+ ipipe_setsched_notify(p); > > if (running) > p->sched_class->set_curr_task(rq); >@@ -5699,6 +5738,7 @@ void __cpuinit init_idle(struct task_str > #else > task_thread_info(idle)->preempt_count = 0; > #endif >+ ipipe_check_context(ipipe_root_domain); > /* > * The idle tasks have their own, simple scheduling class: > */ >@@ -9603,3 +9643,64 @@ void synchronize_sched_expedited(void) > EXPORT_SYMBOL_GPL(synchronize_sched_expedited); > > #endif /* #else #ifndef CONFIG_SMP */ >+ >+#ifdef CONFIG_IPIPE >+ >+int ipipe_setscheduler_root(struct task_struct *p, int policy, int prio) >+{ >+ const struct sched_class *prev_class; >+ int oldprio, on_rq, running; >+ unsigned long flags; >+ struct rq *rq; >+ >+ raw_spin_lock_irqsave(&p->pi_lock, flags); >+ rq = __task_rq_lock(p); >+ on_rq = p->se.on_rq; >+ running = task_current(rq, p); >+ if (on_rq) >+ deactivate_task(rq, p, 0); >+ if (running) >+ p->sched_class->put_prev_task(rq, p); >+ >+ p->sched_reset_on_fork = 0; >+ >+ oldprio = p->prio; >+ prev_class = p->sched_class; >+ __setscheduler(rq, p, policy, prio); >+ ipipe_setsched_notify(p); >+ >+ if (running) >+ p->sched_class->set_curr_task(rq); >+ if (on_rq) { >+ activate_task(rq, p, 0); >+ >+ check_class_changed(rq, p, prev_class, oldprio, running); >+ } >+ __task_rq_unlock(rq); >+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); >+ >+ rt_mutex_adjust_pi(p); >+ >+ return 0; >+} >+EXPORT_SYMBOL_GPL(ipipe_setscheduler_root); >+ >+int ipipe_reenter_root(struct task_struct *prev, int policy, int prio) >+{ >+ struct rq *rq = this_rq(); >+ >+ finish_task_switch(rq, prev); >+ >+ post_schedule(rq); >+ >+ (void)reacquire_kernel_lock(current); >+ preempt_enable_no_resched(); >+ >+ if (current->policy != policy || current->rt_priority != prio) >+ return ipipe_setscheduler_root(current, policy, prio); >+ >+ return 0; >+} >+EXPORT_SYMBOL_GPL(ipipe_reenter_root); >+ >+#endif /* CONFIG_IPIPE */ >diff --git a/kernel/signal.c b/kernel/signal.c >index 4e3cff1..8ffb89c 100644 >--- a/kernel/signal.c >+++ b/kernel/signal.c >@@ -558,6 +558,7 @@ void signal_wake_up(struct task_struct *t, int resume) > unsigned int mask; > > set_tsk_thread_flag(t, TIF_SIGPENDING); >+ ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */ > > /* > * For SIGKILL, we want to wake it up in the stopped/traced/killable >diff --git a/kernel/spinlock.c b/kernel/spinlock.c >index be6517f..862aed4 100644 >--- a/kernel/spinlock.c >+++ b/kernel/spinlock.c >@@ -26,7 +26,9 @@ > * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are > * not re-enabled during lock-acquire (which the preempt-spin-ops do): > */ >-#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) >+#if !defined(CONFIG_GENERIC_LOCKBREAK) || \ >+ defined(CONFIG_DEBUG_LOCK_ALLOC) || \ >+ defined(CONFIG_IPIPE) > /* > * The __lock_function inlines are taken from > * include/linux/spinlock_api_smp.h >diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c >index b6b898d..75c2031 100644 >--- a/kernel/time/tick-common.c >+++ b/kernel/time/tick-common.c >@@ -69,7 +69,7 @@ static void tick_periodic(int cpu) > write_sequnlock(&xtime_lock); > } > >- update_process_times(user_mode(get_irq_regs())); >+ update_root_process_times(get_irq_regs()); > profile_tick(CPU_PROFILING); > } > >@@ -177,6 +177,10 @@ static void tick_setup_device(struct tick_device *td, > > td->evtdev = newdev; > >+ /* I-pipe: derive global tick IRQ from CPU 0 */ >+ if (cpu == 0) >+ ipipe_update_tick_evtdev(newdev); >+ > /* > * When the device is not per cpu, pin the interrupt to the > * current cpu: >diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c >index 3e216e0..9434a56 100644 >--- a/kernel/time/tick-sched.c >+++ b/kernel/time/tick-sched.c >@@ -598,7 +598,7 @@ static void tick_nohz_handler(struct clock_event_device *dev) > ts->idle_jiffies++; > } > >- update_process_times(user_mode(regs)); >+ update_root_process_times(regs); > profile_tick(CPU_PROFILING); > > while (tick_nohz_reprogram(ts, now)) { >@@ -758,7 +758,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) > touch_softlockup_watchdog(); > ts->idle_jiffies++; > } >- update_process_times(user_mode(regs)); >+ update_root_process_times(regs); > profile_tick(CPU_PROFILING); > } > >diff --git a/kernel/timer.c b/kernel/timer.c >index 353b922..4ac6134 100644 >--- a/kernel/timer.c >+++ b/kernel/timer.c >@@ -1293,6 +1293,25 @@ void update_process_times(int user_tick) > run_posix_cpu_timers(p); > } > >+#ifdef CONFIG_IPIPE >+ >+void update_root_process_times(struct pt_regs *regs) >+{ >+ int cpu, user_tick = user_mode(regs); >+ >+ if (__ipipe_root_tick_p(regs)) { >+ update_process_times(user_tick); >+ return; >+ } >+ >+ run_local_timers(); >+ cpu = smp_processor_id(); >+ rcu_check_callbacks(cpu, user_tick); >+ run_posix_cpu_timers(current); >+} >+ >+#endif >+ > /* > * This function runs timers and the timer-tq in bottom half context. > */ >diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c >index f3dadae..6f3f0fa 100644 >--- a/kernel/trace/ftrace.c >+++ b/kernel/trace/ftrace.c >@@ -29,6 +29,7 @@ > #include <linux/list.h> > #include <linux/hash.h> > #include <linux/rcupdate.h> >+#include <linux/ipipe.h> > > #include <trace/events/sched.h> > >@@ -1156,6 +1157,9 @@ static int __ftrace_modify_code(void *data) > > static void ftrace_run_update_code(int command) > { >+#ifdef CONFIG_IPIPE >+ unsigned long flags; >+#endif /* CONFIG_IPIPE */ > int ret; > > ret = ftrace_arch_code_modify_prepare(); >@@ -1163,7 +1167,13 @@ static void ftrace_run_update_code(int command) > if (ret) > return; > >+#ifdef CONFIG_IPIPE >+ flags = ipipe_critical_enter(NULL); >+ __ftrace_modify_code(&command); >+ ipipe_critical_exit(flags); >+#else /* !CONFIG_IPIPE */ > stop_machine(__ftrace_modify_code, &command, NULL); >+#endif /* !CONFIG_IPIPE */ > > ret = ftrace_arch_code_modify_post_process(); > FTRACE_WARN_ON(ret); >@@ -2722,9 +2732,9 @@ static int ftrace_process_locs(struct module *mod, > } > > /* disable interrupts to prevent kstop machine */ >- local_irq_save(flags); >+ local_irq_save_hw_notrace(flags); > ftrace_update_code(mod); >- local_irq_restore(flags); >+ local_irq_restore_hw_notrace(flags); > mutex_unlock(&ftrace_lock); > > return 0; >@@ -2803,9 +2813,9 @@ void __init ftrace_init(void) > /* Keep the ftrace pointer to the stub */ > addr = (unsigned long)ftrace_stub; > >- local_irq_save(flags); >+ local_irq_save_hw_notrace(flags); > ftrace_dyn_arch_init(&addr); >- local_irq_restore(flags); >+ local_irq_restore_hw_notrace(flags); > > /* ftrace_dyn_arch_init places the return code in addr */ > if (addr) >diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug >index 28b42b9..49f57b4 100644 >--- a/lib/Kconfig.debug >+++ b/lib/Kconfig.debug >@@ -136,6 +136,8 @@ config DEBUG_SECTION_MISMATCH > - Enable verbose reporting from modpost to help solving > the section mismatches reported. > >+source "kernel/ipipe/Kconfig.debug" >+ > config DEBUG_KERNEL > bool "Kernel debugging" > help >diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c >index 9681d54..2dba50c 100644 >--- a/lib/bust_spinlocks.c >+++ b/lib/bust_spinlocks.c >@@ -13,6 +13,7 @@ > #include <linux/wait.h> > #include <linux/vt_kern.h> > #include <linux/console.h> >+#include <linux/ipipe_trace.h> > > > void __attribute__((weak)) bust_spinlocks(int yes) >@@ -24,6 +25,7 @@ void __attribute__((weak)) bust_spinlocks(int yes) > unblank_screen(); > #endif > console_unblank(); >+ ipipe_trace_panic_dump(); > if (--oops_in_progress == 0) > wake_up_klogd(); > } >diff --git a/lib/ioremap.c b/lib/ioremap.c >index 5730ecd..cbd4ab9 100644 >--- a/lib/ioremap.c >+++ b/lib/ioremap.c >@@ -85,8 +85,8 @@ int ioremap_page_range(unsigned long addr, > if (err) > break; > } while (pgd++, addr = next, addr != end); >- >- flush_cache_vmap(start, end); >+ __ipipe_pin_range_globally(start, end); >+ flush_cache_vmap(start, end); > > return err; > } >diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c >index 4689cb0..3d12764 100644 >--- a/lib/smp_processor_id.c >+++ b/lib/smp_processor_id.c >@@ -12,10 +12,13 @@ notrace unsigned int debug_smp_processor_id(void) > unsigned long preempt_count = preempt_count(); > int this_cpu = raw_smp_processor_id(); > >+ if (!ipipe_root_domain_p) >+ goto out; >+ > if (likely(preempt_count)) > goto out; > >- if (irqs_disabled()) >+ if (irqs_disabled() || irqs_disabled_hw()) > goto out; > > /* >diff --git a/mm/memory.c b/mm/memory.c >index 02e48aa..ba7a45c 100644 >--- a/mm/memory.c >+++ b/mm/memory.c >@@ -640,6 +640,32 @@ out: > return pfn_to_page(pfn); > } > >+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) >+{ >+ /* >+ * If the source page was a PFN mapping, we don't have >+ * a "struct page" for it. We do a best-effort copy by >+ * just copying from the original user address. If that >+ * fails, we just zero-fill it. Live with it. >+ */ >+ if (unlikely(!src)) { >+ void *kaddr = kmap_atomic(dst, KM_USER0); >+ void __user *uaddr = (void __user *)(va & PAGE_MASK); >+ >+ /* >+ * This really shouldn't fail, because the page is there >+ * in the page tables. But it might just be unreadable, >+ * in which case we just give up and fill the result with >+ * zeroes. >+ */ >+ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) >+ clear_page(kaddr); >+ kunmap_atomic(kaddr, KM_USER0); >+ flush_dcache_page(dst); >+ } else >+ copy_user_highpage(dst, src, va, vma); >+} >+ > /* > * copy one vm_area from one task to the other. Assumes the page tables > * already present in the new task to be cleared in the whole range >@@ -648,8 +674,8 @@ out: > > static inline unsigned long > copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, >- pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, >- unsigned long addr, int *rss) >+ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, >+ unsigned long addr, int *rss, struct page *uncow_page) > { > unsigned long vm_flags = vma->vm_flags; > pte_t pte = *src_pte; >@@ -692,6 +718,21 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, > * in the parent and the child > */ > if (is_cow_mapping(vm_flags)) { >+#ifdef CONFIG_IPIPE >+ if (uncow_page) { >+ struct page *old_page = vm_normal_page(vma, addr, pte); >+ cow_user_page(uncow_page, old_page, addr, vma); >+ pte = mk_pte(uncow_page, vma->vm_page_prot); >+ >+ if (vm_flags & VM_SHARED) >+ pte = pte_mkclean(pte); >+ pte = pte_mkold(pte); >+ >+ page_add_new_anon_rmap(uncow_page, vma, addr); >+ rss[!!PageAnon(uncow_page)]++; >+ goto out_set_pte; >+ } >+#endif /* CONFIG_IPIPE */ > ptep_set_wrprotect(src_mm, addr, src_pte); > pte = pte_wrprotect(pte); > } >@@ -729,13 +770,27 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, > int progress = 0; > int rss[NR_MM_COUNTERS]; > swp_entry_t entry = (swp_entry_t){0}; >- >+ struct page *uncow_page = NULL; >+#ifdef CONFIG_IPIPE >+ int do_cow_break = 0; >+again: >+ if (do_cow_break) { >+ uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); >+ if (uncow_page == NULL) >+ return -ENOMEM; >+ do_cow_break = 0; >+ } >+#else > again: >+#endif > init_rss_vec(rss); > > dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); >- if (!dst_pte) >+ if (!dst_pte) { >+ if (uncow_page) >+ page_cache_release(uncow_page); > return -ENOMEM; >+ } > src_pte = pte_offset_map(src_pmd, addr); > src_ptl = pte_lockptr(src_mm, src_pmd); > spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); >@@ -758,8 +813,25 @@ again: > progress++; > continue; > } >+#ifdef CONFIG_IPIPE >+ if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) { >+ if (is_cow_mapping(vma->vm_flags) && >+ test_bit(MMF_VM_PINNED, &src_mm->flags) && >+ ((vma->vm_flags|src_mm->def_flags) & VM_LOCKED)) { >+ arch_leave_lazy_mmu_mode(); >+ spin_unlock(src_ptl); >+ pte_unmap(src_pte); >+ add_mm_rss_vec(dst_mm, rss); >+ pte_unmap_unlock(dst_pte, dst_ptl); >+ cond_resched(); >+ do_cow_break = 1; >+ goto again; >+ } >+ } >+#endif > entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, >- vma, addr, rss); >+ vma, addr, rss, uncow_page); >+ uncow_page = NULL; > if (entry.val) > break; > progress += 8; >@@ -2061,32 +2133,6 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) > return pte; > } > >-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) >-{ >- /* >- * If the source page was a PFN mapping, we don't have >- * a "struct page" for it. We do a best-effort copy by >- * just copying from the original user address. If that >- * fails, we just zero-fill it. Live with it. >- */ >- if (unlikely(!src)) { >- void *kaddr = kmap_atomic(dst, KM_USER0); >- void __user *uaddr = (void __user *)(va & PAGE_MASK); >- >- /* >- * This really shouldn't fail, because the page is there >- * in the page tables. But it might just be unreadable, >- * in which case we just give up and fill the result with >- * zeroes. >- */ >- if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) >- clear_page(kaddr); >- kunmap_atomic(kaddr, KM_USER0); >- flush_dcache_page(dst); >- } else >- copy_user_highpage(dst, src, va, vma); >-} >- > /* > * This routine handles present pages, when users try to write > * to a shared page. It is done by copying the page to a new address >@@ -3608,3 +3654,111 @@ void might_fault(void) > } > EXPORT_SYMBOL(might_fault); > #endif >+ >+#ifdef CONFIG_IPIPE >+ >+static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd, >+ struct vm_area_struct *vma, >+ unsigned long addr, unsigned long end) >+{ >+ spinlock_t *ptl; >+ pte_t *pte; >+ >+ do { >+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl); >+ if (!pte) >+ continue; >+ >+ if (!pte_present(*pte) || pte_write(*pte)) { >+ pte_unmap_unlock(pte, ptl); >+ continue; >+ } >+ >+ if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM) >+ return -ENOMEM; >+ } while (addr += PAGE_SIZE, addr != end); >+ return 0; >+} >+ >+static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud, >+ struct vm_area_struct *vma, >+ unsigned long addr, unsigned long end) >+{ >+ unsigned long next; >+ pmd_t *pmd; >+ >+ pmd = pmd_offset(pud, addr); >+ do { >+ next = pmd_addr_end(addr, end); >+ if (pmd_none_or_clear_bad(pmd)) >+ continue; >+ if (ipipe_pin_pte_range(mm, pmd, vma, addr, next)) >+ return -ENOMEM; >+ } while (pmd++, addr = next, addr != end); >+ return 0; >+} >+ >+static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd, >+ struct vm_area_struct *vma, >+ unsigned long addr, unsigned long end) >+{ >+ unsigned long next; >+ pud_t *pud; >+ >+ pud = pud_offset(pgd, addr); >+ do { >+ next = pud_addr_end(addr, end); >+ if (pud_none_or_clear_bad(pud)) >+ continue; >+ if (ipipe_pin_pmd_range(mm, pud, vma, addr, next)) >+ return -ENOMEM; >+ } while (pud++, addr = next, addr != end); >+ return 0; >+} >+ >+int ipipe_disable_ondemand_mappings(struct task_struct *tsk) >+{ >+ unsigned long addr, next, end; >+ struct vm_area_struct *vma; >+ struct mm_struct *mm; >+ int result = 0; >+ pgd_t *pgd; >+ >+ mm = get_task_mm(tsk); >+ if (!mm) >+ return -EPERM; >+ >+ down_write(&mm->mmap_sem); >+ if (test_bit(MMF_VM_PINNED, &mm->flags)) >+ goto done_mm; >+ >+ for (vma = mm->mmap; vma; vma = vma->vm_next) { >+ if (!is_cow_mapping(vma->vm_flags) >+ || !(vma->vm_flags & VM_WRITE)) >+ continue; >+ >+ addr = vma->vm_start; >+ end = vma->vm_end; >+ >+ pgd = pgd_offset(mm, addr); >+ do { >+ next = pgd_addr_end(addr, end); >+ if (pgd_none_or_clear_bad(pgd)) >+ continue; >+ if (ipipe_pin_pud_range(mm, pgd, vma, addr, next)) { >+ result = -ENOMEM; >+ goto done_mm; >+ } >+ } while (pgd++, addr = next, addr != end); >+ } >+ set_bit(MMF_VM_PINNED, &mm->flags); >+ >+ done_mm: >+ up_write(&mm->mmap_sem); >+ mmput(mm); >+ return result; >+} >+ >+EXPORT_SYMBOL(ipipe_disable_ondemand_mappings); >+ >+#endif >diff --git a/mm/mmu_context.c b/mm/mmu_context.c >index 9e82e93..a4bd34d 100644 >--- a/mm/mmu_context.c >+++ b/mm/mmu_context.c >@@ -24,15 +24,18 @@ void use_mm(struct mm_struct *mm) > { > struct mm_struct *active_mm; > struct task_struct *tsk = current; >+ unsigned long flags; > > task_lock(tsk); > active_mm = tsk->active_mm; >+ ipipe_mm_switch_protect(flags); > if (active_mm != mm) { > atomic_inc(&mm->mm_count); > tsk->active_mm = mm; > } > tsk->mm = mm; >- switch_mm(active_mm, mm, tsk); >+ __switch_mm(active_mm, mm, tsk); >+ ipipe_mm_switch_unprotect(flags); > task_unlock(tsk); > > if (active_mm != mm) >diff --git a/mm/vmalloc.c b/mm/vmalloc.c >index eb5cc7d..9d4c7ce 100644 >--- a/mm/vmalloc.c >+++ b/mm/vmalloc.c >@@ -171,6 +171,8 @@ static int vmap_page_range_noflush(unsigned long start, unsigned long end, > return err; > } while (pgd++, addr = next, addr != end); > >+ __ipipe_pin_range_globally(start, end); >+ > return nr; > } >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 377831
:
282129
|
282139
|
283031
| 283033