Merge branch 'x86-percpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 14 Sep 2009 15:01:28 +0000 (08:01 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 14 Sep 2009 15:01:28 +0000 (08:01 -0700)
* 'x86-percpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, percpu: Collect hot percpu variables into one cacheline
  x86, percpu: Fix DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED()
  x86, percpu: Add 'percpu_read_stable()' interface for cacheable accesses

arch/x86/include/asm/current.h
arch/x86/include/asm/percpu.h
arch/x86/include/asm/thread_info.h
arch/x86/kernel/cpu/common.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
include/linux/percpu-defs.h

index c68c361697e144649f9d0b40e69921d40ca2cbb8..4d447b732d82df76b4c55d77fde42cd4024c2971 100644 (file)
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);
 
 static __always_inline struct task_struct *get_current(void)
 {
-       return percpu_read(current_task);
+       return percpu_read_stable(current_task);
 }
 
 #define current get_current()
index 103f1ddb0d85b342498beef4153267a3af93f0a0..04eacefcfd26035d588f44c08a3114f6097f5c62 100644 (file)
@@ -49,7 +49,7 @@
 #define __percpu_arg(x)                "%%"__stringify(__percpu_seg)":%P" #x
 #define __my_cpu_offset                percpu_read(this_cpu_off)
 #else
-#define __percpu_arg(x)                "%" #x
+#define __percpu_arg(x)                "%P" #x
 #endif
 
 /*
@@ -104,36 +104,48 @@ do {                                                      \
        }                                               \
 } while (0)
 
-#define percpu_from_op(op, var)                                \
+#define percpu_from_op(op, var, constraint)            \
 ({                                                     \
        typeof(var) ret__;                              \
        switch (sizeof(var)) {                          \
        case 1:                                         \
                asm(op "b "__percpu_arg(1)",%0"         \
                    : "=q" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        case 2:                                         \
                asm(op "w "__percpu_arg(1)",%0"         \
                    : "=r" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        case 4:                                         \
                asm(op "l "__percpu_arg(1)",%0"         \
                    : "=r" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        case 8:                                         \
                asm(op "q "__percpu_arg(1)",%0"         \
                    : "=r" (ret__)                      \
-                   : "m" (var));                       \
+                   : constraint);                      \
                break;                                  \
        default: __bad_percpu_size();                   \
        }                                               \
        ret__;                                          \
 })
 
-#define percpu_read(var)       percpu_from_op("mov", per_cpu__##var)
+/*
+ * percpu_read() makes gcc load the percpu variable every time it is
+ * accessed while percpu_read_stable() allows the value to be cached.
+ * percpu_read_stable() is more efficient and can be used if its value
+ * is guaranteed to be valid across cpus.  The current users include
+ * get_current() and get_thread_info() both of which are actually
+ * per-thread variables implemented as per-cpu variables and thus
+ * stable for the duration of the respective task.
+ */
+#define percpu_read(var)       percpu_from_op("mov", per_cpu__##var,   \
+                                              "m" (per_cpu__##var))
+#define percpu_read_stable(var)        percpu_from_op("mov", per_cpu__##var,   \
+                                              "p" (&per_cpu__##var))
 #define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
 #define percpu_add(var, val)   percpu_to_op("add", per_cpu__##var, val)
 #define percpu_sub(var, val)   percpu_to_op("sub", per_cpu__##var, val)
index 6f7786aea4fc5028194b472e46612416177a8cc7..d27d0a2fec4c227540fe4042ea823853fd82a8d3 100644 (file)
@@ -214,7 +214,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
 static inline struct thread_info *current_thread_info(void)
 {
        struct thread_info *ti;
-       ti = (void *)(percpu_read(kernel_stack) +
+       ti = (void *)(percpu_read_stable(kernel_stack) +
                      KERNEL_STACK_OFFSET - THREAD_SIZE);
        return ti;
 }
index 55a6abe4039417ec90a1e5aaf8cde29c19c00a9f..2055fc2b2e6b8967040ce6d7e221c508c6e6bf4e 100644 (file)
@@ -987,13 +987,21 @@ struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
                     irq_stack_union) __aligned(PAGE_SIZE);
 
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
-       init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+/*
+ * The following four percpu variables are hot.  Align current_task to
+ * cacheline size such that all four fall in the same cacheline.
+ */
+DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
+       &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
        (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+       init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+
 DEFINE_PER_CPU(unsigned int, irq_count) = -1;
 
 /*
@@ -1008,8 +1016,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
 };
 
 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
-       __aligned(PAGE_SIZE);
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
 
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
@@ -1042,6 +1049,9 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
 
 #else  /* CONFIG_X86_64 */
 
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
index a80eddd416588e6b8f4d86683481e81bab0cb8e0..4cf79567cdab0728b33c2f9698e3a5b535e4eb28 100644 (file)
@@ -61,9 +61,6 @@
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
 /*
  * Return saved PC of a blocked thread.
  */
index a28279dbb07ca21d657b372fb7404126b03b19bc..ad535b6831700bfdac237942d22f4380dbb43417 100644 (file)
@@ -55,9 +55,6 @@
 
 asmlinkage extern void ret_from_fork(void);
 
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
 DEFINE_PER_CPU(unsigned long, old_rsp);
 static DEFINE_PER_CPU(unsigned char, is_idle);
 
index 3058cf9dd3d43a94d7e0a027df734a55d93e0c5d..0761491b3eec791b6869d989c4c905429d2a686a 100644 (file)
 /*
  * Declaration/definition used for per-CPU variables that must be page aligned.
  */
-#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name)                               \
-       DECLARE_PER_CPU_SECTION(type, name, ".page_aligned")
+#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name)                       \
+       DECLARE_PER_CPU_SECTION(type, name, ".page_aligned")            \
+       __aligned(PAGE_SIZE)
 
 #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)                                \
-       DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
+       DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")             \
+       __aligned(PAGE_SIZE)
 
 /*
  * Intermodule exports for per-CPU variables.