Merge tag 'v3.8-rc7' into x86/asm
authorH. Peter Anvin <hpa@linux.intel.com>
Tue, 12 Feb 2013 23:47:45 +0000 (15:47 -0800)
committerH. Peter Anvin <hpa@linux.intel.com>
Tue, 12 Feb 2013 23:47:45 +0000 (15:47 -0800)
Merge in the updates to head_32.S from the previous urgent branch, as
upcoming patches will make further changes.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
arch/x86/Kconfig
arch/x86/include/asm/linkage.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_32.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/required-features.h
arch/x86/include/asm/xor.h
arch/x86/include/asm/xor_32.h
arch/x86/include/asm/xor_64.h
arch/x86/kernel/sys_x86_64.c
arch/x86/mm/init_64.c

index 225543bf45a5ca551f9609b18cf8711f729f71e7..430204cfc0f7c28840b0d13240c5bc7fb8ce9992 100644 (file)
@@ -114,6 +114,7 @@ config X86
        select MODULES_USE_ELF_RELA if X86_64
        select CLONE_BACKWARDS if X86_32
        select GENERIC_SIGALTSTACK
+       select ARCH_USE_BUILTIN_BSWAP
 
 config INSTRUCTION_DECODER
        def_bool y
index 48142971b25d095b9724a06a6eff47b2cf302532..79327e9483a34ec33c3ba71bbdc94397f2456f46 100644 (file)
 #define __asmlinkage_protect0(ret) \
        __asmlinkage_protect_n(ret)
 #define __asmlinkage_protect1(ret, arg1) \
-       __asmlinkage_protect_n(ret, "g" (arg1))
+       __asmlinkage_protect_n(ret, "m" (arg1))
 #define __asmlinkage_protect2(ret, arg1, arg2) \
-       __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2))
+       __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
 #define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
-       __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3))
+       __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
 #define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
-       __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
-                             "g" (arg4))
+       __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+                             "m" (arg4))
 #define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
-       __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
-                             "g" (arg4), "g" (arg5))
+       __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+                             "m" (arg4), "m" (arg5))
 #define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
-       __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
-                             "g" (arg4), "g" (arg5), "g" (arg6))
+       __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+                             "m" (arg4), "m" (arg5), "m" (arg6))
 
 #endif /* CONFIG_X86_32 */
 
index 5199db2923d31ff88b94c54397daae2b279a7bc7..512ec6bc79782f398a583b13a2c7e11372f160d6 100644 (file)
@@ -781,6 +781,18 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
        memcpy(dst, src, count * sizeof(pgd_t));
 }
 
+/*
+ * The x86 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+               unsigned long addr, pte_t *ptep)
+{
+}
+static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+               unsigned long addr, pmd_t *pmd)
+{
+}
 
 #include <asm-generic/pgtable.h>
 #endif /* __ASSEMBLY__ */
index 8faa215a503e54469359ab25376cc316e7703f82..9ee322103c6da79835fbae00772e9d6a7d60135c 100644 (file)
@@ -66,13 +66,6 @@ do {                                         \
        __flush_tlb_one((vaddr));               \
 } while (0)
 
-/*
- * The i386 doesn't have any external MMU info: the kernel page
- * tables contain all the necessary information.
- */
-#define update_mmu_cache(vma, address, ptep) do { } while (0)
-#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 /*
index 47356f9df82e411ca3aa4e0025611d37451064b1..615b0c78449fd474257a3a63bd58c5c4ba52042f 100644 (file)
@@ -142,9 +142,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
 #define pte_unmap(pte) ((void)(pte))/* NOP */
 
-#define update_mmu_cache(vma, address, ptep) do { } while (0)
-#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
 #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
index 6c7fc25f2c34a0eab822c2d6ab17c3b1cacbf0a0..5c6e4fb370f5aac25ee36002022ed72a59592f5e 100644 (file)
 # define NEED_NOPL     0
 #endif
 
+#ifdef CONFIG_MATOM
+# define NEED_MOVBE    (1<<(X86_FEATURE_MOVBE & 31))
+#else
+# define NEED_MOVBE    0
+#endif
+
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_PARAVIRT
 /* Paravirtualized systems may not have PSE or PGE available */
@@ -80,7 +86,7 @@
 
 #define REQUIRED_MASK2 0
 #define REQUIRED_MASK3 (NEED_NOPL)
-#define REQUIRED_MASK4 0
+#define REQUIRED_MASK4 (NEED_MOVBE)
 #define REQUIRED_MASK5 0
 #define REQUIRED_MASK6 0
 #define REQUIRED_MASK7 0
index f8fde90bc45e37171a7390723e3afff2ee9a1788..d8829751b3f895e9fd19fa6bd597758650f417b8 100644 (file)
 #ifdef CONFIG_KMEMCHECK
 /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
 # include <asm-generic/xor.h>
+#elif !defined(_ASM_X86_XOR_H)
+#define _ASM_X86_XOR_H
+
+/*
+ * Optimized RAID-5 checksumming functions for SSE.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * Cache avoiding checksumming functions utilizing KNI instructions
+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
+ */
+
+/*
+ * Based on
+ * High-speed RAID5 checksumming functions utilizing SSE instructions.
+ * Copyright (C) 1998 Ingo Molnar.
+ */
+
+/*
+ * x86-64 changes / gcc fixes from Andi Kleen.
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ *
+ * This hasn't been optimized for the hammer yet, but there are likely
+ * no advantages to be gotten from x86-64 here anyways.
+ */
+
+#include <asm/i387.h>
+
+#ifdef CONFIG_X86_32
+/* reduce register pressure */
+# define XOR_CONSTANT_CONSTRAINT "i"
 #else
+# define XOR_CONSTANT_CONSTRAINT "re"
+#endif
+
+#define OFFS(x)                "16*("#x")"
+#define PF_OFFS(x)     "256+16*("#x")"
+#define PF0(x)         "       prefetchnta "PF_OFFS(x)"(%[p1])         ;\n"
+#define LD(x, y)       "       movaps "OFFS(x)"(%[p1]), %%xmm"#y"      ;\n"
+#define ST(x, y)       "       movaps %%xmm"#y", "OFFS(x)"(%[p1])      ;\n"
+#define PF1(x)         "       prefetchnta "PF_OFFS(x)"(%[p2])         ;\n"
+#define PF2(x)         "       prefetchnta "PF_OFFS(x)"(%[p3])         ;\n"
+#define PF3(x)         "       prefetchnta "PF_OFFS(x)"(%[p4])         ;\n"
+#define PF4(x)         "       prefetchnta "PF_OFFS(x)"(%[p5])         ;\n"
+#define XO1(x, y)      "       xorps "OFFS(x)"(%[p2]), %%xmm"#y"       ;\n"
+#define XO2(x, y)      "       xorps "OFFS(x)"(%[p3]), %%xmm"#y"       ;\n"
+#define XO3(x, y)      "       xorps "OFFS(x)"(%[p4]), %%xmm"#y"       ;\n"
+#define XO4(x, y)      "       xorps "OFFS(x)"(%[p5]), %%xmm"#y"       ;\n"
+#define NOP(x)
+
+#define BLK64(pf, op, i)                               \
+               pf(i)                                   \
+               op(i, 0)                                \
+                       op(i + 1, 1)                    \
+                               op(i + 2, 2)            \
+                                       op(i + 3, 3)
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+       unsigned long lines = bytes >> 8;
+
+       kernel_fpu_begin();
+
+       asm volatile(
+#undef BLOCK
+#define BLOCK(i)                                       \
+               LD(i, 0)                                \
+                       LD(i + 1, 1)                    \
+               PF1(i)                                  \
+                               PF1(i + 2)              \
+                               LD(i + 2, 2)            \
+                                       LD(i + 3, 3)    \
+               PF0(i + 4)                              \
+                               PF0(i + 6)              \
+               XO1(i, 0)                               \
+                       XO1(i + 1, 1)                   \
+                               XO1(i + 2, 2)           \
+                                       XO1(i + 3, 3)   \
+               ST(i, 0)                                \
+                       ST(i + 1, 1)                    \
+                               ST(i + 2, 2)            \
+                                       ST(i + 3, 3)    \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+       "       add %[inc], %[p1]       ;\n"
+       "       add %[inc], %[p2]       ;\n"
+       "       dec %[cnt]              ;\n"
+       "       jnz 1b                  ;\n"
+       : [cnt] "+r" (lines),
+         [p1] "+r" (p1), [p2] "+r" (p2)
+       : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+       : "memory");
+
+       kernel_fpu_end();
+}
+
+static void
+xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+       unsigned long lines = bytes >> 8;
+
+       kernel_fpu_begin();
+
+       asm volatile(
+#undef BLOCK
+#define BLOCK(i)                       \
+               BLK64(PF0, LD, i)       \
+               BLK64(PF1, XO1, i)      \
+               BLK64(NOP, ST, i)       \
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+       "       add %[inc], %[p1]       ;\n"
+       "       add %[inc], %[p2]       ;\n"
+       "       dec %[cnt]              ;\n"
+       "       jnz 1b                  ;\n"
+       : [cnt] "+r" (lines),
+         [p1] "+r" (p1), [p2] "+r" (p2)
+       : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+       : "memory");
+
+       kernel_fpu_end();
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3)
+{
+       unsigned long lines = bytes >> 8;
+
+       kernel_fpu_begin();
+
+       asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i + 2)              \
+               LD(i, 0)                                \
+                       LD(i + 1, 1)                    \
+                               LD(i + 2, 2)            \
+                                       LD(i + 3, 3)    \
+               PF2(i)                                  \
+                               PF2(i + 2)              \
+               PF0(i + 4)                              \
+                               PF0(i + 6)              \
+               XO1(i, 0)                               \
+                       XO1(i + 1, 1)                   \
+                               XO1(i + 2, 2)           \
+                                       XO1(i + 3, 3)   \
+               XO2(i, 0)                               \
+                       XO2(i + 1, 1)                   \
+                               XO2(i + 2, 2)           \
+                                       XO2(i + 3, 3)   \
+               ST(i, 0)                                \
+                       ST(i + 1, 1)                    \
+                               ST(i + 2, 2)            \
+                                       ST(i + 3, 3)    \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+       "       add %[inc], %[p1]       ;\n"
+       "       add %[inc], %[p2]       ;\n"
+       "       add %[inc], %[p3]       ;\n"
+       "       dec %[cnt]              ;\n"
+       "       jnz 1b                  ;\n"
+       : [cnt] "+r" (lines),
+         [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+       : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+       : "memory");
+
+       kernel_fpu_end();
+}
+
+static void
+xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+              unsigned long *p3)
+{
+       unsigned long lines = bytes >> 8;
+
+       kernel_fpu_begin();
+
+       asm volatile(
+#undef BLOCK
+#define BLOCK(i)                       \
+               BLK64(PF0, LD, i)       \
+               BLK64(PF1, XO1, i)      \
+               BLK64(PF2, XO2, i)      \
+               BLK64(NOP, ST, i)       \
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+       "       add %[inc], %[p1]       ;\n"
+       "       add %[inc], %[p2]       ;\n"
+       "       add %[inc], %[p3]       ;\n"
+       "       dec %[cnt]              ;\n"
+       "       jnz 1b                  ;\n"
+       : [cnt] "+r" (lines),
+         [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+       : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+       : "memory");
+
+       kernel_fpu_end();
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3, unsigned long *p4)
+{
+       unsigned long lines = bytes >> 8;
+
+       kernel_fpu_begin();
+
+       asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i + 2)              \
+               LD(i, 0)                                \
+                       LD(i + 1, 1)                    \
+                               LD(i + 2, 2)            \
+                                       LD(i + 3, 3)    \
+               PF2(i)                                  \
+                               PF2(i + 2)              \
+               XO1(i, 0)                               \
+                       XO1(i + 1, 1)                   \
+                               XO1(i + 2, 2)           \
+                                       XO1(i + 3, 3)   \
+               PF3(i)                                  \
+                               PF3(i + 2)              \
+               PF0(i + 4)                              \
+                               PF0(i + 6)              \
+               XO2(i, 0)                               \
+                       XO2(i + 1, 1)                   \
+                               XO2(i + 2, 2)           \
+                                       XO2(i + 3, 3)   \
+               XO3(i, 0)                               \
+                       XO3(i + 1, 1)                   \
+                               XO3(i + 2, 2)           \
+                                       XO3(i + 3, 3)   \
+               ST(i, 0)                                \
+                       ST(i + 1, 1)                    \
+                               ST(i + 2, 2)            \
+                                       ST(i + 3, 3)    \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+       "       add %[inc], %[p1]       ;\n"
+       "       add %[inc], %[p2]       ;\n"
+       "       add %[inc], %[p3]       ;\n"
+       "       add %[inc], %[p4]       ;\n"
+       "       dec %[cnt]              ;\n"
+       "       jnz 1b                  ;\n"
+       : [cnt] "+r" (lines), [p1] "+r" (p1),
+         [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+       : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+       : "memory");
+
+       kernel_fpu_end();
+}
+
+static void
+xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+              unsigned long *p3, unsigned long *p4)
+{
+       unsigned long lines = bytes >> 8;
+
+       kernel_fpu_begin();
+
+       asm volatile(
+#undef BLOCK
+#define BLOCK(i)                       \
+               BLK64(PF0, LD, i)       \
+               BLK64(PF1, XO1, i)      \
+               BLK64(PF2, XO2, i)      \
+               BLK64(PF3, XO3, i)      \
+               BLK64(NOP, ST, i)       \
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+       "       add %[inc], %[p1]       ;\n"
+       "       add %[inc], %[p2]       ;\n"
+       "       add %[inc], %[p3]       ;\n"
+       "       add %[inc], %[p4]       ;\n"
+       "       dec %[cnt]              ;\n"
+       "       jnz 1b                  ;\n"
+       : [cnt] "+r" (lines), [p1] "+r" (p1),
+         [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+       : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+       : "memory");
+
+       kernel_fpu_end();
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+       unsigned long lines = bytes >> 8;
+
+       kernel_fpu_begin();
+
+       asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i + 2)              \
+               LD(i, 0)                                \
+                       LD(i + 1, 1)                    \
+                               LD(i + 2, 2)            \
+                                       LD(i + 3, 3)    \
+               PF2(i)                                  \
+                               PF2(i + 2)              \
+               XO1(i, 0)                               \
+                       XO1(i + 1, 1)                   \
+                               XO1(i + 2, 2)           \
+                                       XO1(i + 3, 3)   \
+               PF3(i)                                  \
+                               PF3(i + 2)              \
+               XO2(i, 0)                               \
+                       XO2(i + 1, 1)                   \
+                               XO2(i + 2, 2)           \
+                                       XO2(i + 3, 3)   \
+               PF4(i)                                  \
+                               PF4(i + 2)              \
+               PF0(i + 4)                              \
+                               PF0(i + 6)              \
+               XO3(i, 0)                               \
+                       XO3(i + 1, 1)                   \
+                               XO3(i + 2, 2)           \
+                                       XO3(i + 3, 3)   \
+               XO4(i, 0)                               \
+                       XO4(i + 1, 1)                   \
+                               XO4(i + 2, 2)           \
+                                       XO4(i + 3, 3)   \
+               ST(i, 0)                                \
+                       ST(i + 1, 1)                    \
+                               ST(i + 2, 2)            \
+                                       ST(i + 3, 3)    \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+       "       add %[inc], %[p1]       ;\n"
+       "       add %[inc], %[p2]       ;\n"
+       "       add %[inc], %[p3]       ;\n"
+       "       add %[inc], %[p4]       ;\n"
+       "       add %[inc], %[p5]       ;\n"
+       "       dec %[cnt]              ;\n"
+       "       jnz 1b                  ;\n"
+       : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
+         [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
+       : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+       : "memory");
+
+       kernel_fpu_end();
+}
+
+static void
+xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+              unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+       unsigned long lines = bytes >> 8;
+
+       kernel_fpu_begin();
+
+       asm volatile(
+#undef BLOCK
+#define BLOCK(i)                       \
+               BLK64(PF0, LD, i)       \
+               BLK64(PF1, XO1, i)      \
+               BLK64(PF2, XO2, i)      \
+               BLK64(PF3, XO3, i)      \
+               BLK64(PF4, XO4, i)      \
+               BLK64(NOP, ST, i)       \
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+       "       add %[inc], %[p1]       ;\n"
+       "       add %[inc], %[p2]       ;\n"
+       "       add %[inc], %[p3]       ;\n"
+       "       add %[inc], %[p4]       ;\n"
+       "       add %[inc], %[p5]       ;\n"
+       "       dec %[cnt]              ;\n"
+       "       jnz 1b                  ;\n"
+       : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
+         [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
+       : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+       : "memory");
+
+       kernel_fpu_end();
+}
+
+static struct xor_block_template xor_block_sse_pf64 = {
+       .name = "prefetch64-sse",
+       .do_2 = xor_sse_2_pf64,
+       .do_3 = xor_sse_3_pf64,
+       .do_4 = xor_sse_4_pf64,
+       .do_5 = xor_sse_5_pf64,
+};
+
+#undef LD
+#undef XO1
+#undef XO2
+#undef XO3
+#undef XO4
+#undef ST
+#undef NOP
+#undef BLK64
+#undef BLOCK
+
+#undef XOR_CONSTANT_CONSTRAINT
+
 #ifdef CONFIG_X86_32
 # include <asm/xor_32.h>
 #else
 # include <asm/xor_64.h>
 #endif
-#endif
+
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+       AVX_SELECT(FASTEST)
+
+#endif /* _ASM_X86_XOR_H */
index f79cb7ec0e06a4c7f36361a381c3156d45dab365..ce05722e3c68bce4d72a1bcdc9e798b5014581cf 100644 (file)
@@ -2,7 +2,7 @@
 #define _ASM_X86_XOR_32_H
 
 /*
- * Optimized RAID-5 checksumming functions for MMX and SSE.
+ * Optimized RAID-5 checksumming functions for MMX.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -529,290 +529,6 @@ static struct xor_block_template xor_block_p5_mmx = {
        .do_5 = xor_p5_mmx_5,
 };
 
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-#define OFFS(x)                "16*("#x")"
-#define PF_OFFS(x)     "256+16*("#x")"
-#define        PF0(x)          "       prefetchnta "PF_OFFS(x)"(%1)            ;\n"
-#define LD(x, y)       "       movaps   "OFFS(x)"(%1), %%xmm"#y"       ;\n"
-#define ST(x, y)       "       movaps %%xmm"#y",   "OFFS(x)"(%1)       ;\n"
-#define PF1(x)         "       prefetchnta "PF_OFFS(x)"(%2)            ;\n"
-#define PF2(x)         "       prefetchnta "PF_OFFS(x)"(%3)            ;\n"
-#define PF3(x)         "       prefetchnta "PF_OFFS(x)"(%4)            ;\n"
-#define PF4(x)         "       prefetchnta "PF_OFFS(x)"(%5)            ;\n"
-#define PF5(x)         "       prefetchnta "PF_OFFS(x)"(%6)            ;\n"
-#define XO1(x, y)      "       xorps   "OFFS(x)"(%2), %%xmm"#y"        ;\n"
-#define XO2(x, y)      "       xorps   "OFFS(x)"(%3), %%xmm"#y"        ;\n"
-#define XO3(x, y)      "       xorps   "OFFS(x)"(%4), %%xmm"#y"        ;\n"
-#define XO4(x, y)      "       xorps   "OFFS(x)"(%5), %%xmm"#y"        ;\n"
-#define XO5(x, y)      "       xorps   "OFFS(x)"(%6), %%xmm"#y"        ;\n"
-
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
-       unsigned long lines = bytes >> 8;
-
-       kernel_fpu_begin();
-
-       asm volatile(
-#undef BLOCK
-#define BLOCK(i)                                       \
-               LD(i, 0)                                \
-                       LD(i + 1, 1)                    \
-               PF1(i)                                  \
-                               PF1(i + 2)              \
-                               LD(i + 2, 2)            \
-                                       LD(i + 3, 3)    \
-               PF0(i + 4)                              \
-                               PF0(i + 6)              \
-               XO1(i, 0)                               \
-                       XO1(i + 1, 1)                   \
-                               XO1(i + 2, 2)           \
-                                       XO1(i + 3, 3)   \
-               ST(i, 0)                                \
-                       ST(i + 1, 1)                    \
-                               ST(i + 2, 2)            \
-                                       ST(i + 3, 3)    \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-       "       addl $256, %1           ;\n"
-       "       addl $256, %2           ;\n"
-       "       decl %0                 ;\n"
-       "       jnz 1b                  ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2)
-       :
-       : "memory");
-
-       kernel_fpu_end();
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3)
-{
-       unsigned long lines = bytes >> 8;
-
-       kernel_fpu_begin();
-
-       asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
-               PF1(i)                                  \
-                               PF1(i + 2)              \
-               LD(i,0)                                 \
-                       LD(i + 1, 1)                    \
-                               LD(i + 2, 2)            \
-                                       LD(i + 3, 3)    \
-               PF2(i)                                  \
-                               PF2(i + 2)              \
-               PF0(i + 4)                              \
-                               PF0(i + 6)              \
-               XO1(i,0)                                \
-                       XO1(i + 1, 1)                   \
-                               XO1(i + 2, 2)           \
-                                       XO1(i + 3, 3)   \
-               XO2(i,0)                                \
-                       XO2(i + 1, 1)                   \
-                               XO2(i + 2, 2)           \
-                                       XO2(i + 3, 3)   \
-               ST(i,0)                                 \
-                       ST(i + 1, 1)                    \
-                               ST(i + 2, 2)            \
-                                       ST(i + 3, 3)    \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-       "       addl $256, %1           ;\n"
-       "       addl $256, %2           ;\n"
-       "       addl $256, %3           ;\n"
-       "       decl %0                 ;\n"
-       "       jnz 1b                  ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r"(p2), "+r"(p3)
-       :
-       : "memory" );
-
-       kernel_fpu_end();
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3, unsigned long *p4)
-{
-       unsigned long lines = bytes >> 8;
-
-       kernel_fpu_begin();
-
-       asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
-               PF1(i)                                  \
-                               PF1(i + 2)              \
-               LD(i,0)                                 \
-                       LD(i + 1, 1)                    \
-                               LD(i + 2, 2)            \
-                                       LD(i + 3, 3)    \
-               PF2(i)                                  \
-                               PF2(i + 2)              \
-               XO1(i,0)                                \
-                       XO1(i + 1, 1)                   \
-                               XO1(i + 2, 2)           \
-                                       XO1(i + 3, 3)   \
-               PF3(i)                                  \
-                               PF3(i + 2)              \
-               PF0(i + 4)                              \
-                               PF0(i + 6)              \
-               XO2(i,0)                                \
-                       XO2(i + 1, 1)                   \
-                               XO2(i + 2, 2)           \
-                                       XO2(i + 3, 3)   \
-               XO3(i,0)                                \
-                       XO3(i + 1, 1)                   \
-                               XO3(i + 2, 2)           \
-                                       XO3(i + 3, 3)   \
-               ST(i,0)                                 \
-                       ST(i + 1, 1)                    \
-                               ST(i + 2, 2)            \
-                                       ST(i + 3, 3)    \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-       "       addl $256, %1           ;\n"
-       "       addl $256, %2           ;\n"
-       "       addl $256, %3           ;\n"
-       "       addl $256, %4           ;\n"
-       "       decl %0                 ;\n"
-       "       jnz 1b                  ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
-       :
-       : "memory" );
-
-       kernel_fpu_end();
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
-       unsigned long lines = bytes >> 8;
-
-       kernel_fpu_begin();
-
-       /* Make sure GCC forgets anything it knows about p4 or p5,
-          such that it won't pass to the asm volatile below a
-          register that is shared with any other variable.  That's
-          because we modify p4 and p5 there, but we can't mark them
-          as read/write, otherwise we'd overflow the 10-asm-operands
-          limit of GCC < 3.1.  */
-       asm("" : "+r" (p4), "+r" (p5));
-
-       asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
-               PF1(i)                                  \
-                               PF1(i + 2)              \
-               LD(i,0)                                 \
-                       LD(i + 1, 1)                    \
-                               LD(i + 2, 2)            \
-                                       LD(i + 3, 3)    \
-               PF2(i)                                  \
-                               PF2(i + 2)              \
-               XO1(i,0)                                \
-                       XO1(i + 1, 1)                   \
-                               XO1(i + 2, 2)           \
-                                       XO1(i + 3, 3)   \
-               PF3(i)                                  \
-                               PF3(i + 2)              \
-               XO2(i,0)                                \
-                       XO2(i + 1, 1)                   \
-                               XO2(i + 2, 2)           \
-                                       XO2(i + 3, 3)   \
-               PF4(i)                                  \
-                               PF4(i + 2)              \
-               PF0(i + 4)                              \
-                               PF0(i + 6)              \
-               XO3(i,0)                                \
-                       XO3(i + 1, 1)                   \
-                               XO3(i + 2, 2)           \
-                                       XO3(i + 3, 3)   \
-               XO4(i,0)                                \
-                       XO4(i + 1, 1)                   \
-                               XO4(i + 2, 2)           \
-                                       XO4(i + 3, 3)   \
-               ST(i,0)                                 \
-                       ST(i + 1, 1)                    \
-                               ST(i + 2, 2)            \
-                                       ST(i + 3, 3)    \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-       "       addl $256, %1           ;\n"
-       "       addl $256, %2           ;\n"
-       "       addl $256, %3           ;\n"
-       "       addl $256, %4           ;\n"
-       "       addl $256, %5           ;\n"
-       "       decl %0                 ;\n"
-       "       jnz 1b                  ;\n"
-       : "+r" (lines),
-         "+r" (p1), "+r" (p2), "+r" (p3)
-       : "r" (p4), "r" (p5)
-       : "memory");
-
-       /* p4 and p5 were modified, and now the variables are dead.
-          Clobber them just to be sure nobody does something stupid
-          like assuming they have some legal value.  */
-       asm("" : "=r" (p4), "=r" (p5));
-
-       kernel_fpu_end();
-}
-
 static struct xor_block_template xor_block_pIII_sse = {
        .name = "pIII_sse",
        .do_2 = xor_sse_2,
@@ -827,26 +543,25 @@ static struct xor_block_template xor_block_pIII_sse = {
 /* Also try the generic routines.  */
 #include <asm-generic/xor.h>
 
+/* We force the use of the SSE xor block because it can write around L2.
+   We may also be able to load into the L1 only depending on how the cpu
+   deals with a load to a line that is being prefetched.  */
 #undef XOR_TRY_TEMPLATES
 #define XOR_TRY_TEMPLATES                              \
 do {                                                   \
-       xor_speed(&xor_block_8regs);                    \
-       xor_speed(&xor_block_8regs_p);                  \
-       xor_speed(&xor_block_32regs);                   \
-       xor_speed(&xor_block_32regs_p);                 \
        AVX_XOR_SPEED;                                  \
-       if (cpu_has_xmm)                                \
+       if (cpu_has_xmm) {                              \
                xor_speed(&xor_block_pIII_sse);         \
-       if (cpu_has_mmx) {                              \
+               xor_speed(&xor_block_sse_pf64);         \
+       } else if (cpu_has_mmx) {                       \
                xor_speed(&xor_block_pII_mmx);          \
                xor_speed(&xor_block_p5_mmx);           \
+       } else {                                        \
+               xor_speed(&xor_block_8regs);            \
+               xor_speed(&xor_block_8regs_p);          \
+               xor_speed(&xor_block_32regs);           \
+               xor_speed(&xor_block_32regs_p);         \
        }                                               \
 } while (0)
 
-/* We force the use of the SSE xor block because it can write around L2.
-   We may also be able to load into the L1 only depending on how the cpu
-   deals with a load to a line that is being prefetched.  */
-#define XOR_SELECT_TEMPLATE(FASTEST)                   \
-       AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
-
 #endif /* _ASM_X86_XOR_32_H */
index 87ac522c4af53b9770297f38f78a02d065bb2cbf..546f1e3b87cce3d346f95fec83ff2630342da47c 100644 (file)
@@ -1,301 +1,6 @@
 #ifndef _ASM_X86_XOR_64_H
 #define _ASM_X86_XOR_64_H
 
-/*
- * Optimized RAID-5 checksumming functions for MMX and SSE.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-/*
- * Based on
- * High-speed RAID5 checksumming functions utilizing SSE instructions.
- * Copyright (C) 1998 Ingo Molnar.
- */
-
-/*
- * x86-64 changes / gcc fixes from Andi Kleen.
- * Copyright 2002 Andi Kleen, SuSE Labs.
- *
- * This hasn't been optimized for the hammer yet, but there are likely
- * no advantages to be gotten from x86-64 here anyways.
- */
-
-#include <asm/i387.h>
-
-#define OFFS(x)                "16*("#x")"
-#define PF_OFFS(x)     "256+16*("#x")"
-#define        PF0(x)          "       prefetchnta "PF_OFFS(x)"(%[p1])         ;\n"
-#define LD(x, y)       "       movaps   "OFFS(x)"(%[p1]), %%xmm"#y"    ;\n"
-#define ST(x, y)       "       movaps %%xmm"#y",   "OFFS(x)"(%[p1])    ;\n"
-#define PF1(x)         "       prefetchnta "PF_OFFS(x)"(%[p2])         ;\n"
-#define PF2(x)         "       prefetchnta "PF_OFFS(x)"(%[p3])         ;\n"
-#define PF3(x)         "       prefetchnta "PF_OFFS(x)"(%[p4])         ;\n"
-#define PF4(x)         "       prefetchnta "PF_OFFS(x)"(%[p5])         ;\n"
-#define PF5(x)         "       prefetchnta "PF_OFFS(x)"(%[p6])         ;\n"
-#define XO1(x, y)      "       xorps   "OFFS(x)"(%[p2]), %%xmm"#y"     ;\n"
-#define XO2(x, y)      "       xorps   "OFFS(x)"(%[p3]), %%xmm"#y"     ;\n"
-#define XO3(x, y)      "       xorps   "OFFS(x)"(%[p4]), %%xmm"#y"     ;\n"
-#define XO4(x, y)      "       xorps   "OFFS(x)"(%[p5]), %%xmm"#y"     ;\n"
-#define XO5(x, y)      "       xorps   "OFFS(x)"(%[p6]), %%xmm"#y"     ;\n"
-
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
-       unsigned int lines = bytes >> 8;
-
-       kernel_fpu_begin();
-
-       asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
-               LD(i, 0)                                \
-                       LD(i + 1, 1)                    \
-               PF1(i)                                  \
-                               PF1(i + 2)              \
-                               LD(i + 2, 2)            \
-                                       LD(i + 3, 3)    \
-               PF0(i + 4)                              \
-                               PF0(i + 6)              \
-               XO1(i, 0)                               \
-                       XO1(i + 1, 1)                   \
-                               XO1(i + 2, 2)           \
-                                       XO1(i + 3, 3)   \
-               ST(i, 0)                                \
-                       ST(i + 1, 1)                    \
-                               ST(i + 2, 2)            \
-                                       ST(i + 3, 3)    \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-       "       addq %[inc], %[p1]           ;\n"
-       "       addq %[inc], %[p2]           ;\n"
-               "               decl %[cnt] ; jnz 1b"
-       : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
-       : [inc] "r" (256UL)
-       : "memory");
-
-       kernel_fpu_end();
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3)
-{
-       unsigned int lines = bytes >> 8;
-
-       kernel_fpu_begin();
-       asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
-               PF1(i)                                  \
-                               PF1(i + 2)              \
-               LD(i, 0)                                        \
-                       LD(i + 1, 1)                    \
-                               LD(i + 2, 2)            \
-                                       LD(i + 3, 3)    \
-               PF2(i)                                  \
-                               PF2(i + 2)              \
-               PF0(i + 4)                              \
-                               PF0(i + 6)              \
-               XO1(i, 0)                               \
-                       XO1(i + 1, 1)                   \
-                               XO1(i + 2, 2)           \
-                                       XO1(i + 3, 3)   \
-               XO2(i, 0)                               \
-                       XO2(i + 1, 1)                   \
-                               XO2(i + 2, 2)           \
-                                       XO2(i + 3, 3)   \
-               ST(i, 0)                                \
-                       ST(i + 1, 1)                    \
-                               ST(i + 2, 2)            \
-                                       ST(i + 3, 3)    \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-       "       addq %[inc], %[p1]           ;\n"
-       "       addq %[inc], %[p2]          ;\n"
-       "       addq %[inc], %[p3]           ;\n"
-               "               decl %[cnt] ; jnz 1b"
-       : [cnt] "+r" (lines),
-         [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
-       : [inc] "r" (256UL)
-       : "memory");
-       kernel_fpu_end();
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3, unsigned long *p4)
-{
-       unsigned int lines = bytes >> 8;
-
-       kernel_fpu_begin();
-
-       asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
-               PF1(i)                                  \
-                               PF1(i + 2)              \
-               LD(i, 0)                                \
-                       LD(i + 1, 1)                    \
-                               LD(i + 2, 2)            \
-                                       LD(i + 3, 3)    \
-               PF2(i)                                  \
-                               PF2(i + 2)              \
-               XO1(i, 0)                               \
-                       XO1(i + 1, 1)                   \
-                               XO1(i + 2, 2)           \
-                                       XO1(i + 3, 3)   \
-               PF3(i)                                  \
-                               PF3(i + 2)              \
-               PF0(i + 4)                              \
-                               PF0(i + 6)              \
-               XO2(i, 0)                               \
-                       XO2(i + 1, 1)                   \
-                               XO2(i + 2, 2)           \
-                                       XO2(i + 3, 3)   \
-               XO3(i, 0)                               \
-                       XO3(i + 1, 1)                   \
-                               XO3(i + 2, 2)           \
-                                       XO3(i + 3, 3)   \
-               ST(i, 0)                                \
-                       ST(i + 1, 1)                    \
-                               ST(i + 2, 2)            \
-                                       ST(i + 3, 3)    \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-       "       addq %[inc], %[p1]           ;\n"
-       "       addq %[inc], %[p2]           ;\n"
-       "       addq %[inc], %[p3]           ;\n"
-       "       addq %[inc], %[p4]           ;\n"
-       "       decl %[cnt] ; jnz 1b"
-       : [cnt] "+c" (lines),
-         [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
-       : [inc] "r" (256UL)
-       : "memory" );
-
-       kernel_fpu_end();
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
-       unsigned int lines = bytes >> 8;
-
-       kernel_fpu_begin();
-
-       asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
-               PF1(i)                                  \
-                               PF1(i + 2)              \
-               LD(i, 0)                                \
-                       LD(i + 1, 1)                    \
-                               LD(i + 2, 2)            \
-                                       LD(i + 3, 3)    \
-               PF2(i)                                  \
-                               PF2(i + 2)              \
-               XO1(i, 0)                               \
-                       XO1(i + 1, 1)                   \
-                               XO1(i + 2, 2)           \
-                                       XO1(i + 3, 3)   \
-               PF3(i)                                  \
-                               PF3(i + 2)              \
-               XO2(i, 0)                               \
-                       XO2(i + 1, 1)                   \
-                               XO2(i + 2, 2)           \
-                                       XO2(i + 3, 3)   \
-               PF4(i)                                  \
-                               PF4(i + 2)              \
-               PF0(i + 4)                              \
-                               PF0(i + 6)              \
-               XO3(i, 0)                               \
-                       XO3(i + 1, 1)                   \
-                               XO3(i + 2, 2)           \
-                                       XO3(i + 3, 3)   \
-               XO4(i, 0)                               \
-                       XO4(i + 1, 1)                   \
-                               XO4(i + 2, 2)           \
-                                       XO4(i + 3, 3)   \
-               ST(i, 0)                                \
-                       ST(i + 1, 1)                    \
-                               ST(i + 2, 2)            \
-                                       ST(i + 3, 3)    \
-
-
-               PF0(0)
-                               PF0(2)
-
-       " .align 32                     ;\n"
-       " 1:                            ;\n"
-
-               BLOCK(0)
-               BLOCK(4)
-               BLOCK(8)
-               BLOCK(12)
-
-       "       addq %[inc], %[p1]           ;\n"
-       "       addq %[inc], %[p2]           ;\n"
-       "       addq %[inc], %[p3]           ;\n"
-       "       addq %[inc], %[p4]           ;\n"
-       "       addq %[inc], %[p5]           ;\n"
-       "       decl %[cnt] ; jnz 1b"
-       : [cnt] "+c" (lines),
-         [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
-         [p5] "+r" (p5)
-       : [inc] "r" (256UL)
-       : "memory");
-
-       kernel_fpu_end();
-}
-
 static struct xor_block_template xor_block_sse = {
        .name = "generic_sse",
        .do_2 = xor_sse_2,
@@ -308,17 +13,15 @@ static struct xor_block_template xor_block_sse = {
 /* Also try the AVX routines */
 #include <asm/xor_avx.h>
 
+/* We force the use of the SSE xor block because it can write around L2.
+   We may also be able to load into the L1 only depending on how the cpu
+   deals with a load to a line that is being prefetched.  */
 #undef XOR_TRY_TEMPLATES
 #define XOR_TRY_TEMPLATES                      \
 do {                                           \
        AVX_XOR_SPEED;                          \
+       xor_speed(&xor_block_sse_pf64);         \
        xor_speed(&xor_block_sse);              \
 } while (0)
 
-/* We force the use of the SSE xor block because it can write around L2.
-   We may also be able to load into the L1 only depending on how the cpu
-   deals with a load to a line that is being prefetched.  */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
-       AVX_SELECT(&xor_block_sse)
-
 #endif /* _ASM_X86_XOR_64_H */
index 97ef74b88e0f8f6213308c6168303779302adec0..dbded5aedb818d511a46967ccb1d26d77233110f 100644 (file)
@@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        if (flags & MAP_FIXED)
                return addr;
 
-       /* for MAP_32BIT mappings we force the legact mmap base */
+       /* for MAP_32BIT mappings we force the legacy mmap base */
        if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
                goto bottomup;
 
index 2ead3c8a4c8419da92a61fbba35eb695e5205dde..e779e0bb1dfd2e90e35a5df844093ad4a8949a4a 100644 (file)
@@ -605,7 +605,7 @@ kernel_physical_mapping_init(unsigned long start,
        }
 
        if (pgd_changed)
-               sync_global_pgds(addr, end);
+               sync_global_pgds(addr, end - 1);
 
        __flush_tlb_all();
 
@@ -981,7 +981,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
                }
 
        }
-       sync_global_pgds((unsigned long)start_page, end);
+       sync_global_pgds((unsigned long)start_page, end - 1);
        return 0;
 }