Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[~shefty/rdma-dev.git] / arch / x86 / mm / init_64.c
index d6eeead..3eba7f4 100644 (file)
 #include <asm/uv/uv.h>
 #include <asm/setup.h>
 
+#include "mm_internal.h"
+
+static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+                          unsigned long addr, unsigned long end)
+{
+       addr &= PMD_MASK;
+       for (; addr < end; addr += PMD_SIZE) {
+               pmd_t *pmd = pmd_page + pmd_index(addr);
+
+               if (!pmd_present(*pmd))
+                       set_pmd(pmd, __pmd(addr | pmd_flag));
+       }
+}
+static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
+                         unsigned long addr, unsigned long end)
+{
+       unsigned long next;
+
+       for (; addr < end; addr = next) {
+               pud_t *pud = pud_page + pud_index(addr);
+               pmd_t *pmd;
+
+               next = (addr & PUD_MASK) + PUD_SIZE;
+               if (next > end)
+                       next = end;
+
+               if (pud_present(*pud)) {
+                       pmd = pmd_offset(pud, 0);
+                       ident_pmd_init(info->pmd_flag, pmd, addr, next);
+                       continue;
+               }
+               pmd = (pmd_t *)info->alloc_pgt_page(info->context);
+               if (!pmd)
+                       return -ENOMEM;
+               ident_pmd_init(info->pmd_flag, pmd, addr, next);
+               set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+       }
+
+       return 0;
+}
+
+int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
+                             unsigned long addr, unsigned long end)
+{
+       unsigned long next;
+       int result;
+       int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
+
+       for (; addr < end; addr = next) {
+               pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+               pud_t *pud;
+
+               next = (addr & PGDIR_MASK) + PGDIR_SIZE;
+               if (next > end)
+                       next = end;
+
+               if (pgd_present(*pgd)) {
+                       pud = pud_offset(pgd, 0);
+                       result = ident_pud_init(info, pud, addr, next);
+                       if (result)
+                               return result;
+                       continue;
+               }
+
+               pud = (pud_t *)info->alloc_pgt_page(info->context);
+               if (!pud)
+                       return -ENOMEM;
+               result = ident_pud_init(info, pud, addr, next);
+               if (result)
+                       return result;
+               set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+       }
+
+       return 0;
+}
+
 static int __init parse_direct_gbpages_off(char *arg)
 {
        direct_gbpages = 0;
@@ -302,10 +378,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
 void __init cleanup_highmap(void)
 {
        unsigned long vaddr = __START_KERNEL_map;
-       unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
+       unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE;
        unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
        pmd_t *pmd = level2_kernel_pgt;
 
+       /*
+        * Native path, max_pfn_mapped is not set yet.
+        * Xen has valid max_pfn_mapped set in
+        *      arch/x86/xen/mmu.c:xen_setup_kernel_pagetable().
+        */
+       if (max_pfn_mapped)
+               vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT);
+
        for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) {
                if (pmd_none(*pmd))
                        continue;
@@ -314,69 +398,24 @@ void __init cleanup_highmap(void)
        }
 }
 
-static __ref void *alloc_low_page(unsigned long *phys)
-{
-       unsigned long pfn = pgt_buf_end++;
-       void *adr;
-
-       if (after_bootmem) {
-               adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
-               *phys = __pa(adr);
-
-               return adr;
-       }
-
-       if (pfn >= pgt_buf_top)
-               panic("alloc_low_page: ran out of memory");
-
-       adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
-       clear_page(adr);
-       *phys  = pfn * PAGE_SIZE;
-       return adr;
-}
-
-static __ref void *map_low_page(void *virt)
-{
-       void *adr;
-       unsigned long phys, left;
-
-       if (after_bootmem)
-               return virt;
-
-       phys = __pa(virt);
-       left = phys & (PAGE_SIZE - 1);
-       adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
-       adr = (void *)(((unsigned long)adr) | left);
-
-       return adr;
-}
-
-static __ref void unmap_low_page(void *adr)
-{
-       if (after_bootmem)
-               return;
-
-       early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
-}
-
 static unsigned long __meminit
 phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
              pgprot_t prot)
 {
-       unsigned pages = 0;
+       unsigned long pages = 0, next;
        unsigned long last_map_addr = end;
        int i;
 
        pte_t *pte = pte_page + pte_index(addr);
 
-       for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
-
+       for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) {
+               next = (addr & PAGE_MASK) + PAGE_SIZE;
                if (addr >= end) {
-                       if (!after_bootmem) {
-                               for(; i < PTRS_PER_PTE; i++, pte++)
-                                       set_pte(pte, __pte(0));
-                       }
-                       break;
+                       if (!after_bootmem &&
+                           !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) &&
+                           !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN))
+                               set_pte(pte, __pte(0));
+                       continue;
                }
 
                /*
@@ -414,28 +453,25 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
        int i = pmd_index(address);
 
        for (; i < PTRS_PER_PMD; i++, address = next) {
-               unsigned long pte_phys;
                pmd_t *pmd = pmd_page + pmd_index(address);
                pte_t *pte;
                pgprot_t new_prot = prot;
 
+               next = (address & PMD_MASK) + PMD_SIZE;
                if (address >= end) {
-                       if (!after_bootmem) {
-                               for (; i < PTRS_PER_PMD; i++, pmd++)
-                                       set_pmd(pmd, __pmd(0));
-                       }
-                       break;
+                       if (!after_bootmem &&
+                           !e820_any_mapped(address & PMD_MASK, next, E820_RAM) &&
+                           !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN))
+                               set_pmd(pmd, __pmd(0));
+                       continue;
                }
 
-               next = (address & PMD_MASK) + PMD_SIZE;
-
                if (pmd_val(*pmd)) {
                        if (!pmd_large(*pmd)) {
                                spin_lock(&init_mm.page_table_lock);
-                               pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+                               pte = (pte_t *)pmd_page_vaddr(*pmd);
                                last_map_addr = phys_pte_init(pte, address,
                                                                end, prot);
-                               unmap_low_page(pte);
                                spin_unlock(&init_mm.page_table_lock);
                                continue;
                        }
@@ -464,19 +500,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
                        pages++;
                        spin_lock(&init_mm.page_table_lock);
                        set_pte((pte_t *)pmd,
-                               pfn_pte(address >> PAGE_SHIFT,
+                               pfn_pte((address & PMD_MASK) >> PAGE_SHIFT,
                                        __pgprot(pgprot_val(prot) | _PAGE_PSE)));
                        spin_unlock(&init_mm.page_table_lock);
                        last_map_addr = next;
                        continue;
                }
 
-               pte = alloc_low_page(&pte_phys);
+               pte = alloc_low_page();
                last_map_addr = phys_pte_init(pte, address, end, new_prot);
-               unmap_low_page(pte);
 
                spin_lock(&init_mm.page_table_lock);
-               pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
+               pmd_populate_kernel(&init_mm, pmd, pte);
                spin_unlock(&init_mm.page_table_lock);
        }
        update_page_count(PG_LEVEL_2M, pages);
@@ -492,27 +527,24 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
        int i = pud_index(addr);
 
        for (; i < PTRS_PER_PUD; i++, addr = next) {
-               unsigned long pmd_phys;
                pud_t *pud = pud_page + pud_index(addr);
                pmd_t *pmd;
                pgprot_t prot = PAGE_KERNEL;
 
-               if (addr >= end)
-                       break;
-
                next = (addr & PUD_MASK) + PUD_SIZE;
-
-               if (!after_bootmem && !e820_any_mapped(addr, next, 0)) {
-                       set_pud(pud, __pud(0));
+               if (addr >= end) {
+                       if (!after_bootmem &&
+                           !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) &&
+                           !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN))
+                               set_pud(pud, __pud(0));
                        continue;
                }
 
                if (pud_val(*pud)) {
                        if (!pud_large(*pud)) {
-                               pmd = map_low_page(pmd_offset(pud, 0));
+                               pmd = pmd_offset(pud, 0);
                                last_map_addr = phys_pmd_init(pmd, addr, end,
                                                         page_size_mask, prot);
-                               unmap_low_page(pmd);
                                __flush_tlb_all();
                                continue;
                        }
@@ -541,19 +573,19 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
                        pages++;
                        spin_lock(&init_mm.page_table_lock);
                        set_pte((pte_t *)pud,
-                               pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+                               pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT,
+                                       PAGE_KERNEL_LARGE));
                        spin_unlock(&init_mm.page_table_lock);
                        last_map_addr = next;
                        continue;
                }
 
-               pmd = alloc_low_page(&pmd_phys);
+               pmd = alloc_low_page();
                last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
                                              prot);
-               unmap_low_page(pmd);
 
                spin_lock(&init_mm.page_table_lock);
-               pud_populate(&init_mm, pud, __va(pmd_phys));
+               pud_populate(&init_mm, pud, pmd);
                spin_unlock(&init_mm.page_table_lock);
        }
        __flush_tlb_all();
@@ -578,28 +610,23 @@ kernel_physical_mapping_init(unsigned long start,
 
        for (; start < end; start = next) {
                pgd_t *pgd = pgd_offset_k(start);
-               unsigned long pud_phys;
                pud_t *pud;
 
-               next = (start + PGDIR_SIZE) & PGDIR_MASK;
-               if (next > end)
-                       next = end;
+               next = (start & PGDIR_MASK) + PGDIR_SIZE;
 
                if (pgd_val(*pgd)) {
-                       pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+                       pud = (pud_t *)pgd_page_vaddr(*pgd);
                        last_map_addr = phys_pud_init(pud, __pa(start),
                                                 __pa(end), page_size_mask);
-                       unmap_low_page(pud);
                        continue;
                }
 
-               pud = alloc_low_page(&pud_phys);
-               last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
+               pud = alloc_low_page();
+               last_map_addr = phys_pud_init(pud, __pa(start), __pa(end),
                                                 page_size_mask);
-               unmap_low_page(pud);
 
                spin_lock(&init_mm.page_table_lock);
-               pgd_populate(&init_mm, pgd, __va(pud_phys));
+               pgd_populate(&init_mm, pgd, pud);
                spin_unlock(&init_mm.page_table_lock);
                pgd_changed = true;
        }
@@ -664,13 +691,11 @@ int arch_add_memory(int nid, u64 start, u64 size)
 {
        struct pglist_data *pgdat = NODE_DATA(nid);
        struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
-       unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
+       unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
        int ret;
 
-       last_mapped_pfn = init_memory_mapping(start, start + size);
-       if (last_mapped_pfn > max_pfn_mapped)
-               max_pfn_mapped = last_mapped_pfn;
+       init_memory_mapping(start, start + size);
 
        ret = __add_pages(nid, zone, start_pfn, nr_pages);
        WARN_ON_ONCE(ret);
@@ -686,6 +711,16 @@ EXPORT_SYMBOL_GPL(arch_add_memory);
 
 static struct kcore_list kcore_vsyscall;
 
+static void __init register_page_bootmem_info(void)
+{
+#ifdef CONFIG_NUMA
+       int i;
+
+       for_each_online_node(i)
+               register_page_bootmem_info_node(NODE_DATA(i));
+#endif
+}
+
 void __init mem_init(void)
 {
        long codesize, reservedpages, datasize, initsize;
@@ -698,11 +733,8 @@ void __init mem_init(void)
        reservedpages = 0;
 
        /* this will put all low memory onto the freelists */
-#ifdef CONFIG_NUMA
-       totalram_pages = numa_free_all_bootmem();
-#else
+       register_page_bootmem_info();
        totalram_pages = free_all_bootmem();
-#endif
 
        absent_pages = absent_pages_in_range(0, max_pfn);
        reservedpages = max_pfn - totalram_pages - absent_pages;
@@ -772,12 +804,11 @@ void set_kernel_text_ro(void)
 void mark_rodata_ro(void)
 {
        unsigned long start = PFN_ALIGN(_text);
-       unsigned long rodata_start =
-               ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+       unsigned long rodata_start = PFN_ALIGN(__start_rodata);
        unsigned long end = (unsigned long) &__end_rodata_hpage_align;
-       unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
-       unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
-       unsigned long data_start = (unsigned long) &_sdata;
+       unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
+       unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
+       unsigned long all_end = PFN_ALIGN(&_end);
 
        printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
               (end - start) >> 10);
@@ -786,10 +817,10 @@ void mark_rodata_ro(void)
        kernel_set_to_readonly = 1;
 
        /*
-        * The rodata section (but not the kernel text!) should also be
-        * not-executable.
+        * The rodata/data/bss/brk section (but not the kernel text!)
+        * should also be not-executable.
         */
-       set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
+       set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
 
        rodata_test();
 
@@ -802,12 +833,12 @@ void mark_rodata_ro(void)
 #endif
 
        free_init_pages("unused kernel memory",
-                       (unsigned long) page_address(virt_to_page(text_end)),
-                       (unsigned long)
-                                page_address(virt_to_page(rodata_start)));
+                       (unsigned long) __va(__pa_symbol(text_end)),
+                       (unsigned long) __va(__pa_symbol(rodata_start)));
+
        free_init_pages("unused kernel memory",
-                       (unsigned long) page_address(virt_to_page(rodata_end)),
-                       (unsigned long) page_address(virt_to_page(data_start)));
+                       (unsigned long) __va(__pa_symbol(rodata_end)),
+                       (unsigned long) __va(__pa_symbol(_sdata)));
 }
 
 #endif