]> git.openfabrics.org - ~shefty/rdma-dev.git/blob - arch/tile/mm/hugetlbpage.c
mm: use vm_unmapped_area() in hugetlbfs on tile architecture
[~shefty/rdma-dev.git] / arch / tile / mm / hugetlbpage.c
1 /*
2  * Copyright 2010 Tilera Corporation. All Rights Reserved.
3  *
4  *   This program is free software; you can redistribute it and/or
5  *   modify it under the terms of the GNU General Public License
6  *   as published by the Free Software Foundation, version 2.
7  *
8  *   This program is distributed in the hope that it will be useful, but
9  *   WITHOUT ANY WARRANTY; without even the implied warranty of
10  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11  *   NON INFRINGEMENT.  See the GNU General Public License for
12  *   more details.
13  *
14  * TILE Huge TLB Page Support for Kernel.
15  * Taken from i386 hugetlb implementation:
16  * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
17  */
18
19 #include <linux/init.h>
20 #include <linux/fs.h>
21 #include <linux/mm.h>
22 #include <linux/hugetlb.h>
23 #include <linux/pagemap.h>
24 #include <linux/slab.h>
25 #include <linux/err.h>
26 #include <linux/sysctl.h>
27 #include <linux/mman.h>
28 #include <asm/tlb.h>
29 #include <asm/tlbflush.h>
30 #include <asm/setup.h>
31
32 #ifdef CONFIG_HUGETLB_SUPER_PAGES
33
34 /*
35  * Provide an additional huge page size (in addition to the regular default
36  * huge page size) if no "hugepagesz" arguments are specified.
37  * Note that it must be smaller than the default huge page size so
38  * that it's possible to allocate them on demand from the buddy allocator.
39  * You can change this to 64K (on a 16K build), 256K, 1M, or 4M,
40  * or not define it at all.
41  */
42 #define ADDITIONAL_HUGE_SIZE (1024 * 1024UL)
43
44 /* "Extra" page-size multipliers, one per level of the page table. */
45 int huge_shift[HUGE_SHIFT_ENTRIES] = {
46 #ifdef ADDITIONAL_HUGE_SIZE
47 #define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE)
48         [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT
49 #endif
50 };
51
52 /*
53  * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel().
54  * It assumes that L2 PTEs are never in HIGHMEM (we don't support that).
55  * It locks the user pagetable, and bumps up the mm->nr_ptes field,
56  * but otherwise allocate the page table using the kernel versions.
57  */
58 static pte_t *pte_alloc_hugetlb(struct mm_struct *mm, pmd_t *pmd,
59                                 unsigned long address)
60 {
61         pte_t *new;
62
63         if (pmd_none(*pmd)) {
64                 new = pte_alloc_one_kernel(mm, address);
65                 if (!new)
66                         return NULL;
67
68                 smp_wmb(); /* See comment in __pte_alloc */
69
70                 spin_lock(&mm->page_table_lock);
71                 if (likely(pmd_none(*pmd))) {  /* Has another populated it ? */
72                         mm->nr_ptes++;
73                         pmd_populate_kernel(mm, pmd, new);
74                         new = NULL;
75                 } else
76                         VM_BUG_ON(pmd_trans_splitting(*pmd));
77                 spin_unlock(&mm->page_table_lock);
78                 if (new)
79                         pte_free_kernel(mm, new);
80         }
81
82         return pte_offset_kernel(pmd, address);
83 }
84 #endif
85
86 pte_t *huge_pte_alloc(struct mm_struct *mm,
87                       unsigned long addr, unsigned long sz)
88 {
89         pgd_t *pgd;
90         pud_t *pud;
91
92         addr &= -sz;   /* Mask off any low bits in the address. */
93
94         pgd = pgd_offset(mm, addr);
95         pud = pud_alloc(mm, pgd, addr);
96
97 #ifdef CONFIG_HUGETLB_SUPER_PAGES
98         if (sz >= PGDIR_SIZE) {
99                 BUG_ON(sz != PGDIR_SIZE &&
100                        sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]);
101                 return (pte_t *)pud;
102         } else {
103                 pmd_t *pmd = pmd_alloc(mm, pud, addr);
104                 if (sz >= PMD_SIZE) {
105                         BUG_ON(sz != PMD_SIZE &&
106                                sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD]));
107                         return (pte_t *)pmd;
108                 }
109                 else {
110                         if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
111                                 panic("Unexpected page size %#lx\n", sz);
112                         return pte_alloc_hugetlb(mm, pmd, addr);
113                 }
114         }
115 #else
116         BUG_ON(sz != PMD_SIZE);
117         return (pte_t *) pmd_alloc(mm, pud, addr);
118 #endif
119 }
120
121 static pte_t *get_pte(pte_t *base, int index, int level)
122 {
123         pte_t *ptep = base + index;
124 #ifdef CONFIG_HUGETLB_SUPER_PAGES
125         if (!pte_present(*ptep) && huge_shift[level] != 0) {
126                 unsigned long mask = -1UL << huge_shift[level];
127                 pte_t *super_ptep = base + (index & mask);
128                 pte_t pte = *super_ptep;
129                 if (pte_present(pte) && pte_super(pte))
130                         ptep = super_ptep;
131         }
132 #endif
133         return ptep;
134 }
135
136 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
137 {
138         pgd_t *pgd;
139         pud_t *pud;
140         pmd_t *pmd;
141 #ifdef CONFIG_HUGETLB_SUPER_PAGES
142         pte_t *pte;
143 #endif
144
145         /* Get the top-level page table entry. */
146         pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0);
147         if (!pgd_present(*pgd))
148                 return NULL;
149
150         /* We don't have four levels. */
151         pud = pud_offset(pgd, addr);
152 #ifndef __PAGETABLE_PUD_FOLDED
153 # error support fourth page table level
154 #endif
155
156         /* Check for an L0 huge PTE, if we have three levels. */
157 #ifndef __PAGETABLE_PMD_FOLDED
158         if (pud_huge(*pud))
159                 return (pte_t *)pud;
160
161         pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud),
162                                pmd_index(addr), 1);
163         if (!pmd_present(*pmd))
164                 return NULL;
165 #else
166         pmd = pmd_offset(pud, addr);
167 #endif
168
169         /* Check for an L1 huge PTE. */
170         if (pmd_huge(*pmd))
171                 return (pte_t *)pmd;
172
173 #ifdef CONFIG_HUGETLB_SUPER_PAGES
174         /* Check for an L2 huge PTE. */
175         pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2);
176         if (!pte_present(*pte))
177                 return NULL;
178         if (pte_super(*pte))
179                 return pte;
180 #endif
181
182         return NULL;
183 }
184
185 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
186                               int write)
187 {
188         return ERR_PTR(-EINVAL);
189 }
190
191 int pmd_huge(pmd_t pmd)
192 {
193         return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE);
194 }
195
196 int pud_huge(pud_t pud)
197 {
198         return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
199 }
200
201 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
202                              pmd_t *pmd, int write)
203 {
204         struct page *page;
205
206         page = pte_page(*(pte_t *)pmd);
207         if (page)
208                 page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
209         return page;
210 }
211
212 struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
213                              pud_t *pud, int write)
214 {
215         struct page *page;
216
217         page = pte_page(*(pte_t *)pud);
218         if (page)
219                 page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
220         return page;
221 }
222
223 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
224 {
225         return 0;
226 }
227
228 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
229 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
230                 unsigned long addr, unsigned long len,
231                 unsigned long pgoff, unsigned long flags)
232 {
233         struct hstate *h = hstate_file(file);
234         struct vm_unmapped_area_info info;
235
236         info.flags = 0;
237         info.length = len;
238         info.low_limit = TASK_UNMAPPED_BASE;
239         info.high_limit = TASK_SIZE;
240         info.align_mask = PAGE_MASK & ~huge_page_mask(h);
241         info.align_offset = 0;
242         return vm_unmapped_area(&info);
243 }
244
245 static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
246                 unsigned long addr0, unsigned long len,
247                 unsigned long pgoff, unsigned long flags)
248 {
249         struct hstate *h = hstate_file(file);
250         struct vm_unmapped_area_info info;
251         unsigned long addr;
252
253         info.flags = VM_UNMAPPED_AREA_TOPDOWN;
254         info.length = len;
255         info.low_limit = PAGE_SIZE;
256         info.high_limit = current->mm->mmap_base;
257         info.align_mask = PAGE_MASK & ~huge_page_mask(h);
258         info.align_offset = 0;
259         addr = vm_unmapped_area(&info);
260
261         /*
262          * A failed mmap() very likely causes application failure,
263          * so fall back to the bottom-up function here. This scenario
264          * can happen with large stack limits and large mmap()
265          * allocations.
266          */
267         if (addr & ~PAGE_MASK) {
268                 VM_BUG_ON(addr != -ENOMEM);
269                 info.flags = 0;
270                 info.low_limit = TASK_UNMAPPED_BASE;
271                 info.high_limit = TASK_SIZE;
272                 addr = vm_unmapped_area(&info);
273         }
274
275         return addr;
276 }
277
278 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
279                 unsigned long len, unsigned long pgoff, unsigned long flags)
280 {
281         struct hstate *h = hstate_file(file);
282         struct mm_struct *mm = current->mm;
283         struct vm_area_struct *vma;
284
285         if (len & ~huge_page_mask(h))
286                 return -EINVAL;
287         if (len > TASK_SIZE)
288                 return -ENOMEM;
289
290         if (flags & MAP_FIXED) {
291                 if (prepare_hugepage_range(file, addr, len))
292                         return -EINVAL;
293                 return addr;
294         }
295
296         if (addr) {
297                 addr = ALIGN(addr, huge_page_size(h));
298                 vma = find_vma(mm, addr);
299                 if (TASK_SIZE - len >= addr &&
300                     (!vma || addr + len <= vma->vm_start))
301                         return addr;
302         }
303         if (current->mm->get_unmapped_area == arch_get_unmapped_area)
304                 return hugetlb_get_unmapped_area_bottomup(file, addr, len,
305                                 pgoff, flags);
306         else
307                 return hugetlb_get_unmapped_area_topdown(file, addr, len,
308                                 pgoff, flags);
309 }
310 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
311
312 #ifdef CONFIG_HUGETLB_SUPER_PAGES
313 static __init int __setup_hugepagesz(unsigned long ps)
314 {
315         int log_ps = __builtin_ctzl(ps);
316         int level, base_shift;
317
318         if ((1UL << log_ps) != ps || (log_ps & 1) != 0) {
319                 pr_warn("Not enabling %ld byte huge pages;"
320                         " must be a power of four.\n", ps);
321                 return -EINVAL;
322         }
323
324         if (ps > 64*1024*1024*1024UL) {
325                 pr_warn("Not enabling %ld MB huge pages;"
326                         " largest legal value is 64 GB .\n", ps >> 20);
327                 return -EINVAL;
328         } else if (ps >= PUD_SIZE) {
329                 static long hv_jpage_size;
330                 if (hv_jpage_size == 0)
331                         hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO);
332                 if (hv_jpage_size != PUD_SIZE) {
333                         pr_warn("Not enabling >= %ld MB huge pages:"
334                                 " hypervisor reports size %ld\n",
335                                 PUD_SIZE >> 20, hv_jpage_size);
336                         return -EINVAL;
337                 }
338                 level = 0;
339                 base_shift = PUD_SHIFT;
340         } else if (ps >= PMD_SIZE) {
341                 level = 1;
342                 base_shift = PMD_SHIFT;
343         } else if (ps > PAGE_SIZE) {
344                 level = 2;
345                 base_shift = PAGE_SHIFT;
346         } else {
347                 pr_err("hugepagesz: huge page size %ld too small\n", ps);
348                 return -EINVAL;
349         }
350
351         if (log_ps != base_shift) {
352                 int shift_val = log_ps - base_shift;
353                 if (huge_shift[level] != 0) {
354                         int old_shift = base_shift + huge_shift[level];
355                         pr_warn("Not enabling %ld MB huge pages;"
356                                 " already have size %ld MB.\n",
357                                 ps >> 20, (1UL << old_shift) >> 20);
358                         return -EINVAL;
359                 }
360                 if (hv_set_pte_super_shift(level, shift_val) != 0) {
361                         pr_warn("Not enabling %ld MB huge pages;"
362                                 " no hypervisor support.\n", ps >> 20);
363                         return -EINVAL;
364                 }
365                 printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20);
366                 huge_shift[level] = shift_val;
367         }
368
369         hugetlb_add_hstate(log_ps - PAGE_SHIFT);
370
371         return 0;
372 }
373
374 static bool saw_hugepagesz;
375
376 static __init int setup_hugepagesz(char *opt)
377 {
378         if (!saw_hugepagesz) {
379                 saw_hugepagesz = true;
380                 memset(huge_shift, 0, sizeof(huge_shift));
381         }
382         return __setup_hugepagesz(memparse(opt, NULL));
383 }
384 __setup("hugepagesz=", setup_hugepagesz);
385
386 #ifdef ADDITIONAL_HUGE_SIZE
387 /*
388  * Provide an additional huge page size if no "hugepagesz" args are given.
389  * In that case, all the cores have properly set up their hv super_shift
390  * already, but we need to notify the hugetlb code to enable the
391  * new huge page size from the Linux point of view.
392  */
393 static __init int add_default_hugepagesz(void)
394 {
395         if (!saw_hugepagesz) {
396                 BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE ||
397                              ADDITIONAL_HUGE_SIZE <= PAGE_SIZE);
398                 BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) !=
399                              ADDITIONAL_HUGE_SIZE);
400                 BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1);
401                 hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT);
402         }
403         return 0;
404 }
405 arch_initcall(add_default_hugepagesz);
406 #endif
407
408 #endif /* CONFIG_HUGETLB_SUPER_PAGES */