VM: add "vm_brk()" helper function
[~shefty/rdma-dev.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38 #include <asm/exec.h>
39
40 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
41 static int load_elf_library(struct file *);
42 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
43                                 int, int, unsigned long);
44
45 /*
46  * If we don't support core dumping, then supply a NULL so we
47  * don't even try.
48  */
49 #ifdef CONFIG_ELF_CORE
50 static int elf_core_dump(struct coredump_params *cprm);
51 #else
52 #define elf_core_dump   NULL
53 #endif
54
55 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
56 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
57 #else
58 #define ELF_MIN_ALIGN   PAGE_SIZE
59 #endif
60
61 #ifndef ELF_CORE_EFLAGS
62 #define ELF_CORE_EFLAGS 0
63 #endif
64
65 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
66 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
67 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
68
69 static struct linux_binfmt elf_format = {
70         .module         = THIS_MODULE,
71         .load_binary    = load_elf_binary,
72         .load_shlib     = load_elf_library,
73         .core_dump      = elf_core_dump,
74         .min_coredump   = ELF_EXEC_PAGESIZE,
75 };
76
77 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
78
79 static int set_brk(unsigned long start, unsigned long end)
80 {
81         start = ELF_PAGEALIGN(start);
82         end = ELF_PAGEALIGN(end);
83         if (end > start) {
84                 unsigned long addr;
85                 addr = vm_brk(start, end - start);
86                 if (BAD_ADDR(addr))
87                         return addr;
88         }
89         current->mm->start_brk = current->mm->brk = end;
90         return 0;
91 }
92
93 /* We need to explicitly zero any fractional pages
94    after the data section (i.e. bss).  This would
95    contain the junk from the file that should not
96    be in memory
97  */
98 static int padzero(unsigned long elf_bss)
99 {
100         unsigned long nbyte;
101
102         nbyte = ELF_PAGEOFFSET(elf_bss);
103         if (nbyte) {
104                 nbyte = ELF_MIN_ALIGN - nbyte;
105                 if (clear_user((void __user *) elf_bss, nbyte))
106                         return -EFAULT;
107         }
108         return 0;
109 }
110
111 /* Let's use some macros to make this stack manipulation a little clearer */
112 #ifdef CONFIG_STACK_GROWSUP
113 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
114 #define STACK_ROUND(sp, items) \
115         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
116 #define STACK_ALLOC(sp, len) ({ \
117         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
118         old_sp; })
119 #else
120 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
121 #define STACK_ROUND(sp, items) \
122         (((unsigned long) (sp - items)) &~ 15UL)
123 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
124 #endif
125
126 #ifndef ELF_BASE_PLATFORM
127 /*
128  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
129  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
130  * will be copied to the user stack in the same manner as AT_PLATFORM.
131  */
132 #define ELF_BASE_PLATFORM NULL
133 #endif
134
135 static int
136 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
137                 unsigned long load_addr, unsigned long interp_load_addr)
138 {
139         unsigned long p = bprm->p;
140         int argc = bprm->argc;
141         int envc = bprm->envc;
142         elf_addr_t __user *argv;
143         elf_addr_t __user *envp;
144         elf_addr_t __user *sp;
145         elf_addr_t __user *u_platform;
146         elf_addr_t __user *u_base_platform;
147         elf_addr_t __user *u_rand_bytes;
148         const char *k_platform = ELF_PLATFORM;
149         const char *k_base_platform = ELF_BASE_PLATFORM;
150         unsigned char k_rand_bytes[16];
151         int items;
152         elf_addr_t *elf_info;
153         int ei_index = 0;
154         const struct cred *cred = current_cred();
155         struct vm_area_struct *vma;
156
157         /*
158          * In some cases (e.g. Hyper-Threading), we want to avoid L1
159          * evictions by the processes running on the same package. One
160          * thing we can do is to shuffle the initial stack for them.
161          */
162
163         p = arch_align_stack(p);
164
165         /*
166          * If this architecture has a platform capability string, copy it
167          * to userspace.  In some cases (Sparc), this info is impossible
168          * for userspace to get any other way, in others (i386) it is
169          * merely difficult.
170          */
171         u_platform = NULL;
172         if (k_platform) {
173                 size_t len = strlen(k_platform) + 1;
174
175                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
176                 if (__copy_to_user(u_platform, k_platform, len))
177                         return -EFAULT;
178         }
179
180         /*
181          * If this architecture has a "base" platform capability
182          * string, copy it to userspace.
183          */
184         u_base_platform = NULL;
185         if (k_base_platform) {
186                 size_t len = strlen(k_base_platform) + 1;
187
188                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189                 if (__copy_to_user(u_base_platform, k_base_platform, len))
190                         return -EFAULT;
191         }
192
193         /*
194          * Generate 16 random bytes for userspace PRNG seeding.
195          */
196         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
197         u_rand_bytes = (elf_addr_t __user *)
198                        STACK_ALLOC(p, sizeof(k_rand_bytes));
199         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
200                 return -EFAULT;
201
202         /* Create the ELF interpreter info */
203         elf_info = (elf_addr_t *)current->mm->saved_auxv;
204         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
205 #define NEW_AUX_ENT(id, val) \
206         do { \
207                 elf_info[ei_index++] = id; \
208                 elf_info[ei_index++] = val; \
209         } while (0)
210
211 #ifdef ARCH_DLINFO
212         /* 
213          * ARCH_DLINFO must come first so PPC can do its special alignment of
214          * AUXV.
215          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
216          * ARCH_DLINFO changes
217          */
218         ARCH_DLINFO;
219 #endif
220         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
221         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
222         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
223         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
224         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
225         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
226         NEW_AUX_ENT(AT_BASE, interp_load_addr);
227         NEW_AUX_ENT(AT_FLAGS, 0);
228         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
229         NEW_AUX_ENT(AT_UID, cred->uid);
230         NEW_AUX_ENT(AT_EUID, cred->euid);
231         NEW_AUX_ENT(AT_GID, cred->gid);
232         NEW_AUX_ENT(AT_EGID, cred->egid);
233         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
234         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
235         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
236         if (k_platform) {
237                 NEW_AUX_ENT(AT_PLATFORM,
238                             (elf_addr_t)(unsigned long)u_platform);
239         }
240         if (k_base_platform) {
241                 NEW_AUX_ENT(AT_BASE_PLATFORM,
242                             (elf_addr_t)(unsigned long)u_base_platform);
243         }
244         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
245                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
246         }
247 #undef NEW_AUX_ENT
248         /* AT_NULL is zero; clear the rest too */
249         memset(&elf_info[ei_index], 0,
250                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
251
252         /* And advance past the AT_NULL entry.  */
253         ei_index += 2;
254
255         sp = STACK_ADD(p, ei_index);
256
257         items = (argc + 1) + (envc + 1) + 1;
258         bprm->p = STACK_ROUND(sp, items);
259
260         /* Point sp at the lowest address on the stack */
261 #ifdef CONFIG_STACK_GROWSUP
262         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
263         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
264 #else
265         sp = (elf_addr_t __user *)bprm->p;
266 #endif
267
268
269         /*
270          * Grow the stack manually; some architectures have a limit on how
271          * far ahead a user-space access may be in order to grow the stack.
272          */
273         vma = find_extend_vma(current->mm, bprm->p);
274         if (!vma)
275                 return -EFAULT;
276
277         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
278         if (__put_user(argc, sp++))
279                 return -EFAULT;
280         argv = sp;
281         envp = argv + argc + 1;
282
283         /* Populate argv and envp */
284         p = current->mm->arg_end = current->mm->arg_start;
285         while (argc-- > 0) {
286                 size_t len;
287                 if (__put_user((elf_addr_t)p, argv++))
288                         return -EFAULT;
289                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
290                 if (!len || len > MAX_ARG_STRLEN)
291                         return -EINVAL;
292                 p += len;
293         }
294         if (__put_user(0, argv))
295                 return -EFAULT;
296         current->mm->arg_end = current->mm->env_start = p;
297         while (envc-- > 0) {
298                 size_t len;
299                 if (__put_user((elf_addr_t)p, envp++))
300                         return -EFAULT;
301                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
302                 if (!len || len > MAX_ARG_STRLEN)
303                         return -EINVAL;
304                 p += len;
305         }
306         if (__put_user(0, envp))
307                 return -EFAULT;
308         current->mm->env_end = p;
309
310         /* Put the elf_info on the stack in the right place.  */
311         sp = (elf_addr_t __user *)envp + 1;
312         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
313                 return -EFAULT;
314         return 0;
315 }
316
317 static unsigned long elf_map(struct file *filep, unsigned long addr,
318                 struct elf_phdr *eppnt, int prot, int type,
319                 unsigned long total_size)
320 {
321         unsigned long map_addr;
322         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
323         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
324         addr = ELF_PAGESTART(addr);
325         size = ELF_PAGEALIGN(size);
326
327         /* mmap() will return -EINVAL if given a zero size, but a
328          * segment with zero filesize is perfectly valid */
329         if (!size)
330                 return addr;
331
332         down_write(&current->mm->mmap_sem);
333         /*
334         * total_size is the size of the ELF (interpreter) image.
335         * The _first_ mmap needs to know the full size, otherwise
336         * randomization might put this image into an overlapping
337         * position with the ELF binary image. (since size < total_size)
338         * So we first map the 'big' image - and unmap the remainder at
339         * the end. (which unmap is needed for ELF images with holes.)
340         */
341         if (total_size) {
342                 total_size = ELF_PAGEALIGN(total_size);
343                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
344                 if (!BAD_ADDR(map_addr))
345                         do_munmap(current->mm, map_addr+size, total_size-size);
346         } else
347                 map_addr = do_mmap(filep, addr, size, prot, type, off);
348
349         up_write(&current->mm->mmap_sem);
350         return(map_addr);
351 }
352
353 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
354 {
355         int i, first_idx = -1, last_idx = -1;
356
357         for (i = 0; i < nr; i++) {
358                 if (cmds[i].p_type == PT_LOAD) {
359                         last_idx = i;
360                         if (first_idx == -1)
361                                 first_idx = i;
362                 }
363         }
364         if (first_idx == -1)
365                 return 0;
366
367         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
368                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
369 }
370
371
372 /* This is much more generalized than the library routine read function,
373    so we keep this separate.  Technically the library read function
374    is only provided so that we can read a.out libraries that have
375    an ELF header */
376
377 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
378                 struct file *interpreter, unsigned long *interp_map_addr,
379                 unsigned long no_base)
380 {
381         struct elf_phdr *elf_phdata;
382         struct elf_phdr *eppnt;
383         unsigned long load_addr = 0;
384         int load_addr_set = 0;
385         unsigned long last_bss = 0, elf_bss = 0;
386         unsigned long error = ~0UL;
387         unsigned long total_size;
388         int retval, i, size;
389
390         /* First of all, some simple consistency checks */
391         if (interp_elf_ex->e_type != ET_EXEC &&
392             interp_elf_ex->e_type != ET_DYN)
393                 goto out;
394         if (!elf_check_arch(interp_elf_ex))
395                 goto out;
396         if (!interpreter->f_op || !interpreter->f_op->mmap)
397                 goto out;
398
399         /*
400          * If the size of this structure has changed, then punt, since
401          * we will be doing the wrong thing.
402          */
403         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
404                 goto out;
405         if (interp_elf_ex->e_phnum < 1 ||
406                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
407                 goto out;
408
409         /* Now read in all of the header information */
410         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
411         if (size > ELF_MIN_ALIGN)
412                 goto out;
413         elf_phdata = kmalloc(size, GFP_KERNEL);
414         if (!elf_phdata)
415                 goto out;
416
417         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
418                              (char *)elf_phdata, size);
419         error = -EIO;
420         if (retval != size) {
421                 if (retval < 0)
422                         error = retval; 
423                 goto out_close;
424         }
425
426         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
427         if (!total_size) {
428                 error = -EINVAL;
429                 goto out_close;
430         }
431
432         eppnt = elf_phdata;
433         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
434                 if (eppnt->p_type == PT_LOAD) {
435                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
436                         int elf_prot = 0;
437                         unsigned long vaddr = 0;
438                         unsigned long k, map_addr;
439
440                         if (eppnt->p_flags & PF_R)
441                                 elf_prot = PROT_READ;
442                         if (eppnt->p_flags & PF_W)
443                                 elf_prot |= PROT_WRITE;
444                         if (eppnt->p_flags & PF_X)
445                                 elf_prot |= PROT_EXEC;
446                         vaddr = eppnt->p_vaddr;
447                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
448                                 elf_type |= MAP_FIXED;
449                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
450                                 load_addr = -vaddr;
451
452                         map_addr = elf_map(interpreter, load_addr + vaddr,
453                                         eppnt, elf_prot, elf_type, total_size);
454                         total_size = 0;
455                         if (!*interp_map_addr)
456                                 *interp_map_addr = map_addr;
457                         error = map_addr;
458                         if (BAD_ADDR(map_addr))
459                                 goto out_close;
460
461                         if (!load_addr_set &&
462                             interp_elf_ex->e_type == ET_DYN) {
463                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
464                                 load_addr_set = 1;
465                         }
466
467                         /*
468                          * Check to see if the section's size will overflow the
469                          * allowed task size. Note that p_filesz must always be
470                          * <= p_memsize so it's only necessary to check p_memsz.
471                          */
472                         k = load_addr + eppnt->p_vaddr;
473                         if (BAD_ADDR(k) ||
474                             eppnt->p_filesz > eppnt->p_memsz ||
475                             eppnt->p_memsz > TASK_SIZE ||
476                             TASK_SIZE - eppnt->p_memsz < k) {
477                                 error = -ENOMEM;
478                                 goto out_close;
479                         }
480
481                         /*
482                          * Find the end of the file mapping for this phdr, and
483                          * keep track of the largest address we see for this.
484                          */
485                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
486                         if (k > elf_bss)
487                                 elf_bss = k;
488
489                         /*
490                          * Do the same thing for the memory mapping - between
491                          * elf_bss and last_bss is the bss section.
492                          */
493                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
494                         if (k > last_bss)
495                                 last_bss = k;
496                 }
497         }
498
499         if (last_bss > elf_bss) {
500                 /*
501                  * Now fill out the bss section.  First pad the last page up
502                  * to the page boundary, and then perform a mmap to make sure
503                  * that there are zero-mapped pages up to and including the
504                  * last bss page.
505                  */
506                 if (padzero(elf_bss)) {
507                         error = -EFAULT;
508                         goto out_close;
509                 }
510
511                 /* What we have mapped so far */
512                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
513
514                 /* Map the last of the bss segment */
515                 error = vm_brk(elf_bss, last_bss - elf_bss);
516                 if (BAD_ADDR(error))
517                         goto out_close;
518         }
519
520         error = load_addr;
521
522 out_close:
523         kfree(elf_phdata);
524 out:
525         return error;
526 }
527
528 /*
529  * These are the functions used to load ELF style executables and shared
530  * libraries.  There is no binary dependent code anywhere else.
531  */
532
533 #define INTERPRETER_NONE 0
534 #define INTERPRETER_ELF 2
535
536 #ifndef STACK_RND_MASK
537 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
538 #endif
539
540 static unsigned long randomize_stack_top(unsigned long stack_top)
541 {
542         unsigned int random_variable = 0;
543
544         if ((current->flags & PF_RANDOMIZE) &&
545                 !(current->personality & ADDR_NO_RANDOMIZE)) {
546                 random_variable = get_random_int() & STACK_RND_MASK;
547                 random_variable <<= PAGE_SHIFT;
548         }
549 #ifdef CONFIG_STACK_GROWSUP
550         return PAGE_ALIGN(stack_top) + random_variable;
551 #else
552         return PAGE_ALIGN(stack_top) - random_variable;
553 #endif
554 }
555
556 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
557 {
558         struct file *interpreter = NULL; /* to shut gcc up */
559         unsigned long load_addr = 0, load_bias = 0;
560         int load_addr_set = 0;
561         char * elf_interpreter = NULL;
562         unsigned long error;
563         struct elf_phdr *elf_ppnt, *elf_phdata;
564         unsigned long elf_bss, elf_brk;
565         int retval, i;
566         unsigned int size;
567         unsigned long elf_entry;
568         unsigned long interp_load_addr = 0;
569         unsigned long start_code, end_code, start_data, end_data;
570         unsigned long reloc_func_desc __maybe_unused = 0;
571         int executable_stack = EXSTACK_DEFAULT;
572         unsigned long def_flags = 0;
573         struct {
574                 struct elfhdr elf_ex;
575                 struct elfhdr interp_elf_ex;
576         } *loc;
577
578         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
579         if (!loc) {
580                 retval = -ENOMEM;
581                 goto out_ret;
582         }
583         
584         /* Get the exec-header */
585         loc->elf_ex = *((struct elfhdr *)bprm->buf);
586
587         retval = -ENOEXEC;
588         /* First of all, some simple consistency checks */
589         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
590                 goto out;
591
592         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
593                 goto out;
594         if (!elf_check_arch(&loc->elf_ex))
595                 goto out;
596         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
597                 goto out;
598
599         /* Now read in all of the header information */
600         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
601                 goto out;
602         if (loc->elf_ex.e_phnum < 1 ||
603                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
604                 goto out;
605         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
606         retval = -ENOMEM;
607         elf_phdata = kmalloc(size, GFP_KERNEL);
608         if (!elf_phdata)
609                 goto out;
610
611         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
612                              (char *)elf_phdata, size);
613         if (retval != size) {
614                 if (retval >= 0)
615                         retval = -EIO;
616                 goto out_free_ph;
617         }
618
619         elf_ppnt = elf_phdata;
620         elf_bss = 0;
621         elf_brk = 0;
622
623         start_code = ~0UL;
624         end_code = 0;
625         start_data = 0;
626         end_data = 0;
627
628         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
629                 if (elf_ppnt->p_type == PT_INTERP) {
630                         /* This is the program interpreter used for
631                          * shared libraries - for now assume that this
632                          * is an a.out format binary
633                          */
634                         retval = -ENOEXEC;
635                         if (elf_ppnt->p_filesz > PATH_MAX || 
636                             elf_ppnt->p_filesz < 2)
637                                 goto out_free_ph;
638
639                         retval = -ENOMEM;
640                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
641                                                   GFP_KERNEL);
642                         if (!elf_interpreter)
643                                 goto out_free_ph;
644
645                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
646                                              elf_interpreter,
647                                              elf_ppnt->p_filesz);
648                         if (retval != elf_ppnt->p_filesz) {
649                                 if (retval >= 0)
650                                         retval = -EIO;
651                                 goto out_free_interp;
652                         }
653                         /* make sure path is NULL terminated */
654                         retval = -ENOEXEC;
655                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
656                                 goto out_free_interp;
657
658                         interpreter = open_exec(elf_interpreter);
659                         retval = PTR_ERR(interpreter);
660                         if (IS_ERR(interpreter))
661                                 goto out_free_interp;
662
663                         /*
664                          * If the binary is not readable then enforce
665                          * mm->dumpable = 0 regardless of the interpreter's
666                          * permissions.
667                          */
668                         would_dump(bprm, interpreter);
669
670                         retval = kernel_read(interpreter, 0, bprm->buf,
671                                              BINPRM_BUF_SIZE);
672                         if (retval != BINPRM_BUF_SIZE) {
673                                 if (retval >= 0)
674                                         retval = -EIO;
675                                 goto out_free_dentry;
676                         }
677
678                         /* Get the exec headers */
679                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
680                         break;
681                 }
682                 elf_ppnt++;
683         }
684
685         elf_ppnt = elf_phdata;
686         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
687                 if (elf_ppnt->p_type == PT_GNU_STACK) {
688                         if (elf_ppnt->p_flags & PF_X)
689                                 executable_stack = EXSTACK_ENABLE_X;
690                         else
691                                 executable_stack = EXSTACK_DISABLE_X;
692                         break;
693                 }
694
695         /* Some simple consistency checks for the interpreter */
696         if (elf_interpreter) {
697                 retval = -ELIBBAD;
698                 /* Not an ELF interpreter */
699                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
700                         goto out_free_dentry;
701                 /* Verify the interpreter has a valid arch */
702                 if (!elf_check_arch(&loc->interp_elf_ex))
703                         goto out_free_dentry;
704         }
705
706         /* Flush all traces of the currently running executable */
707         retval = flush_old_exec(bprm);
708         if (retval)
709                 goto out_free_dentry;
710
711         /* OK, This is the point of no return */
712         current->mm->def_flags = def_flags;
713
714         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
715            may depend on the personality.  */
716         SET_PERSONALITY(loc->elf_ex);
717         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
718                 current->personality |= READ_IMPLIES_EXEC;
719
720         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
721                 current->flags |= PF_RANDOMIZE;
722
723         setup_new_exec(bprm);
724
725         /* Do this so that we can load the interpreter, if need be.  We will
726            change some of these later */
727         current->mm->free_area_cache = current->mm->mmap_base;
728         current->mm->cached_hole_size = 0;
729         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
730                                  executable_stack);
731         if (retval < 0) {
732                 send_sig(SIGKILL, current, 0);
733                 goto out_free_dentry;
734         }
735         
736         current->mm->start_stack = bprm->p;
737
738         /* Now we do a little grungy work by mmapping the ELF image into
739            the correct location in memory. */
740         for(i = 0, elf_ppnt = elf_phdata;
741             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
742                 int elf_prot = 0, elf_flags;
743                 unsigned long k, vaddr;
744
745                 if (elf_ppnt->p_type != PT_LOAD)
746                         continue;
747
748                 if (unlikely (elf_brk > elf_bss)) {
749                         unsigned long nbyte;
750                     
751                         /* There was a PT_LOAD segment with p_memsz > p_filesz
752                            before this one. Map anonymous pages, if needed,
753                            and clear the area.  */
754                         retval = set_brk(elf_bss + load_bias,
755                                          elf_brk + load_bias);
756                         if (retval) {
757                                 send_sig(SIGKILL, current, 0);
758                                 goto out_free_dentry;
759                         }
760                         nbyte = ELF_PAGEOFFSET(elf_bss);
761                         if (nbyte) {
762                                 nbyte = ELF_MIN_ALIGN - nbyte;
763                                 if (nbyte > elf_brk - elf_bss)
764                                         nbyte = elf_brk - elf_bss;
765                                 if (clear_user((void __user *)elf_bss +
766                                                         load_bias, nbyte)) {
767                                         /*
768                                          * This bss-zeroing can fail if the ELF
769                                          * file specifies odd protections. So
770                                          * we don't check the return value
771                                          */
772                                 }
773                         }
774                 }
775
776                 if (elf_ppnt->p_flags & PF_R)
777                         elf_prot |= PROT_READ;
778                 if (elf_ppnt->p_flags & PF_W)
779                         elf_prot |= PROT_WRITE;
780                 if (elf_ppnt->p_flags & PF_X)
781                         elf_prot |= PROT_EXEC;
782
783                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
784
785                 vaddr = elf_ppnt->p_vaddr;
786                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
787                         elf_flags |= MAP_FIXED;
788                 } else if (loc->elf_ex.e_type == ET_DYN) {
789                         /* Try and get dynamic programs out of the way of the
790                          * default mmap base, as well as whatever program they
791                          * might try to exec.  This is because the brk will
792                          * follow the loader, and is not movable.  */
793 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
794                         /* Memory randomization might have been switched off
795                          * in runtime via sysctl.
796                          * If that is the case, retain the original non-zero
797                          * load_bias value in order to establish proper
798                          * non-randomized mappings.
799                          */
800                         if (current->flags & PF_RANDOMIZE)
801                                 load_bias = 0;
802                         else
803                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
804 #else
805                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
806 #endif
807                 }
808
809                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
810                                 elf_prot, elf_flags, 0);
811                 if (BAD_ADDR(error)) {
812                         send_sig(SIGKILL, current, 0);
813                         retval = IS_ERR((void *)error) ?
814                                 PTR_ERR((void*)error) : -EINVAL;
815                         goto out_free_dentry;
816                 }
817
818                 if (!load_addr_set) {
819                         load_addr_set = 1;
820                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
821                         if (loc->elf_ex.e_type == ET_DYN) {
822                                 load_bias += error -
823                                              ELF_PAGESTART(load_bias + vaddr);
824                                 load_addr += load_bias;
825                                 reloc_func_desc = load_bias;
826                         }
827                 }
828                 k = elf_ppnt->p_vaddr;
829                 if (k < start_code)
830                         start_code = k;
831                 if (start_data < k)
832                         start_data = k;
833
834                 /*
835                  * Check to see if the section's size will overflow the
836                  * allowed task size. Note that p_filesz must always be
837                  * <= p_memsz so it is only necessary to check p_memsz.
838                  */
839                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
840                     elf_ppnt->p_memsz > TASK_SIZE ||
841                     TASK_SIZE - elf_ppnt->p_memsz < k) {
842                         /* set_brk can never work. Avoid overflows. */
843                         send_sig(SIGKILL, current, 0);
844                         retval = -EINVAL;
845                         goto out_free_dentry;
846                 }
847
848                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
849
850                 if (k > elf_bss)
851                         elf_bss = k;
852                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
853                         end_code = k;
854                 if (end_data < k)
855                         end_data = k;
856                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
857                 if (k > elf_brk)
858                         elf_brk = k;
859         }
860
861         loc->elf_ex.e_entry += load_bias;
862         elf_bss += load_bias;
863         elf_brk += load_bias;
864         start_code += load_bias;
865         end_code += load_bias;
866         start_data += load_bias;
867         end_data += load_bias;
868
869         /* Calling set_brk effectively mmaps the pages that we need
870          * for the bss and break sections.  We must do this before
871          * mapping in the interpreter, to make sure it doesn't wind
872          * up getting placed where the bss needs to go.
873          */
874         retval = set_brk(elf_bss, elf_brk);
875         if (retval) {
876                 send_sig(SIGKILL, current, 0);
877                 goto out_free_dentry;
878         }
879         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
880                 send_sig(SIGSEGV, current, 0);
881                 retval = -EFAULT; /* Nobody gets to see this, but.. */
882                 goto out_free_dentry;
883         }
884
885         if (elf_interpreter) {
886                 unsigned long uninitialized_var(interp_map_addr);
887
888                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
889                                             interpreter,
890                                             &interp_map_addr,
891                                             load_bias);
892                 if (!IS_ERR((void *)elf_entry)) {
893                         /*
894                          * load_elf_interp() returns relocation
895                          * adjustment
896                          */
897                         interp_load_addr = elf_entry;
898                         elf_entry += loc->interp_elf_ex.e_entry;
899                 }
900                 if (BAD_ADDR(elf_entry)) {
901                         force_sig(SIGSEGV, current);
902                         retval = IS_ERR((void *)elf_entry) ?
903                                         (int)elf_entry : -EINVAL;
904                         goto out_free_dentry;
905                 }
906                 reloc_func_desc = interp_load_addr;
907
908                 allow_write_access(interpreter);
909                 fput(interpreter);
910                 kfree(elf_interpreter);
911         } else {
912                 elf_entry = loc->elf_ex.e_entry;
913                 if (BAD_ADDR(elf_entry)) {
914                         force_sig(SIGSEGV, current);
915                         retval = -EINVAL;
916                         goto out_free_dentry;
917                 }
918         }
919
920         kfree(elf_phdata);
921
922         set_binfmt(&elf_format);
923
924 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
925         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
926         if (retval < 0) {
927                 send_sig(SIGKILL, current, 0);
928                 goto out;
929         }
930 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
931
932         install_exec_creds(bprm);
933         retval = create_elf_tables(bprm, &loc->elf_ex,
934                           load_addr, interp_load_addr);
935         if (retval < 0) {
936                 send_sig(SIGKILL, current, 0);
937                 goto out;
938         }
939         /* N.B. passed_fileno might not be initialized? */
940         current->mm->end_code = end_code;
941         current->mm->start_code = start_code;
942         current->mm->start_data = start_data;
943         current->mm->end_data = end_data;
944         current->mm->start_stack = bprm->p;
945
946 #ifdef arch_randomize_brk
947         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
948                 current->mm->brk = current->mm->start_brk =
949                         arch_randomize_brk(current->mm);
950 #ifdef CONFIG_COMPAT_BRK
951                 current->brk_randomized = 1;
952 #endif
953         }
954 #endif
955
956         if (current->personality & MMAP_PAGE_ZERO) {
957                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
958                    and some applications "depend" upon this behavior.
959                    Since we do not have the power to recompile these, we
960                    emulate the SVr4 behavior. Sigh. */
961                 down_write(&current->mm->mmap_sem);
962                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
963                                 MAP_FIXED | MAP_PRIVATE, 0);
964                 up_write(&current->mm->mmap_sem);
965         }
966
967 #ifdef ELF_PLAT_INIT
968         /*
969          * The ABI may specify that certain registers be set up in special
970          * ways (on i386 %edx is the address of a DT_FINI function, for
971          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
972          * that the e_entry field is the address of the function descriptor
973          * for the startup routine, rather than the address of the startup
974          * routine itself.  This macro performs whatever initialization to
975          * the regs structure is required as well as any relocations to the
976          * function descriptor entries when executing dynamically links apps.
977          */
978         ELF_PLAT_INIT(regs, reloc_func_desc);
979 #endif
980
981         start_thread(regs, elf_entry, bprm->p);
982         retval = 0;
983 out:
984         kfree(loc);
985 out_ret:
986         return retval;
987
988         /* error cleanup */
989 out_free_dentry:
990         allow_write_access(interpreter);
991         if (interpreter)
992                 fput(interpreter);
993 out_free_interp:
994         kfree(elf_interpreter);
995 out_free_ph:
996         kfree(elf_phdata);
997         goto out;
998 }
999
1000 /* This is really simpleminded and specialized - we are loading an
1001    a.out library that is given an ELF header. */
1002 static int load_elf_library(struct file *file)
1003 {
1004         struct elf_phdr *elf_phdata;
1005         struct elf_phdr *eppnt;
1006         unsigned long elf_bss, bss, len;
1007         int retval, error, i, j;
1008         struct elfhdr elf_ex;
1009
1010         error = -ENOEXEC;
1011         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1012         if (retval != sizeof(elf_ex))
1013                 goto out;
1014
1015         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1016                 goto out;
1017
1018         /* First of all, some simple consistency checks */
1019         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1020             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1021                 goto out;
1022
1023         /* Now read in all of the header information */
1024
1025         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1026         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1027
1028         error = -ENOMEM;
1029         elf_phdata = kmalloc(j, GFP_KERNEL);
1030         if (!elf_phdata)
1031                 goto out;
1032
1033         eppnt = elf_phdata;
1034         error = -ENOEXEC;
1035         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1036         if (retval != j)
1037                 goto out_free_ph;
1038
1039         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1040                 if ((eppnt + i)->p_type == PT_LOAD)
1041                         j++;
1042         if (j != 1)
1043                 goto out_free_ph;
1044
1045         while (eppnt->p_type != PT_LOAD)
1046                 eppnt++;
1047
1048         /* Now use mmap to map the library into memory. */
1049         down_write(&current->mm->mmap_sem);
1050         error = do_mmap(file,
1051                         ELF_PAGESTART(eppnt->p_vaddr),
1052                         (eppnt->p_filesz +
1053                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1054                         PROT_READ | PROT_WRITE | PROT_EXEC,
1055                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1056                         (eppnt->p_offset -
1057                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1058         up_write(&current->mm->mmap_sem);
1059         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1060                 goto out_free_ph;
1061
1062         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1063         if (padzero(elf_bss)) {
1064                 error = -EFAULT;
1065                 goto out_free_ph;
1066         }
1067
1068         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1069                             ELF_MIN_ALIGN - 1);
1070         bss = eppnt->p_memsz + eppnt->p_vaddr;
1071         if (bss > len)
1072                 vm_brk(len, bss - len);
1073         error = 0;
1074
1075 out_free_ph:
1076         kfree(elf_phdata);
1077 out:
1078         return error;
1079 }
1080
1081 #ifdef CONFIG_ELF_CORE
1082 /*
1083  * ELF core dumper
1084  *
1085  * Modelled on fs/exec.c:aout_core_dump()
1086  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1087  */
1088
1089 /*
1090  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1091  * that are useful for post-mortem analysis are included in every core dump.
1092  * In that way we ensure that the core dump is fully interpretable later
1093  * without matching up the same kernel and hardware config to see what PC values
1094  * meant. These special mappings include - vDSO, vsyscall, and other
1095  * architecture specific mappings
1096  */
1097 static bool always_dump_vma(struct vm_area_struct *vma)
1098 {
1099         /* Any vsyscall mappings? */
1100         if (vma == get_gate_vma(vma->vm_mm))
1101                 return true;
1102         /*
1103          * arch_vma_name() returns non-NULL for special architecture mappings,
1104          * such as vDSO sections.
1105          */
1106         if (arch_vma_name(vma))
1107                 return true;
1108
1109         return false;
1110 }
1111
1112 /*
1113  * Decide what to dump of a segment, part, all or none.
1114  */
1115 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1116                                    unsigned long mm_flags)
1117 {
1118 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1119
1120         /* always dump the vdso and vsyscall sections */
1121         if (always_dump_vma(vma))
1122                 goto whole;
1123
1124         if (vma->vm_flags & VM_NODUMP)
1125                 return 0;
1126
1127         /* Hugetlb memory check */
1128         if (vma->vm_flags & VM_HUGETLB) {
1129                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1130                         goto whole;
1131                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1132                         goto whole;
1133         }
1134
1135         /* Do not dump I/O mapped devices or special mappings */
1136         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1137                 return 0;
1138
1139         /* By default, dump shared memory if mapped from an anonymous file. */
1140         if (vma->vm_flags & VM_SHARED) {
1141                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1142                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1143                         goto whole;
1144                 return 0;
1145         }
1146
1147         /* Dump segments that have been written to.  */
1148         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1149                 goto whole;
1150         if (vma->vm_file == NULL)
1151                 return 0;
1152
1153         if (FILTER(MAPPED_PRIVATE))
1154                 goto whole;
1155
1156         /*
1157          * If this looks like the beginning of a DSO or executable mapping,
1158          * check for an ELF header.  If we find one, dump the first page to
1159          * aid in determining what was mapped here.
1160          */
1161         if (FILTER(ELF_HEADERS) &&
1162             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1163                 u32 __user *header = (u32 __user *) vma->vm_start;
1164                 u32 word;
1165                 mm_segment_t fs = get_fs();
1166                 /*
1167                  * Doing it this way gets the constant folded by GCC.
1168                  */
1169                 union {
1170                         u32 cmp;
1171                         char elfmag[SELFMAG];
1172                 } magic;
1173                 BUILD_BUG_ON(SELFMAG != sizeof word);
1174                 magic.elfmag[EI_MAG0] = ELFMAG0;
1175                 magic.elfmag[EI_MAG1] = ELFMAG1;
1176                 magic.elfmag[EI_MAG2] = ELFMAG2;
1177                 magic.elfmag[EI_MAG3] = ELFMAG3;
1178                 /*
1179                  * Switch to the user "segment" for get_user(),
1180                  * then put back what elf_core_dump() had in place.
1181                  */
1182                 set_fs(USER_DS);
1183                 if (unlikely(get_user(word, header)))
1184                         word = 0;
1185                 set_fs(fs);
1186                 if (word == magic.cmp)
1187                         return PAGE_SIZE;
1188         }
1189
1190 #undef  FILTER
1191
1192         return 0;
1193
1194 whole:
1195         return vma->vm_end - vma->vm_start;
1196 }
1197
1198 /* An ELF note in memory */
1199 struct memelfnote
1200 {
1201         const char *name;
1202         int type;
1203         unsigned int datasz;
1204         void *data;
1205 };
1206
1207 static int notesize(struct memelfnote *en)
1208 {
1209         int sz;
1210
1211         sz = sizeof(struct elf_note);
1212         sz += roundup(strlen(en->name) + 1, 4);
1213         sz += roundup(en->datasz, 4);
1214
1215         return sz;
1216 }
1217
1218 #define DUMP_WRITE(addr, nr, foffset)   \
1219         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1220
1221 static int alignfile(struct file *file, loff_t *foffset)
1222 {
1223         static const char buf[4] = { 0, };
1224         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1225         return 1;
1226 }
1227
1228 static int writenote(struct memelfnote *men, struct file *file,
1229                         loff_t *foffset)
1230 {
1231         struct elf_note en;
1232         en.n_namesz = strlen(men->name) + 1;
1233         en.n_descsz = men->datasz;
1234         en.n_type = men->type;
1235
1236         DUMP_WRITE(&en, sizeof(en), foffset);
1237         DUMP_WRITE(men->name, en.n_namesz, foffset);
1238         if (!alignfile(file, foffset))
1239                 return 0;
1240         DUMP_WRITE(men->data, men->datasz, foffset);
1241         if (!alignfile(file, foffset))
1242                 return 0;
1243
1244         return 1;
1245 }
1246 #undef DUMP_WRITE
1247
1248 static void fill_elf_header(struct elfhdr *elf, int segs,
1249                             u16 machine, u32 flags, u8 osabi)
1250 {
1251         memset(elf, 0, sizeof(*elf));
1252
1253         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1254         elf->e_ident[EI_CLASS] = ELF_CLASS;
1255         elf->e_ident[EI_DATA] = ELF_DATA;
1256         elf->e_ident[EI_VERSION] = EV_CURRENT;
1257         elf->e_ident[EI_OSABI] = ELF_OSABI;
1258
1259         elf->e_type = ET_CORE;
1260         elf->e_machine = machine;
1261         elf->e_version = EV_CURRENT;
1262         elf->e_phoff = sizeof(struct elfhdr);
1263         elf->e_flags = flags;
1264         elf->e_ehsize = sizeof(struct elfhdr);
1265         elf->e_phentsize = sizeof(struct elf_phdr);
1266         elf->e_phnum = segs;
1267
1268         return;
1269 }
1270
1271 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1272 {
1273         phdr->p_type = PT_NOTE;
1274         phdr->p_offset = offset;
1275         phdr->p_vaddr = 0;
1276         phdr->p_paddr = 0;
1277         phdr->p_filesz = sz;
1278         phdr->p_memsz = 0;
1279         phdr->p_flags = 0;
1280         phdr->p_align = 0;
1281         return;
1282 }
1283
1284 static void fill_note(struct memelfnote *note, const char *name, int type, 
1285                 unsigned int sz, void *data)
1286 {
1287         note->name = name;
1288         note->type = type;
1289         note->datasz = sz;
1290         note->data = data;
1291         return;
1292 }
1293
1294 /*
1295  * fill up all the fields in prstatus from the given task struct, except
1296  * registers which need to be filled up separately.
1297  */
1298 static void fill_prstatus(struct elf_prstatus *prstatus,
1299                 struct task_struct *p, long signr)
1300 {
1301         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1302         prstatus->pr_sigpend = p->pending.signal.sig[0];
1303         prstatus->pr_sighold = p->blocked.sig[0];
1304         rcu_read_lock();
1305         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1306         rcu_read_unlock();
1307         prstatus->pr_pid = task_pid_vnr(p);
1308         prstatus->pr_pgrp = task_pgrp_vnr(p);
1309         prstatus->pr_sid = task_session_vnr(p);
1310         if (thread_group_leader(p)) {
1311                 struct task_cputime cputime;
1312
1313                 /*
1314                  * This is the record for the group leader.  It shows the
1315                  * group-wide total, not its individual thread total.
1316                  */
1317                 thread_group_cputime(p, &cputime);
1318                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1319                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1320         } else {
1321                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1322                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1323         }
1324         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1325         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1326 }
1327
1328 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1329                        struct mm_struct *mm)
1330 {
1331         const struct cred *cred;
1332         unsigned int i, len;
1333         
1334         /* first copy the parameters from user space */
1335         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1336
1337         len = mm->arg_end - mm->arg_start;
1338         if (len >= ELF_PRARGSZ)
1339                 len = ELF_PRARGSZ-1;
1340         if (copy_from_user(&psinfo->pr_psargs,
1341                            (const char __user *)mm->arg_start, len))
1342                 return -EFAULT;
1343         for(i = 0; i < len; i++)
1344                 if (psinfo->pr_psargs[i] == 0)
1345                         psinfo->pr_psargs[i] = ' ';
1346         psinfo->pr_psargs[len] = 0;
1347
1348         rcu_read_lock();
1349         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1350         rcu_read_unlock();
1351         psinfo->pr_pid = task_pid_vnr(p);
1352         psinfo->pr_pgrp = task_pgrp_vnr(p);
1353         psinfo->pr_sid = task_session_vnr(p);
1354
1355         i = p->state ? ffz(~p->state) + 1 : 0;
1356         psinfo->pr_state = i;
1357         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1358         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1359         psinfo->pr_nice = task_nice(p);
1360         psinfo->pr_flag = p->flags;
1361         rcu_read_lock();
1362         cred = __task_cred(p);
1363         SET_UID(psinfo->pr_uid, cred->uid);
1364         SET_GID(psinfo->pr_gid, cred->gid);
1365         rcu_read_unlock();
1366         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1367         
1368         return 0;
1369 }
1370
1371 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1372 {
1373         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1374         int i = 0;
1375         do
1376                 i += 2;
1377         while (auxv[i - 2] != AT_NULL);
1378         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1379 }
1380
1381 #ifdef CORE_DUMP_USE_REGSET
1382 #include <linux/regset.h>
1383
1384 struct elf_thread_core_info {
1385         struct elf_thread_core_info *next;
1386         struct task_struct *task;
1387         struct elf_prstatus prstatus;
1388         struct memelfnote notes[0];
1389 };
1390
1391 struct elf_note_info {
1392         struct elf_thread_core_info *thread;
1393         struct memelfnote psinfo;
1394         struct memelfnote auxv;
1395         size_t size;
1396         int thread_notes;
1397 };
1398
1399 /*
1400  * When a regset has a writeback hook, we call it on each thread before
1401  * dumping user memory.  On register window machines, this makes sure the
1402  * user memory backing the register data is up to date before we read it.
1403  */
1404 static void do_thread_regset_writeback(struct task_struct *task,
1405                                        const struct user_regset *regset)
1406 {
1407         if (regset->writeback)
1408                 regset->writeback(task, regset, 1);
1409 }
1410
1411 #ifndef PR_REG_SIZE
1412 #define PR_REG_SIZE(S) sizeof(S)
1413 #endif
1414
1415 #ifndef PRSTATUS_SIZE
1416 #define PRSTATUS_SIZE(S) sizeof(S)
1417 #endif
1418
1419 #ifndef PR_REG_PTR
1420 #define PR_REG_PTR(S) (&((S)->pr_reg))
1421 #endif
1422
1423 #ifndef SET_PR_FPVALID
1424 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1425 #endif
1426
1427 static int fill_thread_core_info(struct elf_thread_core_info *t,
1428                                  const struct user_regset_view *view,
1429                                  long signr, size_t *total)
1430 {
1431         unsigned int i;
1432
1433         /*
1434          * NT_PRSTATUS is the one special case, because the regset data
1435          * goes into the pr_reg field inside the note contents, rather
1436          * than being the whole note contents.  We fill the reset in here.
1437          * We assume that regset 0 is NT_PRSTATUS.
1438          */
1439         fill_prstatus(&t->prstatus, t->task, signr);
1440         (void) view->regsets[0].get(t->task, &view->regsets[0],
1441                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1442                                     PR_REG_PTR(&t->prstatus), NULL);
1443
1444         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1445                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1446         *total += notesize(&t->notes[0]);
1447
1448         do_thread_regset_writeback(t->task, &view->regsets[0]);
1449
1450         /*
1451          * Each other regset might generate a note too.  For each regset
1452          * that has no core_note_type or is inactive, we leave t->notes[i]
1453          * all zero and we'll know to skip writing it later.
1454          */
1455         for (i = 1; i < view->n; ++i) {
1456                 const struct user_regset *regset = &view->regsets[i];
1457                 do_thread_regset_writeback(t->task, regset);
1458                 if (regset->core_note_type && regset->get &&
1459                     (!regset->active || regset->active(t->task, regset))) {
1460                         int ret;
1461                         size_t size = regset->n * regset->size;
1462                         void *data = kmalloc(size, GFP_KERNEL);
1463                         if (unlikely(!data))
1464                                 return 0;
1465                         ret = regset->get(t->task, regset,
1466                                           0, size, data, NULL);
1467                         if (unlikely(ret))
1468                                 kfree(data);
1469                         else {
1470                                 if (regset->core_note_type != NT_PRFPREG)
1471                                         fill_note(&t->notes[i], "LINUX",
1472                                                   regset->core_note_type,
1473                                                   size, data);
1474                                 else {
1475                                         SET_PR_FPVALID(&t->prstatus, 1);
1476                                         fill_note(&t->notes[i], "CORE",
1477                                                   NT_PRFPREG, size, data);
1478                                 }
1479                                 *total += notesize(&t->notes[i]);
1480                         }
1481                 }
1482         }
1483
1484         return 1;
1485 }
1486
1487 static int fill_note_info(struct elfhdr *elf, int phdrs,
1488                           struct elf_note_info *info,
1489                           long signr, struct pt_regs *regs)
1490 {
1491         struct task_struct *dump_task = current;
1492         const struct user_regset_view *view = task_user_regset_view(dump_task);
1493         struct elf_thread_core_info *t;
1494         struct elf_prpsinfo *psinfo;
1495         struct core_thread *ct;
1496         unsigned int i;
1497
1498         info->size = 0;
1499         info->thread = NULL;
1500
1501         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1502         if (psinfo == NULL)
1503                 return 0;
1504
1505         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1506
1507         /*
1508          * Figure out how many notes we're going to need for each thread.
1509          */
1510         info->thread_notes = 0;
1511         for (i = 0; i < view->n; ++i)
1512                 if (view->regsets[i].core_note_type != 0)
1513                         ++info->thread_notes;
1514
1515         /*
1516          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1517          * since it is our one special case.
1518          */
1519         if (unlikely(info->thread_notes == 0) ||
1520             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1521                 WARN_ON(1);
1522                 return 0;
1523         }
1524
1525         /*
1526          * Initialize the ELF file header.
1527          */
1528         fill_elf_header(elf, phdrs,
1529                         view->e_machine, view->e_flags, view->ei_osabi);
1530
1531         /*
1532          * Allocate a structure for each thread.
1533          */
1534         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1535                 t = kzalloc(offsetof(struct elf_thread_core_info,
1536                                      notes[info->thread_notes]),
1537                             GFP_KERNEL);
1538                 if (unlikely(!t))
1539                         return 0;
1540
1541                 t->task = ct->task;
1542                 if (ct->task == dump_task || !info->thread) {
1543                         t->next = info->thread;
1544                         info->thread = t;
1545                 } else {
1546                         /*
1547                          * Make sure to keep the original task at
1548                          * the head of the list.
1549                          */
1550                         t->next = info->thread->next;
1551                         info->thread->next = t;
1552                 }
1553         }
1554
1555         /*
1556          * Now fill in each thread's information.
1557          */
1558         for (t = info->thread; t != NULL; t = t->next)
1559                 if (!fill_thread_core_info(t, view, signr, &info->size))
1560                         return 0;
1561
1562         /*
1563          * Fill in the two process-wide notes.
1564          */
1565         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1566         info->size += notesize(&info->psinfo);
1567
1568         fill_auxv_note(&info->auxv, current->mm);
1569         info->size += notesize(&info->auxv);
1570
1571         return 1;
1572 }
1573
1574 static size_t get_note_info_size(struct elf_note_info *info)
1575 {
1576         return info->size;
1577 }
1578
1579 /*
1580  * Write all the notes for each thread.  When writing the first thread, the
1581  * process-wide notes are interleaved after the first thread-specific note.
1582  */
1583 static int write_note_info(struct elf_note_info *info,
1584                            struct file *file, loff_t *foffset)
1585 {
1586         bool first = 1;
1587         struct elf_thread_core_info *t = info->thread;
1588
1589         do {
1590                 int i;
1591
1592                 if (!writenote(&t->notes[0], file, foffset))
1593                         return 0;
1594
1595                 if (first && !writenote(&info->psinfo, file, foffset))
1596                         return 0;
1597                 if (first && !writenote(&info->auxv, file, foffset))
1598                         return 0;
1599
1600                 for (i = 1; i < info->thread_notes; ++i)
1601                         if (t->notes[i].data &&
1602                             !writenote(&t->notes[i], file, foffset))
1603                                 return 0;
1604
1605                 first = 0;
1606                 t = t->next;
1607         } while (t);
1608
1609         return 1;
1610 }
1611
1612 static void free_note_info(struct elf_note_info *info)
1613 {
1614         struct elf_thread_core_info *threads = info->thread;
1615         while (threads) {
1616                 unsigned int i;
1617                 struct elf_thread_core_info *t = threads;
1618                 threads = t->next;
1619                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1620                 for (i = 1; i < info->thread_notes; ++i)
1621                         kfree(t->notes[i].data);
1622                 kfree(t);
1623         }
1624         kfree(info->psinfo.data);
1625 }
1626
1627 #else
1628
1629 /* Here is the structure in which status of each thread is captured. */
1630 struct elf_thread_status
1631 {
1632         struct list_head list;
1633         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1634         elf_fpregset_t fpu;             /* NT_PRFPREG */
1635         struct task_struct *thread;
1636 #ifdef ELF_CORE_COPY_XFPREGS
1637         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1638 #endif
1639         struct memelfnote notes[3];
1640         int num_notes;
1641 };
1642
1643 /*
1644  * In order to add the specific thread information for the elf file format,
1645  * we need to keep a linked list of every threads pr_status and then create
1646  * a single section for them in the final core file.
1647  */
1648 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1649 {
1650         int sz = 0;
1651         struct task_struct *p = t->thread;
1652         t->num_notes = 0;
1653
1654         fill_prstatus(&t->prstatus, p, signr);
1655         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1656         
1657         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1658                   &(t->prstatus));
1659         t->num_notes++;
1660         sz += notesize(&t->notes[0]);
1661
1662         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1663                                                                 &t->fpu))) {
1664                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1665                           &(t->fpu));
1666                 t->num_notes++;
1667                 sz += notesize(&t->notes[1]);
1668         }
1669
1670 #ifdef ELF_CORE_COPY_XFPREGS
1671         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1672                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1673                           sizeof(t->xfpu), &t->xfpu);
1674                 t->num_notes++;
1675                 sz += notesize(&t->notes[2]);
1676         }
1677 #endif  
1678         return sz;
1679 }
1680
1681 struct elf_note_info {
1682         struct memelfnote *notes;
1683         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1684         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1685         struct list_head thread_list;
1686         elf_fpregset_t *fpu;
1687 #ifdef ELF_CORE_COPY_XFPREGS
1688         elf_fpxregset_t *xfpu;
1689 #endif
1690         int thread_status_size;
1691         int numnote;
1692 };
1693
1694 static int elf_note_info_init(struct elf_note_info *info)
1695 {
1696         memset(info, 0, sizeof(*info));
1697         INIT_LIST_HEAD(&info->thread_list);
1698
1699         /* Allocate space for six ELF notes */
1700         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1701         if (!info->notes)
1702                 return 0;
1703         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1704         if (!info->psinfo)
1705                 goto notes_free;
1706         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1707         if (!info->prstatus)
1708                 goto psinfo_free;
1709         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1710         if (!info->fpu)
1711                 goto prstatus_free;
1712 #ifdef ELF_CORE_COPY_XFPREGS
1713         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1714         if (!info->xfpu)
1715                 goto fpu_free;
1716 #endif
1717         return 1;
1718 #ifdef ELF_CORE_COPY_XFPREGS
1719  fpu_free:
1720         kfree(info->fpu);
1721 #endif
1722  prstatus_free:
1723         kfree(info->prstatus);
1724  psinfo_free:
1725         kfree(info->psinfo);
1726  notes_free:
1727         kfree(info->notes);
1728         return 0;
1729 }
1730
1731 static int fill_note_info(struct elfhdr *elf, int phdrs,
1732                           struct elf_note_info *info,
1733                           long signr, struct pt_regs *regs)
1734 {
1735         struct list_head *t;
1736
1737         if (!elf_note_info_init(info))
1738                 return 0;
1739
1740         if (signr) {
1741                 struct core_thread *ct;
1742                 struct elf_thread_status *ets;
1743
1744                 for (ct = current->mm->core_state->dumper.next;
1745                                                 ct; ct = ct->next) {
1746                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1747                         if (!ets)
1748                                 return 0;
1749
1750                         ets->thread = ct->task;
1751                         list_add(&ets->list, &info->thread_list);
1752                 }
1753
1754                 list_for_each(t, &info->thread_list) {
1755                         int sz;
1756
1757                         ets = list_entry(t, struct elf_thread_status, list);
1758                         sz = elf_dump_thread_status(signr, ets);
1759                         info->thread_status_size += sz;
1760                 }
1761         }
1762         /* now collect the dump for the current */
1763         memset(info->prstatus, 0, sizeof(*info->prstatus));
1764         fill_prstatus(info->prstatus, current, signr);
1765         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1766
1767         /* Set up header */
1768         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1769
1770         /*
1771          * Set up the notes in similar form to SVR4 core dumps made
1772          * with info from their /proc.
1773          */
1774
1775         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1776                   sizeof(*info->prstatus), info->prstatus);
1777         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1778         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1779                   sizeof(*info->psinfo), info->psinfo);
1780
1781         info->numnote = 2;
1782
1783         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1784
1785         /* Try to dump the FPU. */
1786         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1787                                                                info->fpu);
1788         if (info->prstatus->pr_fpvalid)
1789                 fill_note(info->notes + info->numnote++,
1790                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1791 #ifdef ELF_CORE_COPY_XFPREGS
1792         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1793                 fill_note(info->notes + info->numnote++,
1794                           "LINUX", ELF_CORE_XFPREG_TYPE,
1795                           sizeof(*info->xfpu), info->xfpu);
1796 #endif
1797
1798         return 1;
1799 }
1800
1801 static size_t get_note_info_size(struct elf_note_info *info)
1802 {
1803         int sz = 0;
1804         int i;
1805
1806         for (i = 0; i < info->numnote; i++)
1807                 sz += notesize(info->notes + i);
1808
1809         sz += info->thread_status_size;
1810
1811         return sz;
1812 }
1813
1814 static int write_note_info(struct elf_note_info *info,
1815                            struct file *file, loff_t *foffset)
1816 {
1817         int i;
1818         struct list_head *t;
1819
1820         for (i = 0; i < info->numnote; i++)
1821                 if (!writenote(info->notes + i, file, foffset))
1822                         return 0;
1823
1824         /* write out the thread status notes section */
1825         list_for_each(t, &info->thread_list) {
1826                 struct elf_thread_status *tmp =
1827                                 list_entry(t, struct elf_thread_status, list);
1828
1829                 for (i = 0; i < tmp->num_notes; i++)
1830                         if (!writenote(&tmp->notes[i], file, foffset))
1831                                 return 0;
1832         }
1833
1834         return 1;
1835 }
1836
1837 static void free_note_info(struct elf_note_info *info)
1838 {
1839         while (!list_empty(&info->thread_list)) {
1840                 struct list_head *tmp = info->thread_list.next;
1841                 list_del(tmp);
1842                 kfree(list_entry(tmp, struct elf_thread_status, list));
1843         }
1844
1845         kfree(info->prstatus);
1846         kfree(info->psinfo);
1847         kfree(info->notes);
1848         kfree(info->fpu);
1849 #ifdef ELF_CORE_COPY_XFPREGS
1850         kfree(info->xfpu);
1851 #endif
1852 }
1853
1854 #endif
1855
1856 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1857                                         struct vm_area_struct *gate_vma)
1858 {
1859         struct vm_area_struct *ret = tsk->mm->mmap;
1860
1861         if (ret)
1862                 return ret;
1863         return gate_vma;
1864 }
1865 /*
1866  * Helper function for iterating across a vma list.  It ensures that the caller
1867  * will visit `gate_vma' prior to terminating the search.
1868  */
1869 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1870                                         struct vm_area_struct *gate_vma)
1871 {
1872         struct vm_area_struct *ret;
1873
1874         ret = this_vma->vm_next;
1875         if (ret)
1876                 return ret;
1877         if (this_vma == gate_vma)
1878                 return NULL;
1879         return gate_vma;
1880 }
1881
1882 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1883                              elf_addr_t e_shoff, int segs)
1884 {
1885         elf->e_shoff = e_shoff;
1886         elf->e_shentsize = sizeof(*shdr4extnum);
1887         elf->e_shnum = 1;
1888         elf->e_shstrndx = SHN_UNDEF;
1889
1890         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1891
1892         shdr4extnum->sh_type = SHT_NULL;
1893         shdr4extnum->sh_size = elf->e_shnum;
1894         shdr4extnum->sh_link = elf->e_shstrndx;
1895         shdr4extnum->sh_info = segs;
1896 }
1897
1898 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1899                                      unsigned long mm_flags)
1900 {
1901         struct vm_area_struct *vma;
1902         size_t size = 0;
1903
1904         for (vma = first_vma(current, gate_vma); vma != NULL;
1905              vma = next_vma(vma, gate_vma))
1906                 size += vma_dump_size(vma, mm_flags);
1907         return size;
1908 }
1909
1910 /*
1911  * Actual dumper
1912  *
1913  * This is a two-pass process; first we find the offsets of the bits,
1914  * and then they are actually written out.  If we run out of core limit
1915  * we just truncate.
1916  */
1917 static int elf_core_dump(struct coredump_params *cprm)
1918 {
1919         int has_dumped = 0;
1920         mm_segment_t fs;
1921         int segs;
1922         size_t size = 0;
1923         struct vm_area_struct *vma, *gate_vma;
1924         struct elfhdr *elf = NULL;
1925         loff_t offset = 0, dataoff, foffset;
1926         struct elf_note_info info;
1927         struct elf_phdr *phdr4note = NULL;
1928         struct elf_shdr *shdr4extnum = NULL;
1929         Elf_Half e_phnum;
1930         elf_addr_t e_shoff;
1931
1932         /*
1933          * We no longer stop all VM operations.
1934          * 
1935          * This is because those proceses that could possibly change map_count
1936          * or the mmap / vma pages are now blocked in do_exit on current
1937          * finishing this core dump.
1938          *
1939          * Only ptrace can touch these memory addresses, but it doesn't change
1940          * the map_count or the pages allocated. So no possibility of crashing
1941          * exists while dumping the mm->vm_next areas to the core file.
1942          */
1943   
1944         /* alloc memory for large data structures: too large to be on stack */
1945         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1946         if (!elf)
1947                 goto out;
1948         /*
1949          * The number of segs are recored into ELF header as 16bit value.
1950          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1951          */
1952         segs = current->mm->map_count;
1953         segs += elf_core_extra_phdrs();
1954
1955         gate_vma = get_gate_vma(current->mm);
1956         if (gate_vma != NULL)
1957                 segs++;
1958
1959         /* for notes section */
1960         segs++;
1961
1962         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1963          * this, kernel supports extended numbering. Have a look at
1964          * include/linux/elf.h for further information. */
1965         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1966
1967         /*
1968          * Collect all the non-memory information about the process for the
1969          * notes.  This also sets up the file header.
1970          */
1971         if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1972                 goto cleanup;
1973
1974         has_dumped = 1;
1975         current->flags |= PF_DUMPCORE;
1976   
1977         fs = get_fs();
1978         set_fs(KERNEL_DS);
1979
1980         offset += sizeof(*elf);                         /* Elf header */
1981         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1982         foffset = offset;
1983
1984         /* Write notes phdr entry */
1985         {
1986                 size_t sz = get_note_info_size(&info);
1987
1988                 sz += elf_coredump_extra_notes_size();
1989
1990                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1991                 if (!phdr4note)
1992                         goto end_coredump;
1993
1994                 fill_elf_note_phdr(phdr4note, sz, offset);
1995                 offset += sz;
1996         }
1997
1998         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1999
2000         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2001         offset += elf_core_extra_data_size();
2002         e_shoff = offset;
2003
2004         if (e_phnum == PN_XNUM) {
2005                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2006                 if (!shdr4extnum)
2007                         goto end_coredump;
2008                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2009         }
2010
2011         offset = dataoff;
2012
2013         size += sizeof(*elf);
2014         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2015                 goto end_coredump;
2016
2017         size += sizeof(*phdr4note);
2018         if (size > cprm->limit
2019             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2020                 goto end_coredump;
2021
2022         /* Write program headers for segments dump */
2023         for (vma = first_vma(current, gate_vma); vma != NULL;
2024                         vma = next_vma(vma, gate_vma)) {
2025                 struct elf_phdr phdr;
2026
2027                 phdr.p_type = PT_LOAD;
2028                 phdr.p_offset = offset;
2029                 phdr.p_vaddr = vma->vm_start;
2030                 phdr.p_paddr = 0;
2031                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2032                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2033                 offset += phdr.p_filesz;
2034                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2035                 if (vma->vm_flags & VM_WRITE)
2036                         phdr.p_flags |= PF_W;
2037                 if (vma->vm_flags & VM_EXEC)
2038                         phdr.p_flags |= PF_X;
2039                 phdr.p_align = ELF_EXEC_PAGESIZE;
2040
2041                 size += sizeof(phdr);
2042                 if (size > cprm->limit
2043                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2044                         goto end_coredump;
2045         }
2046
2047         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2048                 goto end_coredump;
2049
2050         /* write out the notes section */
2051         if (!write_note_info(&info, cprm->file, &foffset))
2052                 goto end_coredump;
2053
2054         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2055                 goto end_coredump;
2056
2057         /* Align to page */
2058         if (!dump_seek(cprm->file, dataoff - foffset))
2059                 goto end_coredump;
2060
2061         for (vma = first_vma(current, gate_vma); vma != NULL;
2062                         vma = next_vma(vma, gate_vma)) {
2063                 unsigned long addr;
2064                 unsigned long end;
2065
2066                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2067
2068                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2069                         struct page *page;
2070                         int stop;
2071
2072                         page = get_dump_page(addr);
2073                         if (page) {
2074                                 void *kaddr = kmap(page);
2075                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2076                                         !dump_write(cprm->file, kaddr,
2077                                                     PAGE_SIZE);
2078                                 kunmap(page);
2079                                 page_cache_release(page);
2080                         } else
2081                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2082                         if (stop)
2083                                 goto end_coredump;
2084                 }
2085         }
2086
2087         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2088                 goto end_coredump;
2089
2090         if (e_phnum == PN_XNUM) {
2091                 size += sizeof(*shdr4extnum);
2092                 if (size > cprm->limit
2093                     || !dump_write(cprm->file, shdr4extnum,
2094                                    sizeof(*shdr4extnum)))
2095                         goto end_coredump;
2096         }
2097
2098 end_coredump:
2099         set_fs(fs);
2100
2101 cleanup:
2102         free_note_info(&info);
2103         kfree(shdr4extnum);
2104         kfree(phdr4note);
2105         kfree(elf);
2106 out:
2107         return has_dumped;
2108 }
2109
2110 #endif          /* CONFIG_ELF_CORE */
2111
2112 static int __init init_elf_binfmt(void)
2113 {
2114         register_binfmt(&elf_format);
2115         return 0;
2116 }
2117
2118 static void __exit exit_elf_binfmt(void)
2119 {
2120         /* Remove the COFF and ELF loaders. */
2121         unregister_binfmt(&elf_format);
2122 }
2123
2124 core_initcall(init_elf_binfmt);
2125 module_exit(exit_elf_binfmt);
2126 MODULE_LICENSE("GPL");