Merge branch 'x86-efi-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 28 Feb 2013 00:17:42 +0000 (16:17 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 28 Feb 2013 00:17:42 +0000 (16:17 -0800)
Pull x86/EFI changes from Peter Anvin:

 - Improve the initrd handling in the EFI boot stub by allowing forward
   slashes in the pathname - from Chun-Yi Lee.

 - Cleanup code duplication in the EFI mixed kernel/firmware code - from
   Satoru Takeuchi.

 - efivarfs bug fixes for more strict filename validation, with lots of
   input from Al Viro.

* 'x86-efi-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, efi: remove duplicate code in setup_arch() by using, efi_is_native()
  efivarfs: guid part of filenames are case-insensitive
  efivarfs: Validate filenames much more aggressively
  efivarfs: Use sizeof() instead of magic number
  x86, efi: Allow slash in file path of initrd

1  2 
arch/x86/kernel/setup.c
arch/x86/platform/efi/efi.c
drivers/firmware/efivars.c

diff --combined arch/x86/kernel/setup.c
  #include <asm/topology.h>
  #include <asm/apicdef.h>
  #include <asm/amd_nb.h>
 -#ifdef CONFIG_X86_64
 -#include <asm/numa_64.h>
 -#endif
  #include <asm/mce.h>
  #include <asm/alternative.h>
  #include <asm/prom.h>
  
  /*
 - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
 - * The direct mapping extends to max_pfn_mapped, so that we can directly access
 - * apertures, ACPI and other tables without having to play with fixmaps.
 + * max_low_pfn_mapped: highest direct mapped pfn under 4GB
 + * max_pfn_mapped:     highest direct mapped pfn over 4GB
 + *
 + * The direct mapping only covers E820_RAM regions, so the ranges and gaps are
 + * represented by pfn_mapped
   */
  unsigned long max_low_pfn_mapped;
  unsigned long max_pfn_mapped;
@@@ -275,7 -276,18 +275,7 @@@ void * __init extend_brk(size_t size, s
        return ret;
  }
  
 -#ifdef CONFIG_X86_64
 -static void __init init_gbpages(void)
 -{
 -      if (direct_gbpages && cpu_has_gbpages)
 -              printk(KERN_INFO "Using GB pages for direct mapping\n");
 -      else
 -              direct_gbpages = 0;
 -}
 -#else
 -static inline void init_gbpages(void)
 -{
 -}
 +#ifdef CONFIG_X86_32
  static void __init cleanup_highmap(void)
  {
  }
  static void __init reserve_brk(void)
  {
        if (_brk_end > _brk_start)
 -              memblock_reserve(__pa(_brk_start),
 -                               __pa(_brk_end) - __pa(_brk_start));
 +              memblock_reserve(__pa_symbol(_brk_start),
 +                               _brk_end - _brk_start);
  
        /* Mark brk area as locked down and no longer taking any
           new allocations */
  
  #ifdef CONFIG_BLK_DEV_INITRD
  
 +static u64 __init get_ramdisk_image(void)
 +{
 +      u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 +
 +      ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32;
 +
 +      return ramdisk_image;
 +}
 +static u64 __init get_ramdisk_size(void)
 +{
 +      u64 ramdisk_size = boot_params.hdr.ramdisk_size;
 +
 +      ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32;
 +
 +      return ramdisk_size;
 +}
 +
  #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
  static void __init relocate_initrd(void)
  {
        /* Assume only end is not page aligned */
 -      u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 -      u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 +      u64 ramdisk_image = get_ramdisk_image();
 +      u64 ramdisk_size  = get_ramdisk_size();
        u64 area_size     = PAGE_ALIGN(ramdisk_size);
 -      u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
        u64 ramdisk_here;
        unsigned long slop, clen, mapaddr;
        char *p, *q;
  
 -      /* We need to move the initrd down into lowmem */
 -      ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
 -                                       PAGE_SIZE);
 +      /* We need to move the initrd down into directly mapped mem */
 +      ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
 +                                               area_size, PAGE_SIZE);
  
        if (!ramdisk_here)
                panic("Cannot find place for new RAMDISK of size %lld\n",
                         ramdisk_size);
  
 -      /* Note: this includes all the lowmem currently occupied by
 +      /* Note: this includes all the mem currently occupied by
           the initrd, we rely on that fact to keep the data intact. */
        memblock_reserve(ramdisk_here, area_size);
        initrd_start = ramdisk_here + PAGE_OFFSET;
  
        q = (char *)initrd_start;
  
 -      /* Copy any lowmem portion of the initrd */
 -      if (ramdisk_image < end_of_lowmem) {
 -              clen = end_of_lowmem - ramdisk_image;
 -              p = (char *)__va(ramdisk_image);
 -              memcpy(q, p, clen);
 -              q += clen;
 -              ramdisk_image += clen;
 -              ramdisk_size  -= clen;
 -      }
 -
 -      /* Copy the highmem portion of the initrd */
 +      /* Copy the initrd */
        while (ramdisk_size) {
                slop = ramdisk_image & ~PAGE_MASK;
                clen = ramdisk_size;
                ramdisk_image += clen;
                ramdisk_size  -= clen;
        }
 -      /* high pages is not converted by early_res_to_bootmem */
 -      ramdisk_image = boot_params.hdr.ramdisk_image;
 -      ramdisk_size  = boot_params.hdr.ramdisk_size;
 +
 +      ramdisk_image = get_ramdisk_image();
 +      ramdisk_size  = get_ramdisk_size();
        printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to"
                " [mem %#010llx-%#010llx]\n",
                ramdisk_image, ramdisk_image + ramdisk_size - 1,
                ramdisk_here, ramdisk_here + ramdisk_size - 1);
  }
  
 +static void __init early_reserve_initrd(void)
 +{
 +      /* Assume only end is not page aligned */
 +      u64 ramdisk_image = get_ramdisk_image();
 +      u64 ramdisk_size  = get_ramdisk_size();
 +      u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
 +
 +      if (!boot_params.hdr.type_of_loader ||
 +          !ramdisk_image || !ramdisk_size)
 +              return;         /* No initrd provided by bootloader */
 +
 +      memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
 +}
  static void __init reserve_initrd(void)
  {
        /* Assume only end is not page aligned */
 -      u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 -      u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 +      u64 ramdisk_image = get_ramdisk_image();
 +      u64 ramdisk_size  = get_ramdisk_size();
        u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
 -      u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 +      u64 mapped_size;
  
        if (!boot_params.hdr.type_of_loader ||
            !ramdisk_image || !ramdisk_size)
  
        initrd_start = 0;
  
 -      if (ramdisk_size >= (end_of_lowmem>>1)) {
 +      mapped_size = memblock_mem_size(max_pfn_mapped);
 +      if (ramdisk_size >= (mapped_size>>1))
                panic("initrd too large to handle, "
                       "disabling initrd (%lld needed, %lld available)\n",
 -                     ramdisk_size, end_of_lowmem>>1);
 -      }
 +                     ramdisk_size, mapped_size>>1);
  
        printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
                        ramdisk_end - 1);
  
 -
 -      if (ramdisk_end <= end_of_lowmem) {
 -              /* All in lowmem, easy case */
 -              /*
 -               * don't need to reserve again, already reserved early
 -               * in i386_start_kernel
 -               */
 +      if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image),
 +                              PFN_DOWN(ramdisk_end))) {
 +              /* All are mapped, easy case */
                initrd_start = ramdisk_image + PAGE_OFFSET;
                initrd_end = initrd_start + ramdisk_size;
                return;
        memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
  }
  #else
 +static void __init early_reserve_initrd(void)
 +{
 +}
  static void __init reserve_initrd(void)
  {
  }
@@@ -425,6 -419,8 +425,6 @@@ static void __init parse_setup_data(voi
        struct setup_data *data;
        u64 pa_data;
  
 -      if (boot_params.hdr.version < 0x0209)
 -              return;
        pa_data = boot_params.hdr.setup_data;
        while (pa_data) {
                u32 data_len, map_len;
@@@ -460,6 -456,8 +460,6 @@@ static void __init e820_reserve_setup_d
        u64 pa_data;
        int found = 0;
  
 -      if (boot_params.hdr.version < 0x0209)
 -              return;
        pa_data = boot_params.hdr.setup_data;
        while (pa_data) {
                data = early_memremap(pa_data, sizeof(*data));
@@@ -483,6 -481,8 +483,6 @@@ static void __init memblock_x86_reserve
        struct setup_data *data;
        u64 pa_data;
  
 -      if (boot_params.hdr.version < 0x0209)
 -              return;
        pa_data = boot_params.hdr.setup_data;
        while (pa_data) {
                data = early_memremap(pa_data, sizeof(*data));
  /*
   * Keep the crash kernel below this limit.  On 32 bits earlier kernels
   * would limit the kernel to the low 512 MiB due to mapping restrictions.
 - * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
 - * limit once kexec-tools are fixed.
   */
  #ifdef CONFIG_X86_32
  # define CRASH_KERNEL_ADDR_MAX        (512 << 20)
  #else
 -# define CRASH_KERNEL_ADDR_MAX        (896 << 20)
 +# define CRASH_KERNEL_ADDR_MAX        MAXMEM
  #endif
  
 +static void __init reserve_crashkernel_low(void)
 +{
 +#ifdef CONFIG_X86_64
 +      const unsigned long long alignment = 16<<20;    /* 16M */
 +      unsigned long long low_base = 0, low_size = 0;
 +      unsigned long total_low_mem;
 +      unsigned long long base;
 +      int ret;
 +
 +      total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
 +      ret = parse_crashkernel_low(boot_command_line, total_low_mem,
 +                                              &low_size, &base);
 +      if (ret != 0 || low_size <= 0)
 +              return;
 +
 +      low_base = memblock_find_in_range(low_size, (1ULL<<32),
 +                                      low_size, alignment);
 +
 +      if (!low_base) {
 +              pr_info("crashkernel low reservation failed - No suitable area found.\n");
 +
 +              return;
 +      }
 +
 +      memblock_reserve(low_base, low_size);
 +      pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
 +                      (unsigned long)(low_size >> 20),
 +                      (unsigned long)(low_base >> 20),
 +                      (unsigned long)(total_low_mem >> 20));
 +      crashk_low_res.start = low_base;
 +      crashk_low_res.end   = low_base + low_size - 1;
 +      insert_resource(&iomem_resource, &crashk_low_res);
 +#endif
 +}
 +
  static void __init reserve_crashkernel(void)
  {
 +      const unsigned long long alignment = 16<<20;    /* 16M */
        unsigned long long total_mem;
        unsigned long long crash_size, crash_base;
        int ret;
  
        /* 0 means: find the address automatically */
        if (crash_base <= 0) {
 -              const unsigned long long alignment = 16<<20;    /* 16M */
 -
                /*
                 *  kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
                 */
                        pr_info("crashkernel reservation failed - No suitable area found.\n");
                        return;
                }
 +
        } else {
                unsigned long long start;
  
        crashk_res.start = crash_base;
        crashk_res.end   = crash_base + crash_size - 1;
        insert_resource(&iomem_resource, &crashk_res);
 +
 +      if (crash_base >= (1ULL<<32))
 +              reserve_crashkernel_low();
  }
  #else
  static void __init reserve_crashkernel(void)
@@@ -644,6 -608,8 +644,6 @@@ static __init void reserve_ibft_region(
                memblock_reserve(addr, size);
  }
  
 -static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
 -
  static bool __init snb_gfx_workaround_needed(void)
  {
  #ifdef CONFIG_PCI
@@@ -732,7 -698,8 +732,7 @@@ static void __init trim_bios_range(void
         * since some BIOSes are known to corrupt low memory.  See the
         * Kconfig help text for X86_RESERVE_LOW.
         */
 -      e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
 -                        E820_RAM, E820_RESERVED);
 +      e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
  
        /*
         * special case: Some BIOSen report the PC BIOS
        sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
  }
  
 +/* called before trim_bios_range() to spare extra sanitize */
 +static void __init e820_add_kernel_range(void)
 +{
 +      u64 start = __pa_symbol(_text);
 +      u64 size = __pa_symbol(_end) - start;
 +
 +      /*
 +       * Complain if .text .data and .bss are not marked as E820_RAM and
 +       * attempt to fix it by adding the range. We may have a confused BIOS,
 +       * or the user may have used memmap=exactmap or memmap=xxM$yyM to
 +       * exclude kernel range. If we really are running on top non-RAM,
 +       * we will crash later anyways.
 +       */
 +      if (e820_all_mapped(start, start + size, E820_RAM))
 +              return;
 +
 +      pr_warn(".text .data .bss are not marked as E820_RAM!\n");
 +      e820_remove_range(start, size, E820_RAM, 0);
 +      e820_add_region(start, size, E820_RAM);
 +}
 +
 +static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
 +
  static int __init parse_reservelow(char *p)
  {
        unsigned long long size;
  
  early_param("reservelow", parse_reservelow);
  
 +static void __init trim_low_memory_range(void)
 +{
 +      memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
 +}
 +      
  /*
   * Determine if we were loaded by an EFI loader.  If so, then we have also been
   * passed the efi memmap, systab, etc., so we should use these data structures
  
  void __init setup_arch(char **cmdline_p)
  {
 +      memblock_reserve(__pa_symbol(_text),
 +                       (unsigned long)__bss_stop - (unsigned long)_text);
 +
 +      early_reserve_initrd();
 +
 +      /*
 +       * At this point everything still needed from the boot loader
 +       * or BIOS or kernel text should be early reserved or marked not
 +       * RAM in e820. All other memory is free game.
 +       */
 +
  #ifdef CONFIG_X86_32
        memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
        visws_early_detect();
        init_mm.end_data = (unsigned long) _edata;
        init_mm.brk = _brk_end;
  
 -      code_resource.start = virt_to_phys(_text);
 -      code_resource.end = virt_to_phys(_etext)-1;
 -      data_resource.start = virt_to_phys(_etext);
 -      data_resource.end = virt_to_phys(_edata)-1;
 -      bss_resource.start = virt_to_phys(&__bss_start);
 -      bss_resource.end = virt_to_phys(&__bss_stop)-1;
 +      code_resource.start = __pa_symbol(_text);
 +      code_resource.end = __pa_symbol(_etext)-1;
 +      data_resource.start = __pa_symbol(_etext);
 +      data_resource.end = __pa_symbol(_edata)-1;
 +      bss_resource.start = __pa_symbol(__bss_start);
 +      bss_resource.end = __pa_symbol(__bss_stop)-1;
  
  #ifdef CONFIG_CMDLINE_BOOL
  #ifdef CONFIG_CMDLINE_OVERRIDE
        insert_resource(&iomem_resource, &data_resource);
        insert_resource(&iomem_resource, &bss_resource);
  
 +      e820_add_kernel_range();
        trim_bios_range();
  #ifdef CONFIG_X86_32
        if (ppro_with_ram_bug()) {
  
        reserve_ibft_region();
  
 +      early_alloc_pgt_buf();
 +
        /*
         * Need to conclude brk, before memblock_x86_fill()
         *  it could use memblock_find_in_range, could overlap with
  
        cleanup_highmap();
  
 -      memblock.current_limit = get_max_mapped();
 +      memblock.current_limit = ISA_END_ADDRESS;
        memblock_x86_fill();
  
        /*
        setup_bios_corruption_check();
  #endif
  
 +      /*
 +       * In the memory hotplug case, the kernel needs info from SRAT to
 +       * determine which memory is hotpluggable before allocating memory
 +       * using memblock.
 +       */
 +      acpi_boot_table_init();
 +      early_acpi_boot_init();
 +      early_parse_srat();
 +
 +#ifdef CONFIG_X86_32
        printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
                        (max_pfn_mapped<<PAGE_SHIFT) - 1);
 +#endif
  
 -      setup_real_mode();
 +      reserve_real_mode();
  
        trim_platform_memory_ranges();
 +      trim_low_memory_range();
  
 -      init_gbpages();
 +      init_mem_mapping();
  
 -      /* max_pfn_mapped is updated here */
 -      max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
 -      max_pfn_mapped = max_low_pfn_mapped;
 +      early_trap_pf_init();
  
 -#ifdef CONFIG_X86_64
 -      if (max_pfn > max_low_pfn) {
 -              int i;
 -              unsigned long start, end;
 -              unsigned long start_pfn, end_pfn;
 -
 -              for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
 -                                                       NULL) {
 -
 -                      end = PFN_PHYS(end_pfn);
 -                      if (end <= (1UL<<32))
 -                              continue;
 -
 -                      start = PFN_PHYS(start_pfn);
 -                      max_pfn_mapped = init_memory_mapping(
 -                                              max((1UL<<32), start), end);
 -              }
 +      setup_real_mode();
  
 -              /* can we preseve max_low_pfn ?*/
 -              max_low_pfn = max_pfn;
 -      }
 -#endif
        memblock.current_limit = get_max_mapped();
        dma_contiguous_reserve(0);
  
        /*
         * Parse the ACPI tables for possible boot-time SMP configuration.
         */
 -      acpi_boot_table_init();
 -
 -      early_acpi_boot_init();
 -
        initmem_init();
        memblock_find_dma_reserve();
  
         * mismatched firmware/kernel archtectures since there is no
         * support for runtime services.
         */
-       if (efi_enabled(EFI_BOOT) &&
-           IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) {
+       if (efi_enabled(EFI_BOOT) && !efi_is_native()) {
                pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
                efi_unmap_memmap();
        }
@@@ -69,11 -69,6 +69,6 @@@ struct efi_memory_map memmap
  static struct efi efi_phys __initdata;
  static efi_system_table_t efi_systab __initdata;
  
- static inline bool efi_is_native(void)
- {
-       return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
- }
  unsigned long x86_efi_facility;
  
  /*
@@@ -85,10 -80,9 +80,10 @@@ int efi_enabled(int facility
  }
  EXPORT_SYMBOL(efi_enabled);
  
 +static bool __initdata disable_runtime = false;
  static int __init setup_noefi(char *arg)
  {
 -      clear_bit(EFI_BOOT, &x86_efi_facility);
 +      disable_runtime = true;
        return 0;
  }
  early_param("noefi", setup_noefi);
@@@ -417,8 -411,8 +412,8 @@@ void __init efi_reserve_boot_services(v
                 * - Not within any part of the kernel
                 * - Not the bios reserved area
                */
 -              if ((start+size >= virt_to_phys(_text)
 -                              && start <= virt_to_phys(_end)) ||
 +              if ((start+size >= __pa_symbol(_text)
 +                              && start <= __pa_symbol(_end)) ||
                        !e820_all_mapped(start, start+size, E820_RAM) ||
                        memblock_is_region_reserved(start, size)) {
                        /* Could not reserve, skip it */
@@@ -735,7 -729,7 +730,7 @@@ void __init efi_init(void
        if (!efi_is_native())
                pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
        else {
 -              if (efi_runtime_init())
 +              if (disable_runtime || efi_runtime_init())
                        return;
                set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
        }
@@@ -844,7 -838,7 +839,7 @@@ void __init efi_enter_virtual_mode(void
        efi_memory_desc_t *md, *prev_md = NULL;
        efi_status_t status;
        unsigned long size;
 -      u64 end, systab, end_pfn;
 +      u64 end, systab, start_pfn, end_pfn;
        void *p, *va, *new_memmap = NULL;
        int count = 0;
  
                size = md->num_pages << EFI_PAGE_SHIFT;
                end = md->phys_addr + size;
  
 +              start_pfn = PFN_DOWN(md->phys_addr);
                end_pfn = PFN_UP(end);
 -              if (end_pfn <= max_low_pfn_mapped
 -                  || (end_pfn > (1UL << (32 - PAGE_SHIFT))
 -                      && end_pfn <= max_pfn_mapped)) {
 +              if (pfn_range_is_mapped(start_pfn, end_pfn)) {
                        va = __va(md->phys_addr);
  
                        if (!(md->attribute & EFI_MEMORY_WB))
@@@ -79,6 -79,7 +79,7 @@@
  #include <linux/device.h>
  #include <linux/slab.h>
  #include <linux/pstore.h>
+ #include <linux/ctype.h>
  
  #include <linux/fs.h>
  #include <linux/ramfs.h>
@@@ -158,13 -159,6 +159,13 @@@ efivar_create_sysfs_entry(struct efivar
                          efi_char16_t *variable_name,
                          efi_guid_t *vendor_guid);
  
 +/*
 + * Prototype for workqueue functions updating sysfs entry
 + */
 +
 +static void efivar_update_sysfs_entries(struct work_struct *);
 +static DECLARE_WORK(efivar_work, efivar_update_sysfs_entries);
 +
  /* Return the number of unicode characters in data */
  static unsigned long
  utf16_strnlen(efi_char16_t *s, size_t maxlength)
@@@ -412,11 -406,10 +413,11 @@@ static efi_status_
  get_var_data(struct efivars *efivars, struct efi_variable *var)
  {
        efi_status_t status;
 +      unsigned long flags;
  
 -      spin_lock(&efivars->lock);
 +      spin_lock_irqsave(&efivars->lock, flags);
        status = get_var_data_locked(efivars, var);
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irqrestore(&efivars->lock, flags);
  
        if (status != EFI_SUCCESS) {
                printk(KERN_WARNING "efivars: get_variable() failed 0x%lx!\n",
@@@ -545,14 -538,14 +546,14 @@@ efivar_store_raw(struct efivar_entry *e
                return -EINVAL;
        }
  
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
        status = efivars->ops->set_variable(new_var->VariableName,
                                            &new_var->VendorGuid,
                                            new_var->Attributes,
                                            new_var->DataSize,
                                            new_var->Data);
  
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
  
        if (status != EFI_SUCCESS) {
                printk(KERN_WARNING "efivars: set_variable() failed: status=%lx\n",
@@@ -721,7 -714,7 +722,7 @@@ static ssize_t efivarfs_file_write(stru
         * amounts of memory. Pick a default size of 64K if
         * QueryVariableInfo() isn't supported by the firmware.
         */
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
  
        if (!efivars->ops->query_variable_info)
                status = EFI_UNSUPPORTED;
                                                   &remaining_size, &max_size);
        }
  
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
  
        if (status != EFI_SUCCESS) {
                if (status != EFI_UNSUPPORTED)
         * set_variable call, and removal of the variable from the efivars
         * list (in the case of an authenticated delete).
         */
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
  
        status = efivars->ops->set_variable(var->var.VariableName,
                                            &var->var.VendorGuid,
                                            data);
  
        if (status != EFI_SUCCESS) {
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
                kfree(data);
  
                return efi_status_to_err(status);
                                            NULL);
  
        if (status == EFI_BUFFER_TOO_SMALL) {
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
                mutex_lock(&inode->i_mutex);
                i_size_write(inode, newdatasize + sizeof(attributes));
                mutex_unlock(&inode->i_mutex);
  
        } else if (status == EFI_NOT_FOUND) {
                list_del(&var->list);
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
                efivar_unregister(var);
                drop_nlink(inode);
                d_delete(file->f_dentry);
                dput(file->f_dentry);
  
        } else {
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
                pr_warn("efivarfs: inconsistent EFI variable implementation? "
                                "status = %lx\n", status);
        }
@@@ -827,11 -820,11 +828,11 @@@ static ssize_t efivarfs_file_read(struc
        void *data;
        ssize_t size = 0;
  
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
        status = efivars->ops->get_variable(var->var.VariableName,
                                            &var->var.VendorGuid,
                                            &attributes, &datasize, NULL);
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
  
        if (status != EFI_BUFFER_TOO_SMALL)
                return efi_status_to_err(status);
        if (!data)
                return -ENOMEM;
  
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
        status = efivars->ops->get_variable(var->var.VariableName,
                                            &var->var.VendorGuid,
                                            &attributes, &datasize,
                                            (data + sizeof(attributes)));
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
  
        if (status != EFI_SUCCESS) {
                size = efi_status_to_err(status);
@@@ -908,6 -901,48 +909,48 @@@ static struct inode *efivarfs_get_inode
        return inode;
  }
  
+ /*
+  * Return true if 'str' is a valid efivarfs filename of the form,
+  *
+  *    VariableName-12345678-1234-1234-1234-1234567891bc
+  */
+ static bool efivarfs_valid_name(const char *str, int len)
+ {
+       static const char dashes[GUID_LEN] = {
+               [8] = 1, [13] = 1, [18] = 1, [23] = 1
+       };
+       const char *s = str + len - GUID_LEN;
+       int i;
+       /*
+        * We need a GUID, plus at least one letter for the variable name,
+        * plus the '-' separator
+        */
+       if (len < GUID_LEN + 2)
+               return false;
+       /* GUID should be right after the first '-' */
+       if (s - 1 != strchr(str, '-'))
+               return false;
+       /*
+        * Validate that 's' is of the correct format, e.g.
+        *
+        *      12345678-1234-1234-1234-123456789abc
+        */
+       for (i = 0; i < GUID_LEN; i++) {
+               if (dashes[i]) {
+                       if (*s++ != '-')
+                               return false;
+               } else {
+                       if (!isxdigit(*s++))
+                               return false;
+               }
+       }
+       return true;
+ }
  static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid)
  {
        guid->b[0] = hex_to_bin(str[6]) << 4 | hex_to_bin(str[7]);
@@@ -936,11 -971,7 +979,7 @@@ static int efivarfs_create(struct inod
        struct efivar_entry *var;
        int namelen, i = 0, err = 0;
  
-       /*
-        * We need a GUID, plus at least one letter for the variable name,
-        * plus the '-' separator
-        */
-       if (dentry->d_name.len < GUID_LEN + 2)
+       if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len))
                return -EINVAL;
  
        inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0);
                goto out;
  
        kobject_uevent(&var->kobj, KOBJ_ADD);
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
        list_add(&var->list, &efivars->list);
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
        d_instantiate(dentry, inode);
        dget(dentry);
  out:
@@@ -993,7 -1024,7 +1032,7 @@@ static int efivarfs_unlink(struct inod
        struct efivars *efivars = var->efivars;
        efi_status_t status;
  
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
  
        status = efivars->ops->set_variable(var->var.VariableName,
                                            &var->var.VendorGuid,
  
        if (status == EFI_SUCCESS || status == EFI_NOT_FOUND) {
                list_del(&var->list);
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
                efivar_unregister(var);
                drop_nlink(dentry->d_inode);
                dput(dentry);
                return 0;
        }
  
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
        return -EINVAL;
  };
  
+ /*
+  * Compare two efivarfs file names.
+  *
+  * An efivarfs filename is composed of two parts,
+  *
+  *    1. A case-sensitive variable name
+  *    2. A case-insensitive GUID
+  *
+  * So we need to perform a case-sensitive match on part 1 and a
+  * case-insensitive match on part 2.
+  */
+ static int efivarfs_d_compare(const struct dentry *parent, const struct inode *pinode,
+                             const struct dentry *dentry, const struct inode *inode,
+                             unsigned int len, const char *str,
+                             const struct qstr *name)
+ {
+       int guid = len - GUID_LEN;
+       if (name->len != len)
+               return 1;
+       /* Case-sensitive compare for the variable name */
+       if (memcmp(str, name->name, guid))
+               return 1;
+       /* Case-insensitive compare for the GUID */
+       return strncasecmp(name->name + guid, str + guid, GUID_LEN);
+ }
+ static int efivarfs_d_hash(const struct dentry *dentry,
+                          const struct inode *inode, struct qstr *qstr)
+ {
+       unsigned long hash = init_name_hash();
+       const unsigned char *s = qstr->name;
+       unsigned int len = qstr->len;
+       if (!efivarfs_valid_name(s, len))
+               return -EINVAL;
+       while (len-- > GUID_LEN)
+               hash = partial_name_hash(*s++, hash);
+       /* GUID is case-insensitive. */
+       while (len--)
+               hash = partial_name_hash(tolower(*s++), hash);
+       qstr->hash = end_name_hash(hash);
+       return 0;
+ }
+ /*
+  * Retaining negative dentries for an in-memory filesystem just wastes
+  * memory and lookup time: arrange for them to be deleted immediately.
+  */
+ static int efivarfs_delete_dentry(const struct dentry *dentry)
+ {
+       return 1;
+ }
+ static struct dentry_operations efivarfs_d_ops = {
+       .d_compare = efivarfs_d_compare,
+       .d_hash = efivarfs_d_hash,
+       .d_delete = efivarfs_delete_dentry,
+ };
+ static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
+ {
+       struct qstr q;
+       q.name = name;
+       q.len = strlen(name);
+       if (efivarfs_d_hash(NULL, NULL, &q))
+               return NULL;
+       return d_alloc(parent, &q);
+ }
  static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
  {
        struct inode *inode = NULL;
        sb->s_blocksize_bits    = PAGE_CACHE_SHIFT;
        sb->s_magic             = EFIVARFS_MAGIC;
        sb->s_op                = &efivarfs_ops;
+       sb->s_d_op              = &efivarfs_d_ops;
        sb->s_time_gran         = 1;
  
        inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0);
                if (!inode)
                        goto fail_name;
  
-               dentry = d_alloc_name(root, name);
+               dentry = efivarfs_alloc_dentry(root, name);
                if (!dentry)
                        goto fail_inode;
  
                /* copied by the above to local storage in the dentry. */
                kfree(name);
  
 -              spin_lock(&efivars->lock);
 +              spin_lock_irq(&efivars->lock);
                efivars->ops->get_variable(entry->var.VariableName,
                                           &entry->var.VendorGuid,
                                           &entry->var.Attributes,
                                           &size,
                                           NULL);
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
  
                mutex_lock(&inode->i_mutex);
                inode->i_private = entry;
-               i_size_write(inode, size+4);
+               i_size_write(inode, size + sizeof(entry->var.Attributes));
                mutex_unlock(&inode->i_mutex);
                d_add(dentry, inode);
        }
@@@ -1117,8 -1227,20 +1235,20 @@@ static struct file_system_type efivarfs
        .kill_sb = efivarfs_kill_sb,
  };
  
+ /*
+  * Handle negative dentry.
+  */
+ static struct dentry *efivarfs_lookup(struct inode *dir, struct dentry *dentry,
+                                     unsigned int flags)
+ {
+       if (dentry->d_name.len > NAME_MAX)
+               return ERR_PTR(-ENAMETOOLONG);
+       d_add(dentry, NULL);
+       return NULL;
+ }
  static const struct inode_operations efivarfs_dir_inode_operations = {
-       .lookup = simple_lookup,
+       .lookup = efivarfs_lookup,
        .unlink = efivarfs_unlink,
        .create = efivarfs_create,
  };
@@@ -1131,7 -1253,7 +1261,7 @@@ static int efi_pstore_open(struct pstor
  {
        struct efivars *efivars = psi->data;
  
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
        efivars->walk_entry = list_first_entry(&efivars->list,
                                               struct efivar_entry, list);
        return 0;
@@@ -1141,7 -1263,7 +1271,7 @@@ static int efi_pstore_close(struct psto
  {
        struct efivars *efivars = psi->data;
  
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
        return 0;
  }
  
@@@ -1217,18 -1339,8 +1347,18 @@@ static int efi_pstore_write(enum pstore
        int i, ret = 0;
        u64 storage_space, remaining_space, max_variable_size;
        efi_status_t status = EFI_NOT_FOUND;
 -
 -      spin_lock(&efivars->lock);
 +      unsigned long flags;
 +
 +      if (pstore_cannot_block_path(reason)) {
 +              /*
 +               * If the lock is taken by another cpu in non-blocking path,
 +               * this driver returns without entering firmware to avoid
 +               * hanging up.
 +               */
 +              if (!spin_trylock_irqsave(&efivars->lock, flags))
 +                      return -EBUSY;
 +      } else
 +              spin_lock_irqsave(&efivars->lock, flags);
  
        /*
         * Check if there is a space enough to log.
                                                   &remaining_space,
                                                   &max_variable_size);
        if (status || remaining_space < size + DUMP_NAME_LEN * 2) {
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irqrestore(&efivars->lock, flags);
                *id = part;
                return -ENOSPC;
        }
        efivars->ops->set_variable(efi_name, &vendor, PSTORE_EFI_ATTRIBUTES,
                                   size, psi->buf);
  
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irqrestore(&efivars->lock, flags);
  
 -      if (size)
 -              ret = efivar_create_sysfs_entry(efivars,
 -                                        utf16_strsize(efi_name,
 -                                                      DUMP_NAME_LEN * 2),
 -                                        efi_name, &vendor);
 +      if (reason == KMSG_DUMP_OOPS)
 +              schedule_work(&efivar_work);
  
        *id = part;
        return ret;
@@@ -1278,7 -1393,7 +1408,7 @@@ static int efi_pstore_erase(enum pstore
        sprintf(name, "dump-type%u-%u-%d-%lu", type, (unsigned int)id, count,
                time.tv_sec);
  
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
  
        for (i = 0; i < DUMP_NAME_LEN; i++)
                efi_name[i] = name[i];
        if (found)
                list_del(&found->list);
  
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
  
        if (found)
                efivar_unregister(found);
@@@ -1392,7 -1507,7 +1522,7 @@@ static ssize_t efivar_create(struct fil
                return -EINVAL;
        }
  
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
  
        /*
         * Does this variable already exist?
                }
        }
        if (found) {
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
                return -EINVAL;
        }
  
        if (status != EFI_SUCCESS) {
                printk(KERN_WARNING "efivars: set_variable() failed: status=%lx\n",
                        status);
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
                return -EIO;
        }
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
  
        /* Create the entry in sysfs.  Locking is not required here */
        status = efivar_create_sysfs_entry(efivars,
@@@ -1455,7 -1570,7 +1585,7 @@@ static ssize_t efivar_delete(struct fil
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
  
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
  
        /*
         * Does this variable already exist?
                }
        }
        if (!found) {
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
                return -EINVAL;
        }
        /* force the Attributes/DataSize to 0 to ensure deletion */
        if (status != EFI_SUCCESS) {
                printk(KERN_WARNING "efivars: set_variable() failed: status=%lx\n",
                        status);
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
                return -EIO;
        }
        list_del(&search_efivar->list);
        /* We need to release this lock before unregistering. */
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
        efivar_unregister(search_efivar);
  
        /* It's dead Jim.... */
        return count;
  }
  
 +static bool variable_is_present(efi_char16_t *variable_name, efi_guid_t *vendor)
 +{
 +      struct efivar_entry *entry, *n;
 +      struct efivars *efivars = &__efivars;
 +      unsigned long strsize1, strsize2;
 +      bool found = false;
 +
 +      strsize1 = utf16_strsize(variable_name, 1024);
 +      list_for_each_entry_safe(entry, n, &efivars->list, list) {
 +              strsize2 = utf16_strsize(entry->var.VariableName, 1024);
 +              if (strsize1 == strsize2 &&
 +                      !memcmp(variable_name, &(entry->var.VariableName),
 +                              strsize2) &&
 +                      !efi_guidcmp(entry->var.VendorGuid,
 +                              *vendor)) {
 +                      found = true;
 +                      break;
 +              }
 +      }
 +      return found;
 +}
 +
 +static void efivar_update_sysfs_entries(struct work_struct *work)
 +{
 +      struct efivars *efivars = &__efivars;
 +      efi_guid_t vendor;
 +      efi_char16_t *variable_name;
 +      unsigned long variable_name_size = 1024;
 +      efi_status_t status = EFI_NOT_FOUND;
 +      bool found;
 +
 +      /* Add new sysfs entries */
 +      while (1) {
 +              variable_name = kzalloc(variable_name_size, GFP_KERNEL);
 +              if (!variable_name) {
 +                      pr_err("efivars: Memory allocation failed.\n");
 +                      return;
 +              }
 +
 +              spin_lock_irq(&efivars->lock);
 +              found = false;
 +              while (1) {
 +                      variable_name_size = 1024;
 +                      status = efivars->ops->get_next_variable(
 +                                                      &variable_name_size,
 +                                                      variable_name,
 +                                                      &vendor);
 +                      if (status != EFI_SUCCESS) {
 +                              break;
 +                      } else {
 +                              if (!variable_is_present(variable_name,
 +                                  &vendor)) {
 +                                      found = true;
 +                                      break;
 +                              }
 +                      }
 +              }
 +              spin_unlock_irq(&efivars->lock);
 +
 +              if (!found) {
 +                      kfree(variable_name);
 +                      break;
 +              } else
 +                      efivar_create_sysfs_entry(efivars,
 +                                                variable_name_size,
 +                                                variable_name, &vendor);
 +      }
 +}
 +
  /*
   * Let's not leave out systab information that snuck into
   * the efivars driver
@@@ -1678,9 -1724,9 +1808,9 @@@ efivar_create_sysfs_entry(struct efivar
        kfree(short_name);
        short_name = NULL;
  
 -      spin_lock(&efivars->lock);
 +      spin_lock_irq(&efivars->lock);
        list_add(&new_efivar->list, &efivars->list);
 -      spin_unlock(&efivars->lock);
 +      spin_unlock_irq(&efivars->lock);
  
        return 0;
  }
@@@ -1749,9 -1795,9 +1879,9 @@@ void unregister_efivars(struct efivars 
        struct efivar_entry *entry, *n;
  
        list_for_each_entry_safe(entry, n, &efivars->list, list) {
 -              spin_lock(&efivars->lock);
 +              spin_lock_irq(&efivars->lock);
                list_del(&entry->list);
 -              spin_unlock(&efivars->lock);
 +              spin_unlock_irq(&efivars->lock);
                efivar_unregister(entry);
        }
        if (efivars->new_var)
@@@ -1907,8 -1953,6 +2037,8 @@@ err_put
  static void __exit
  efivars_exit(void)
  {
 +      cancel_work_sync(&efivar_work);
 +
        if (efi_enabled(EFI_RUNTIME_SERVICES)) {
                unregister_efivars(&__efivars);
                kobject_put(efi_kobj);