Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 10 Jan 2012 19:08:21 +0000 (11:08 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 10 Jan 2012 19:08:21 +0000 (11:08 -0800)
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (53 commits)
  iommu/amd: Set IOTLB invalidation timeout
  iommu/amd: Init stats for iommu=pt
  iommu/amd: Remove unnecessary cache flushes in amd_iommu_resume
  iommu/amd: Add invalidate-context call-back
  iommu/amd: Add amd_iommu_device_info() function
  iommu/amd: Adapt IOMMU driver to PCI register name changes
  iommu/amd: Add invalid_ppr callback
  iommu/amd: Implement notifiers for IOMMUv2
  iommu/amd: Implement IO page-fault handler
  iommu/amd: Add routines to bind/unbind a pasid
  iommu/amd: Implement device aquisition code for IOMMUv2
  iommu/amd: Add driver stub for AMD IOMMUv2 support
  iommu/amd: Add stat counter for IOMMUv2 events
  iommu/amd: Add device errata handling
  iommu/amd: Add function to get IOMMUv2 domain for pdev
  iommu/amd: Implement function to send PPR completions
  iommu/amd: Implement functions to manage GCR3 table
  iommu/amd: Implement IOMMUv2 TLB flushing routines
  iommu/amd: Add support for IOMMUv2 domain mode
  iommu/amd: Add amd_iommu_domain_direct_map function
  ...

43 files changed:
Documentation/ABI/testing/sysfs-bus-pci
Documentation/kernel-parameters.txt
arch/arm/mach-omap2/devices.c
arch/arm/plat-omap/include/plat/iommu.h
arch/arm/plat-omap/include/plat/iovmm.h
arch/ia64/include/asm/iommu.h
arch/ia64/kernel/pci-dma.c
arch/x86/include/asm/iommu.h
arch/x86/kernel/pci-dma.c
drivers/acpi/pci_root.c
drivers/iommu/Kconfig
drivers/iommu/Makefile
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_init.c
drivers/iommu/amd_iommu_proto.h
drivers/iommu/amd_iommu_types.h
drivers/iommu/amd_iommu_v2.c [new file with mode: 0644]
drivers/iommu/intel-iommu.c
drivers/iommu/iommu.c
drivers/iommu/msm_iommu.c
drivers/iommu/omap-iommu.c
drivers/iommu/omap-iovmm.c
drivers/media/video/omap3isp/isp.c
drivers/media/video/omap3isp/isp.h
drivers/media/video/omap3isp/ispccdc.c
drivers/media/video/omap3isp/ispstat.c
drivers/media/video/omap3isp/ispvideo.c
drivers/pci/ats.c
drivers/pci/hotplug/pciehp.h
drivers/pci/hotplug/pciehp_core.c
drivers/pci/hotplug/pciehp_ctrl.c
drivers/pci/hotplug/pciehp_hpc.c
drivers/pci/msi.c
drivers/pci/pci-acpi.c
drivers/pci/pcie/aspm.c
include/linux/acpi.h
include/linux/amd-iommu.h
include/linux/iommu.h
include/linux/msi.h
include/linux/pci-aspm.h
include/linux/pci.h
include/linux/pci_regs.h
virt/kvm/iommu.c

index 349ecf26ce108440a5bd0c9fe9414cfa984aed9d..34f51100f0299cb5a96a77984def99dbfdd9367d 100644 (file)
@@ -66,6 +66,24 @@ Description:
                re-discover previously removed devices.
                Depends on CONFIG_HOTPLUG.
 
+What:          /sys/bus/pci/devices/.../msi_irqs/
+Date:          September, 2011
+Contact:       Neil Horman <nhorman@tuxdriver.com>
+Description:
+               The /sys/devices/.../msi_irqs directory contains a variable set
+               of sub-directories, with each sub-directory being named after a
+               corresponding msi irq vector allocated to that device.  Each
+               numbered sub-directory N contains attributes of that irq.
+               Note that this directory is not created for device drivers which
+               do not support msi irqs
+
+What:          /sys/bus/pci/devices/.../msi_irqs/<N>/mode
+Date:          September 2011
+Contact:       Neil Horman <nhorman@tuxdriver.com>
+Description:
+               This attribute indicates the mode that the irq vector named by
+               the parent directory is in (msi vs. msix)
+
 What:          /sys/bus/pci/devices/.../remove
 Date:          January 2009
 Contact:       Linux PCI developers <linux-pci@vger.kernel.org>
index e69a461a06c233573431f19ec9fdf95ee8cdcb8a..9373d95319c15754069f2af1600a7ef61aa31e17 100644 (file)
@@ -329,6 +329,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                                    is a lot of faster
                        off       - do not initialize any AMD IOMMU found in
                                    the system
+                       force_isolation - Force device isolation for all
+                                         devices. The IOMMU driver is not
+                                         allowed anymore to lift isolation
+                                         requirements as needed. This option
+                                         does not override iommu=pt
 
        amijoy.map=     [HW,JOY] Amiga joystick support
                        Map of devices attached to JOY0DAT and JOY1DAT
@@ -1059,7 +1064,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                nomerge
                forcesac
                soft
-               pt      [x86, IA-64]
+               pt              [x86, IA-64]
+               group_mf        [x86, IA-64]
+
 
        io7=            [HW] IO7 for Marvel based alpha systems
                        See comment before marvel_specify_io7 in
index 35d5dffab7e11c4fa3f4efefe2397d3cf429dc5d..46dfd1ae8f71a6001f90ecdcea237103f906dcc4 100644 (file)
@@ -28,6 +28,7 @@
 #include <plat/board.h>
 #include <plat/mcbsp.h>
 #include <plat/mmc.h>
+#include <plat/iommu.h>
 #include <plat/dma.h>
 #include <plat/omap_hwmod.h>
 #include <plat/omap_device.h>
@@ -211,9 +212,15 @@ static struct platform_device omap3isp_device = {
        .resource       = omap3isp_resources,
 };
 
+static struct omap_iommu_arch_data omap3_isp_iommu = {
+       .name = "isp",
+};
+
 int omap3_init_camera(struct isp_platform_data *pdata)
 {
        omap3isp_device.dev.platform_data = pdata;
+       omap3isp_device.dev.archdata.iommu = &omap3_isp_iommu;
+
        return platform_device_register(&omap3isp_device);
 }
 
index a1d79ee192503e3c14fd19fddb1bd24af933e0ad..88be3e628b339f49e6943f6fb6ad792546730aa6 100644 (file)
@@ -111,6 +111,32 @@ struct iommu_platform_data {
        u32 da_end;
 };
 
+/**
+ * struct iommu_arch_data - omap iommu private data
+ * @name: name of the iommu device
+ * @iommu_dev: handle of the iommu device
+ *
+ * This is an omap iommu private data object, which binds an iommu user
+ * to its iommu device. This object should be placed at the iommu user's
+ * dev_archdata so generic IOMMU API can be used without having to
+ * utilize omap-specific plumbing anymore.
+ */
+struct omap_iommu_arch_data {
+       const char *name;
+       struct omap_iommu *iommu_dev;
+};
+
+/**
+ * dev_to_omap_iommu() - retrieves an omap iommu object from a user device
+ * @dev: iommu client device
+ */
+static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
+{
+       struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+
+       return arch_data->iommu_dev;
+}
+
 /* IOMMU errors */
 #define OMAP_IOMMU_ERR_TLB_MISS                (1 << 0)
 #define OMAP_IOMMU_ERR_TRANS_FAULT     (1 << 1)
@@ -163,8 +189,8 @@ extern int omap_iommu_set_isr(const char *name,
                                    void *priv),
                         void *isr_priv);
 
-extern void omap_iommu_save_ctx(struct omap_iommu *obj);
-extern void omap_iommu_restore_ctx(struct omap_iommu *obj);
+extern void omap_iommu_save_ctx(struct device *dev);
+extern void omap_iommu_restore_ctx(struct device *dev);
 
 extern int omap_install_iommu_arch(const struct iommu_functions *ops);
 extern void omap_uninstall_iommu_arch(const struct iommu_functions *ops);
@@ -176,6 +202,5 @@ extern ssize_t
 omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len);
 extern size_t
 omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len);
-struct device *omap_find_iommu_device(const char *name);
 
 #endif /* __MACH_IOMMU_H */
index 6af1a91c0f36311996f91f2328cfe84da770064a..498e57cda6cd4babfebd0c474a4a71a101ff064b 100644 (file)
@@ -72,18 +72,18 @@ struct iovm_struct {
 #define IOVMF_DA_FIXED         (1 << (4 + IOVMF_SW_SHIFT))
 
 
-extern struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da);
+extern struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da);
 extern u32
-omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
+omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
                        const struct sg_table *sgt, u32 flags);
 extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain,
-                               struct omap_iommu *obj, u32 da);
+                               struct device *dev, u32 da);
 extern u32
-omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj,
+omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev,
                                u32 da, size_t bytes, u32 flags);
 extern void
-omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj,
+omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
                                const u32 da);
-extern void *omap_da_to_va(struct omap_iommu *obj, u32 da);
+extern void *omap_da_to_va(struct device *dev, u32 da);
 
 #endif /* __IOMMU_MMAP_H */
index 105c93b00b1bc53ce22f04ffd249595b5001c4d2..b6a809fa2995fc989bc1175fca12ab483c28790e 100644 (file)
@@ -11,10 +11,12 @@ extern void no_iommu_init(void);
 extern int force_iommu, no_iommu;
 extern int iommu_pass_through;
 extern int iommu_detected;
+extern int iommu_group_mf;
 #else
 #define iommu_pass_through     (0)
 #define no_iommu               (1)
 #define iommu_detected         (0)
+#define iommu_group_mf         (0)
 #endif
 extern void iommu_dma_init(void);
 extern void machvec_init(const char *name);
index c16162c70860a7ca1e878474b85348da233ad21d..eb11757200500f574aaf6665ea9c3f17b9a3d566 100644 (file)
@@ -33,6 +33,7 @@ int force_iommu __read_mostly;
 #endif
 
 int iommu_pass_through;
+int iommu_group_mf;
 
 /* Dummy device used for NULL arguments (normally ISA). Better would
    be probably a smaller DMA mask, but this is bug-to-bug compatible
index 345c99cef15262dda6415b5eff3d2140c37bafcc..dffc38ee6255f95944eaf3aa0eb283d4f3ba808a 100644 (file)
@@ -5,6 +5,7 @@ extern struct dma_map_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 extern int iommu_pass_through;
+extern int iommu_group_mf;
 
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
index 80dc793b3f6331747cd29ea5d0a1303e515f07db..1c4d769e21ea07053f81f75b7aed2b21ab1f397f 100644 (file)
@@ -45,6 +45,15 @@ int iommu_detected __read_mostly = 0;
  */
 int iommu_pass_through __read_mostly;
 
+/*
+ * Group multi-function PCI devices into a single device-group for the
+ * iommu_device_group interface.  This tells the iommu driver to pretend
+ * it cannot distinguish between functions of a device, exposing only one
+ * group for the device.  Useful for disallowing use of individual PCI
+ * functions from userspace drivers.
+ */
+int iommu_group_mf __read_mostly;
+
 extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
 
 /* Dummy device used for NULL arguments (normally ISA). */
@@ -169,6 +178,8 @@ static __init int iommu_setup(char *p)
 #endif
                if (!strncmp(p, "pt", 2))
                        iommu_pass_through = 1;
+               if (!strncmp(p, "group_mf", 8))
+                       iommu_group_mf = 1;
 
                gart_parse_options(p);
 
index 2672c798272fa1f419c98aeba0540fd39601f163..7aff6312ce7c75a66d731b5ea0a10743e7b34c5f 100644 (file)
@@ -596,6 +596,13 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
                if (ACPI_SUCCESS(status)) {
                        dev_info(root->bus->bridge,
                                "ACPI _OSC control (0x%02x) granted\n", flags);
+                       if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
+                               /*
+                                * We have ASPM control, but the FADT indicates
+                                * that it's unsupported. Clear it.
+                                */
+                               pcie_clear_aspm(root->bus);
+                       }
                } else {
                        dev_info(root->bus->bridge,
                                "ACPI _OSC request failed (%s), "
index 5414253b185a3ada15daa7c4adc423d65dfe0285..6bea6962f8ee6d8d8f5a73c7f5309654725054e5 100644 (file)
@@ -34,7 +34,9 @@ config AMD_IOMMU
        bool "AMD IOMMU support"
        select SWIOTLB
        select PCI_MSI
-       select PCI_IOV
+       select PCI_ATS
+       select PCI_PRI
+       select PCI_PASID
        select IOMMU_API
        depends on X86_64 && PCI && ACPI
        ---help---
@@ -58,6 +60,15 @@ config AMD_IOMMU_STATS
          information to userspace via debugfs.
          If unsure, say N.
 
+config AMD_IOMMU_V2
+       tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)"
+       depends on AMD_IOMMU && PROFILING && EXPERIMENTAL
+       select MMU_NOTIFIER
+       ---help---
+         This option enables support for the AMD IOMMUv2 features of the IOMMU
+         hardware. Select this option if you want to use devices that support
+         the the PCI PRI and PASID interface.
+
 # Intel IOMMU support
 config DMAR_TABLE
        bool
index 2f4448794bc793133d5f1e8a3145be4c1cc685b3..0e36b4934affc0a3b4a2f7debc8809babd719734 100644 (file)
@@ -1,6 +1,7 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
 obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
+obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o
 obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
index 4ee277a8521a49eb41b5056daebf13cadc3d68dd..cce1f03b8895324d7d6e92cd6094cc39e7673cbe 100644 (file)
@@ -17,6 +17,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#include <linux/ratelimit.h>
 #include <linux/pci.h>
 #include <linux/pci-ats.h>
 #include <linux/bitmap.h>
@@ -28,6 +29,8 @@
 #include <linux/iommu.h>
 #include <linux/delay.h>
 #include <linux/amd-iommu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
 #include <asm/msidef.h>
 #include <asm/proto.h>
 #include <asm/iommu.h>
 
 #define LOOP_TIMEOUT   100000
 
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * Traditionally the IOMMU core just handed us the mappings directly,
+ * after making sure the size is an order of a 4KiB page and that the
+ * mapping has natural alignment.
+ *
+ * To retain this behavior, we currently advertise that we support
+ * all page sizes that are an order of 4KiB.
+ *
+ * If at some point we'd like to utilize the IOMMU core's new behavior,
+ * we could change this to advertise the real page sizes we support.
+ */
+#define AMD_IOMMU_PGSIZES      (~0xFFFUL)
+
 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
 
 /* A list of preallocated protection domains */
@@ -59,6 +80,9 @@ static struct protection_domain *pt_domain;
 
 static struct iommu_ops amd_iommu_ops;
 
+static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
+int amd_iommu_max_glx_val = -1;
+
 /*
  * general struct to manage commands send to an IOMMU
  */
@@ -67,6 +91,7 @@ struct iommu_cmd {
 };
 
 static void update_domain(struct protection_domain *domain);
+static int __init alloc_passthrough_domain(void);
 
 /****************************************************************************
  *
@@ -147,6 +172,33 @@ static struct iommu_dev_data *get_dev_data(struct device *dev)
        return dev->archdata.iommu;
 }
 
+static bool pci_iommuv2_capable(struct pci_dev *pdev)
+{
+       static const int caps[] = {
+               PCI_EXT_CAP_ID_ATS,
+               PCI_EXT_CAP_ID_PRI,
+               PCI_EXT_CAP_ID_PASID,
+       };
+       int i, pos;
+
+       for (i = 0; i < 3; ++i) {
+               pos = pci_find_ext_capability(pdev, caps[i]);
+               if (pos == 0)
+                       return false;
+       }
+
+       return true;
+}
+
+static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
+{
+       struct iommu_dev_data *dev_data;
+
+       dev_data = get_dev_data(&pdev->dev);
+
+       return dev_data->errata & (1 << erratum) ? true : false;
+}
+
 /*
  * In this function the list of preallocated protection domains is traversed to
  * find the domain for a specific device
@@ -204,6 +256,7 @@ static bool check_device(struct device *dev)
 
 static int iommu_init_device(struct device *dev)
 {
+       struct pci_dev *pdev = to_pci_dev(dev);
        struct iommu_dev_data *dev_data;
        u16 alias;
 
@@ -228,6 +281,13 @@ static int iommu_init_device(struct device *dev)
                dev_data->alias_data = alias_data;
        }
 
+       if (pci_iommuv2_capable(pdev)) {
+               struct amd_iommu *iommu;
+
+               iommu              = amd_iommu_rlookup_table[dev_data->devid];
+               dev_data->iommu_v2 = iommu->is_iommu_v2;
+       }
+
        dev->archdata.iommu = dev_data;
 
        return 0;
@@ -317,6 +377,11 @@ DECLARE_STATS_COUNTER(domain_flush_single);
 DECLARE_STATS_COUNTER(domain_flush_all);
 DECLARE_STATS_COUNTER(alloced_io_mem);
 DECLARE_STATS_COUNTER(total_map_requests);
+DECLARE_STATS_COUNTER(complete_ppr);
+DECLARE_STATS_COUNTER(invalidate_iotlb);
+DECLARE_STATS_COUNTER(invalidate_iotlb_all);
+DECLARE_STATS_COUNTER(pri_requests);
+
 
 static struct dentry *stats_dir;
 static struct dentry *de_fflush;
@@ -351,6 +416,10 @@ static void amd_iommu_stats_init(void)
        amd_iommu_stats_add(&domain_flush_all);
        amd_iommu_stats_add(&alloced_io_mem);
        amd_iommu_stats_add(&total_map_requests);
+       amd_iommu_stats_add(&complete_ppr);
+       amd_iommu_stats_add(&invalidate_iotlb);
+       amd_iommu_stats_add(&invalidate_iotlb_all);
+       amd_iommu_stats_add(&pri_requests);
 }
 
 #endif
@@ -365,8 +434,8 @@ static void dump_dte_entry(u16 devid)
 {
        int i;
 
-       for (i = 0; i < 8; ++i)
-               pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
+       for (i = 0; i < 4; ++i)
+               pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
                        amd_iommu_dev_table[devid].data[i]);
 }
 
@@ -461,12 +530,84 @@ static void iommu_poll_events(struct amd_iommu *iommu)
        spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
+static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
+{
+       struct amd_iommu_fault fault;
+       volatile u64 *raw;
+       int i;
+
+       INC_STATS_COUNTER(pri_requests);
+
+       raw = (u64 *)(iommu->ppr_log + head);
+
+       /*
+        * Hardware bug: Interrupt may arrive before the entry is written to
+        * memory. If this happens we need to wait for the entry to arrive.
+        */
+       for (i = 0; i < LOOP_TIMEOUT; ++i) {
+               if (PPR_REQ_TYPE(raw[0]) != 0)
+                       break;
+               udelay(1);
+       }
+
+       if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
+               pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
+               return;
+       }
+
+       fault.address   = raw[1];
+       fault.pasid     = PPR_PASID(raw[0]);
+       fault.device_id = PPR_DEVID(raw[0]);
+       fault.tag       = PPR_TAG(raw[0]);
+       fault.flags     = PPR_FLAGS(raw[0]);
+
+       /*
+        * To detect the hardware bug we need to clear the entry
+        * to back to zero.
+        */
+       raw[0] = raw[1] = 0;
+
+       atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
+}
+
+static void iommu_poll_ppr_log(struct amd_iommu *iommu)
+{
+       unsigned long flags;
+       u32 head, tail;
+
+       if (iommu->ppr_log == NULL)
+               return;
+
+       spin_lock_irqsave(&iommu->lock, flags);
+
+       head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+       tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+
+       while (head != tail) {
+
+               /* Handle PPR entry */
+               iommu_handle_ppr_entry(iommu, head);
+
+               /* Update and refresh ring-buffer state*/
+               head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
+               writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+               tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+       }
+
+       /* enable ppr interrupts again */
+       writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+       spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
 irqreturn_t amd_iommu_int_thread(int irq, void *data)
 {
        struct amd_iommu *iommu;
 
-       for_each_iommu(iommu)
+       for_each_iommu(iommu) {
                iommu_poll_events(iommu);
+               iommu_poll_ppr_log(iommu);
+       }
 
        return IRQ_HANDLED;
 }
@@ -595,6 +736,60 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
                cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
 }
 
+static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
+                                 u64 address, bool size)
+{
+       memset(cmd, 0, sizeof(*cmd));
+
+       address &= ~(0xfffULL);
+
+       cmd->data[0]  = pasid & PASID_MASK;
+       cmd->data[1]  = domid;
+       cmd->data[2]  = lower_32_bits(address);
+       cmd->data[3]  = upper_32_bits(address);
+       cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+       cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
+       if (size)
+               cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+       CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+}
+
+static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
+                                 int qdep, u64 address, bool size)
+{
+       memset(cmd, 0, sizeof(*cmd));
+
+       address &= ~(0xfffULL);
+
+       cmd->data[0]  = devid;
+       cmd->data[0] |= (pasid & 0xff) << 16;
+       cmd->data[0] |= (qdep  & 0xff) << 24;
+       cmd->data[1]  = devid;
+       cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16;
+       cmd->data[2]  = lower_32_bits(address);
+       cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
+       cmd->data[3]  = upper_32_bits(address);
+       if (size)
+               cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+       CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
+}
+
+static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
+                              int status, int tag, bool gn)
+{
+       memset(cmd, 0, sizeof(*cmd));
+
+       cmd->data[0]  = devid;
+       if (gn) {
+               cmd->data[1]  = pasid & PASID_MASK;
+               cmd->data[2]  = CMD_INV_IOMMU_PAGES_GN_MASK;
+       }
+       cmd->data[3]  = tag & 0x1ff;
+       cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;
+
+       CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
+}
+
 static void build_inv_all(struct iommu_cmd *cmd)
 {
        memset(cmd, 0, sizeof(*cmd));
@@ -1496,6 +1691,48 @@ static void free_pagetable(struct protection_domain *domain)
        domain->pt_root = NULL;
 }
 
+static void free_gcr3_tbl_level1(u64 *tbl)
+{
+       u64 *ptr;
+       int i;
+
+       for (i = 0; i < 512; ++i) {
+               if (!(tbl[i] & GCR3_VALID))
+                       continue;
+
+               ptr = __va(tbl[i] & PAGE_MASK);
+
+               free_page((unsigned long)ptr);
+       }
+}
+
+static void free_gcr3_tbl_level2(u64 *tbl)
+{
+       u64 *ptr;
+       int i;
+
+       for (i = 0; i < 512; ++i) {
+               if (!(tbl[i] & GCR3_VALID))
+                       continue;
+
+               ptr = __va(tbl[i] & PAGE_MASK);
+
+               free_gcr3_tbl_level1(ptr);
+       }
+}
+
+static void free_gcr3_table(struct protection_domain *domain)
+{
+       if (domain->glx == 2)
+               free_gcr3_tbl_level2(domain->gcr3_tbl);
+       else if (domain->glx == 1)
+               free_gcr3_tbl_level1(domain->gcr3_tbl);
+       else if (domain->glx != 0)
+               BUG();
+
+       free_page((unsigned long)domain->gcr3_tbl);
+}
+
 /*
  * Free a domain, only used if something went wrong in the
  * allocation path and we need to free an already allocated page table
@@ -1582,20 +1819,52 @@ static bool dma_ops_domain(struct protection_domain *domain)
 
 static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
 {
-       u64 pte_root = virt_to_phys(domain->pt_root);
-       u32 flags = 0;
+       u64 pte_root = 0;
+       u64 flags = 0;
+
+       if (domain->mode != PAGE_MODE_NONE)
+               pte_root = virt_to_phys(domain->pt_root);
 
        pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
                    << DEV_ENTRY_MODE_SHIFT;
        pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
 
+       flags = amd_iommu_dev_table[devid].data[1];
+
        if (ats)
                flags |= DTE_FLAG_IOTLB;
 
-       amd_iommu_dev_table[devid].data[3] |= flags;
-       amd_iommu_dev_table[devid].data[2]  = domain->id;
-       amd_iommu_dev_table[devid].data[1]  = upper_32_bits(pte_root);
-       amd_iommu_dev_table[devid].data[0]  = lower_32_bits(pte_root);
+       if (domain->flags & PD_IOMMUV2_MASK) {
+               u64 gcr3 = __pa(domain->gcr3_tbl);
+               u64 glx  = domain->glx;
+               u64 tmp;
+
+               pte_root |= DTE_FLAG_GV;
+               pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
+
+               /* First mask out possible old values for GCR3 table */
+               tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
+               flags    &= ~tmp;
+
+               tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
+               flags    &= ~tmp;
+
+               /* Encode GCR3 table into DTE */
+               tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
+               pte_root |= tmp;
+
+               tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
+               flags    |= tmp;
+
+               tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
+               flags    |= tmp;
+       }
+
+       flags &= ~(0xffffUL);
+       flags |= domain->id;
+
+       amd_iommu_dev_table[devid].data[1]  = flags;
+       amd_iommu_dev_table[devid].data[0]  = pte_root;
 }
 
 static void clear_dte_entry(u16 devid)
@@ -1603,7 +1872,6 @@ static void clear_dte_entry(u16 devid)
        /* remove entry from the device table seen by the hardware */
        amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
        amd_iommu_dev_table[devid].data[1] = 0;
-       amd_iommu_dev_table[devid].data[2] = 0;
 
        amd_iommu_apply_erratum_63(devid);
 }
@@ -1696,6 +1964,93 @@ out_unlock:
        return ret;
 }
 
+
+static void pdev_iommuv2_disable(struct pci_dev *pdev)
+{
+       pci_disable_ats(pdev);
+       pci_disable_pri(pdev);
+       pci_disable_pasid(pdev);
+}
+
+/* FIXME: Change generic reset-function to do the same */
+static int pri_reset_while_enabled(struct pci_dev *pdev)
+{
+       u16 control;
+       int pos;
+
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+       if (!pos)
+               return -EINVAL;
+
+       pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+       control |= PCI_PRI_CTRL_RESET;
+       pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
+
+       return 0;
+}
+
+static int pdev_iommuv2_enable(struct pci_dev *pdev)
+{
+       bool reset_enable;
+       int reqs, ret;
+
+       /* FIXME: Hardcode number of outstanding requests for now */
+       reqs = 32;
+       if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
+               reqs = 1;
+       reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
+
+       /* Only allow access to user-accessible pages */
+       ret = pci_enable_pasid(pdev, 0);
+       if (ret)
+               goto out_err;
+
+       /* First reset the PRI state of the device */
+       ret = pci_reset_pri(pdev);
+       if (ret)
+               goto out_err;
+
+       /* Enable PRI */
+       ret = pci_enable_pri(pdev, reqs);
+       if (ret)
+               goto out_err;
+
+       if (reset_enable) {
+               ret = pri_reset_while_enabled(pdev);
+               if (ret)
+                       goto out_err;
+       }
+
+       ret = pci_enable_ats(pdev, PAGE_SHIFT);
+       if (ret)
+               goto out_err;
+
+       return 0;
+
+out_err:
+       pci_disable_pri(pdev);
+       pci_disable_pasid(pdev);
+
+       return ret;
+}
+
+/* FIXME: Move this to PCI code */
+#define PCI_PRI_TLP_OFF                (1 << 2)
+
+bool pci_pri_tlp_required(struct pci_dev *pdev)
+{
+       u16 control;
+       int pos;
+
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+       if (!pos)
+               return false;
+
+       pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+
+       return (control & PCI_PRI_TLP_OFF) ? true : false;
+}
+
 /*
  * If a device is not yet associated with a domain, this function does
  * assigns it visible for the hardware
@@ -1710,7 +2065,18 @@ static int attach_device(struct device *dev,
 
        dev_data = get_dev_data(dev);
 
-       if (amd_iommu_iotlb_sup && pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
+       if (domain->flags & PD_IOMMUV2_MASK) {
+               if (!dev_data->iommu_v2 || !dev_data->passthrough)
+                       return -EINVAL;
+
+               if (pdev_iommuv2_enable(pdev) != 0)
+                       return -EINVAL;
+
+               dev_data->ats.enabled = true;
+               dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
+               dev_data->pri_tlp     = pci_pri_tlp_required(pdev);
+       } else if (amd_iommu_iotlb_sup &&
+                  pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
                dev_data->ats.enabled = true;
                dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
        }
@@ -1760,7 +2126,7 @@ static void __detach_device(struct iommu_dev_data *dev_data)
         * passthrough domain if it is detached from any other domain.
         * Make sure we can deassign from the pt_domain itself.
         */
-       if (iommu_pass_through &&
+       if (dev_data->passthrough &&
            (dev_data->domain == NULL && domain != pt_domain))
                __attach_device(dev_data, pt_domain);
 }
@@ -1770,20 +2136,24 @@ static void __detach_device(struct iommu_dev_data *dev_data)
  */
 static void detach_device(struct device *dev)
 {
+       struct protection_domain *domain;
        struct iommu_dev_data *dev_data;
        unsigned long flags;
 
        dev_data = get_dev_data(dev);
+       domain   = dev_data->domain;
 
        /* lock device table */
        write_lock_irqsave(&amd_iommu_devtable_lock, flags);
        __detach_device(dev_data);
        write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
-       if (dev_data->ats.enabled) {
+       if (domain->flags & PD_IOMMUV2_MASK)
+               pdev_iommuv2_disable(to_pci_dev(dev));
+       else if (dev_data->ats.enabled)
                pci_disable_ats(to_pci_dev(dev));
-               dev_data->ats.enabled = false;
-       }
+
+       dev_data->ats.enabled = false;
 }
 
 /*
@@ -1818,18 +2188,20 @@ static struct protection_domain *domain_for_device(struct device *dev)
 static int device_change_notifier(struct notifier_block *nb,
                                  unsigned long action, void *data)
 {
-       struct device *dev = data;
-       u16 devid;
-       struct protection_domain *domain;
        struct dma_ops_domain *dma_domain;
+       struct protection_domain *domain;
+       struct iommu_dev_data *dev_data;
+       struct device *dev = data;
        struct amd_iommu *iommu;
        unsigned long flags;
+       u16 devid;
 
        if (!check_device(dev))
                return 0;
 
-       devid  = get_device_id(dev);
-       iommu  = amd_iommu_rlookup_table[devid];
+       devid    = get_device_id(dev);
+       iommu    = amd_iommu_rlookup_table[devid];
+       dev_data = get_dev_data(dev);
 
        switch (action) {
        case BUS_NOTIFY_UNBOUND_DRIVER:
@@ -1838,7 +2210,7 @@ static int device_change_notifier(struct notifier_block *nb,
 
                if (!domain)
                        goto out;
-               if (iommu_pass_through)
+               if (dev_data->passthrough)
                        break;
                detach_device(dev);
                break;
@@ -2434,8 +2806,9 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
  */
 static void prealloc_protection_domains(void)
 {
-       struct pci_dev *dev = NULL;
+       struct iommu_dev_data *dev_data;
        struct dma_ops_domain *dma_dom;
+       struct pci_dev *dev = NULL;
        u16 devid;
 
        for_each_pci_dev(dev) {
@@ -2444,6 +2817,16 @@ static void prealloc_protection_domains(void)
                if (!check_device(&dev->dev))
                        continue;
 
+               dev_data = get_dev_data(&dev->dev);
+               if (!amd_iommu_force_isolation && dev_data->iommu_v2) {
+                       /* Make sure passthrough domain is allocated */
+                       alloc_passthrough_domain();
+                       dev_data->passthrough = true;
+                       attach_device(&dev->dev, pt_domain);
+                       pr_info("AMD-Vi: Using passthough domain for device %s\n",
+                               dev_name(&dev->dev));
+               }
+
                /* Is there already any domain for it? */
                if (domain_for_device(&dev->dev))
                        continue;
@@ -2474,6 +2857,7 @@ static struct dma_map_ops amd_iommu_dma_ops = {
 
 static unsigned device_dma_ops_init(void)
 {
+       struct iommu_dev_data *dev_data;
        struct pci_dev *pdev = NULL;
        unsigned unhandled = 0;
 
@@ -2483,7 +2867,12 @@ static unsigned device_dma_ops_init(void)
                        continue;
                }
 
-               pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
+               dev_data = get_dev_data(&pdev->dev);
+
+               if (!dev_data->passthrough)
+                       pdev->dev.archdata.dma_ops = &amd_iommu_dma_ops;
+               else
+                       pdev->dev.archdata.dma_ops = &nommu_dma_ops;
        }
 
        return unhandled;
@@ -2610,6 +2999,20 @@ out_err:
        return NULL;
 }
 
+static int __init alloc_passthrough_domain(void)
+{
+       if (pt_domain != NULL)
+               return 0;
+
+       /* allocate passthrough domain */
+       pt_domain = protection_domain_alloc();
+       if (!pt_domain)
+               return -ENOMEM;
+
+       pt_domain->mode = PAGE_MODE_NONE;
+
+       return 0;
+}
 static int amd_iommu_domain_init(struct iommu_domain *dom)
 {
        struct protection_domain *domain;
@@ -2623,6 +3026,8 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
        if (!domain->pt_root)
                goto out_free;
 
+       domain->iommu_domain = dom;
+
        dom->priv = domain;
 
        return 0;
@@ -2645,7 +3050,11 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)
 
        BUG_ON(domain->dev_cnt != 0);
 
-       free_pagetable(domain);
+       if (domain->mode != PAGE_MODE_NONE)
+               free_pagetable(domain);
+
+       if (domain->flags & PD_IOMMUV2_MASK)
+               free_gcr3_table(domain);
 
        protection_domain_free(domain);
 
@@ -2702,13 +3111,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 }
 
 static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
-                        phys_addr_t paddr, int gfp_order, int iommu_prot)
+                        phys_addr_t paddr, size_t page_size, int iommu_prot)
 {
-       unsigned long page_size = 0x1000UL << gfp_order;
        struct protection_domain *domain = dom->priv;
        int prot = 0;
        int ret;
 
+       if (domain->mode == PAGE_MODE_NONE)
+               return -EINVAL;
+
        if (iommu_prot & IOMMU_READ)
                prot |= IOMMU_PROT_IR;
        if (iommu_prot & IOMMU_WRITE)
@@ -2721,13 +3132,14 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
        return ret;
 }
 
-static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
-                          int gfp_order)
+static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
+                          size_t page_size)
 {
        struct protection_domain *domain = dom->priv;
-       unsigned long page_size, unmap_size;
+       size_t unmap_size;
 
-       page_size  = 0x1000UL << gfp_order;
+       if (domain->mode == PAGE_MODE_NONE)
+               return -EINVAL;
 
        mutex_lock(&domain->api_lock);
        unmap_size = iommu_unmap_page(domain, iova, page_size);
@@ -2735,7 +3147,7 @@ static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 
        domain_flush_tlb_pde(domain);
 
-       return get_order(unmap_size);
+       return unmap_size;
 }
 
 static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2746,6 +3158,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
        phys_addr_t paddr;
        u64 *pte, __pte;
 
+       if (domain->mode == PAGE_MODE_NONE)
+               return iova;
+
        pte = fetch_pte(domain, iova);
 
        if (!pte || !IOMMU_PTE_PRESENT(*pte))
@@ -2773,6 +3188,26 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
        return 0;
 }
 
+static int amd_iommu_device_group(struct device *dev, unsigned int *groupid)
+{
+       struct iommu_dev_data *dev_data = dev->archdata.iommu;
+       struct pci_dev *pdev = to_pci_dev(dev);
+       u16 devid;
+
+       if (!dev_data)
+               return -ENODEV;
+
+       if (pdev->is_virtfn || !iommu_group_mf)
+               devid = dev_data->devid;
+       else
+               devid = calc_devid(pdev->bus->number,
+                                  PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
+
+       *groupid = amd_iommu_alias_table[devid];
+
+       return 0;
+}
+
 static struct iommu_ops amd_iommu_ops = {
        .domain_init = amd_iommu_domain_init,
        .domain_destroy = amd_iommu_domain_destroy,
@@ -2782,6 +3217,8 @@ static struct iommu_ops amd_iommu_ops = {
        .unmap = amd_iommu_unmap,
        .iova_to_phys = amd_iommu_iova_to_phys,
        .domain_has_cap = amd_iommu_domain_has_cap,
+       .device_group = amd_iommu_device_group,
+       .pgsize_bitmap  = AMD_IOMMU_PGSIZES,
 };
 
 /*****************************************************************************
@@ -2796,21 +3233,23 @@ static struct iommu_ops amd_iommu_ops = {
 
 int __init amd_iommu_init_passthrough(void)
 {
-       struct amd_iommu *iommu;
+       struct iommu_dev_data *dev_data;
        struct pci_dev *dev = NULL;
+       struct amd_iommu *iommu;
        u16 devid;
+       int ret;
 
-       /* allocate passthrough domain */
-       pt_domain = protection_domain_alloc();
-       if (!pt_domain)
-               return -ENOMEM;
-
-       pt_domain->mode |= PAGE_MODE_NONE;
+       ret = alloc_passthrough_domain();
+       if (ret)
+               return ret;
 
        for_each_pci_dev(dev) {
                if (!check_device(&dev->dev))
                        continue;
 
+               dev_data = get_dev_data(&dev->dev);
+               dev_data->passthrough = true;
+
                devid = get_device_id(&dev->dev);
 
                iommu = amd_iommu_rlookup_table[devid];
@@ -2820,7 +3259,375 @@ int __init amd_iommu_init_passthrough(void)
                attach_device(&dev->dev, pt_domain);
        }
 
+       amd_iommu_stats_init();
+
        pr_info("AMD-Vi: Initialized for Passthrough Mode\n");
 
        return 0;
 }
+
+/* IOMMUv2 specific functions */
+int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
+{
+       return atomic_notifier_chain_register(&ppr_notifier, nb);
+}
+EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
+
+int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
+{
+       return atomic_notifier_chain_unregister(&ppr_notifier, nb);
+}
+EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
+
+void amd_iommu_domain_direct_map(struct iommu_domain *dom)
+{
+       struct protection_domain *domain = dom->priv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&domain->lock, flags);
+
+       /* Update data structure */
+       domain->mode    = PAGE_MODE_NONE;
+       domain->updated = true;
+
+       /* Make changes visible to IOMMUs */
+       update_domain(domain);
+
+       /* Page-table is not visible to IOMMU anymore, so free it */
+       free_pagetable(domain);
+
+       spin_unlock_irqrestore(&domain->lock, flags);
+}
+EXPORT_SYMBOL(amd_iommu_domain_direct_map);
+
+int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
+{
+       struct protection_domain *domain = dom->priv;
+       unsigned long flags;
+       int levels, ret;
+
+       if (pasids <= 0 || pasids > (PASID_MASK + 1))
+               return -EINVAL;
+
+       /* Number of GCR3 table levels required */
+       for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
+               levels += 1;
+
+       if (levels > amd_iommu_max_glx_val)
+               return -EINVAL;
+
+       spin_lock_irqsave(&domain->lock, flags);
+
+       /*
+        * Save us all sanity checks whether devices already in the
+        * domain support IOMMUv2. Just force that the domain has no
+        * devices attached when it is switched into IOMMUv2 mode.
+        */
+       ret = -EBUSY;
+       if (domain->dev_cnt > 0 || domain->flags & PD_IOMMUV2_MASK)
+               goto out;
+
+       ret = -ENOMEM;
+       domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
+       if (domain->gcr3_tbl == NULL)
+               goto out;
+
+       domain->glx      = levels;
+       domain->flags   |= PD_IOMMUV2_MASK;
+       domain->updated  = true;
+
+       update_domain(domain);
+
+       ret = 0;
+
+out:
+       spin_unlock_irqrestore(&domain->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
+
+static int __flush_pasid(struct protection_domain *domain, int pasid,
+                        u64 address, bool size)
+{
+       struct iommu_dev_data *dev_data;
+       struct iommu_cmd cmd;
+       int i, ret;
+
+       if (!(domain->flags & PD_IOMMUV2_MASK))
+               return -EINVAL;
+
+       build_inv_iommu_pasid(&cmd, domain->id, pasid, address, size);
+
+       /*
+        * IOMMU TLB needs to be flushed before Device TLB to
+        * prevent device TLB refill from IOMMU TLB
+        */
+       for (i = 0; i < amd_iommus_present; ++i) {
+               if (domain->dev_iommu[i] == 0)
+                       continue;
+
+               ret = iommu_queue_command(amd_iommus[i], &cmd);
+               if (ret != 0)
+                       goto out;
+       }
+
+       /* Wait until IOMMU TLB flushes are complete */
+       domain_flush_complete(domain);
+
+       /* Now flush device TLBs */
+       list_for_each_entry(dev_data, &domain->dev_list, list) {
+               struct amd_iommu *iommu;
+               int qdep;
+
+               BUG_ON(!dev_data->ats.enabled);
+
+               qdep  = dev_data->ats.qdep;
+               iommu = amd_iommu_rlookup_table[dev_data->devid];
+
+               build_inv_iotlb_pasid(&cmd, dev_data->devid, pasid,
+                                     qdep, address, size);
+
+               ret = iommu_queue_command(iommu, &cmd);
+               if (ret != 0)
+                       goto out;
+       }
+
+       /* Wait until all device TLBs are flushed */
+       domain_flush_complete(domain);
+
+       ret = 0;
+
+out:
+
+       return ret;
+}
+
+static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
+                                 u64 address)
+{
+       INC_STATS_COUNTER(invalidate_iotlb);
+
+       return __flush_pasid(domain, pasid, address, false);
+}
+
+int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+                        u64 address)
+{
+       struct protection_domain *domain = dom->priv;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&domain->lock, flags);
+       ret = __amd_iommu_flush_page(domain, pasid, address);
+       spin_unlock_irqrestore(&domain->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(amd_iommu_flush_page);
+
+static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
+{
+       INC_STATS_COUNTER(invalidate_iotlb_all);
+
+       return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+                            true);
+}
+
+int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
+{
+       struct protection_domain *domain = dom->priv;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&domain->lock, flags);
+       ret = __amd_iommu_flush_tlb(domain, pasid);
+       spin_unlock_irqrestore(&domain->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(amd_iommu_flush_tlb);
+
+static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
+{
+       int index;
+       u64 *pte;
+
+       while (true) {
+
+               index = (pasid >> (9 * level)) & 0x1ff;
+               pte   = &root[index];
+
+               if (level == 0)
+                       break;
+
+               if (!(*pte & GCR3_VALID)) {
+                       if (!alloc)
+                               return NULL;
+
+                       root = (void *)get_zeroed_page(GFP_ATOMIC);
+                       if (root == NULL)
+                               return NULL;
+
+                       *pte = __pa(root) | GCR3_VALID;
+               }
+
+               root = __va(*pte & PAGE_MASK);
+
+               level -= 1;
+       }
+
+       return pte;
+}
+
+static int __set_gcr3(struct protection_domain *domain, int pasid,
+                     unsigned long cr3)
+{
+       u64 *pte;
+
+       if (domain->mode != PAGE_MODE_NONE)
+               return -EINVAL;
+
+       pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
+       if (pte == NULL)
+               return -ENOMEM;
+
+       *pte = (cr3 & PAGE_MASK) | GCR3_VALID;
+
+       return __amd_iommu_flush_tlb(domain, pasid);
+}
+
+static int __clear_gcr3(struct protection_domain *domain, int pasid)
+{
+       u64 *pte;
+
+       if (domain->mode != PAGE_MODE_NONE)
+               return -EINVAL;
+
+       pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
+       if (pte == NULL)
+               return 0;
+
+       *pte = 0;
+
+       return __amd_iommu_flush_tlb(domain, pasid);
+}
+
+int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+                             unsigned long cr3)
+{
+       struct protection_domain *domain = dom->priv;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&domain->lock, flags);
+       ret = __set_gcr3(domain, pasid, cr3);
+       spin_unlock_irqrestore(&domain->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
+
+int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
+{
+       struct protection_domain *domain = dom->priv;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&domain->lock, flags);
+       ret = __clear_gcr3(domain, pasid);
+       spin_unlock_irqrestore(&domain->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
+
+int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+                          int status, int tag)
+{
+       struct iommu_dev_data *dev_data;
+       struct amd_iommu *iommu;
+       struct iommu_cmd cmd;
+
+       INC_STATS_COUNTER(complete_ppr);
+
+       dev_data = get_dev_data(&pdev->dev);
+       iommu    = amd_iommu_rlookup_table[dev_data->devid];
+
+       build_complete_ppr(&cmd, dev_data->devid, pasid, status,
+                          tag, dev_data->pri_tlp);
+
+       return iommu_queue_command(iommu, &cmd);
+}
+EXPORT_SYMBOL(amd_iommu_complete_ppr);
+
+struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
+{
+       struct protection_domain *domain;
+
+       domain = get_domain(&pdev->dev);
+       if (IS_ERR(domain))
+               return NULL;
+
+       /* Only return IOMMUv2 domains */
+       if (!(domain->flags & PD_IOMMUV2_MASK))
+               return NULL;
+
+       return domain->iommu_domain;
+}
+EXPORT_SYMBOL(amd_iommu_get_v2_domain);
+
+void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
+{
+       struct iommu_dev_data *dev_data;
+
+       if (!amd_iommu_v2_supported())
+               return;
+
+       dev_data = get_dev_data(&pdev->dev);
+       dev_data->errata |= (1 << erratum);
+}
+EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
+
+int amd_iommu_device_info(struct pci_dev *pdev,
+                          struct amd_iommu_device_info *info)
+{
+       int max_pasids;
+       int pos;
+
+       if (pdev == NULL || info == NULL)
+               return -EINVAL;
+
+       if (!amd_iommu_v2_supported())
+               return -EINVAL;
+
+       memset(info, 0, sizeof(*info));
+
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS);
+       if (pos)
+               info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
+
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+       if (pos)
+               info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
+
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
+       if (pos) {
+               int features;
+
+               max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
+               max_pasids = min(max_pasids, (1 << 20));
+
+               info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+               info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
+
+               features = pci_pasid_features(pdev);
+               if (features & PCI_PASID_CAP_EXEC)
+                       info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
+               if (features & PCI_PASID_CAP_PRIV)
+                       info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(amd_iommu_device_info);
index 82d2410f4205d8e5977fce0a9a6ba59ad01b7088..bdea288dc185c619e7e944a2959ed215e2a22c10 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/msi.h>
 #include <linux/amd-iommu.h>
+#include <linux/export.h>
 #include <asm/pci-direct.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -141,6 +142,12 @@ int amd_iommus_present;
 bool amd_iommu_np_cache __read_mostly;
 bool amd_iommu_iotlb_sup __read_mostly = true;
 
+u32 amd_iommu_max_pasids __read_mostly = ~0;
+
+bool amd_iommu_v2_present __read_mostly;
+
+bool amd_iommu_force_isolation __read_mostly;
+
 /*
  * The ACPI table parsing functions set this variable on an error
  */
@@ -299,6 +306,16 @@ static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
        writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 }
 
+static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
+{
+       u32 ctrl;
+
+       ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+       ctrl &= ~CTRL_INV_TO_MASK;
+       ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
+       writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
+}
+
 /* Function to enable the hardware */
 static void iommu_enable(struct amd_iommu *iommu)
 {
@@ -581,21 +598,69 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
        free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
 }
 
+/* allocates the memory where the IOMMU will log its events to */
+static u8 * __init alloc_ppr_log(struct amd_iommu *iommu)
+{
+       iommu->ppr_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+                                               get_order(PPR_LOG_SIZE));
+
+       if (iommu->ppr_log == NULL)
+               return NULL;
+
+       return iommu->ppr_log;
+}
+
+static void iommu_enable_ppr_log(struct amd_iommu *iommu)
+{
+       u64 entry;
+
+       if (iommu->ppr_log == NULL)
+               return;
+
+       entry = (u64)virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
+
+       memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
+                   &entry, sizeof(entry));
+
+       /* set head and tail to zero manually */
+       writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+       writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+
+       iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
+       iommu_feature_enable(iommu, CONTROL_PPR_EN);
+}
+
+static void __init free_ppr_log(struct amd_iommu *iommu)
+{
+       if (iommu->ppr_log == NULL)
+               return;
+
+       free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
+}
+
+static void iommu_enable_gt(struct amd_iommu *iommu)
+{
+       if (!iommu_feature(iommu, FEATURE_GT))
+               return;
+
+       iommu_feature_enable(iommu, CONTROL_GT_EN);
+}
+
 /* sets a specific bit in the device table entry. */
 static void set_dev_entry_bit(u16 devid, u8 bit)
 {
-       int i = (bit >> 5) & 0x07;
-       int _bit = bit & 0x1f;
+       int i = (bit >> 6) & 0x03;
+       int _bit = bit & 0x3f;
 
-       amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
+       amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
 }
 
 static int get_dev_entry_bit(u16 devid, u8 bit)
 {
-       int i = (bit >> 5) & 0x07;
-       int _bit = bit & 0x1f;
+       int i = (bit >> 6) & 0x03;
+       int _bit = bit & 0x3f;
 
-       return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit;
+       return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
 }
 
 
@@ -699,6 +764,32 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
 
        iommu->features = ((u64)high << 32) | low;
 
+       if (iommu_feature(iommu, FEATURE_GT)) {
+               int glxval;
+               u32 pasids;
+               u64 shift;
+
+               shift   = iommu->features & FEATURE_PASID_MASK;
+               shift >>= FEATURE_PASID_SHIFT;
+               pasids  = (1 << shift);
+
+               amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids);
+
+               glxval   = iommu->features & FEATURE_GLXVAL_MASK;
+               glxval >>= FEATURE_GLXVAL_SHIFT;
+
+               if (amd_iommu_max_glx_val == -1)
+                       amd_iommu_max_glx_val = glxval;
+               else
+                       amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
+       }
+
+       if (iommu_feature(iommu, FEATURE_GT) &&
+           iommu_feature(iommu, FEATURE_PPR)) {
+               iommu->is_iommu_v2   = true;
+               amd_iommu_v2_present = true;
+       }
+
        if (!is_rd890_iommu(iommu->dev))
                return;
 
@@ -901,6 +992,7 @@ static void __init free_iommu_one(struct amd_iommu *iommu)
 {
        free_command_buffer(iommu);
        free_event_buffer(iommu);
+       free_ppr_log(iommu);
        iommu_unmap_mmio_space(iommu);
 }
 
@@ -964,6 +1056,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
        init_iommu_from_acpi(iommu, h);
        init_iommu_devices(iommu);
 
+       if (iommu_feature(iommu, FEATURE_PPR)) {
+               iommu->ppr_log = alloc_ppr_log(iommu);
+               if (!iommu->ppr_log)
+                       return -ENOMEM;
+       }
+
        if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
                amd_iommu_np_cache = true;
 
@@ -1050,6 +1148,9 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
        iommu->int_enabled = true;
        iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
 
+       if (iommu->ppr_log != NULL)
+               iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
+
        return 0;
 }
 
@@ -1209,6 +1310,9 @@ static void iommu_init_flags(struct amd_iommu *iommu)
         * make IOMMU memory accesses cache coherent
         */
        iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
+
+       /* Set IOTLB invalidation timeout to 1s */
+       iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
 }
 
 static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
@@ -1274,6 +1378,8 @@ static void enable_iommus(void)
                iommu_set_device_table(iommu);
                iommu_enable_command_buffer(iommu);
                iommu_enable_event_buffer(iommu);
+               iommu_enable_ppr_log(iommu);
+               iommu_enable_gt(iommu);
                iommu_set_exclusion_range(iommu);
                iommu_init_msi(iommu);
                iommu_enable(iommu);
@@ -1303,13 +1409,6 @@ static void amd_iommu_resume(void)
 
        /* re-load the hardware */
        enable_iommus();
-
-       /*
-        * we have to flush after the IOMMUs are enabled because a
-        * disabled IOMMU will never execute the commands we send
-        */
-       for_each_iommu(iommu)
-               iommu_flush_all_caches(iommu);
 }
 
 static int amd_iommu_suspend(void)
@@ -1560,6 +1659,8 @@ static int __init parse_amd_iommu_options(char *str)
                        amd_iommu_unmap_flush = true;
                if (strncmp(str, "off", 3) == 0)
                        amd_iommu_disabled = true;
+               if (strncmp(str, "force_isolation", 15) == 0)
+                       amd_iommu_force_isolation = true;
        }
 
        return 1;
@@ -1572,3 +1673,9 @@ IOMMU_INIT_FINISH(amd_iommu_detect,
                  gart_iommu_hole_init,
                  0,
                  0);
+
+bool amd_iommu_v2_supported(void)
+{
+       return amd_iommu_v2_present;
+}
+EXPORT_SYMBOL(amd_iommu_v2_supported);
index 7ffaa64410b0c9cda6a59f4fece123413d0e0118..1a7f41c6cc66b4eff17092ef62d46a41f93d676d 100644 (file)
@@ -31,6 +31,30 @@ extern int amd_iommu_init_devices(void);
 extern void amd_iommu_uninit_devices(void);
 extern void amd_iommu_init_notifier(void);
 extern void amd_iommu_init_api(void);
+
+/* IOMMUv2 specific functions */
+struct iommu_domain;
+
+extern bool amd_iommu_v2_supported(void);
+extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
+extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
+extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
+extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+                               u64 address);
+extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
+extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+                                    unsigned long cr3);
+extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
+extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
+
+#define PPR_SUCCESS                    0x0
+#define PPR_INVALID                    0x1
+#define PPR_FAILURE                    0xf
+
+extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+                                 int status, int tag);
+
 #ifndef CONFIG_AMD_IOMMU_STATS
 
 static inline void amd_iommu_stats_init(void) { }
index 5b9c5075e81a67bd11938d228b674628aa61933b..2452f3b7173619c449c04f025e2e2ff73cc44f29 100644 (file)
 #define MMIO_EXCL_BASE_OFFSET   0x0020
 #define MMIO_EXCL_LIMIT_OFFSET  0x0028
 #define MMIO_EXT_FEATURES      0x0030
+#define MMIO_PPR_LOG_OFFSET    0x0038
 #define MMIO_CMD_HEAD_OFFSET   0x2000
 #define MMIO_CMD_TAIL_OFFSET   0x2008
 #define MMIO_EVT_HEAD_OFFSET   0x2010
 #define MMIO_EVT_TAIL_OFFSET   0x2018
 #define MMIO_STATUS_OFFSET     0x2020
+#define MMIO_PPR_HEAD_OFFSET   0x2030
+#define MMIO_PPR_TAIL_OFFSET   0x2038
 
 
 /* Extended Feature Bits */
 #define FEATURE_HE             (1ULL<<8)
 #define FEATURE_PC             (1ULL<<9)
 
+#define FEATURE_PASID_SHIFT    32
+#define FEATURE_PASID_MASK     (0x1fULL << FEATURE_PASID_SHIFT)
+
+#define FEATURE_GLXVAL_SHIFT   14
+#define FEATURE_GLXVAL_MASK    (0x03ULL << FEATURE_GLXVAL_SHIFT)
+
+#define PASID_MASK             0x000fffff
+
 /* MMIO status bits */
-#define MMIO_STATUS_COM_WAIT_INT_MASK  0x04
+#define MMIO_STATUS_COM_WAIT_INT_MASK  (1 << 2)
+#define MMIO_STATUS_PPR_INT_MASK       (1 << 6)
 
 /* event logging constants */
 #define EVENT_ENTRY_SIZE       0x10
 #define CONTROL_EVT_LOG_EN      0x02ULL
 #define CONTROL_EVT_INT_EN      0x03ULL
 #define CONTROL_COMWAIT_EN      0x04ULL
+#define CONTROL_INV_TIMEOUT    0x05ULL
 #define CONTROL_PASSPW_EN       0x08ULL
 #define CONTROL_RESPASSPW_EN    0x09ULL
 #define CONTROL_COHERENT_EN     0x0aULL
 #define CONTROL_CMDBUF_EN       0x0cULL
 #define CONTROL_PPFLOG_EN       0x0dULL
 #define CONTROL_PPFINT_EN       0x0eULL
+#define CONTROL_PPR_EN          0x0fULL
+#define CONTROL_GT_EN           0x10ULL
+
+#define CTRL_INV_TO_MASK       (7 << CONTROL_INV_TIMEOUT)
+#define CTRL_INV_TO_NONE       0
+#define CTRL_INV_TO_1MS                1
+#define CTRL_INV_TO_10MS       2
+#define CTRL_INV_TO_100MS      3
+#define CTRL_INV_TO_1S         4
+#define CTRL_INV_TO_10S                5
+#define CTRL_INV_TO_100S       6
 
 /* command specific defines */
 #define CMD_COMPL_WAIT          0x01
 #define CMD_INV_DEV_ENTRY       0x02
 #define CMD_INV_IOMMU_PAGES    0x03
 #define CMD_INV_IOTLB_PAGES    0x04
+#define CMD_COMPLETE_PPR       0x07
 #define CMD_INV_ALL            0x08
 
 #define CMD_COMPL_WAIT_STORE_MASK      0x01
 #define CMD_COMPL_WAIT_INT_MASK                0x02
 #define CMD_INV_IOMMU_PAGES_SIZE_MASK  0x01
 #define CMD_INV_IOMMU_PAGES_PDE_MASK   0x02
+#define CMD_INV_IOMMU_PAGES_GN_MASK    0x04
+
+#define PPR_STATUS_MASK                        0xf
+#define PPR_STATUS_SHIFT               12
 
 #define CMD_INV_IOMMU_ALL_PAGES_ADDRESS        0x7fffffffffffffffULL
 
 #define EVT_BUFFER_SIZE                8192 /* 512 entries */
 #define EVT_LEN_MASK           (0x9ULL << 56)
 
+/* Constants for PPR Log handling */
+#define PPR_LOG_ENTRIES                512
+#define PPR_LOG_SIZE_SHIFT     56
+#define PPR_LOG_SIZE_512       (0x9ULL << PPR_LOG_SIZE_SHIFT)
+#define PPR_ENTRY_SIZE         16
+#define PPR_LOG_SIZE           (PPR_ENTRY_SIZE * PPR_LOG_ENTRIES)
+
+#define PPR_REQ_TYPE(x)                (((x) >> 60) & 0xfULL)
+#define PPR_FLAGS(x)           (((x) >> 48) & 0xfffULL)
+#define PPR_DEVID(x)           ((x) & 0xffffULL)
+#define PPR_TAG(x)             (((x) >> 32) & 0x3ffULL)
+#define PPR_PASID1(x)          (((x) >> 16) & 0xffffULL)
+#define PPR_PASID2(x)          (((x) >> 42) & 0xfULL)
+#define PPR_PASID(x)           ((PPR_PASID2(x) << 16) | PPR_PASID1(x))
+
+#define PPR_REQ_FAULT          0x01
+
 #define PAGE_MODE_NONE    0x00
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
-#define DTE_FLAG_IOTLB 0x01
+#define DTE_FLAG_IOTLB (0x01UL << 32)
+#define DTE_FLAG_GV    (0x01ULL << 55)
+#define DTE_GLX_SHIFT  (56)
+#define DTE_GLX_MASK   (3)
+
+#define DTE_GCR3_VAL_A(x)      (((x) >> 12) & 0x00007ULL)
+#define DTE_GCR3_VAL_B(x)      (((x) >> 15) & 0x0ffffULL)
+#define DTE_GCR3_VAL_C(x)      (((x) >> 31) & 0xfffffULL)
+
+#define DTE_GCR3_INDEX_A       0
+#define DTE_GCR3_INDEX_B       1
+#define DTE_GCR3_INDEX_C       1
+
+#define DTE_GCR3_SHIFT_A       58
+#define DTE_GCR3_SHIFT_B       16
+#define DTE_GCR3_SHIFT_C       43
+
+#define GCR3_VALID             0x01ULL
 
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
 #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
                                              domain for an IOMMU */
 #define PD_PASSTHROUGH_MASK    (1UL << 2) /* domain has no page
                                              translation */
+#define PD_IOMMUV2_MASK                (1UL << 3) /* domain has gcr3 table */
 
 extern bool amd_iommu_dump;
 #define DUMP_printk(format, arg...)                                    \
@@ -285,6 +349,29 @@ extern bool amd_iommu_iotlb_sup;
 #define APERTURE_RANGE_INDEX(a)        ((a) >> APERTURE_RANGE_SHIFT)
 #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
 
+
+/*
+ * This struct is used to pass information about
+ * incoming PPR faults around.
+ */
+struct amd_iommu_fault {
+       u64 address;    /* IO virtual address of the fault*/
+       u32 pasid;      /* Address space identifier */
+       u16 device_id;  /* Originating PCI device id */
+       u16 tag;        /* PPR tag */
+       u16 flags;      /* Fault flags */
+
+};
+
+#define PPR_FAULT_EXEC (1 << 1)
+#define PPR_FAULT_READ  (1 << 2)
+#define PPR_FAULT_WRITE (1 << 5)
+#define PPR_FAULT_USER  (1 << 6)
+#define PPR_FAULT_RSVD  (1 << 7)
+#define PPR_FAULT_GN    (1 << 8)
+
+struct iommu_domain;
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -297,11 +384,15 @@ struct protection_domain {
        u16 id;                 /* the domain id written to the device table */
        int mode;               /* paging mode (0-6 levels) */
        u64 *pt_root;           /* page table root pointer */
+       int glx;                /* Number of levels for GCR3 table */
+       u64 *gcr3_tbl;          /* Guest CR3 table */
        unsigned long flags;    /* flags to find out type of domain */
        bool updated;           /* complete domain flush required */
        unsigned dev_cnt;       /* devices assigned to this domain */
        unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
        void *priv;             /* private data */
+       struct iommu_domain *iommu_domain; /* Pointer to generic
+                                             domain structure */
 
 };
 
@@ -315,10 +406,15 @@ struct iommu_dev_data {
        struct protection_domain *domain; /* Domain the device is bound to */
        atomic_t bind;                    /* Domain attach reverent count */
        u16 devid;                        /* PCI Device ID */
+       bool iommu_v2;                    /* Device can make use of IOMMUv2 */
+       bool passthrough;                 /* Default for device is pt_domain */
        struct {
                bool enabled;
                int qdep;
        } ats;                            /* ATS state */
+       bool pri_tlp;                     /* PASID TLB required for
+                                            PPR completions */
+       u32 errata;                       /* Bitmap for errata to apply */
 };
 
 /*
@@ -399,6 +495,9 @@ struct amd_iommu {
        /* Extended features */
        u64 features;
 
+       /* IOMMUv2 */
+       bool is_iommu_v2;
+
        /*
         * Capability pointer. There could be more than one IOMMU per PCI
         * device function if there are more than one AMD IOMMU capability
@@ -431,6 +530,9 @@ struct amd_iommu {
        /* MSI number for event interrupt */
        u16 evt_msi_num;
 
+       /* Base of the PPR log, if present */
+       u8 *ppr_log;
+
        /* true if interrupts for this IOMMU are already enabled */
        bool int_enabled;
 
@@ -484,7 +586,7 @@ extern struct list_head amd_iommu_pd_list;
  * Structure defining one entry in the device table
  */
 struct dev_table_entry {
-       u32 data[8];
+       u64 data[4];
 };
 
 /*
@@ -549,6 +651,16 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;
  */
 extern bool amd_iommu_unmap_flush;
 
+/* Smallest number of PASIDs supported by any IOMMU in the system */
+extern u32 amd_iommu_max_pasids;
+
+extern bool amd_iommu_v2_present;
+
+extern bool amd_iommu_force_isolation;
+
+/* Max levels of glxval supported */
+extern int amd_iommu_max_glx_val;
+
 /* takes bus and device/function and returns the device id
  * FIXME: should that be in generic PCI code? */
 static inline u16 calc_devid(u8 bus, u8 devfn)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
new file mode 100644 (file)
index 0000000..8add9f1
--- /dev/null
@@ -0,0 +1,994 @@
+/*
+ * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/mmu_notifier.h>
+#include <linux/amd-iommu.h>
+#include <linux/mm_types.h>
+#include <linux/profile.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/iommu.h>
+#include <linux/wait.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+
+#include "amd_iommu_types.h"
+#include "amd_iommu_proto.h"
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
+
+#define MAX_DEVICES            0x10000
+#define PRI_QUEUE_SIZE         512
+
+struct pri_queue {
+       atomic_t inflight;
+       bool finish;
+       int status;
+};
+
+struct pasid_state {
+       struct list_head list;                  /* For global state-list */
+       atomic_t count;                         /* Reference count */
+       struct task_struct *task;               /* Task bound to this PASID */
+       struct mm_struct *mm;                   /* mm_struct for the faults */
+       struct mmu_notifier mn;                 /* mmu_otifier handle */
+       struct pri_queue pri[PRI_QUEUE_SIZE];   /* PRI tag states */
+       struct device_state *device_state;      /* Link to our device_state */
+       int pasid;                              /* PASID index */
+       spinlock_t lock;                        /* Protect pri_queues */
+       wait_queue_head_t wq;                   /* To wait for count == 0 */
+};
+
+struct device_state {
+       atomic_t count;
+       struct pci_dev *pdev;
+       struct pasid_state **states;
+       struct iommu_domain *domain;
+       int pasid_levels;
+       int max_pasids;
+       amd_iommu_invalid_ppr_cb inv_ppr_cb;
+       amd_iommu_invalidate_ctx inv_ctx_cb;
+       spinlock_t lock;
+       wait_queue_head_t wq;
+};
+
+struct fault {
+       struct work_struct work;
+       struct device_state *dev_state;
+       struct pasid_state *state;
+       struct mm_struct *mm;
+       u64 address;
+       u16 devid;
+       u16 pasid;
+       u16 tag;
+       u16 finish;
+       u16 flags;
+};
+
+struct device_state **state_table;
+static spinlock_t state_lock;
+
+/* List and lock for all pasid_states */
+static LIST_HEAD(pasid_state_list);
+static DEFINE_SPINLOCK(ps_lock);
+
+static struct workqueue_struct *iommu_wq;
+
+/*
+ * Empty page table - Used between
+ * mmu_notifier_invalidate_range_start and
+ * mmu_notifier_invalidate_range_end
+ */
+static u64 *empty_page_table;
+
+static void free_pasid_states(struct device_state *dev_state);
+static void unbind_pasid(struct device_state *dev_state, int pasid);
+static int task_exit(struct notifier_block *nb, unsigned long e, void *data);
+
+static u16 device_id(struct pci_dev *pdev)
+{
+       u16 devid;
+
+       devid = pdev->bus->number;
+       devid = (devid << 8) | pdev->devfn;
+
+       return devid;
+}
+
+static struct device_state *get_device_state(u16 devid)
+{
+       struct device_state *dev_state;
+       unsigned long flags;
+
+       spin_lock_irqsave(&state_lock, flags);
+       dev_state = state_table[devid];
+       if (dev_state != NULL)
+               atomic_inc(&dev_state->count);
+       spin_unlock_irqrestore(&state_lock, flags);
+
+       return dev_state;
+}
+
+static void free_device_state(struct device_state *dev_state)
+{
+       /*
+        * First detach device from domain - No more PRI requests will arrive
+        * from that device after it is unbound from the IOMMUv2 domain.
+        */
+       iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
+
+       /* Everything is down now, free the IOMMUv2 domain */
+       iommu_domain_free(dev_state->domain);
+
+       /* Finally get rid of the device-state */
+       kfree(dev_state);
+}
+
+static void put_device_state(struct device_state *dev_state)
+{
+       if (atomic_dec_and_test(&dev_state->count))
+               wake_up(&dev_state->wq);
+}
+
+static void put_device_state_wait(struct device_state *dev_state)
+{
+       DEFINE_WAIT(wait);
+
+       prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
+       if (!atomic_dec_and_test(&dev_state->count))
+               schedule();
+       finish_wait(&dev_state->wq, &wait);
+
+       free_device_state(dev_state);
+}
+
+static struct notifier_block profile_nb = {
+       .notifier_call = task_exit,
+};
+
+static void link_pasid_state(struct pasid_state *pasid_state)
+{
+       spin_lock(&ps_lock);
+       list_add_tail(&pasid_state->list, &pasid_state_list);
+       spin_unlock(&ps_lock);
+}
+
+static void __unlink_pasid_state(struct pasid_state *pasid_state)
+{
+       list_del(&pasid_state->list);
+}
+
+static void unlink_pasid_state(struct pasid_state *pasid_state)
+{
+       spin_lock(&ps_lock);
+       __unlink_pasid_state(pasid_state);
+       spin_unlock(&ps_lock);
+}
+
+/* Must be called under dev_state->lock */
+static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
+                                                 int pasid, bool alloc)
+{
+       struct pasid_state **root, **ptr;
+       int level, index;
+
+       level = dev_state->pasid_levels;
+       root  = dev_state->states;
+
+       while (true) {
+
+               index = (pasid >> (9 * level)) & 0x1ff;
+               ptr   = &root[index];
+
+               if (level == 0)
+                       break;
+
+               if (*ptr == NULL) {
+                       if (!alloc)
+                               return NULL;
+
+                       *ptr = (void *)get_zeroed_page(GFP_ATOMIC);
+                       if (*ptr == NULL)
+                               return NULL;
+               }
+
+               root   = (struct pasid_state **)*ptr;
+               level -= 1;
+       }
+
+       return ptr;
+}
+
+static int set_pasid_state(struct device_state *dev_state,
+                          struct pasid_state *pasid_state,
+                          int pasid)
+{
+       struct pasid_state **ptr;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&dev_state->lock, flags);
+       ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+
+       ret = -ENOMEM;
+       if (ptr == NULL)
+               goto out_unlock;
+
+       ret = -ENOMEM;
+       if (*ptr != NULL)
+               goto out_unlock;
+
+       *ptr = pasid_state;
+
+       ret = 0;
+
+out_unlock:
+       spin_unlock_irqrestore(&dev_state->lock, flags);
+
+       return ret;
+}
+
+static void clear_pasid_state(struct device_state *dev_state, int pasid)
+{
+       struct pasid_state **ptr;
+       unsigned long flags;
+
+       spin_lock_irqsave(&dev_state->lock, flags);
+       ptr = __get_pasid_state_ptr(dev_state, pasid, true);
+
+       if (ptr == NULL)
+               goto out_unlock;
+
+       *ptr = NULL;
+
+out_unlock:
+       spin_unlock_irqrestore(&dev_state->lock, flags);
+}
+
+static struct pasid_state *get_pasid_state(struct device_state *dev_state,
+                                          int pasid)
+{
+       struct pasid_state **ptr, *ret = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&dev_state->lock, flags);
+       ptr = __get_pasid_state_ptr(dev_state, pasid, false);
+
+       if (ptr == NULL)
+               goto out_unlock;
+
+       ret = *ptr;
+       if (ret)
+               atomic_inc(&ret->count);
+
+out_unlock:
+       spin_unlock_irqrestore(&dev_state->lock, flags);
+
+       return ret;
+}
+
+static void free_pasid_state(struct pasid_state *pasid_state)
+{
+       kfree(pasid_state);
+}
+
+static void put_pasid_state(struct pasid_state *pasid_state)
+{
+       if (atomic_dec_and_test(&pasid_state->count)) {
+               put_device_state(pasid_state->device_state);
+               wake_up(&pasid_state->wq);
+       }
+}
+
+static void put_pasid_state_wait(struct pasid_state *pasid_state)
+{
+       DEFINE_WAIT(wait);
+
+       prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
+
+       if (atomic_dec_and_test(&pasid_state->count))
+               put_device_state(pasid_state->device_state);
+       else
+               schedule();
+
+       finish_wait(&pasid_state->wq, &wait);
+       mmput(pasid_state->mm);
+       free_pasid_state(pasid_state);
+}
+
+static void __unbind_pasid(struct pasid_state *pasid_state)
+{
+       struct iommu_domain *domain;
+
+       domain = pasid_state->device_state->domain;
+
+       amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
+       clear_pasid_state(pasid_state->device_state, pasid_state->pasid);
+
+       /* Make sure no more pending faults are in the queue */
+       flush_workqueue(iommu_wq);
+
+       mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
+
+       put_pasid_state(pasid_state); /* Reference taken in bind() function */
+}
+
+static void unbind_pasid(struct device_state *dev_state, int pasid)
+{
+       struct pasid_state *pasid_state;
+
+       pasid_state = get_pasid_state(dev_state, pasid);
+       if (pasid_state == NULL)
+               return;
+
+       unlink_pasid_state(pasid_state);
+       __unbind_pasid(pasid_state);
+       put_pasid_state_wait(pasid_state); /* Reference taken in this function */
+}
+
+static void free_pasid_states_level1(struct pasid_state **tbl)
+{
+       int i;
+
+       for (i = 0; i < 512; ++i) {
+               if (tbl[i] == NULL)
+                       continue;
+
+               free_page((unsigned long)tbl[i]);
+       }
+}
+
+static void free_pasid_states_level2(struct pasid_state **tbl)
+{
+       struct pasid_state **ptr;
+       int i;
+
+       for (i = 0; i < 512; ++i) {
+               if (tbl[i] == NULL)
+                       continue;
+
+               ptr = (struct pasid_state **)tbl[i];
+               free_pasid_states_level1(ptr);
+       }
+}
+
+static void free_pasid_states(struct device_state *dev_state)
+{
+       struct pasid_state *pasid_state;
+       int i;
+
+       for (i = 0; i < dev_state->max_pasids; ++i) {
+               pasid_state = get_pasid_state(dev_state, i);
+               if (pasid_state == NULL)
+                       continue;
+
+               put_pasid_state(pasid_state);
+               unbind_pasid(dev_state, i);
+       }
+
+       if (dev_state->pasid_levels == 2)
+               free_pasid_states_level2(dev_state->states);
+       else if (dev_state->pasid_levels == 1)
+               free_pasid_states_level1(dev_state->states);
+       else if (dev_state->pasid_levels != 0)
+               BUG();
+
+       free_page((unsigned long)dev_state->states);
+}
+
+static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
+{
+       return container_of(mn, struct pasid_state, mn);
+}
+
+static void __mn_flush_page(struct mmu_notifier *mn,
+                           unsigned long address)
+{
+       struct pasid_state *pasid_state;
+       struct device_state *dev_state;
+
+       pasid_state = mn_to_state(mn);
+       dev_state   = pasid_state->device_state;
+
+       amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address);
+}
+
+static int mn_clear_flush_young(struct mmu_notifier *mn,
+                               struct mm_struct *mm,
+                               unsigned long address)
+{
+       __mn_flush_page(mn, address);
+
+       return 0;
+}
+
+static void mn_change_pte(struct mmu_notifier *mn,
+                         struct mm_struct *mm,
+                         unsigned long address,
+                         pte_t pte)
+{
+       __mn_flush_page(mn, address);
+}
+
+static void mn_invalidate_page(struct mmu_notifier *mn,
+                              struct mm_struct *mm,
+                              unsigned long address)
+{
+       __mn_flush_page(mn, address);
+}
+
+static void mn_invalidate_range_start(struct mmu_notifier *mn,
+                                     struct mm_struct *mm,
+                                     unsigned long start, unsigned long end)
+{
+       struct pasid_state *pasid_state;
+       struct device_state *dev_state;
+
+       pasid_state = mn_to_state(mn);
+       dev_state   = pasid_state->device_state;
+
+       amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
+                                 __pa(empty_page_table));
+}
+
+static void mn_invalidate_range_end(struct mmu_notifier *mn,
+                                   struct mm_struct *mm,
+                                   unsigned long start, unsigned long end)
+{
+       struct pasid_state *pasid_state;
+       struct device_state *dev_state;
+
+       pasid_state = mn_to_state(mn);
+       dev_state   = pasid_state->device_state;
+
+       amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
+                                 __pa(pasid_state->mm->pgd));
+}
+
+static struct mmu_notifier_ops iommu_mn = {
+       .clear_flush_young      = mn_clear_flush_young,
+       .change_pte             = mn_change_pte,
+       .invalidate_page        = mn_invalidate_page,
+       .invalidate_range_start = mn_invalidate_range_start,
+       .invalidate_range_end   = mn_invalidate_range_end,
+};
+
+static void set_pri_tag_status(struct pasid_state *pasid_state,
+                              u16 tag, int status)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&pasid_state->lock, flags);
+       pasid_state->pri[tag].status = status;
+       spin_unlock_irqrestore(&pasid_state->lock, flags);
+}
+
+static void finish_pri_tag(struct device_state *dev_state,
+                          struct pasid_state *pasid_state,
+                          u16 tag)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&pasid_state->lock, flags);
+       if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
+           pasid_state->pri[tag].finish) {
+               amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
+                                      pasid_state->pri[tag].status, tag);
+               pasid_state->pri[tag].finish = false;
+               pasid_state->pri[tag].status = PPR_SUCCESS;
+       }
+       spin_unlock_irqrestore(&pasid_state->lock, flags);
+}
+
+static void do_fault(struct work_struct *work)
+{
+       struct fault *fault = container_of(work, struct fault, work);
+       int npages, write;
+       struct page *page;
+
+       write = !!(fault->flags & PPR_FAULT_WRITE);
+
+       npages = get_user_pages(fault->state->task, fault->state->mm,
+                               fault->address, 1, write, 0, &page, NULL);
+
+       if (npages == 1) {
+               put_page(page);
+       } else if (fault->dev_state->inv_ppr_cb) {
+               int status;
+
+               status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
+                                                     fault->pasid,
+                                                     fault->address,
+                                                     fault->flags);
+               switch (status) {
+               case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
+                       set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
+                       break;
+               case AMD_IOMMU_INV_PRI_RSP_INVALID:
+                       set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+                       break;
+               case AMD_IOMMU_INV_PRI_RSP_FAIL:
+                       set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
+                       break;
+               default:
+                       BUG();
+               }
+       } else {
+               set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
+       }
+
+       finish_pri_tag(fault->dev_state, fault->state, fault->tag);
+
+       put_pasid_state(fault->state);
+
+       kfree(fault);
+}
+
+static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
+{
+       struct amd_iommu_fault *iommu_fault;
+       struct pasid_state *pasid_state;
+       struct device_state *dev_state;
+       unsigned long flags;
+       struct fault *fault;
+       bool finish;
+       u16 tag;
+       int ret;
+
+       iommu_fault = data;
+       tag         = iommu_fault->tag & 0x1ff;
+       finish      = (iommu_fault->tag >> 9) & 1;
+
+       ret = NOTIFY_DONE;
+       dev_state = get_device_state(iommu_fault->device_id);
+       if (dev_state == NULL)
+               goto out;
+
+       pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
+       if (pasid_state == NULL) {
+               /* We know the device but not the PASID -> send INVALID */
+               amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
+                                      PPR_INVALID, tag);
+               goto out_drop_state;
+       }
+
+       spin_lock_irqsave(&pasid_state->lock, flags);
+       atomic_inc(&pasid_state->pri[tag].inflight);
+       if (finish)
+               pasid_state->pri[tag].finish = true;
+       spin_unlock_irqrestore(&pasid_state->lock, flags);
+
+       fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
+       if (fault == NULL) {
+               /* We are OOM - send success and let the device re-fault */
+               finish_pri_tag(dev_state, pasid_state, tag);
+               goto out_drop_state;
+       }
+
+       fault->dev_state = dev_state;
+       fault->address   = iommu_fault->address;
+       fault->state     = pasid_state;
+       fault->tag       = tag;
+       fault->finish    = finish;
+       fault->flags     = iommu_fault->flags;
+       INIT_WORK(&fault->work, do_fault);
+
+       queue_work(iommu_wq, &fault->work);
+
+       ret = NOTIFY_OK;
+
+out_drop_state:
+       put_device_state(dev_state);
+
+out:
+       return ret;
+}
+
+static struct notifier_block ppr_nb = {
+       .notifier_call = ppr_notifier,
+};
+
+static int task_exit(struct notifier_block *nb, unsigned long e, void *data)
+{
+       struct pasid_state *pasid_state;
+       struct task_struct *task;
+
+       task = data;
+
+       /*
+        * Using this notifier is a hack - but there is no other choice
+        * at the moment. What I really want is a sleeping notifier that
+        * is called when an MM goes down. But such a notifier doesn't
+        * exist yet. The notifier needs to sleep because it has to make
+        * sure that the device does not use the PASID and the address
+        * space anymore before it is destroyed. This includes waiting
+        * for pending PRI requests to pass the workqueue. The
+        * MMU-Notifiers would be a good fit, but they use RCU and so
+        * they are not allowed to sleep. Lets see how we can solve this
+        * in a more intelligent way in the future.
+        */
+again:
+       spin_lock(&ps_lock);
+       list_for_each_entry(pasid_state, &pasid_state_list, list) {
+               struct device_state *dev_state;
+               int pasid;
+
+               if (pasid_state->task != task)
+                       continue;
+
+               /* Drop Lock and unbind */
+               spin_unlock(&ps_lock);
+
+               dev_state = pasid_state->device_state;
+               pasid     = pasid_state->pasid;
+
+               if (pasid_state->device_state->inv_ctx_cb)
+                       dev_state->inv_ctx_cb(dev_state->pdev, pasid);
+
+               unbind_pasid(dev_state, pasid);
+
+               /* Task may be in the list multiple times */
+               goto again;
+       }
+       spin_unlock(&ps_lock);
+
+       return NOTIFY_OK;
+}
+
+int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+                        struct task_struct *task)
+{
+       struct pasid_state *pasid_state;
+       struct device_state *dev_state;
+       u16 devid;
+       int ret;
+
+       might_sleep();
+
+       if (!amd_iommu_v2_supported())
+               return -ENODEV;
+
+       devid     = device_id(pdev);
+       dev_state = get_device_state(devid);
+
+       if (dev_state == NULL)
+               return -EINVAL;
+
+       ret = -EINVAL;
+       if (pasid < 0 || pasid >= dev_state->max_pasids)
+               goto out;
+
+       ret = -ENOMEM;
+       pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
+       if (pasid_state == NULL)
+               goto out;
+
+       atomic_set(&pasid_state->count, 1);
+       init_waitqueue_head(&pasid_state->wq);
+       pasid_state->task         = task;
+       pasid_state->mm           = get_task_mm(task);
+       pasid_state->device_state = dev_state;
+       pasid_state->pasid        = pasid;
+       pasid_state->mn.ops       = &iommu_mn;
+
+       if (pasid_state->mm == NULL)
+               goto out_free;
+
+       mmu_notifier_register(&pasid_state->mn, pasid_state->mm);
+
+       ret = set_pasid_state(dev_state, pasid_state, pasid);
+       if (ret)
+               goto out_unregister;
+
+       ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
+                                       __pa(pasid_state->mm->pgd));
+       if (ret)
+               goto out_clear_state;
+
+       link_pasid_state(pasid_state);
+
+       return 0;
+
+out_clear_state:
+       clear_pasid_state(dev_state, pasid);
+
+out_unregister:
+       mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
+
+out_free:
+       free_pasid_state(pasid_state);
+
+out:
+       put_device_state(dev_state);
+
+       return ret;
+}
+EXPORT_SYMBOL(amd_iommu_bind_pasid);
+
+void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
+{
+       struct device_state *dev_state;
+       u16 devid;
+
+       might_sleep();
+
+       if (!amd_iommu_v2_supported())
+               return;
+
+       devid = device_id(pdev);
+       dev_state = get_device_state(devid);
+       if (dev_state == NULL)
+               return;
+
+       if (pasid < 0 || pasid >= dev_state->max_pasids)
+               goto out;
+
+       unbind_pasid(dev_state, pasid);
+
+out:
+       put_device_state(dev_state);
+}
+EXPORT_SYMBOL(amd_iommu_unbind_pasid);
+
+int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
+{
+       struct device_state *dev_state;
+       unsigned long flags;
+       int ret, tmp;
+       u16 devid;
+
+       might_sleep();
+
+       if (!amd_iommu_v2_supported())
+               return -ENODEV;
+
+       if (pasids <= 0 || pasids > (PASID_MASK + 1))
+               return -EINVAL;
+
+       devid = device_id(pdev);
+
+       dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
+       if (dev_state == NULL)
+               return -ENOMEM;
+
+       spin_lock_init(&dev_state->lock);
+       init_waitqueue_head(&dev_state->wq);
+       dev_state->pdev = pdev;
+
+       tmp = pasids;
+       for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
+               dev_state->pasid_levels += 1;
+
+       atomic_set(&dev_state->count, 1);
+       dev_state->max_pasids = pasids;
+
+       ret = -ENOMEM;
+       dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
+       if (dev_state->states == NULL)
+               goto out_free_dev_state;
+
+       dev_state->domain = iommu_domain_alloc(&pci_bus_type);
+       if (dev_state->domain == NULL)
+               goto out_free_states;
+
+       amd_iommu_domain_direct_map(dev_state->domain);
+
+       ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
+       if (ret)
+               goto out_free_domain;
+
+       ret = iommu_attach_device(dev_state->domain, &pdev->dev);
+       if (ret != 0)
+               goto out_free_domain;
+
+       spin_lock_irqsave(&state_lock, flags);
+
+       if (state_table[devid] != NULL) {
+               spin_unlock_irqrestore(&state_lock, flags);
+               ret = -EBUSY;
+               goto out_free_domain;
+       }
+
+       state_table[devid] = dev_state;
+
+       spin_unlock_irqrestore(&state_lock, flags);
+
+       return 0;
+
+out_free_domain:
+       iommu_domain_free(dev_state->domain);
+
+out_free_states:
+       free_page((unsigned long)dev_state->states);
+
+out_free_dev_state:
+       kfree(dev_state);
+
+       return ret;
+}
+EXPORT_SYMBOL(amd_iommu_init_device);
+
+void amd_iommu_free_device(struct pci_dev *pdev)
+{
+       struct device_state *dev_state;
+       unsigned long flags;
+       u16 devid;
+
+       if (!amd_iommu_v2_supported())
+               return;
+
+       devid = device_id(pdev);
+
+       spin_lock_irqsave(&state_lock, flags);
+
+       dev_state = state_table[devid];
+       if (dev_state == NULL) {
+               spin_unlock_irqrestore(&state_lock, flags);
+               return;
+       }
+
+       state_table[devid] = NULL;
+
+       spin_unlock_irqrestore(&state_lock, flags);
+
+       /* Get rid of any remaining pasid states */
+       free_pasid_states(dev_state);
+
+       put_device_state_wait(dev_state);
+}
+EXPORT_SYMBOL(amd_iommu_free_device);
+
+int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
+                                amd_iommu_invalid_ppr_cb cb)
+{
+       struct device_state *dev_state;
+       unsigned long flags;
+       u16 devid;
+       int ret;
+
+       if (!amd_iommu_v2_supported())
+               return -ENODEV;
+
+       devid = device_id(pdev);
+
+       spin_lock_irqsave(&state_lock, flags);
+
+       ret = -EINVAL;
+       dev_state = state_table[devid];
+       if (dev_state == NULL)
+               goto out_unlock;
+
+       dev_state->inv_ppr_cb = cb;
+
+       ret = 0;
+
+out_unlock:
+       spin_unlock_irqrestore(&state_lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
+
+int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
+                                   amd_iommu_invalidate_ctx cb)
+{
+       struct device_state *dev_state;
+       unsigned long flags;
+       u16 devid;
+       int ret;
+
+       if (!amd_iommu_v2_supported())
+               return -ENODEV;
+
+       devid = device_id(pdev);
+
+       spin_lock_irqsave(&state_lock, flags);
+
+       ret = -EINVAL;
+       dev_state = state_table[devid];
+       if (dev_state == NULL)
+               goto out_unlock;
+
+       dev_state->inv_ctx_cb = cb;
+
+       ret = 0;
+
+out_unlock:
+       spin_unlock_irqrestore(&state_lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
+
+static int __init amd_iommu_v2_init(void)
+{
+       size_t state_table_size;
+       int ret;
+
+       pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>");
+
+       spin_lock_init(&state_lock);
+
+       state_table_size = MAX_DEVICES * sizeof(struct device_state *);
+       state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+                                              get_order(state_table_size));
+       if (state_table == NULL)
+               return -ENOMEM;
+
+       ret = -ENOMEM;
+       iommu_wq = create_workqueue("amd_iommu_v2");
+       if (iommu_wq == NULL)
+               goto out_free;
+
+       ret = -ENOMEM;
+       empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL);
+       if (empty_page_table == NULL)
+               goto out_destroy_wq;
+
+       amd_iommu_register_ppr_notifier(&ppr_nb);
+       profile_event_register(PROFILE_TASK_EXIT, &profile_nb);
+
+       return 0;
+
+out_destroy_wq:
+       destroy_workqueue(iommu_wq);
+
+out_free:
+       free_pages((unsigned long)state_table, get_order(state_table_size));
+
+       return ret;
+}
+
+static void __exit amd_iommu_v2_exit(void)
+{
+       struct device_state *dev_state;
+       size_t state_table_size;
+       int i;
+
+       profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);
+       amd_iommu_unregister_ppr_notifier(&ppr_nb);
+
+       flush_workqueue(iommu_wq);
+
+       /*
+        * The loop below might call flush_workqueue(), so call
+        * destroy_workqueue() after it
+        */
+       for (i = 0; i < MAX_DEVICES; ++i) {
+               dev_state = get_device_state(i);
+
+               if (dev_state == NULL)
+                       continue;
+
+               WARN_ON_ONCE(1);
+
+               put_device_state(dev_state);
+               amd_iommu_free_device(dev_state->pdev);
+       }
+
+       destroy_workqueue(iommu_wq);
+
+       state_table_size = MAX_DEVICES * sizeof(struct device_state *);
+       free_pages((unsigned long)state_table, get_order(state_table_size));
+
+       free_page((unsigned long)empty_page_table);
+}
+
+module_init(amd_iommu_v2_init);
+module_exit(amd_iommu_v2_exit);
index 31053a951c3452640d7dd4e2a6f4c9c2460dee63..c9c6053198d403626d08e2d0826fde7b633baef9 100644 (file)
 #define LEVEL_STRIDE           (9)
 #define LEVEL_MASK             (((u64)1 << LEVEL_STRIDE) - 1)
 
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * Traditionally the IOMMU core just handed us the mappings directly,
+ * after making sure the size is an order of a 4KiB page and that the
+ * mapping has natural alignment.
+ *
+ * To retain this behavior, we currently advertise that we support
+ * all page sizes that are an order of 4KiB.
+ *
+ * If at some point we'd like to utilize the IOMMU core's new behavior,
+ * we could change this to advertise the real page sizes we support.
+ */
+#define INTEL_IOMMU_PGSIZES    (~0xFFFUL)
+
 static inline int agaw_to_level(int agaw)
 {
        return agaw + 2;
@@ -3979,12 +3997,11 @@ static void intel_iommu_detach_device(struct iommu_domain *domain,
 
 static int intel_iommu_map(struct iommu_domain *domain,
                           unsigned long iova, phys_addr_t hpa,
-                          int gfp_order, int iommu_prot)
+                          size_t size, int iommu_prot)
 {
        struct dmar_domain *dmar_domain = domain->priv;
        u64 max_addr;
        int prot = 0;
-       size_t size;
        int ret;
 
        if (iommu_prot & IOMMU_READ)
@@ -3994,7 +4011,6 @@ static int intel_iommu_map(struct iommu_domain *domain,
        if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
                prot |= DMA_PTE_SNP;
 
-       size     = PAGE_SIZE << gfp_order;
        max_addr = iova + size;
        if (dmar_domain->max_addr < max_addr) {
                u64 end;
@@ -4017,11 +4033,10 @@ static int intel_iommu_map(struct iommu_domain *domain,
        return ret;
 }
 
-static int intel_iommu_unmap(struct iommu_domain *domain,
-                            unsigned long iova, int gfp_order)
+static size_t intel_iommu_unmap(struct iommu_domain *domain,
+                            unsigned long iova, size_t size)
 {
        struct dmar_domain *dmar_domain = domain->priv;
-       size_t size = PAGE_SIZE << gfp_order;
        int order;
 
        order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
@@ -4030,7 +4045,7 @@ static int intel_iommu_unmap(struct iommu_domain *domain,
        if (dmar_domain->max_addr == iova + size)
                dmar_domain->max_addr = iova;
 
-       return order;
+       return PAGE_SIZE << order;
 }
 
 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -4060,6 +4075,54 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
        return 0;
 }
 
+/*
+ * Group numbers are arbitrary.  Device with the same group number
+ * indicate the iommu cannot differentiate between them.  To avoid
+ * tracking used groups we just use the seg|bus|devfn of the lowest
+ * level we're able to differentiate devices
+ */
+static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct pci_dev *bridge;
+       union {
+               struct {
+                       u8 devfn;
+                       u8 bus;
+                       u16 segment;
+               } pci;
+               u32 group;
+       } id;
+
+       if (iommu_no_mapping(dev))
+               return -ENODEV;
+
+       id.pci.segment = pci_domain_nr(pdev->bus);
+       id.pci.bus = pdev->bus->number;
+       id.pci.devfn = pdev->devfn;
+
+       if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
+               return -ENODEV;
+
+       bridge = pci_find_upstream_pcie_bridge(pdev);
+       if (bridge) {
+               if (pci_is_pcie(bridge)) {
+                       id.pci.bus = bridge->subordinate->number;
+                       id.pci.devfn = 0;
+               } else {
+                       id.pci.bus = bridge->bus->number;
+                       id.pci.devfn = bridge->devfn;
+               }
+       }
+
+       if (!pdev->is_virtfn && iommu_group_mf)
+               id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
+
+       *groupid = id.group;
+
+       return 0;
+}
+
 static struct iommu_ops intel_iommu_ops = {
        .domain_init    = intel_iommu_domain_init,
        .domain_destroy = intel_iommu_domain_destroy,
@@ -4069,6 +4132,8 @@ static struct iommu_ops intel_iommu_ops = {
        .unmap          = intel_iommu_unmap,
        .iova_to_phys   = intel_iommu_iova_to_phys,
        .domain_has_cap = intel_iommu_domain_has_cap,
+       .device_group   = intel_iommu_device_group,
+       .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
 };
 
 static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
index 5b5fa5cdaa3108da74b7358ae187dd4ee8a00181..2198b2dbbcd3ad964b03a13dd6fd8dd336f27bed 100644 (file)
@@ -16,6 +16,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
+#define pr_fmt(fmt)    "%s: " fmt, __func__
+
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/errno.h>
 #include <linux/iommu.h>
 
+static ssize_t show_iommu_group(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       unsigned int groupid;
+
+       if (iommu_device_group(dev, &groupid))
+               return 0;
+
+       return sprintf(buf, "%u", groupid);
+}
+static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL);
+
+static int add_iommu_group(struct device *dev, void *data)
+{
+       unsigned int groupid;
+
+       if (iommu_device_group(dev, &groupid) == 0)
+               return device_create_file(dev, &dev_attr_iommu_group);
+
+       return 0;
+}
+
+static int remove_iommu_group(struct device *dev)
+{
+       unsigned int groupid;
+
+       if (iommu_device_group(dev, &groupid) == 0)
+               device_remove_file(dev, &dev_attr_iommu_group);
+
+       return 0;
+}
+
+static int iommu_device_notifier(struct notifier_block *nb,
+                                unsigned long action, void *data)
+{
+       struct device *dev = data;
+
+       if (action == BUS_NOTIFY_ADD_DEVICE)
+               return add_iommu_group(dev, NULL);
+       else if (action == BUS_NOTIFY_DEL_DEVICE)
+               return remove_iommu_group(dev);
+
+       return 0;
+}
+
+static struct notifier_block iommu_device_nb = {
+       .notifier_call = iommu_device_notifier,
+};
+
 static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops)
 {
+       bus_register_notifier(bus, &iommu_device_nb);
+       bus_for_each_dev(bus, NULL, NULL, add_iommu_group);
 }
 
 /**
@@ -157,32 +210,134 @@ int iommu_domain_has_cap(struct iommu_domain *domain,
 EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
 
 int iommu_map(struct iommu_domain *domain, unsigned long iova,
-             phys_addr_t paddr, int gfp_order, int prot)
+             phys_addr_t paddr, size_t size, int prot)
 {
-       size_t size;
+       unsigned long orig_iova = iova;
+       unsigned int min_pagesz;
+       size_t orig_size = size;
+       int ret = 0;
 
        if (unlikely(domain->ops->map == NULL))
                return -ENODEV;
 
-       size         = PAGE_SIZE << gfp_order;
+       /* find out the minimum page size supported */
+       min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+
+       /*
+        * both the virtual address and the physical one, as well as
+        * the size of the mapping, must be aligned (at least) to the
+        * size of the smallest page supported by the hardware
+        */
+       if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+               pr_err("unaligned: iova 0x%lx pa 0x%lx size 0x%lx min_pagesz "
+                       "0x%x\n", iova, (unsigned long)paddr,
+                       (unsigned long)size, min_pagesz);
+               return -EINVAL;
+       }
+
+       pr_debug("map: iova 0x%lx pa 0x%lx size 0x%lx\n", iova,
+                               (unsigned long)paddr, (unsigned long)size);
+
+       while (size) {
+               unsigned long pgsize, addr_merge = iova | paddr;
+               unsigned int pgsize_idx;
+
+               /* Max page size that still fits into 'size' */
+               pgsize_idx = __fls(size);
+
+               /* need to consider alignment requirements ? */
+               if (likely(addr_merge)) {
+                       /* Max page size allowed by both iova and paddr */
+                       unsigned int align_pgsize_idx = __ffs(addr_merge);
+
+                       pgsize_idx = min(pgsize_idx, align_pgsize_idx);
+               }
+
+               /* build a mask of acceptable page sizes */
+               pgsize = (1UL << (pgsize_idx + 1)) - 1;
 
-       BUG_ON(!IS_ALIGNED(iova | paddr, size));
+               /* throw away page sizes not supported by the hardware */
+               pgsize &= domain->ops->pgsize_bitmap;
 
-       return domain->ops->map(domain, iova, paddr, gfp_order, prot);
+               /* make sure we're still sane */
+               BUG_ON(!pgsize);
+
+               /* pick the biggest page */
+               pgsize_idx = __fls(pgsize);
+               pgsize = 1UL << pgsize_idx;
+
+               pr_debug("mapping: iova 0x%lx pa 0x%lx pgsize %lu\n", iova,
+                                       (unsigned long)paddr, pgsize);
+
+               ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
+               if (ret)
+                       break;
+
+               iova += pgsize;
+               paddr += pgsize;
+               size -= pgsize;
+       }
+
+       /* unroll mapping in case something went wrong */
+       if (ret)
+               iommu_unmap(domain, orig_iova, orig_size - size);
+
+       return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_map);
 
-int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
+size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 {
-       size_t size;
+       size_t unmapped_page, unmapped = 0;
+       unsigned int min_pagesz;
 
        if (unlikely(domain->ops->unmap == NULL))
                return -ENODEV;
 
-       size         = PAGE_SIZE << gfp_order;
+       /* find out the minimum page size supported */
+       min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+
+       /*
+        * The virtual address, as well as the size of the mapping, must be
+        * aligned (at least) to the size of the smallest page supported
+        * by the hardware
+        */
+       if (!IS_ALIGNED(iova | size, min_pagesz)) {
+               pr_err("unaligned: iova 0x%lx size 0x%lx min_pagesz 0x%x\n",
+                                       iova, (unsigned long)size, min_pagesz);
+               return -EINVAL;
+       }
+
+       pr_debug("unmap this: iova 0x%lx size 0x%lx\n", iova,
+                                                       (unsigned long)size);
+
+       /*
+        * Keep iterating until we either unmap 'size' bytes (or more)
+        * or we hit an area that isn't mapped.
+        */
+       while (unmapped < size) {
+               size_t left = size - unmapped;
+
+               unmapped_page = domain->ops->unmap(domain, iova, left);
+               if (!unmapped_page)
+                       break;
+
+               pr_debug("unmapped: iova 0x%lx size %lx\n", iova,
+                                       (unsigned long)unmapped_page);
+
+               iova += unmapped_page;
+               unmapped += unmapped_page;
+       }
+
+       return unmapped;
+}
+EXPORT_SYMBOL_GPL(iommu_unmap);
 
-       BUG_ON(!IS_ALIGNED(iova, size));
+int iommu_device_group(struct device *dev, unsigned int *groupid)
+{
+       if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group)
+               return dev->bus->iommu_ops->device_group(dev, groupid);
 
-       return domain->ops->unmap(domain, iova, gfp_order);
+       return -ENODEV;
 }
-EXPORT_SYMBOL_GPL(iommu_unmap);
+EXPORT_SYMBOL_GPL(iommu_device_group);
index 5865dd2e28f928b0cf55ff1c5ebddfe5786e1d16..08a90b88e40d80feb9d01185e534a1351cf7473d 100644 (file)
@@ -42,6 +42,9 @@ __asm__ __volatile__ (                                                        \
 #define RCP15_PRRR(reg)                MRC(reg, p15, 0, c10, c2, 0)
 #define RCP15_NMRR(reg)                MRC(reg, p15, 0, c10, c2, 1)
 
+/* bitmap of the page sizes currently supported */
+#define MSM_IOMMU_PGSIZES      (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
+
 static int msm_iommu_tex_class[4];
 
 DEFINE_SPINLOCK(msm_iommu_lock);
@@ -352,7 +355,7 @@ fail:
 }
 
 static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
-                        phys_addr_t pa, int order, int prot)
+                        phys_addr_t pa, size_t len, int prot)
 {
        struct msm_priv *priv;
        unsigned long flags;
@@ -363,7 +366,6 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
        unsigned long *sl_pte;
        unsigned long sl_offset;
        unsigned int pgprot;
-       size_t len = 0x1000UL << order;
        int ret = 0, tex, sh;
 
        spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -463,8 +465,8 @@ fail:
        return ret;
 }
 
-static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
-                           int order)
+static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
+                           size_t len)
 {
        struct msm_priv *priv;
        unsigned long flags;
@@ -474,7 +476,6 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
        unsigned long *sl_table;
        unsigned long *sl_pte;
        unsigned long sl_offset;
-       size_t len = 0x1000UL << order;
        int i, ret = 0;
 
        spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -544,15 +545,12 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
 
        ret = __flush_iotlb(domain);
 
-       /*
-        * the IOMMU API requires us to return the order of the unmapped
-        * page (on success).
-        */
-       if (!ret)
-               ret = order;
 fail:
        spin_unlock_irqrestore(&msm_iommu_lock, flags);
-       return ret;
+
+       /* the IOMMU API requires us to return how many bytes were unmapped */
+       len = ret ? 0 : len;
+       return len;
 }
 
 static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -684,7 +682,8 @@ static struct iommu_ops msm_iommu_ops = {
        .map = msm_iommu_map,
        .unmap = msm_iommu_unmap,
        .iova_to_phys = msm_iommu_iova_to_phys,
-       .domain_has_cap = msm_iommu_domain_has_cap
+       .domain_has_cap = msm_iommu_domain_has_cap,
+       .pgsize_bitmap = MSM_IOMMU_PGSIZES,
 };
 
 static int __init get_tex_class(int icp, int ocp, int mt, int nos)
index 8f32b2bf758777686c4bf9c7a85df3edaf9ac3af..d8edd979d01b2c3d84ffb49a9d2c3c53829518d9 100644 (file)
@@ -33,6 +33,9 @@
             (__i < (n)) && (cr = __iotlb_read_cr((obj), __i), true);   \
             __i++)
 
+/* bitmap of the page sizes currently supported */
+#define OMAP_IOMMU_PGSIZES     (SZ_4K | SZ_64K | SZ_1M | SZ_16M)
+
 /**
  * struct omap_iommu_domain - omap iommu domain
  * @pgtable:   the page table
@@ -86,20 +89,24 @@ EXPORT_SYMBOL_GPL(omap_uninstall_iommu_arch);
 
 /**
  * omap_iommu_save_ctx - Save registers for pm off-mode support
- * @obj:       target iommu
+ * @dev:       client device
  **/
-void omap_iommu_save_ctx(struct omap_iommu *obj)
+void omap_iommu_save_ctx(struct device *dev)
 {
+       struct omap_iommu *obj = dev_to_omap_iommu(dev);
+
        arch_iommu->save_ctx(obj);
 }
 EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
 
 /**
  * omap_iommu_restore_ctx - Restore registers for pm off-mode support
- * @obj:       target iommu
+ * @dev:       client device
  **/
-void omap_iommu_restore_ctx(struct omap_iommu *obj)
+void omap_iommu_restore_ctx(struct device *dev)
 {
+       struct omap_iommu *obj = dev_to_omap_iommu(dev);
+
        arch_iommu->restore_ctx(obj);
 }
 EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
@@ -819,36 +826,24 @@ static int device_match_by_alias(struct device *dev, void *data)
        return strcmp(obj->name, name) == 0;
 }
 
-/**
- * omap_find_iommu_device() - find an omap iommu device by name
- * @name:      name of the iommu device
- *
- * The generic iommu API requires the caller to provide the device
- * he wishes to attach to a certain iommu domain.
- *
- * Drivers generally should not bother with this as it should just
- * be taken care of by the DMA-API using dev_archdata.
- *
- * This function is provided as an interim solution until the latter
- * materializes, and omap3isp is fully migrated to the DMA-API.
- */
-struct device *omap_find_iommu_device(const char *name)
-{
-       return driver_find_device(&omap_iommu_driver.driver, NULL,
-                               (void *)name,
-                               device_match_by_alias);
-}
-EXPORT_SYMBOL_GPL(omap_find_iommu_device);
-
 /**
  * omap_iommu_attach() - attach iommu device to an iommu domain
- * @dev:       target omap iommu device
+ * @name:      name of target omap iommu device
  * @iopgd:     page table
  **/
-static struct omap_iommu *omap_iommu_attach(struct device *dev, u32 *iopgd)
+static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd)
 {
        int err = -ENOMEM;
-       struct omap_iommu *obj = to_iommu(dev);
+       struct device *dev;
+       struct omap_iommu *obj;
+
+       dev = driver_find_device(&omap_iommu_driver.driver, NULL,
+                               (void *)name,
+                               device_match_by_alias);
+       if (!dev)
+               return NULL;
+
+       obj = to_iommu(dev);
 
        spin_lock(&obj->iommu_lock);
 
@@ -1019,12 +1014,11 @@ static void iopte_cachep_ctor(void *iopte)
 }
 
 static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
-                        phys_addr_t pa, int order, int prot)
+                        phys_addr_t pa, size_t bytes, int prot)
 {
        struct omap_iommu_domain *omap_domain = domain->priv;
        struct omap_iommu *oiommu = omap_domain->iommu_dev;
        struct device *dev = oiommu->dev;
-       size_t bytes = PAGE_SIZE << order;
        struct iotlb_entry e;
        int omap_pgsz;
        u32 ret, flags;
@@ -1049,19 +1043,16 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
        return ret;
 }
 
-static int omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
-                           int order)
+static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
+                           size_t size)
 {
        struct omap_iommu_domain *omap_domain = domain->priv;
        struct omap_iommu *oiommu = omap_domain->iommu_dev;
        struct device *dev = oiommu->dev;
-       size_t unmap_size;
-
-       dev_dbg(dev, "unmapping da 0x%lx order %d\n", da, order);
 
-       unmap_size = iopgtable_clear_entry(oiommu, da);
+       dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size);
 
-       return unmap_size ? get_order(unmap_size) : -EINVAL;
+       return iopgtable_clear_entry(oiommu, da);
 }
 
 static int
@@ -1069,6 +1060,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
        struct omap_iommu_domain *omap_domain = domain->priv;
        struct omap_iommu *oiommu;
+       struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
        int ret = 0;
 
        spin_lock(&omap_domain->lock);
@@ -1081,14 +1073,14 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
        }
 
        /* get a handle to and enable the omap iommu */
-       oiommu = omap_iommu_attach(dev, omap_domain->pgtable);
+       oiommu = omap_iommu_attach(arch_data->name, omap_domain->pgtable);
        if (IS_ERR(oiommu)) {
                ret = PTR_ERR(oiommu);
                dev_err(dev, "can't get omap iommu: %d\n", ret);
                goto out;
        }
 
-       omap_domain->iommu_dev = oiommu;
+       omap_domain->iommu_dev = arch_data->iommu_dev = oiommu;
        oiommu->domain = domain;
 
 out:
@@ -1100,7 +1092,8 @@ static void omap_iommu_detach_dev(struct iommu_domain *domain,
                                 struct device *dev)
 {
        struct omap_iommu_domain *omap_domain = domain->priv;
-       struct omap_iommu *oiommu = to_iommu(dev);
+       struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+       struct omap_iommu *oiommu = dev_to_omap_iommu(dev);
 
        spin_lock(&omap_domain->lock);
 
@@ -1114,7 +1107,7 @@ static void omap_iommu_detach_dev(struct iommu_domain *domain,
 
        omap_iommu_detach(oiommu);
 
-       omap_domain->iommu_dev = NULL;
+       omap_domain->iommu_dev = arch_data->iommu_dev = NULL;
 
 out:
        spin_unlock(&omap_domain->lock);
@@ -1183,14 +1176,14 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain,
                else if (iopte_is_large(*pte))
                        ret = omap_iommu_translate(*pte, da, IOLARGE_MASK);
                else
-                       dev_err(dev, "bogus pte 0x%x", *pte);
+                       dev_err(dev, "bogus pte 0x%x, da 0x%lx", *pte, da);
        } else {
                if (iopgd_is_section(*pgd))
                        ret = omap_iommu_translate(*pgd, da, IOSECTION_MASK);
                else if (iopgd_is_super(*pgd))
                        ret = omap_iommu_translate(*pgd, da, IOSUPER_MASK);
                else
-                       dev_err(dev, "bogus pgd 0x%x", *pgd);
+                       dev_err(dev, "bogus pgd 0x%x, da 0x%lx", *pgd, da);
        }
 
        return ret;
@@ -1211,6 +1204,7 @@ static struct iommu_ops omap_iommu_ops = {
        .unmap          = omap_iommu_unmap,
        .iova_to_phys   = omap_iommu_iova_to_phys,
        .domain_has_cap = omap_iommu_domain_has_cap,
+       .pgsize_bitmap  = OMAP_IOMMU_PGSIZES,
 };
 
 static int __init omap_iommu_init(void)
index 46be456fcc00e01c10b2ae8106a83310b41d29b9..2e10c3e0a7aee3eaa3815f473e06d6999ce8866b 100644 (file)
@@ -231,12 +231,14 @@ static struct iovm_struct *__find_iovm_area(struct omap_iommu *obj,
 
 /**
  * omap_find_iovm_area  -  find iovma which includes @da
+ * @dev:       client device
  * @da:                iommu device virtual address
  *
  * Find the existing iovma starting at @da
  */
-struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da)
+struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da)
 {
+       struct omap_iommu *obj = dev_to_omap_iommu(dev);
        struct iovm_struct *area;
 
        mutex_lock(&obj->mmap_lock);
@@ -343,14 +345,15 @@ static void free_iovm_area(struct omap_iommu *obj, struct iovm_struct *area)
 
 /**
  * omap_da_to_va - convert (d) to (v)
- * @obj:       objective iommu
+ * @dev:       client device
  * @da:                iommu device virtual address
  * @va:                mpu virtual address
  *
  * Returns mpu virtual addr which corresponds to a given device virtual addr
  */
-void *omap_da_to_va(struct omap_iommu *obj, u32 da)
+void *omap_da_to_va(struct device *dev, u32 da)
 {
+       struct omap_iommu *obj = dev_to_omap_iommu(dev);
        void *va = NULL;
        struct iovm_struct *area;
 
@@ -410,7 +413,6 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
        unsigned int i, j;
        struct scatterlist *sg;
        u32 da = new->da_start;
-       int order;
 
        if (!domain || !sgt)
                return -EINVAL;
@@ -429,12 +431,10 @@ static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new,
                if (bytes_to_iopgsz(bytes) < 0)
                        goto err_out;
 
-               order = get_order(bytes);
-
                pr_debug("%s: [%d] %08x %08x(%x)\n", __func__,
                         i, da, pa, bytes);
 
-               err = iommu_map(domain, da, pa, order, flags);
+               err = iommu_map(domain, da, pa, bytes, flags);
                if (err)
                        goto err_out;
 
@@ -449,10 +449,9 @@ err_out:
                size_t bytes;
 
                bytes = sg->length + sg->offset;
-               order = get_order(bytes);
 
                /* ignore failures.. we're already handling one */
-               iommu_unmap(domain, da, order);
+               iommu_unmap(domain, da, bytes);
 
                da += bytes;
        }
@@ -467,7 +466,8 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
        size_t total = area->da_end - area->da_start;
        const struct sg_table *sgt = area->sgt;
        struct scatterlist *sg;
-       int i, err;
+       int i;
+       size_t unmapped;
 
        BUG_ON(!sgtable_ok(sgt));
        BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE));
@@ -475,13 +475,11 @@ static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj,
        start = area->da_start;
        for_each_sg(sgt->sgl, sg, sgt->nents, i) {
                size_t bytes;
-               int order;
 
                bytes = sg->length + sg->offset;
-               order = get_order(bytes);
 
-               err = iommu_unmap(domain, start, order);
-               if (err < 0)
+               unmapped = iommu_unmap(domain, start, bytes);
+               if (unmapped < bytes)
                        break;
 
                dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n",
@@ -582,16 +580,18 @@ __iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj,
 
 /**
  * omap_iommu_vmap  -  (d)-(p)-(v) address mapper
- * @obj:       objective iommu
+ * @domain:    iommu domain
+ * @dev:       client device
  * @sgt:       address of scatter gather table
  * @flags:     iovma and page property
  *
  * Creates 1-n-1 mapping with given @sgt and returns @da.
  * All @sgt element must be io page size aligned.
  */
-u32 omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
+u32 omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
                const struct sg_table *sgt, u32 flags)
 {
+       struct omap_iommu *obj = dev_to_omap_iommu(dev);
        size_t bytes;
        void *va = NULL;
 
@@ -622,15 +622,17 @@ EXPORT_SYMBOL_GPL(omap_iommu_vmap);
 
 /**
  * omap_iommu_vunmap  -  release virtual mapping obtained by 'omap_iommu_vmap()'
- * @obj:       objective iommu
+ * @domain:    iommu domain
+ * @dev:       client device
  * @da:                iommu device virtual address
  *
  * Free the iommu virtually contiguous memory area starting at
  * @da, which was returned by 'omap_iommu_vmap()'.
  */
 struct sg_table *
-omap_iommu_vunmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da)
+omap_iommu_vunmap(struct iommu_domain *domain, struct device *dev, u32 da)
 {
+       struct omap_iommu *obj = dev_to_omap_iommu(dev);
        struct sg_table *sgt;
        /*
         * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called.
@@ -647,7 +649,7 @@ EXPORT_SYMBOL_GPL(omap_iommu_vunmap);
 
 /**
  * omap_iommu_vmalloc  -  (d)-(p)-(v) address allocator and mapper
- * @obj:       objective iommu
+ * @dev:       client device
  * @da:                contiguous iommu virtual memory
  * @bytes:     allocation size
  * @flags:     iovma and page property
@@ -656,9 +658,10 @@ EXPORT_SYMBOL_GPL(omap_iommu_vunmap);
  * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set.
  */
 u32
-omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
+omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, u32 da,
                                                size_t bytes, u32 flags)
 {
+       struct omap_iommu *obj = dev_to_omap_iommu(dev);
        void *va;
        struct sg_table *sgt;
 
@@ -698,15 +701,16 @@ EXPORT_SYMBOL_GPL(omap_iommu_vmalloc);
 
 /**
  * omap_iommu_vfree  -  release memory allocated by 'omap_iommu_vmalloc()'
- * @obj:       objective iommu
+ * @dev:       client device
  * @da:                iommu device virtual address
  *
  * Frees the iommu virtually continuous memory area starting at
  * @da, as obtained from 'omap_iommu_vmalloc()'.
  */
-void omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj,
+void omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
                                                                const u32 da)
 {
+       struct omap_iommu *obj = dev_to_omap_iommu(dev);
        struct sg_table *sgt;
 
        sgt = unmap_vm_area(domain, obj, da, vfree,
index b818cacf420f89e4932ea02b18bf3ad3539ab829..d4c48ef227fb9379ed34ee4d09976cd86836658b 100644 (file)
 #include "isph3a.h"
 #include "isphist.h"
 
-/*
- * this is provided as an interim solution until omap3isp doesn't need
- * any omap-specific iommu API
- */
-#define to_iommu(dev)                                                  \
-       (struct omap_iommu *)platform_get_drvdata(to_platform_device(dev))
-
 static unsigned int autoidle;
 module_param(autoidle, int, 0444);
 MODULE_PARM_DESC(autoidle, "Enable OMAP3ISP AUTOIDLE support");
@@ -1114,8 +1107,7 @@ isp_restore_context(struct isp_device *isp, struct isp_reg *reg_list)
 static void isp_save_ctx(struct isp_device *isp)
 {
        isp_save_context(isp, isp_reg_list);
-       if (isp->iommu)
-               omap_iommu_save_ctx(isp->iommu);
+       omap_iommu_save_ctx(isp->dev);
 }
 
 /*
@@ -1128,8 +1120,7 @@ static void isp_save_ctx(struct isp_device *isp)
 static void isp_restore_ctx(struct isp_device *isp)
 {
        isp_restore_context(isp, isp_reg_list);
-       if (isp->iommu)
-               omap_iommu_restore_ctx(isp->iommu);
+       omap_iommu_restore_ctx(isp->dev);
        omap3isp_ccdc_restore_context(isp);
        omap3isp_preview_restore_context(isp);
 }
@@ -1983,7 +1974,7 @@ static int isp_remove(struct platform_device *pdev)
        isp_cleanup_modules(isp);
 
        omap3isp_get(isp);
-       iommu_detach_device(isp->domain, isp->iommu_dev);
+       iommu_detach_device(isp->domain, &pdev->dev);
        iommu_domain_free(isp->domain);
        omap3isp_put(isp);
 
@@ -2131,17 +2122,6 @@ static int isp_probe(struct platform_device *pdev)
                }
        }
 
-       /* IOMMU */
-       isp->iommu_dev = omap_find_iommu_device("isp");
-       if (!isp->iommu_dev) {
-               dev_err(isp->dev, "omap_find_iommu_device failed\n");
-               ret = -ENODEV;
-               goto error_isp;
-       }
-
-       /* to be removed once iommu migration is complete */
-       isp->iommu = to_iommu(isp->iommu_dev);
-
        isp->domain = iommu_domain_alloc(pdev->dev.bus);
        if (!isp->domain) {
                dev_err(isp->dev, "can't alloc iommu domain\n");
@@ -2149,7 +2129,7 @@ static int isp_probe(struct platform_device *pdev)
                goto error_isp;
        }
 
-       ret = iommu_attach_device(isp->domain, isp->iommu_dev);
+       ret = iommu_attach_device(isp->domain, &pdev->dev);
        if (ret) {
                dev_err(&pdev->dev, "can't attach iommu device: %d\n", ret);
                goto free_domain;
@@ -2188,7 +2168,7 @@ error_modules:
 error_irq:
        free_irq(isp->irq_num, isp);
 detach_dev:
-       iommu_detach_device(isp->domain, isp->iommu_dev);
+       iommu_detach_device(isp->domain, &pdev->dev);
 free_domain:
        iommu_domain_free(isp->domain);
 error_isp:
index 705946ef4d6027ca6f2e966149868e7a80b64d0e..d96603eb0d17a7c1ca6d4f363b4c8f32089d7c03 100644 (file)
@@ -212,9 +212,7 @@ struct isp_device {
        unsigned int sbl_resources;
        unsigned int subclk_resources;
 
-       struct omap_iommu *iommu;
        struct iommu_domain *domain;
-       struct device *iommu_dev;
 
        struct isp_platform_callback platform_cb;
 };
index 54a4a3f22e2e4187c5651aaa42cf73dcf9141059..d341ba12593f0bc0a93fc1f66dc57a899b9aa1a7 100644 (file)
@@ -366,7 +366,7 @@ static void ccdc_lsc_free_request(struct isp_ccdc_device *ccdc,
                dma_unmap_sg(isp->dev, req->iovm->sgt->sgl,
                             req->iovm->sgt->nents, DMA_TO_DEVICE);
        if (req->table)
-               omap_iommu_vfree(isp->domain, isp->iommu, req->table);
+               omap_iommu_vfree(isp->domain, isp->dev, req->table);
        kfree(req);
 }
 
@@ -438,7 +438,7 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc,
 
                req->enable = 1;
 
-               req->table = omap_iommu_vmalloc(isp->domain, isp->iommu, 0,
+               req->table = omap_iommu_vmalloc(isp->domain, isp->dev, 0,
                                        req->config.size, IOMMU_FLAG);
                if (IS_ERR_VALUE(req->table)) {
                        req->table = 0;
@@ -446,7 +446,7 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc,
                        goto done;
                }
 
-               req->iovm = omap_find_iovm_area(isp->iommu, req->table);
+               req->iovm = omap_find_iovm_area(isp->dev, req->table);
                if (req->iovm == NULL) {
                        ret = -ENOMEM;
                        goto done;
@@ -462,7 +462,7 @@ static int ccdc_lsc_config(struct isp_ccdc_device *ccdc,
                dma_sync_sg_for_cpu(isp->dev, req->iovm->sgt->sgl,
                                    req->iovm->sgt->nents, DMA_TO_DEVICE);
 
-               table = omap_da_to_va(isp->iommu, req->table);
+               table = omap_da_to_va(isp->dev, req->table);
                if (copy_from_user(table, config->lsc, req->config.size)) {
                        ret = -EFAULT;
                        goto done;
@@ -734,15 +734,15 @@ static int ccdc_config(struct isp_ccdc_device *ccdc,
                         * already done by omap_iommu_vmalloc().
                         */
                        size = ccdc->fpc.fpnum * 4;
-                       table_new = omap_iommu_vmalloc(isp->domain, isp->iommu,
+                       table_new = omap_iommu_vmalloc(isp->domain, isp->dev,
                                                        0, size, IOMMU_FLAG);
                        if (IS_ERR_VALUE(table_new))
                                return -ENOMEM;
 
-                       if (copy_from_user(omap_da_to_va(isp->iommu, table_new),
+                       if (copy_from_user(omap_da_to_va(isp->dev, table_new),
                                           (__force void __user *)
                                           ccdc->fpc.fpcaddr, size)) {
-                               omap_iommu_vfree(isp->domain, isp->iommu,
+                               omap_iommu_vfree(isp->domain, isp->dev,
                                                                table_new);
                                return -EFAULT;
                        }
@@ -753,7 +753,7 @@ static int ccdc_config(struct isp_ccdc_device *ccdc,
 
                ccdc_configure_fpc(ccdc);
                if (table_old != 0)
-                       omap_iommu_vfree(isp->domain, isp->iommu, table_old);
+                       omap_iommu_vfree(isp->domain, isp->dev, table_old);
        }
 
        return ccdc_lsc_config(ccdc, ccdc_struct);
@@ -2309,7 +2309,7 @@ void omap3isp_ccdc_cleanup(struct isp_device *isp)
        ccdc_lsc_free_queue(ccdc, &ccdc->lsc.free_queue);
 
        if (ccdc->fpc.fpcaddr != 0)
-               omap_iommu_vfree(isp->domain, isp->iommu, ccdc->fpc.fpcaddr);
+               omap_iommu_vfree(isp->domain, isp->dev, ccdc->fpc.fpcaddr);
 
        mutex_destroy(&ccdc->ioctl_lock);
 }
index bc0b2c7349b97894d62c6fcdb876eff8613634f7..11871ecc6d25c7257d6f960c97d805146e4bbd94 100644 (file)
@@ -366,7 +366,7 @@ static void isp_stat_bufs_free(struct ispstat *stat)
                                dma_unmap_sg(isp->dev, buf->iovm->sgt->sgl,
                                             buf->iovm->sgt->nents,
                                             DMA_FROM_DEVICE);
-                       omap_iommu_vfree(isp->domain, isp->iommu,
+                       omap_iommu_vfree(isp->domain, isp->dev,
                                                        buf->iommu_addr);
                } else {
                        if (!buf->virt_addr)
@@ -400,7 +400,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
                struct iovm_struct *iovm;
 
                WARN_ON(buf->dma_addr);
-               buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->iommu, 0,
+               buf->iommu_addr = omap_iommu_vmalloc(isp->domain, isp->dev, 0,
                                                        size, IOMMU_FLAG);
                if (IS_ERR((void *)buf->iommu_addr)) {
                        dev_err(stat->isp->dev,
@@ -410,7 +410,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
                        return -ENOMEM;
                }
 
-               iovm = omap_find_iovm_area(isp->iommu, buf->iommu_addr);
+               iovm = omap_find_iovm_area(isp->dev, buf->iommu_addr);
                if (!iovm ||
                    !dma_map_sg(isp->dev, iovm->sgt->sgl, iovm->sgt->nents,
                                DMA_FROM_DEVICE)) {
@@ -419,7 +419,7 @@ static int isp_stat_bufs_alloc_iommu(struct ispstat *stat, unsigned int size)
                }
                buf->iovm = iovm;
 
-               buf->virt_addr = omap_da_to_va(stat->isp->iommu,
+               buf->virt_addr = omap_da_to_va(stat->isp->dev,
                                          (u32)buf->iommu_addr);
                buf->empty = 1;
                dev_dbg(stat->isp->dev, "%s: buffer[%d] allocated."
index f2290578448c416bb809abe4d11d4146da6ead91..bd3aebafafa08c29502c5624623b3cf3c1830b74 100644 (file)
@@ -453,7 +453,7 @@ ispmmu_vmap(struct isp_device *isp, const struct scatterlist *sglist, int sglen)
        sgt->nents = sglen;
        sgt->orig_nents = sglen;
 
-       da = omap_iommu_vmap(isp->domain, isp->iommu, 0, sgt, IOMMU_FLAG);
+       da = omap_iommu_vmap(isp->domain, isp->dev, 0, sgt, IOMMU_FLAG);
        if (IS_ERR_VALUE(da))
                kfree(sgt);
 
@@ -469,7 +469,7 @@ static void ispmmu_vunmap(struct isp_device *isp, dma_addr_t da)
 {
        struct sg_table *sgt;
 
-       sgt = omap_iommu_vunmap(isp->domain, isp->iommu, (u32)da);
+       sgt = omap_iommu_vunmap(isp->domain, isp->dev, (u32)da);
        kfree(sgt);
 }
 
index b0dd08e6a9da1cc4f8ee78ebf53f1e4e5f1de2b2..9dd90b30f91a23703c420f5b60cf1fc2d31f55cd 100644 (file)
@@ -175,21 +175,22 @@ int pci_enable_pri(struct pci_dev *pdev, u32 reqs)
        u32 max_requests;
        int pos;
 
-       pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
        if (!pos)
                return -EINVAL;
 
-       pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-       pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF,  &status);
-       if ((control & PCI_PRI_ENABLE) || !(status & PCI_PRI_STATUS_STOPPED))
+       pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+       pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
+       if ((control & PCI_PRI_CTRL_ENABLE) ||
+           !(status & PCI_PRI_STATUS_STOPPED))
                return -EBUSY;
 
-       pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ_OFF, &max_requests);
+       pci_read_config_dword(pdev, pos + PCI_PRI_MAX_REQ, &max_requests);
        reqs = min(max_requests, reqs);
-       pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ_OFF, reqs);
+       pci_write_config_dword(pdev, pos + PCI_PRI_ALLOC_REQ, reqs);
 
-       control |= PCI_PRI_ENABLE;
-       pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+       control |= PCI_PRI_CTRL_ENABLE;
+       pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
        return 0;
 }
@@ -206,13 +207,13 @@ void pci_disable_pri(struct pci_dev *pdev)
        u16 control;
        int pos;
 
-       pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
        if (!pos)
                return;
 
-       pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-       control &= ~PCI_PRI_ENABLE;
-       pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+       pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+       control &= ~PCI_PRI_CTRL_ENABLE;
+       pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 }
 EXPORT_SYMBOL_GPL(pci_disable_pri);
 
@@ -227,13 +228,13 @@ bool pci_pri_enabled(struct pci_dev *pdev)
        u16 control;
        int pos;
 
-       pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
        if (!pos)
                return false;
 
-       pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
+       pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
 
-       return (control & PCI_PRI_ENABLE) ? true : false;
+       return (control & PCI_PRI_CTRL_ENABLE) ? true : false;
 }
 EXPORT_SYMBOL_GPL(pci_pri_enabled);
 
@@ -249,17 +250,17 @@ int pci_reset_pri(struct pci_dev *pdev)
        u16 control;
        int pos;
 
-       pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
        if (!pos)
                return -EINVAL;
 
-       pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-       if (control & PCI_PRI_ENABLE)
+       pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+       if (control & PCI_PRI_CTRL_ENABLE)
                return -EBUSY;
 
-       control |= PCI_PRI_RESET;
+       control |= PCI_PRI_CTRL_RESET;
 
-       pci_write_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, control);
+       pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
 
        return 0;
 }
@@ -282,14 +283,14 @@ bool pci_pri_stopped(struct pci_dev *pdev)
        u16 control, status;
        int pos;
 
-       pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
        if (!pos)
                return true;
 
-       pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-       pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF,  &status);
+       pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+       pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
 
-       if (control & PCI_PRI_ENABLE)
+       if (control & PCI_PRI_CTRL_ENABLE)
                return false;
 
        return (status & PCI_PRI_STATUS_STOPPED) ? true : false;
@@ -311,15 +312,15 @@ int pci_pri_status(struct pci_dev *pdev)
        u16 status, control;
        int pos;
 
-       pos = pci_find_ext_capability(pdev, PCI_PRI_CAP);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
        if (!pos)
                return -EINVAL;
 
-       pci_read_config_word(pdev, pos + PCI_PRI_CONTROL_OFF, &control);
-       pci_read_config_word(pdev, pos + PCI_PRI_STATUS_OFF,  &status);
+       pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+       pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
 
        /* Stopped bit is undefined when enable == 1, so clear it */
-       if (control & PCI_PRI_ENABLE)
+       if (control & PCI_PRI_CTRL_ENABLE)
                status &= ~PCI_PRI_STATUS_STOPPED;
 
        return status;
@@ -342,25 +343,25 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
        u16 control, supported;
        int pos;
 
-       pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
        if (!pos)
                return -EINVAL;
 
-       pci_read_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, &control);
-       pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF,     &supported);
+       pci_read_config_word(pdev, pos + PCI_PASID_CTRL, &control);
+       pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
 
-       if (!(supported & PCI_PASID_ENABLE))
+       if (control & PCI_PASID_CTRL_ENABLE)
                return -EINVAL;
 
-       supported &= PCI_PASID_EXEC | PCI_PASID_PRIV;
+       supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
        /* User wants to enable anything unsupported? */
        if ((supported & features) != features)
                return -EINVAL;
 
-       control = PCI_PASID_ENABLE | features;
+       control = PCI_PASID_CTRL_ENABLE | features;
 
-       pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control);
+       pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
 
        return 0;
 }
@@ -376,11 +377,11 @@ void pci_disable_pasid(struct pci_dev *pdev)
        u16 control = 0;
        int pos;
 
-       pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
        if (!pos)
                return;
 
-       pci_write_config_word(pdev, pos + PCI_PASID_CONTROL_OFF, control);
+       pci_write_config_word(pdev, pos + PCI_PASID_CTRL, control);
 }
 EXPORT_SYMBOL_GPL(pci_disable_pasid);
 
@@ -391,22 +392,21 @@ EXPORT_SYMBOL_GPL(pci_disable_pasid);
  * Returns a negative value when no PASI capability is present.
  * Otherwise is returns a bitmask with supported features. Current
  * features reported are:
- * PCI_PASID_ENABLE - PASID capability can be enabled
- * PCI_PASID_EXEC - Execute permission supported
- * PCI_PASID_PRIV - Priviledged mode supported
+ * PCI_PASID_CAP_EXEC - Execute permission supported
+ * PCI_PASID_CAP_PRIV - Priviledged mode supported
  */
 int pci_pasid_features(struct pci_dev *pdev)
 {
        u16 supported;
        int pos;
 
-       pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
        if (!pos)
                return -EINVAL;
 
-       pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported);
+       pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
 
-       supported &= PCI_PASID_ENABLE | PCI_PASID_EXEC | PCI_PASID_PRIV;
+       supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
        return supported;
 }
@@ -426,11 +426,11 @@ int pci_max_pasids(struct pci_dev *pdev)
        u16 supported;
        int pos;
 
-       pos = pci_find_ext_capability(pdev, PCI_PASID_CAP);
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
        if (!pos)
                return -EINVAL;
 
-       pci_read_config_word(pdev, pos + PCI_PASID_CAP_OFF, &supported);
+       pci_read_config_word(pdev, pos + PCI_PASID_CAP, &supported);
 
        supported = (supported & PASID_NUMBER_MASK) >> PASID_NUMBER_SHIFT;
 
index 838f571027b750e390090fb06dc032abda3db639..9a33fdde2d1639ae5722eef34f37771d01bd6594 100644 (file)
@@ -45,7 +45,6 @@ extern int pciehp_poll_time;
 extern int pciehp_debug;
 extern int pciehp_force;
 extern struct workqueue_struct *pciehp_wq;
-extern struct workqueue_struct *pciehp_ordered_wq;
 
 #define dbg(format, arg...)                                            \
 do {                                                                   \
index 7ac8358df8fdd1a6591f259bb0216f37ca44cf65..b8c99d35ac97af9aef83c5e46b9e9337bf7e9610 100644 (file)
@@ -43,7 +43,6 @@ int pciehp_poll_mode;
 int pciehp_poll_time;
 int pciehp_force;
 struct workqueue_struct *pciehp_wq;
-struct workqueue_struct *pciehp_ordered_wq;
 
 #define DRIVER_VERSION "0.4"
 #define DRIVER_AUTHOR  "Dan Zink <dan.zink@compaq.com>, Greg Kroah-Hartman <greg@kroah.com>, Dely Sy <dely.l.sy@intel.com>"
@@ -345,18 +344,11 @@ static int __init pcied_init(void)
        if (!pciehp_wq)
                return -ENOMEM;
 
-       pciehp_ordered_wq = alloc_ordered_workqueue("pciehp_ordered", 0);
-       if (!pciehp_ordered_wq) {
-               destroy_workqueue(pciehp_wq);
-               return -ENOMEM;
-       }
-
        pciehp_firmware_init();
        retval = pcie_port_service_register(&hpdriver_portdrv);
        dbg("pcie_port_service_register = %d\n", retval);
        info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
        if (retval) {
-               destroy_workqueue(pciehp_ordered_wq);
                destroy_workqueue(pciehp_wq);
                dbg("Failure to register service\n");
        }
@@ -366,9 +358,8 @@ static int __init pcied_init(void)
 static void __exit pcied_cleanup(void)
 {
        dbg("unload_pciehpd()\n");
-       destroy_workqueue(pciehp_ordered_wq);
-       destroy_workqueue(pciehp_wq);
        pcie_port_service_unregister(&hpdriver_portdrv);
+       destroy_workqueue(pciehp_wq);
        info(DRIVER_DESC " version: " DRIVER_VERSION " unloaded\n");
 }
 
index 085dbb5fc168be33de77d0ced5c411c807ffc2ef..27f44295a65738976b4be0f07f4cd33b04e2cc85 100644 (file)
@@ -344,7 +344,7 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
                kfree(info);
                goto out;
        }
-       queue_work(pciehp_ordered_wq, &info->work);
+       queue_work(pciehp_wq, &info->work);
  out:
        mutex_unlock(&p_slot->lock);
 }
@@ -439,7 +439,7 @@ static void handle_surprise_event(struct slot *p_slot)
        else
                p_slot->state = POWERON_STATE;
 
-       queue_work(pciehp_ordered_wq, &info->work);
+       queue_work(pciehp_wq, &info->work);
 }
 
 static void interrupt_event_handler(struct work_struct *work)
index 7b1414810ae3e0e0a5ccbcd9bec0f21051cf95db..bcdbb16436216886949ed44352d1c0ea4606f21a 100644 (file)
@@ -806,7 +806,6 @@ static void pcie_cleanup_slot(struct controller *ctrl)
        struct slot *slot = ctrl->slot;
        cancel_delayed_work(&slot->work);
        flush_workqueue(pciehp_wq);
-       flush_workqueue(pciehp_ordered_wq);
        kfree(slot);
 }
 
index 0e6d04d7ba4f7c0a2f8494f3e99aceaed1eecacb..337e16ab4a92ba3cde34b53b220b61d9191b4658 100644 (file)
@@ -323,6 +323,8 @@ static void free_msi_irqs(struct pci_dev *dev)
                        if (list_is_last(&entry->list, &dev->msi_list))
                                iounmap(entry->mask_base);
                }
+               kobject_del(&entry->kobj);
+               kobject_put(&entry->kobj);
                list_del(&entry->list);
                kfree(entry);
        }
@@ -403,6 +405,98 @@ void pci_restore_msi_state(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_restore_msi_state);
 
+
+#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
+#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)
+
+struct msi_attribute {
+       struct attribute        attr;
+       ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
+                       char *buf);
+       ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
+                        const char *buf, size_t count);
+};
+
+static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
+                            char *buf)
+{
+       return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
+}
+
+static ssize_t msi_irq_attr_show(struct kobject *kobj,
+                                struct attribute *attr, char *buf)
+{
+       struct msi_attribute *attribute = to_msi_attr(attr);
+       struct msi_desc *entry = to_msi_desc(kobj);
+
+       if (!attribute->show)
+               return -EIO;
+
+       return attribute->show(entry, attribute, buf);
+}
+
+static const struct sysfs_ops msi_irq_sysfs_ops = {
+       .show = msi_irq_attr_show,
+};
+
+static struct msi_attribute mode_attribute =
+       __ATTR(mode, S_IRUGO, show_msi_mode, NULL);
+
+
+struct attribute *msi_irq_default_attrs[] = {
+       &mode_attribute.attr,
+       NULL
+};
+
+void msi_kobj_release(struct kobject *kobj)
+{
+       struct msi_desc *entry = to_msi_desc(kobj);
+
+       pci_dev_put(entry->dev);
+}
+
+static struct kobj_type msi_irq_ktype = {
+       .release = msi_kobj_release,
+       .sysfs_ops = &msi_irq_sysfs_ops,
+       .default_attrs = msi_irq_default_attrs,
+};
+
+static int populate_msi_sysfs(struct pci_dev *pdev)
+{
+       struct msi_desc *entry;
+       struct kobject *kobj;
+       int ret;
+       int count = 0;
+
+       pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
+       if (!pdev->msi_kset)
+               return -ENOMEM;
+
+       list_for_each_entry(entry, &pdev->msi_list, list) {
+               kobj = &entry->kobj;
+               kobj->kset = pdev->msi_kset;
+               pci_dev_get(pdev);
+               ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
+                                    "%u", entry->irq);
+               if (ret)
+                       goto out_unroll;
+
+               count++;
+       }
+
+       return 0;
+
+out_unroll:
+       list_for_each_entry(entry, &pdev->msi_list, list) {
+               if (!count)
+                       break;
+               kobject_del(&entry->kobj);
+               kobject_put(&entry->kobj);
+               count--;
+       }
+       return ret;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -454,6 +548,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
                return ret;
        }
 
+       ret = populate_msi_sysfs(dev);
+       if (ret) {
+               msi_mask_irq(entry, mask, ~mask);
+               free_msi_irqs(dev);
+               return ret;
+       }
+
        /* Set MSI enabled bits  */
        pci_intx_for_msi(dev, 0);
        msi_set_enable(dev, pos, 1);
@@ -574,6 +675,12 @@ static int msix_capability_init(struct pci_dev *dev,
 
        msix_program_entries(dev, entries);
 
+       ret = populate_msi_sysfs(dev);
+       if (ret) {
+               ret = 0;
+               goto error;
+       }
+
        /* Set MSI-X enabled bits and unmask the function */
        pci_intx_for_msi(dev, 0);
        dev->msix_enabled = 1;
@@ -732,6 +839,8 @@ void pci_disable_msi(struct pci_dev *dev)
 
        pci_msi_shutdown(dev);
        free_msi_irqs(dev);
+       kset_unregister(dev->msi_kset);
+       dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msi);
 
@@ -830,6 +939,8 @@ void pci_disable_msix(struct pci_dev *dev)
 
        pci_msix_shutdown(dev);
        free_msi_irqs(dev);
+       kset_unregister(dev->msi_kset);
+       dev->msi_kset = NULL;
 }
 EXPORT_SYMBOL(pci_disable_msix);
 
@@ -870,5 +981,15 @@ EXPORT_SYMBOL(pci_msi_enabled);
 
 void pci_msi_init_pci_dev(struct pci_dev *dev)
 {
+       int pos;
        INIT_LIST_HEAD(&dev->msi_list);
+
+       /* Disable the msi hardware to avoid screaming interrupts
+        * during boot.  This is the power on reset default so
+        * usually this should be a noop.
+        */
+       pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+       if (pos)
+               msi_set_enable(dev, pos, 0);
+       msix_set_enable(dev, 0);
 }
index 4ecb6408b0d61e2128c26100bba3d6690402de5d..060fd22a1103856988d6866b64c4d8e8e3e95a89 100644 (file)
@@ -45,16 +45,20 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
 {
        struct pci_dev *pci_dev = context;
 
-       if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) {
+       if (event != ACPI_NOTIFY_DEVICE_WAKE || !pci_dev)
+               return;
+
+       if (!pci_dev->pm_cap || !pci_dev->pme_support
+            || pci_check_pme_status(pci_dev)) {
                if (pci_dev->pme_poll)
                        pci_dev->pme_poll = false;
 
                pci_wakeup_event(pci_dev);
-               pci_check_pme_status(pci_dev);
                pm_runtime_resume(&pci_dev->dev);
-               if (pci_dev->subordinate)
-                       pci_pme_wakeup_bus(pci_dev->subordinate);
        }
+
+       if (pci_dev->subordinate)
+               pci_pme_wakeup_bus(pci_dev->subordinate);
 }
 
 /**
@@ -395,7 +399,6 @@ static int __init acpi_pci_init(void)
 
        if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
                printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n");
-               pcie_clear_aspm();
                pcie_no_aspm();
        }
 
index cbfbab18be91d7ecdf9ebd3952e9b77aac590726..1cfbf228fbb1d7f6531ae5196ca7aa12d637df4d 100644 (file)
@@ -68,7 +68,7 @@ struct pcie_link_state {
        struct aspm_latency acceptable[8];
 };
 
-static int aspm_disabled, aspm_force, aspm_clear_state;
+static int aspm_disabled, aspm_force;
 static bool aspm_support_enabled = true;
 static DEFINE_MUTEX(aspm_lock);
 static LIST_HEAD(link_list);
@@ -500,9 +500,6 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev)
        int pos;
        u32 reg32;
 
-       if (aspm_clear_state)
-               return -EINVAL;
-
        /*
         * Some functions in a slot might not all be PCIe functions,
         * very strange. Disable ASPM for the whole slot
@@ -574,9 +571,6 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
            pdev->pcie_type != PCI_EXP_TYPE_DOWNSTREAM)
                return;
 
-       if (aspm_disabled && !aspm_clear_state)
-               return;
-
        /* VIA has a strange chipset, root port is under a bridge */
        if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT &&
            pdev->bus->self)
@@ -608,7 +602,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
         * the BIOS's expectation, we'll do so once pci_enable_device() is
         * called.
         */
-       if (aspm_policy != POLICY_POWERSAVE || aspm_clear_state) {
+       if (aspm_policy != POLICY_POWERSAVE) {
                pcie_config_aspm_path(link);
                pcie_set_clkpm(link, policy_to_clkpm_state(link));
        }
@@ -649,8 +643,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
        struct pci_dev *parent = pdev->bus->self;
        struct pcie_link_state *link, *root, *parent_link;
 
-       if ((aspm_disabled && !aspm_clear_state) || !pci_is_pcie(pdev) ||
-           !parent || !parent->link_state)
+       if (!pci_is_pcie(pdev) || !parent || !parent->link_state)
                return;
        if ((parent->pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
            (parent->pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
@@ -734,13 +727,18 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
  * pci_disable_link_state - disable pci device's link state, so the link will
  * never enter specific states
  */
-static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
+static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem,
+                                    bool force)
 {
        struct pci_dev *parent = pdev->bus->self;
        struct pcie_link_state *link;
 
-       if (aspm_disabled || !pci_is_pcie(pdev))
+       if (aspm_disabled && !force)
+               return;
+
+       if (!pci_is_pcie(pdev))
                return;
+
        if (pdev->pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
            pdev->pcie_type == PCI_EXP_TYPE_DOWNSTREAM)
                parent = pdev;
@@ -768,16 +766,31 @@ static void __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
 
 void pci_disable_link_state_locked(struct pci_dev *pdev, int state)
 {
-       __pci_disable_link_state(pdev, state, false);
+       __pci_disable_link_state(pdev, state, false, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state_locked);
 
 void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
-       __pci_disable_link_state(pdev, state, true);
+       __pci_disable_link_state(pdev, state, true, false);
 }
 EXPORT_SYMBOL(pci_disable_link_state);
 
+void pcie_clear_aspm(struct pci_bus *bus)
+{
+       struct pci_dev *child;
+
+       /*
+        * Clear any ASPM setup that the firmware has carried out on this bus
+        */
+       list_for_each_entry(child, &bus->devices, bus_list) {
+               __pci_disable_link_state(child, PCIE_LINK_STATE_L0S |
+                                        PCIE_LINK_STATE_L1 |
+                                        PCIE_LINK_STATE_CLKPM,
+                                        false, true);
+       }
+}
+
 static int pcie_aspm_set_policy(const char *val, struct kernel_param *kp)
 {
        int i;
@@ -935,6 +948,7 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev)
 static int __init pcie_aspm_disable(char *str)
 {
        if (!strcmp(str, "off")) {
+               aspm_policy = POLICY_DEFAULT;
                aspm_disabled = 1;
                aspm_support_enabled = false;
                printk(KERN_INFO "PCIe ASPM is disabled\n");
@@ -947,16 +961,18 @@ static int __init pcie_aspm_disable(char *str)
 
 __setup("pcie_aspm=", pcie_aspm_disable);
 
-void pcie_clear_aspm(void)
-{
-       if (!aspm_force)
-               aspm_clear_state = 1;
-}
-
 void pcie_no_aspm(void)
 {
-       if (!aspm_force)
+       /*
+        * Disabling ASPM is intended to prevent the kernel from modifying
+        * existing hardware state, not to clear existing state. To that end:
+        * (a) set policy to POLICY_DEFAULT in order to avoid changing state
+        * (b) prevent userspace from changing policy
+        */
+       if (!aspm_force) {
+               aspm_policy = POLICY_DEFAULT;
                aspm_disabled = 1;
+       }
 }
 
 /**
index 6001b4da39ddc9ebd52e0f7691268abbe8ebb69d..627a3a42e4d8e3ba015ad32a6bf01f779c356943 100644 (file)
@@ -302,6 +302,10 @@ extern bool osc_sb_apei_support_acked;
                                OSC_PCI_EXPRESS_PME_CONTROL |           \
                                OSC_PCI_EXPRESS_AER_CONTROL |           \
                                OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL)
+
+#define OSC_PCI_NATIVE_HOTPLUG (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL |    \
+                               OSC_SHPC_NATIVE_HP_CONTROL)
+
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
                                             u32 *mask, u32 req);
 extern void acpi_early_init(void);
index a6863a2dec1f6883b6be0b3fd68ca7f6d8982d83..ef00610837d4f9e1b556028c151df0118b89677c 100644 (file)
 #ifndef _ASM_X86_AMD_IOMMU_H
 #define _ASM_X86_AMD_IOMMU_H
 
-#include <linux/irqreturn.h>
+#include <linux/types.h>
 
 #ifdef CONFIG_AMD_IOMMU
 
+struct task_struct;
+struct pci_dev;
+
 extern int amd_iommu_detect(void);
 
+
+/**
+ * amd_iommu_enable_device_erratum() - Enable erratum workaround for device
+ *                                    in the IOMMUv2 driver
+ * @pdev: The PCI device the workaround is necessary for
+ * @erratum: The erratum workaround to enable
+ *
+ * The function needs to be called before amd_iommu_init_device().
+ * Possible values for the erratum number are for now:
+ * - AMD_PRI_DEV_ERRATUM_ENABLE_RESET - Reset PRI capability when PRI
+ *                                     is enabled
+ * - AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE - Limit number of outstanding PRI
+ *                                      requests to one
+ */
+#define AMD_PRI_DEV_ERRATUM_ENABLE_RESET               0
+#define AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE              1
+
+extern void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum);
+
+/**
+ * amd_iommu_init_device() - Init device for use with IOMMUv2 driver
+ * @pdev: The PCI device to initialize
+ * @pasids: Number of PASIDs to support for this device
+ *
+ * This function does all setup for the device pdev so that it can be
+ * used with IOMMUv2.
+ * Returns 0 on success or negative value on error.
+ */
+extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
+
+/**
+ * amd_iommu_free_device() - Free all IOMMUv2 related device resources
+ *                          and disable IOMMUv2 usage for this device
+ * @pdev: The PCI device to disable IOMMUv2 usage for'
+ */
+extern void amd_iommu_free_device(struct pci_dev *pdev);
+
+/**
+ * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device
+ * @pdev: The PCI device to bind the task to
+ * @pasid: The PASID on the device the task should be bound to
+ * @task: the task to bind
+ *
+ * The function returns 0 on success or a negative value on error.
+ */
+extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+                               struct task_struct *task);
+
+/**
+ * amd_iommu_unbind_pasid() - Unbind a PASID from its task on
+ *                           a device
+ * @pdev: The device of the PASID
+ * @pasid: The PASID to unbind
+ *
+ * When this function returns the device is no longer using the PASID
+ * and the PASID is no longer bound to its task.
+ */
+extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
+
+/**
+ * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed
+ *                                 PRI requests
+ * @pdev: The PCI device the call-back should be registered for
+ * @cb: The call-back function
+ *
+ * The IOMMUv2 driver invokes this call-back when it is unable to
+ * successfully handle a PRI request. The device driver can then decide
+ * which PRI response the device should see. Possible return values for
+ * the call-back are:
+ *
+ * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device
+ * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device
+ * - AMD_IOMMU_INV_PRI_RSP_FAIL    - Send Failure back to the device,
+ *                                  the device is required to disable
+ *                                  PRI when it receives this response
+ *
+ * The function returns 0 on success or negative value on error.
+ */
+#define AMD_IOMMU_INV_PRI_RSP_SUCCESS  0
+#define AMD_IOMMU_INV_PRI_RSP_INVALID  1
+#define AMD_IOMMU_INV_PRI_RSP_FAIL     2
+
+typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
+                                       int pasid,
+                                       unsigned long address,
+                                       u16);
+
+extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
+                                       amd_iommu_invalid_ppr_cb cb);
+
+/**
+ * amd_iommu_device_info() - Get information about IOMMUv2 support of a
+ *                          PCI device
+ * @pdev: PCI device to query information from
+ * @info: A pointer to an amd_iommu_device_info structure which will contain
+ *       the information about the PCI device
+ *
+ * Returns 0 on success, negative value on error
+ */
+
+#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP     0x1    /* ATS feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP     0x2    /* PRI feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP   0x4    /* PASID context supported */
+#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP    0x8    /* Device may request execution
+                                                   on memory pages */
+#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP   0x10    /* Device may request
+                                                   super-user privileges */
+
+struct amd_iommu_device_info {
+       int max_pasids;
+       u32 flags;
+};
+
+extern int amd_iommu_device_info(struct pci_dev *pdev,
+                                struct amd_iommu_device_info *info);
+
+/**
+ * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating
+ *                                    a pasid context. This call-back is
+ *                                    invoked when the IOMMUv2 driver needs to
+ *                                    invalidate a PASID context, for example
+ *                                    because the task that is bound to that
+ *                                    context is about to exit.
+ *
+ * @pdev: The PCI device the call-back should be registered for
+ * @cb: The call-back function
+ */
+
+typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, int pasid);
+
+extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
+                                          amd_iommu_invalidate_ctx cb);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
index 432acc4c054df1134dddb9be501de5c27d84d926..d937580417ba668d343b30b1741d59139f7924b9 100644 (file)
@@ -48,19 +48,34 @@ struct iommu_domain {
 
 #ifdef CONFIG_IOMMU_API
 
+/**
+ * struct iommu_ops - iommu ops and capabilities
+ * @domain_init: init iommu domain
+ * @domain_destroy: destroy iommu domain
+ * @attach_dev: attach device to an iommu domain
+ * @detach_dev: detach device from an iommu domain
+ * @map: map a physically contiguous memory region to an iommu domain
+ * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @iova_to_phys: translate iova to physical address
+ * @domain_has_cap: domain capabilities query
+ * @commit: commit iommu domain
+ * @pgsize_bitmap: bitmap of supported page sizes
+ */
 struct iommu_ops {
        int (*domain_init)(struct iommu_domain *domain);
        void (*domain_destroy)(struct iommu_domain *domain);
        int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
        void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
        int (*map)(struct iommu_domain *domain, unsigned long iova,
-                  phys_addr_t paddr, int gfp_order, int prot);
-       int (*unmap)(struct iommu_domain *domain, unsigned long iova,
-                    int gfp_order);
+                  phys_addr_t paddr, size_t size, int prot);
+       size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
+                    size_t size);
        phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
                                    unsigned long iova);
        int (*domain_has_cap)(struct iommu_domain *domain,
                              unsigned long cap);
+       int (*device_group)(struct device *dev, unsigned int *groupid);
+       unsigned long pgsize_bitmap;
 };
 
 extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops);
@@ -72,15 +87,16 @@ extern int iommu_attach_device(struct iommu_domain *domain,
 extern void iommu_detach_device(struct iommu_domain *domain,
                                struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
-                    phys_addr_t paddr, int gfp_order, int prot);
-extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-                      int gfp_order);
+                    phys_addr_t paddr, size_t size, int prot);
+extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+                      size_t size);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
                                      unsigned long iova);
 extern int iommu_domain_has_cap(struct iommu_domain *domain,
                                unsigned long cap);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
                                        iommu_fault_handler_t handler);
+extern int iommu_device_group(struct device *dev, unsigned int *groupid);
 
 /**
  * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework
@@ -179,6 +195,11 @@ static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 {
 }
 
+static inline int iommu_device_group(struct device *dev, unsigned int *groupid)
+{
+       return -ENODEV;
+}
+
 #endif /* CONFIG_IOMMU_API */
 
 #endif /* __LINUX_IOMMU_H */
index 05acced439a38ec1faf822200740d032778e9022..ce93a341337d48a78b5449ea249aac04c3f8284d 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef LINUX_MSI_H
 #define LINUX_MSI_H
 
+#include <linux/kobject.h>
 #include <linux/list.h>
 
 struct msi_msg {
@@ -44,6 +45,8 @@ struct msi_desc {
 
        /* Last set MSI message */
        struct msi_msg msg;
+
+       struct kobject kobj;
 };
 
 /*
index 7cea7b6c14133628c5f543e729852bd3db085b3c..c8320144fe790cc3fb8dc5fe593092412a838bae 100644 (file)
@@ -29,7 +29,7 @@ extern void pcie_aspm_pm_state_change(struct pci_dev *pdev);
 extern void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
 extern void pci_disable_link_state(struct pci_dev *pdev, int state);
 extern void pci_disable_link_state_locked(struct pci_dev *pdev, int state);
-extern void pcie_clear_aspm(void);
+extern void pcie_clear_aspm(struct pci_bus *bus);
 extern void pcie_no_aspm(void);
 #else
 static inline void pcie_aspm_init_link_state(struct pci_dev *pdev)
@@ -47,7 +47,7 @@ static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
 static inline void pci_disable_link_state(struct pci_dev *pdev, int state)
 {
 }
-static inline void pcie_clear_aspm(void)
+static inline void pcie_clear_aspm(struct pci_bus *bus)
 {
 }
 static inline void pcie_no_aspm(void)
index 7cda65b5f79806005af9aada8831f8403a519db7..84225c756bd131ce422e926fa0a43a6ddc3dd459 100644 (file)
@@ -336,6 +336,7 @@ struct pci_dev {
        struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
 #ifdef CONFIG_PCI_MSI
        struct list_head msi_list;
+       struct kset *msi_kset;
 #endif
        struct pci_vpd *vpd;
 #ifdef CONFIG_PCI_ATS
index b5d9657f31004d64e53d9467646c1f268f4d51ab..28fe380cb19d59f83f507cfc4e841b7dbf4c288a 100644 (file)
 #define PCI_EXT_CAP_ID_ARI     14
 #define PCI_EXT_CAP_ID_ATS     15
 #define PCI_EXT_CAP_ID_SRIOV   16
+#define PCI_EXT_CAP_ID_PRI     19
 #define PCI_EXT_CAP_ID_LTR     24
+#define PCI_EXT_CAP_ID_PASID   27
 
 /* Advanced Error Reporting */
 #define PCI_ERR_UNCOR_STATUS   4       /* Uncorrectable Error Status */
 #define  PCI_ATS_MIN_STU       12      /* shift of minimum STU block */
 
 /* Page Request Interface */
-#define PCI_PRI_CAP            0x13    /* PRI capability ID */
-#define PCI_PRI_CONTROL_OFF    0x04    /* Offset of control register */
-#define PCI_PRI_STATUS_OFF     0x06    /* Offset of status register */
-#define PCI_PRI_ENABLE         0x0001  /* Enable mask */
-#define PCI_PRI_RESET          0x0002  /* Reset bit mask */
-#define PCI_PRI_STATUS_RF      0x0001  /* Request Failure */
-#define PCI_PRI_STATUS_UPRGI   0x0002  /* Unexpected PRG index */
-#define PCI_PRI_STATUS_STOPPED 0x0100  /* PRI Stopped */
-#define PCI_PRI_MAX_REQ_OFF    0x08    /* Cap offset for max reqs supported */
-#define PCI_PRI_ALLOC_REQ_OFF  0x0c    /* Cap offset for max reqs allowed */
+#define PCI_PRI_CTRL           0x04    /* PRI control register */
+#define  PCI_PRI_CTRL_ENABLE   0x01    /* Enable */
+#define  PCI_PRI_CTRL_RESET    0x02    /* Reset */
+#define PCI_PRI_STATUS         0x06    /* PRI status register */
+#define  PCI_PRI_STATUS_RF     0x001   /* Response Failure */
+#define  PCI_PRI_STATUS_UPRGI  0x002   /* Unexpected PRG index */
+#define  PCI_PRI_STATUS_STOPPED        0x100   /* PRI Stopped */
+#define PCI_PRI_MAX_REQ                0x08    /* PRI max reqs supported */
+#define PCI_PRI_ALLOC_REQ      0x0c    /* PRI max reqs allowed */
 
 /* PASID capability */
-#define PCI_PASID_CAP          0x1b    /* PASID capability ID */
-#define PCI_PASID_CAP_OFF      0x04    /* PASID feature register */
-#define PCI_PASID_CONTROL_OFF   0x06    /* PASID control register */
-#define PCI_PASID_ENABLE       0x01    /* Enable/Supported bit */
-#define PCI_PASID_EXEC         0x02    /* Exec permissions Enable/Supported */
-#define PCI_PASID_PRIV         0x04    /* Priviledge Mode Enable/Support */
+#define PCI_PASID_CAP          0x04    /* PASID feature register */
+#define  PCI_PASID_CAP_EXEC    0x02    /* Exec permissions Supported */
+#define  PCI_PASID_CAP_PRIV    0x04    /* Priviledge Mode Supported */
+#define PCI_PASID_CTRL         0x06    /* PASID control register */
+#define  PCI_PASID_CTRL_ENABLE 0x01    /* Enable bit */
+#define  PCI_PASID_CTRL_EXEC   0x02    /* Exec permissions Enable */
+#define  PCI_PASID_CTRL_PRIV   0x04    /* Priviledge Mode Enable */
 
 /* Single Root I/O Virtualization */
 #define PCI_SRIOV_CAP          0x04    /* SR-IOV Capabilities */
index 4e5f7b7f1d2be1828005e5081ab3af86d7f447f2..0fb448e6a1a33288694e78ae764540df7e7a80bb 100644 (file)
@@ -113,7 +113,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 
                /* Map into IO address space */
                r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
-                             get_order(page_size), flags);
+                             page_size, flags);
                if (r) {
                        printk(KERN_ERR "kvm_iommu_map_address:"
                               "iommu failed to map pfn=%llx\n", pfn);
@@ -293,15 +293,15 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 
        while (gfn < end_gfn) {
                unsigned long unmap_pages;
-               int order;
+               size_t size;
 
                /* Get physical address */
                phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
                pfn  = phys >> PAGE_SHIFT;
 
                /* Unmap address from IO address space */
-               order       = iommu_unmap(domain, gfn_to_gpa(gfn), 0);
-               unmap_pages = 1ULL << order;
+               size       = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
+               unmap_pages = 1ULL << get_order(size);
 
                /* Unpin all pages we just unmapped to not leak any memory */
                kvm_unpin_pages(kvm, pfn, unmap_pages);