[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen master] VT-d: honor APEI firmware-first mode in XSA-59 workaround code



commit 1cc37ba8dbd89fb86dad3f6c78c3fba06019fe21
Author:     Jan Beulich <jbeulich@xxxxxxxx>
AuthorDate: Thu Jun 5 17:49:14 2014 +0200
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Thu Jun 5 17:49:14 2014 +0200

    VT-d: honor APEI firmware-first mode in XSA-59 workaround code
    
    When firmware-first mode is being indicated by firmware, we shouldn't
    be modifying AER registers - these are considered to be owned by
    firmware in that case. Violating this is being reported to result in
    SMI storms. While circumventing the workaround means re-exposing
    affected hosts to the XSA-59 issues, this in any event seems better
    than not booting at all. Respective messages are being issued to the
    log, so the situation can be diagnosed.
    
    The basic building blocks were taken from Linux 3.15-rc. Note that
    this includes a block of code enclosed in #ifdef CONFIG_X86_MCE - we
    don't define that symbol, and that code also wouldn't build without
    suitable machine check side code added; that should happen eventually,
    but isn't subject of this change.
    
    Reported-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Reported-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
    Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
    Tested-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
    Reviewed-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Acked-by: Yang Zhang <yang.z.zhang@xxxxxxxxx>
---
 xen/arch/x86/acpi/boot.c             |    2 +
 xen/drivers/acpi/apei/Makefile       |    1 +
 xen/drivers/acpi/apei/hest.c         |  200 ++++++++++++++++++++++++++++++++++
 xen/drivers/passthrough/pci.c        |  100 +++++++++++++++++
 xen/drivers/passthrough/vtd/quirks.c |   29 ++++--
 xen/include/acpi/actbl1.h            |    8 ++
 xen/include/acpi/apei.h              |    3 +
 xen/include/xen/acpi.h               |    1 +
 xen/include/xen/pci.h                |    2 +
 9 files changed, 338 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
index 63ae05a..903830b 100644
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -754,6 +754,8 @@ int __init acpi_boot_init(void)
 
        erst_init();
 
+       acpi_hest_init();
+
        acpi_table_parse(ACPI_SIG_BGRT, acpi_invalidate_bgrt);
 
        return 0;
diff --git a/xen/drivers/acpi/apei/Makefile b/xen/drivers/acpi/apei/Makefile
index af6ecb0..6ad7c64 100644
--- a/xen/drivers/acpi/apei/Makefile
+++ b/xen/drivers/acpi/apei/Makefile
@@ -1,3 +1,4 @@
 obj-y += erst.o
+obj-y += hest.o
 obj-y += apei-base.o
 obj-y += apei-io.o
diff --git a/xen/drivers/acpi/apei/hest.c b/xen/drivers/acpi/apei/hest.c
new file mode 100644
index 0000000..b8790a6
--- /dev/null
+++ b/xen/drivers/acpi/apei/hest.c
@@ -0,0 +1,200 @@
+/*
+ * APEI Hardware Error Souce Table support
+ *
+ * HEST describes error sources in detail; communicates operational
+ * parameters (i.e. severity levels, masking bits, and threshold
+ * values) to Linux as necessary. It also allows the BIOS to report
+ * non-standard error sources to Linux (for example, chipset-specific
+ * error registers).
+ *
+ * For more information about HEST, please refer to ACPI Specification
+ * version 4.0, section 17.3.2.
+ *
+ * Copyright 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@xxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <xen/kernel.h>
+#include <xen/mm.h>
+#include <xen/pfn.h>
+#include <acpi/acpi.h>
+#include <acpi/apei.h>
+
+#include "apei-internal.h"
+
+#define HEST_PFX "HEST: "
+
+static bool_t hest_disable;
+boolean_param("hest_disable", hest_disable);
+
+/* HEST table parsing */
+
+static struct acpi_table_hest *__read_mostly hest_tab;
+
+static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
+       [ACPI_HEST_TYPE_IA32_CHECK] = -1,       /* need further calculation */
+       [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
+       [ACPI_HEST_TYPE_IA32_NMI] = sizeof(struct acpi_hest_ia_nmi),
+       [ACPI_HEST_TYPE_AER_ROOT_PORT] = sizeof(struct acpi_hest_aer_root),
+       [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
+       [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
+       [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
+};
+
+static int hest_esrc_len(const struct acpi_hest_header *hest_hdr)
+{
+       u16 hest_type = hest_hdr->type;
+       int len;
+
+       if (hest_type >= ACPI_HEST_TYPE_RESERVED)
+               return 0;
+
+       len = hest_esrc_len_tab[hest_type];
+
+       if (hest_type == ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) {
+               const struct acpi_hest_ia_corrected *cmc =
+                       container_of(hest_hdr,
+                                    const struct acpi_hest_ia_corrected,
+                                    header);
+
+               len = sizeof(*cmc) + cmc->num_hardware_banks *
+                     sizeof(struct acpi_hest_ia_error_bank);
+       } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
+               const struct acpi_hest_ia_machine_check *mc =
+                       container_of(hest_hdr,
+                                    const struct acpi_hest_ia_machine_check,
+                                    header);
+
+               len = sizeof(*mc) + mc->num_hardware_banks *
+                     sizeof(struct acpi_hest_ia_error_bank);
+       }
+       BUG_ON(len == -1);
+
+       return len;
+};
+
+int apei_hest_parse(apei_hest_func_t func, void *data)
+{
+       struct acpi_hest_header *hest_hdr;
+       int i, rc, len;
+
+       if (hest_disable || !hest_tab)
+               return -EINVAL;
+
+       hest_hdr = (struct acpi_hest_header *)(hest_tab + 1);
+       for (i = 0; i < hest_tab->error_source_count; i++) {
+               len = hest_esrc_len(hest_hdr);
+               if (!len) {
+                       printk(XENLOG_WARNING HEST_PFX
+                              "Unknown or unused hardware error source "
+                              "type: %d for hardware error source: %d\n",
+                              hest_hdr->type, hest_hdr->source_id);
+                       return -EINVAL;
+               }
+               if ((void *)hest_hdr + len >
+                   (void *)hest_tab + hest_tab->header.length) {
+                       printk(XENLOG_WARNING HEST_PFX
+                              "Table contents overflow for hardware error 
source: %d\n",
+                              hest_hdr->source_id);
+                       return -EINVAL;
+               }
+
+               rc = func(hest_hdr, data);
+               if (rc)
+                       return rc;
+
+               hest_hdr = (void *)hest_hdr + len;
+       }
+
+       return 0;
+}
+
+/*
+ * Check if firmware advertises firmware first mode. We need FF bit to be set
+ * along with a set of MC banks which work in FF mode.
+ */
+static int __init hest_parse_cmc(const struct acpi_hest_header *hest_hdr,
+                                void *data)
+{
+#ifdef CONFIG_X86_MCE
+       unsigned int i;
+       const struct acpi_hest_ia_corrected *cmc;
+       const struct acpi_hest_ia_error_bank *mc_bank;
+
+       if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
+               return 0;
+
+       cmc = container_of(hest_hdr, const struct acpi_hest_ia_corrected, 
header);
+       if (!cmc->enabled)
+               return 0;
+
+       /*
+        * We expect HEST to provide a list of MC banks that report errors
+        * in firmware first mode. Otherwise, return non-zero value to
+        * indicate that we are done parsing HEST.
+        */
+       if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || 
!cmc->num_hardware_banks)
+               return 1;
+
+       printk(XENLOG_INFO HEST_PFX "Enabling Firmware First mode for corrected 
errors.\n");
+
+       mc_bank = (const struct acpi_hest_ia_error_bank *)(cmc + 1);
+       for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
+               mce_disable_bank(mc_bank->bank_number);
+#else
+# define acpi_disable_cmcff 1
+#endif
+
+       return 1;
+}
+
+void __init acpi_hest_init(void)
+{
+       acpi_status status;
+       acpi_physical_address hest_addr;
+       acpi_native_uint hest_len;
+
+       if (acpi_disabled)
+               return;
+
+       if (hest_disable) {
+               printk(XENLOG_INFO HEST_PFX "Table parsing disabled.\n");
+               return;
+       }
+
+       status = acpi_get_table_phys(ACPI_SIG_HEST, 0, &hest_addr, &hest_len);
+       if (status == AE_NOT_FOUND)
+               goto err;
+       if (ACPI_FAILURE(status)) {
+               printk(XENLOG_ERR HEST_PFX "Failed to get table, %s\n",
+                      acpi_format_exception(status));
+               goto err;
+       }
+       map_pages_to_xen((unsigned long)__va(hest_addr), PFN_DOWN(hest_addr),
+                        PFN_UP(hest_addr + hest_len) - PFN_DOWN(hest_addr),
+                        PAGE_HYPERVISOR);
+       hest_tab = __va(hest_addr);
+
+       if (!acpi_disable_cmcff)
+               apei_hest_parse(hest_parse_cmc, NULL);
+
+       printk(XENLOG_INFO HEST_PFX "Table parsing has been initialized\n");
+       return;
+err:
+       hest_disable = 1;
+}
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 43c1a81..1eba833 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1069,6 +1069,106 @@ void __hwdom_init setup_hwdom_pci_devices(
     spin_unlock(&pcidevs_lock);
 }
 
+#ifdef CONFIG_ACPI
+#include <acpi/acpi.h>
+#include <acpi/apei.h>
+
+static int hest_match_pci(const struct acpi_hest_aer_common *p,
+                          const struct pci_dev *pdev)
+{
+    return ACPI_HEST_SEGMENT(p->bus) == pdev->seg &&
+           ACPI_HEST_BUS(p->bus)     == pdev->bus &&
+           p->device                 == PCI_SLOT(pdev->devfn) &&
+           p->function               == PCI_FUNC(pdev->devfn);
+}
+
+static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr,
+                              const struct pci_dev *pdev)
+{
+    unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus,
+                                           PCI_SLOT(pdev->devfn),
+                                           PCI_FUNC(pdev->devfn),
+                                           PCI_CAP_ID_EXP);
+    u8 pcie = MASK_EXTR(pci_conf_read16(pdev->seg, pdev->bus,
+                                        PCI_SLOT(pdev->devfn),
+                                        PCI_FUNC(pdev->devfn),
+                                        pos + PCI_EXP_FLAGS),
+                        PCI_EXP_FLAGS_TYPE);
+
+    switch ( hest_hdr->type )
+    {
+    case ACPI_HEST_TYPE_AER_ROOT_PORT:
+        return pcie == PCI_EXP_TYPE_ROOT_PORT;
+    case ACPI_HEST_TYPE_AER_ENDPOINT:
+        return pcie == PCI_EXP_TYPE_ENDPOINT;
+    case ACPI_HEST_TYPE_AER_BRIDGE:
+        return pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                               PCI_FUNC(pdev->devfn), PCI_CLASS_DEVICE) ==
+               PCI_CLASS_BRIDGE_PCI;
+    }
+
+    return 0;
+}
+
+struct aer_hest_parse_info {
+    const struct pci_dev *pdev;
+    bool_t firmware_first;
+};
+
+static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header *hest_hdr)
+{
+    if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
+         hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
+         hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE )
+        return 1;
+    return 0;
+}
+
+static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void *data)
+{
+    struct aer_hest_parse_info *info = data;
+    const struct acpi_hest_aer_common *p;
+    bool_t ff;
+
+    if ( !hest_source_is_pcie_aer(hest_hdr) )
+        return 0;
+
+    p = (const struct acpi_hest_aer_common *)(hest_hdr + 1);
+    ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
+
+    /*
+     * If no specific device is supplied, determine whether
+     * FIRMWARE_FIRST is set for *any* PCIe device.
+     */
+    if ( !info->pdev )
+    {
+        info->firmware_first |= ff;
+        return 0;
+    }
+
+    /* Otherwise, check the specific device */
+    if ( p->flags & ACPI_HEST_GLOBAL ?
+         hest_match_type(hest_hdr, info->pdev) :
+         hest_match_pci(p, info->pdev) )
+    {
+        info->firmware_first = ff;
+        return 1;
+    }
+
+    return 0;
+}
+
+bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev)
+{
+    struct aer_hest_parse_info info = { .pdev = pdev };
+
+    return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                               PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) &&
+           apei_hest_parse(aer_hest_parse, &info) >= 0 &&
+           info.firmware_first;
+}
+#endif
+
 static int _dump_pci_devices(struct pci_seg *pseg, void *arg)
 {
     struct pci_dev *pdev;
diff --git a/xen/drivers/passthrough/vtd/quirks.c 
b/xen/drivers/passthrough/vtd/quirks.c
index fef12fb..c23c89e 100644
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -378,9 +378,11 @@ void pci_vtd_quirk(const struct pci_dev *pdev)
     int dev = PCI_SLOT(pdev->devfn);
     int func = PCI_FUNC(pdev->devfn);
     int pos;
-    u32 val;
+    bool_t ff;
+    u32 val, val2;
     u64 bar;
     paddr_t pa;
+    const char *action;
 
     if ( pci_conf_read16(seg, bus, dev, func, PCI_VENDOR_ID) !=
          PCI_VENDOR_ID_INTEL )
@@ -430,7 +432,10 @@ void pci_vtd_quirk(const struct pci_dev *pdev)
                 pos = pci_find_next_ext_capability(seg, bus, pdev->devfn, pos,
                                                    PCI_EXT_CAP_ID_VNDR);
             }
+            ff = 0;
         }
+        else
+            ff = pcie_aer_get_firmware_first(pdev);
         if ( !pos )
         {
             printk(XENLOG_WARNING "%04x:%02x:%02x.%u without AER 
capability?\n",
@@ -439,18 +444,26 @@ void pci_vtd_quirk(const struct pci_dev *pdev)
         }
 
         val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK);
-        pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK,
-                         val | PCI_ERR_UNC_UNSUP);
-        val = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK);
-        pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK,
-                         val | PCI_ERR_COR_ADV_NFAT);
+        val2 = pci_conf_read32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK);
+        if ( (val & PCI_ERR_UNC_UNSUP) && (val2 & PCI_ERR_COR_ADV_NFAT) )
+            action = "Found masked";
+        else if ( !ff )
+        {
+            pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_UNCOR_MASK,
+                             val | PCI_ERR_UNC_UNSUP);
+            pci_conf_write32(seg, bus, dev, func, pos + PCI_ERR_COR_MASK,
+                             val2 | PCI_ERR_COR_ADV_NFAT);
+            action = "Masked";
+        }
+        else
+            action = "Must not mask";
 
         /* XPUNCERRMSK Send Completion with Unsupported Request */
         val = pci_conf_read32(seg, bus, dev, func, 0x20c);
         pci_conf_write32(seg, bus, dev, func, 0x20c, val | (1 << 4));
 
-        printk(XENLOG_INFO "Masked UR signaling on %04x:%02x:%02x.%u\n",
-               seg, bus, dev, func);
+        printk(XENLOG_INFO "%s UR signaling on %04x:%02x:%02x.%u\n",
+               action, seg, bus, dev, func);
         break;
 
     case 0x100: case 0x104: case 0x108: /* Sandybridge */
diff --git a/xen/include/acpi/actbl1.h b/xen/include/acpi/actbl1.h
index 492be4e..9311e3a 100644
--- a/xen/include/acpi/actbl1.h
+++ b/xen/include/acpi/actbl1.h
@@ -445,6 +445,14 @@ struct acpi_hest_aer_common {
 #define ACPI_HEST_FIRMWARE_FIRST        (1)
 #define ACPI_HEST_GLOBAL                (1<<1)
 
+/*
+ * Macros to access the bus/segment numbers in Bus field above:
+ *  Bus number is encoded in bits 7:0
+ *  Segment number is encoded in bits 23:8
+ */
+#define ACPI_HEST_BUS(bus)              ((bus) & 0xFF)
+#define ACPI_HEST_SEGMENT(bus)          (((bus) >> 8) & 0xFFFF)
+
 /* Hardware Error Notification */
 
 struct acpi_hest_notify {
diff --git a/xen/include/acpi/apei.h b/xen/include/acpi/apei.h
index d228595..eab16e2 100644
--- a/xen/include/acpi/apei.h
+++ b/xen/include/acpi/apei.h
@@ -12,6 +12,9 @@
 
 #define FIX_APEI_RANGE_MAX 64
 
+typedef int (*apei_hest_func_t)(const struct acpi_hest_header *, void *);
+int apei_hest_parse(apei_hest_func_t, void *);
+
 int erst_write(const struct cper_record_header *record);
 ssize_t erst_get_record_count(void);
 int erst_get_next_record_id(u64 *record_id);
diff --git a/xen/include/xen/acpi.h b/xen/include/xen/acpi.h
index aedec65..3aeba4a 100644
--- a/xen/include/xen/acpi.h
+++ b/xen/include/xen/acpi.h
@@ -61,6 +61,7 @@ int acpi_boot_init (void);
 int acpi_boot_table_init (void);
 int acpi_numa_init (void);
 int erst_init(void);
+void acpi_hest_init(void);
 
 int acpi_table_init (void);
 int acpi_table_parse(char *id, acpi_table_handler handler);
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 39c9273..91520bc 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -144,6 +144,8 @@ int pci_find_next_ext_capability(int seg, int bus, int 
devfn, int pos, int cap);
 const char *parse_pci(const char *, unsigned int *seg, unsigned int *bus,
                       unsigned int *dev, unsigned int *func);
 
+bool_t pcie_aer_get_firmware_first(const struct pci_dev *);
+
 struct pirq;
 int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable);
 void msixtbl_pt_unregister(struct domain *, struct pirq *);
--
generated by git-patchbot for /home/xen/git/xen.git#master

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.