[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 3 of 6 V6] hvmloader: Build IVRS table




> -----Original Message-----
> From: Wei Wang [mailto:wei.wang2@xxxxxxx]
> Sent: Thursday, March 08, 2012 10:50 PM
> To: Zhang, Xiantao
> Cc: Ian.Jackson@xxxxxxxxxxxxx; Ian.Campbell@xxxxxxxxxx; JBeulich@xxxxxxxx;
> keir@xxxxxxx; xen-devel@xxxxxxxxxxxxxxxxxxx
> Subject: Re: [Xen-devel] [PATCH 3 of 6 V6] hvmloader: Build IVRS table
> 
> On 03/08/2012 03:22 PM, Zhang, Xiantao wrote:
> > I think this IVRS table should be vendor-specific, and we should have the
> mechanism make it only work for AMD IOMMU.  This is because Intel also
> has the similar support in next generation VT-d,  DMAR table should be built
> also when enable virtual VT-d for the guest.   I suggest this table should be
> only built when guest running on AMD's platforms.
> > Thanks!
> > Xiantao
> 
> Hi Xianto
> Thanks for reviewing it. Actually construct_ivrs() will invoke a hypercall
> guest_iommu_set_base() and it will fail on non-iommuv2 systems including
> vtd. So IVRS can be avoided on Intel systems. But I am also thinking that
> maybe we should let user chose different iommu hw at the beginning. For
> example, use guest_iommu={vtd, amd, 0} to distinguish different iommu
> hardware?
Hi, Wei
        The term iommu should be neutral for vendors, and the option 
guest_iommu={0,1} is also fine to VT-d.  This guest's iommu depends on host's 
capability, so we can't let user choose the options like virtual iommu _v2 on 
vt-d or virtual vt-d on iommu_v2.  Basically,  in hvm_loader, we should detect 
platform's vendor, and then build the corresponding ACPI tables.   For the 
hypercall, we should review the parameters and let it work for both sides, and 
it may have different implementations for AMD's and Intel's platforms in 
hypervisor.  
Thanks!
Xiantao


 
> >> -----Original Message-----
> >> From: xen-devel-bounces@xxxxxxxxxxxxx [mailto:xen-devel-
> >> bounces@xxxxxxxxxxxxx] On Behalf Of Wei Wang
> >> Sent: Thursday, March 08, 2012 9:22 PM
> >> To: Ian.Jackson@xxxxxxxxxxxxx; Ian.Campbell@xxxxxxxxxx;
> >> JBeulich@xxxxxxxx; keir@xxxxxxx
> >> Cc: xen-devel@xxxxxxxxxxxxxxxxxxx
> >> Subject: [Xen-devel] [PATCH 3 of 6 V6] hvmloader: Build IVRS table
> >>
> >> # HG changeset patch
> >> # User Wei Wang<wei.wang2@xxxxxxx>
> >> # Date 1331210217 -3600
> >> # Node ID d0611a8ee06d3f34de1c7c51da8571d9e1a668e1
> >> # Parent  e9d74ec1077472f9127c43903811ce3107fc038d
> >> hvmloader: Build IVRS table.
> >>
> >> There are 32 ivrs padding entries allocated at the beginning. If a
> >> passthru device has been found from qemu bus, a padding entry will be
> >> replaced by a real device entry. This patch has been tested with both
> >> rombios and seabios
> >>
> >> Signed-off-by: Wei Wang<wei.wang2@xxxxxxx>
> >>
> >> diff -r e9d74ec10774 -r d0611a8ee06d
> >> tools/firmware/hvmloader/acpi/acpi2_0.h
> >> --- a/tools/firmware/hvmloader/acpi/acpi2_0.h      Thu Mar 08 13:36:54
> >> 2012 +0100
> >> +++ b/tools/firmware/hvmloader/acpi/acpi2_0.h      Thu Mar 08 13:36:57
> >> 2012 +0100
> >> @@ -389,6 +389,60 @@ struct acpi_20_madt_intsrcovr {  #define
> >> ACPI_2_0_WAET_REVISION 0x01  #define ACPI_1_0_FADT_REVISION
> 0x01
> >>
> >> +#define IVRS_SIGNATURE ASCII32('I','V','R','S')
> >> +#define IVRS_REVISION           1
> >> +#define IVRS_VASIZE             64
> >> +#define IVRS_PASIZE             52
> >> +#define IVRS_GVASIZE            64
> >> +
> >> +#define IVHD_BLOCK_TYPE         0x10
> >> +#define IVHD_FLAG_HTTUNEN       (1<<  0)
> >> +#define IVHD_FLAG_PASSPW        (1<<  1)
> >> +#define IVHD_FLAG_RESPASSPW     (1<<  2)
> >> +#define IVHD_FLAG_ISOC          (1<<  3)
> >> +#define IVHD_FLAG_IOTLBSUP      (1<<  4)
> >> +#define IVHD_FLAG_COHERENT      (1<<  5)
> >> +#define IVHD_FLAG_PREFSUP       (1<<  6)
> >> +#define IVHD_FLAG_PPRSUP        (1<<  7)
> >> +
> >> +#define IVHD_EFR_GTSUP          (1<<  2)
> >> +#define IVHD_EFR_IASUP          (1<<  5)
> >> +
> >> +#define IVHD_SELECT_4_BYTE      0x2
> >> +
> >> +struct ivrs_ivhd_block
> >> +{
> >> +    uint8_t    type;
> >> +    uint8_t    flags;
> >> +    uint16_t   length;
> >> +    uint16_t   devid;
> >> +    uint16_t   cap_offset;
> >> +    uint64_t   iommu_base_addr;
> >> +    uint16_t   pci_segment;
> >> +    uint16_t   iommu_info;
> >> +    uint32_t   reserved;
> >> +};
> >> +
> >> +/* IVHD 4-byte device entries */
> >> +struct ivrs_ivhd_device
> >> +{
> >> +   uint8_t  type;
> >> +   uint16_t dev_id;
> >> +   uint8_t  flags;
> >> +};
> >> +
> >> +#define PT_DEV_MAX_NR           32
> >> +#define IOMMU_CAP_OFFSET        0x40
> >> +struct acpi_40_ivrs
> >> +{
> >> +    struct acpi_header                      header;
> >> +    uint32_t                                iv_info;
> >> +    uint32_t                                reserved[2];
> >> +    struct ivrs_ivhd_block                  ivhd_block;
> >> +    struct ivrs_ivhd_device                 ivhd_device[PT_DEV_MAX_NR];
> >> +};
> >> +
> >> +
> >>   #pragma pack ()
> >>
> >>   struct acpi_config {
> >> diff -r e9d74ec10774 -r d0611a8ee06d
> tools/firmware/hvmloader/acpi/build.c
> >> --- a/tools/firmware/hvmloader/acpi/build.c        Thu Mar 08 13:36:54
> 2012
> >> +0100
> >> +++ b/tools/firmware/hvmloader/acpi/build.c        Thu Mar 08 13:36:57
> 2012
> >> +0100
> >> @@ -23,6 +23,8 @@
> >>   #include "ssdt_pm.h"
> >>   #include "../config.h"
> >>   #include "../util.h"
> >> +#include "../hypercall.h"
> >> +#include<xen/hvm/params.h>
> >>
> >>   #define align16(sz)        (((sz) + 15)&  ~15)
> >>   #define fixed_strcpy(d, s) strncpy((d), (s), sizeof(d)) @@ -198,6
> >> +200,87 @@ static struct acpi_20_waet *construct_wa
> >>       return waet;
> >>   }
> >>
> >> +extern uint32_t ptdev_bdf[PT_DEV_MAX_NR]; extern uint32_t
> ptdev_nr;
> >> +extern uint32_t iommu_bdf; static struct acpi_40_ivrs*
> >> +construct_ivrs(void) {
> >> +    struct acpi_40_ivrs *ivrs;
> >> +    uint64_t mmio;
> >> +    struct ivrs_ivhd_block *ivhd;
> >> +    struct ivrs_ivhd_device *dev_entry;
> >> +    struct xen_hvm_param p;
> >> +
> >> +    if (ptdev_nr == 0 || iommu_bdf == 0) return NULL;
> >> +
> >> +    ivrs = mem_alloc(sizeof(*ivrs), 16);
> >> +    if (!ivrs)
> >> +    {
> >> +        printf("unable to build IVRS tables: out of memory\n");
> >> +        return NULL;
> >> +    }
> >> +    memset(ivrs, 0, sizeof(*ivrs));
> >> +
> >> +    /* initialize acpi header */
> >> +    ivrs->header.signature = IVRS_SIGNATURE;
> >> +    ivrs->header.revision = IVRS_REVISION;
> >> +    fixed_strcpy(ivrs->header.oem_id, ACPI_OEM_ID);
> >> +    fixed_strcpy(ivrs->header.oem_table_id, ACPI_OEM_TABLE_ID);
> >> +
> >> +    ivrs->header.oem_revision = ACPI_OEM_REVISION;
> >> +    ivrs->header.creator_id   = ACPI_CREATOR_ID;
> >> +    ivrs->header.creator_revision = ACPI_CREATOR_REVISION;
> >> +
> >> +    ivrs->header.length = sizeof(*ivrs);
> >> +
> >> +    /* initialize IVHD Block */
> >> +    ivhd =&ivrs->ivhd_block;
> >> +    ivrs->iv_info = (IVRS_VASIZE<<  15) | (IVRS_PASIZE<<  8) |
> >> +                    (IVRS_GVASIZE<<  5);
> >> +
> >> +    ivhd->type          = IVHD_BLOCK_TYPE;
> >> +    ivhd->flags         = IVHD_FLAG_PPRSUP | IVHD_FLAG_IOTLBSUP;
> >> +    ivhd->devid         = iommu_bdf;
> >> +    ivhd->cap_offset    = IOMMU_CAP_OFFSET;
> >> +
> >> +    /*reserve 32K IOMMU MMIO space */
> >> +    mmio = virt_to_phys(mem_alloc(0x8000, 0x1000));
> >> +    if (!mmio)
> >> +    {
> >> +        printf("unable to reserve iommu mmio pages: out of memory\n");
> >> +        return NULL;
> >> +    }
> >> +
> >> +    p.domid = DOMID_SELF;
> >> +    p.index = HVM_PARAM_IOMMU_BASE;
> >> +    p.value = mmio;
> >> +
> >> +    /* Return non-zero if IOMMUv2 hardware is not avaliable */
> >> +    if ( hypercall_hvm_op(HVMOP_set_param,&p) )
> >> +    {
> >> +        printf("unable to set iommu mmio base address\n");
> >> +        return NULL;
> >> +    }
> >> +
> >> +    ivhd->iommu_base_addr = mmio;
> >> +    ivhd->reserved = IVHD_EFR_IASUP | IVHD_EFR_GTSUP;
> >> +
> >> +    /* Build IVHD device entries */
> >> +    dev_entry = ivrs->ivhd_device;
> >> +    for ( int i = 0; i<  ptdev_nr; i++ )
> >> +    {
> >> +        dev_entry[i].type   = IVHD_SELECT_4_BYTE;
> >> +        dev_entry[i].dev_id = ptdev_bdf[i];
> >> +        dev_entry[i].flags  = 0;
> >> +    }
> >> +
> >> +    ivhd->length = sizeof(*ivhd) + sizeof(*dev_entry) * PT_DEV_MAX_NR;
> >> +    set_checksum(ivrs, offsetof(struct acpi_header, checksum),
> >> +                 ivrs->header.length);
> >> +
> >> +    return ivrs;
> >> +}
> >> +
> >>   static int construct_secondary_tables(unsigned long *table_ptrs,
> >>                                         struct acpi_info *info)  { @@
> >> -206,6 +289,7 @@ static int construct_secondary_tables(un
> >>       struct acpi_20_hpet *hpet;
> >>       struct acpi_20_waet *waet;
> >>       struct acpi_20_tcpa *tcpa;
> >> +    struct acpi_40_ivrs *ivrs;
> >>       unsigned char *ssdt;
> >>       static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001};
> >>       uint16_t *tis_hdr;
> >> @@ -293,6 +377,13 @@ static int construct_secondary_tables(un
> >>           }
> >>       }
> >>
> >> +    if ( !strncmp(xenstore_read("guest_iommu", "1"), "1", 1) )
> >> +    {
> >> +        ivrs = construct_ivrs();
> >> +        if ( ivrs != NULL )
> >> +            table_ptrs[nr_tables++] = (unsigned long)ivrs;
> >> +    }
> >> +
> >>       table_ptrs[nr_tables] = 0;
> >>       return nr_tables;
> >>   }
> >> diff -r e9d74ec10774 -r d0611a8ee06d tools/firmware/hvmloader/pci.c
> >> --- a/tools/firmware/hvmloader/pci.c       Thu Mar 08 13:36:54 2012
> +0100
> >> +++ b/tools/firmware/hvmloader/pci.c       Thu Mar 08 13:36:57 2012
> +0100
> >> @@ -34,11 +34,17 @@ unsigned long pci_mem_end = PCI_MEM_END;
> enum
> >> virtual_vga virtual_vga = VGA_none;  unsigned long
> >> igd_opregion_pgbase = 0;
> >>
> >> +/* support up to 32 passthrough devices */
> >> +#define PT_DEV_MAX_NR           32
> >> +uint32_t ptdev_bdf[PT_DEV_MAX_NR];
> >> +uint32_t ptdev_nr;
> >> +uint32_t iommu_bdf = 0;
> >> +
> >>   void pci_setup(void)
> >>   {
> >>       uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
> >>       uint32_t vga_devfn = 256;
> >> -    uint16_t class, vendor_id, device_id;
> >> +    uint16_t class, vendor_id, device_id, sub_vendor_id;
> >>       unsigned int bar, pin, link, isa_irq;
> >>
> >>       /* Resources assignable to PCI devices via BARs. */ @@ -72,12
> >> +78,34 @@ void pci_setup(void)
> >>           class     = pci_readw(devfn, PCI_CLASS_DEVICE);
> >>           vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
> >>           device_id = pci_readw(devfn, PCI_DEVICE_ID);
> >> +        sub_vendor_id = pci_readw(devfn, PCI_SUBSYSTEM_VENDOR_ID);
> >> +
> >>           if ( (vendor_id == 0xffff)&&  (device_id == 0xffff) )
> >>               continue;
> >>
> >>           ASSERT((devfn != PCI_ISA_DEVFN) ||
> >>                  ((vendor_id == 0x8086)&&  (device_id == 0x7000)));
> >>
> >> +        /* Found amd iommu device. */
> >> +        if ( class == 0x0806&&  vendor_id == 0x1022 )
> >> +        {
> >> +            iommu_bdf = devfn;
> >> +            continue;
> >> +        }
> >> +        /* IVRS: Detecting passthrough devices.
> >> +         * sub_vendor_id != citrix&&  sub_vendor_id != qemu */
> >> +        if ( sub_vendor_id != 0x5853&&  sub_vendor_id != 0x1af4 )
> >> +        {
> >> +            /* found a passthru device */
> >> +            if ( ptdev_nr<  PT_DEV_MAX_NR )
> >> +            {
> >> +                ptdev_bdf[ptdev_nr] = devfn;
> >> +                ptdev_nr++;
> >> +            }
> >> +            else
> >> +                printf("Number of passthru devices>  PT_DEV_MAX_NR \n");
> >> +        }
> >> +
> >>           switch ( class )
> >>           {
> >>           case 0x0300:
> >>
> >>
> >> _______________________________________________
> >> Xen-devel mailing list
> >> Xen-devel@xxxxxxxxxxxxx
> >> http://lists.xen.org/xen-devel
> >
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.