[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] Enable HVM guest VT-d device hotplug via a simple ACPI hotplug device model.
# HG changeset patch # User Keir Fraser <keir.fraser@xxxxxxxxxx> # Date 1203084797 0 # Node ID c6eeb71a85cffe917bc8d6738fd36e6694dba4d6 # Parent 29c03bc32d3e16103e9cc48260ab83317662a93e Enable HVM guest VT-d device hotplug via a simple ACPI hotplug device model. ** Currently only 2 virtual hotplug pci slots(6~7) are created so more than 2 vtd dev can't be hotplugged, but we can easily extend it in future. Three new commands are added: "xm pci-list domid" show the current assigned vtd device, like: VSlt domain bus slot func 0x6 0x0 0x02 0x00 0x0 "xm pci-detach" hot remove the specified vtd device by the virtual slot, like: xm pci-detach EdwinHVMDomainVtd 6 "xm pci-attach DomainID dom bus dev func [vslot]" hot add a new vtd device in the vslot. If no vslot specified, a free slot will be picked up. e.g. to insert '0000:03:00.0': xm pci-attach EdwinHVMDomainVtd 0 3 0 0 ** guest pci hotplug linux: pls. use 2.6.X and enable ACPI PCI hotplug ( Bus options=> PCI hotplug => ACPI PCI hotplug driver ) windows: 2000/xp/2003/vista are all okay Signed-off-by: Zhai Edwin <edwin.zhai@xxxxxxxxx> --- tools/firmware/hvmloader/acpi/dsdt.asl | 117 ++++++++ tools/firmware/hvmloader/acpi/dsdt.c | 68 ++++ tools/firmware/hvmloader/acpi/static_tables.c | 2 tools/ioemu/hw/pass-through.c | 345 ++++++++++++++++++++++++- tools/ioemu/hw/pc.c | 6 tools/ioemu/hw/pci.c | 9 tools/ioemu/hw/piix4acpi.c | 354 ++++++++++++++++++++++++++ tools/ioemu/monitor.c | 6 tools/ioemu/vl.c | 24 + tools/ioemu/vl.h | 28 ++ tools/ioemu/xenstore.c | 112 +++++++- tools/libxc/xc_domain.c | 44 +++ tools/libxc/xenctrl.h | 13 tools/python/xen/xend/XendDomainInfo.py | 202 ++++++++++++++ tools/python/xen/xend/image.py | 31 +- tools/python/xen/xend/server/DevController.py | 8 tools/python/xen/xend/server/pciif.py | 48 +++ tools/python/xen/xm/main.py | 67 ++++ xen/arch/x86/domctl.c | 45 +++ xen/arch/x86/hvm/irq.c | 3 xen/arch/x86/hvm/vmx/vtd/intel-iommu.c | 4 xen/arch/x86/hvm/vmx/vtd/io.c | 61 ++++ xen/include/asm-x86/hvm/irq.h | 2 xen/include/asm-x86/iommu.h | 5 xen/include/public/domctl.h | 2 xen/include/public/hvm/ioreq.h | 5 26 files changed, 1555 insertions(+), 56 deletions(-) diff -r 29c03bc32d3e -r c6eeb71a85cf tools/firmware/hvmloader/acpi/dsdt.asl --- a/tools/firmware/hvmloader/acpi/dsdt.asl Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/firmware/hvmloader/acpi/dsdt.asl Fri Feb 15 14:13:17 2008 +0000 @@ -86,7 +86,7 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, Name (_UID, 0x00) Name (_ADR, 0x00) Name (_BBN, 0x00) - + Method (_CRS, 0, NotSerialized) { Name (PRT0, ResourceTemplate () @@ -720,6 +720,121 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, }) } } + + /****************************************************************** + * Each PCI hotplug slot needs at least two methods to handle + * the ACPI event: + * _EJ0: eject a device + * _STA: return a device's status, e.g. enabled or removed + * Other methods are optional: + * _PS0/3: put them here for debug purpose + * + * Eject button would generate a general-purpose event, then the + * control method for this event uses Notify() to inform OSPM which + * action happened and on which device. + * + * Pls. refer "6.3 Device Insertion, Removal, and Status Objects" + * in ACPI spec 3.0b for details. + * + * QEMU provides a simple hotplug controller with some I/O to + * handle the hotplug action and status, which is beyond the ACPI + * scope. + */ + + Device (S1F0) + { + Name (_ADR, 0x00060000) /* Dev 6, Func 0 */ + Name (_SUN, 0x00000001) + + Method (_PS0, 0) + { + Store (0x80, \_GPE.DPT2) + } + + Method (_PS3, 0) + { + Store (0x83, \_GPE.DPT2) + } + + Method (_EJ0, 1) + { + Store (0x88, \_GPE.DPT2) + Store (0x1, \_GPE.PHP1) /* eject php slot 1*/ + } + + Method (_STA, 0) + { + Store (0x89, \_GPE.DPT2) + Return ( \_GPE.PHP1 ) /* IN status as the _STA */ + } + } + + Device (S2F0) + { + Name (_ADR, 0x00070000) /* Dev 7, Func 0 */ + Name (_SUN, 0x00000002) + + Method (_PS0, 0) + { + Store (0x90, \_GPE.DPT2) + } + + Method (_PS3, 0) + { + Store (0x93, \_GPE.DPT2) + } + + Method (_EJ0, 1) + { + Store (0x98, \_GPE.DPT2) + Store (0x1, \_GPE.PHP2) /* eject php slot 1*/ + } + + Method (_STA, 0) + { + Store (0x99, \_GPE.DPT2) + Return ( \_GPE.PHP2 ) /* IN status as the _STA */ + } + } + } + } + + Scope (\_GPE) + { + OperationRegion (PHP, SystemIO, 0x10c0, 0x03) + Field (PHP, ByteAcc, NoLock, Preserve) + { + PSTA, 8, /* hotplug controller status reg */ + PHP1, 8, /* hotplug slot 1 control reg */ + PHP2, 8 /* hotplug slot 2 control reg */ + } + OperationRegion (DG1, SystemIO, 0xb044, 0x04) + Field (DG1, ByteAcc, NoLock, Preserve) + { + DPT1, 8, + DPT2, 8 + } + Method (_L03, 0, NotSerialized) + { + /* detect slot and event(remove/add) */ + Name (SLT, 0x0) + Name (EVT, 0x0) + Store (PSTA, Local1) + ShiftRight (Local1, 0x4, SLT) + And (Local1, 0xf, EVT) + + /* debug */ + Store (SLT, DPT1) + Store (EVT, DPT2) + + If ( LEqual(SLT, 0x1) ) + { + Notify (\_SB.PCI0.S1F0, EVT) + } + ElseIf ( LEqual(SLT, 0x2) ) + { + Notify (\_SB.PCI0.S2F0, EVT) + } } } } diff -r 29c03bc32d3e -r c6eeb71a85cf tools/firmware/hvmloader/acpi/dsdt.c --- a/tools/firmware/hvmloader/acpi/dsdt.c Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/firmware/hvmloader/acpi/dsdt.c Fri Feb 15 14:13:17 2008 +0000 @@ -5,15 +5,15 @@ * Copyright (C) 2000 - 2006 Intel Corporation * Supports ACPI Specification Revision 3.0a * - * Compilation of "dsdt.asl" - Fri Feb 15 12:48:58 2008 + * Compilation of "dsdt.asl" - Fri Feb 15 14:07:57 2008 * * C source code output * */ unsigned char AmlCode[] = { - 0x44,0x53,0x44,0x54,0x9C,0x0E,0x00,0x00, /* 00000000 "DSDT...." */ - 0x02,0xD5,0x58,0x65,0x6E,0x00,0x00,0x00, /* 00000008 "..Xen..." */ + 0x44,0x53,0x44,0x54,0x5A,0x10,0x00,0x00, /* 00000000 "DSDTZ..." */ + 0x02,0xCC,0x58,0x65,0x6E,0x00,0x00,0x00, /* 00000008 "..Xen..." */ 0x48,0x56,0x4D,0x00,0x00,0x00,0x00,0x00, /* 00000010 "HVM....." */ 0x00,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ 0x07,0x07,0x06,0x20,0x08,0x50,0x4D,0x42, /* 00000020 "... .PMB" */ @@ -29,7 +29,7 @@ unsigned char AmlCode[] = 0x07,0x0A,0x07,0x00,0x00,0x08,0x50,0x49, /* 00000070 "......PI" */ 0x43,0x44,0x00,0x14,0x0C,0x5F,0x50,0x49, /* 00000078 "CD..._PI" */ 0x43,0x01,0x70,0x68,0x50,0x49,0x43,0x44, /* 00000080 "C.phPICD" */ - 0x10,0x43,0xE1,0x5F,0x53,0x42,0x5F,0x5B, /* 00000088 ".C._SB_[" */ + 0x10,0x42,0xF1,0x5F,0x53,0x42,0x5F,0x5B, /* 00000088 ".B._SB_[" */ 0x80,0x42,0x49,0x4F,0x53,0x00,0x0C,0x00, /* 00000090 ".BIOS..." */ 0xA0,0x0E,0x00,0x0A,0x10,0x5B,0x81,0x21, /* 00000098 ".....[.!" */ 0x42,0x49,0x4F,0x53,0x01,0x55,0x41,0x52, /* 000000A0 "BIOS.UAR" */ @@ -45,7 +45,7 @@ unsigned char AmlCode[] = 0x00,0xFF,0xFF,0x09,0x00,0x00,0x00,0x00, /* 000000F0 "........" */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 000000F8 "........" */ 0x00,0x00,0x00,0x0A,0x00,0x00,0x00,0x00, /* 00000100 "........" */ - 0x00,0x79,0x00,0x5B,0x82,0x4F,0xD8,0x50, /* 00000108 ".y.[.O.P" */ + 0x00,0x79,0x00,0x5B,0x82,0x4E,0xE8,0x50, /* 00000108 ".y.[.N.P" */ 0x43,0x49,0x30,0x08,0x5F,0x48,0x49,0x44, /* 00000110 "CI0._HID" */ 0x0C,0x41,0xD0,0x0A,0x03,0x08,0x5F,0x55, /* 00000118 ".A...._U" */ 0x49,0x44,0x00,0x08,0x5F,0x41,0x44,0x52, /* 00000120 "ID.._ADR" */ @@ -479,6 +479,62 @@ unsigned char AmlCode[] = 0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F, /* 00000E80 "TA....._" */ 0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47, /* 00000E88 "CRS....G" */ 0x01,0x78,0x03,0x78,0x03,0x08,0x08,0x22, /* 00000E90 ".x.x..."" */ - 0x80,0x00,0x79,0x00, + 0x80,0x00,0x79,0x00,0x5B,0x82,0x4D,0x07, /* 00000E98 "..y.[.M." */ + 0x53,0x31,0x46,0x30,0x08,0x5F,0x41,0x44, /* 00000EA0 "S1F0._AD" */ + 0x52,0x0C,0x00,0x00,0x06,0x00,0x08,0x5F, /* 00000EA8 "R......_" */ + 0x53,0x55,0x4E,0x01,0x14,0x13,0x5F,0x50, /* 00000EB0 "SUN..._P" */ + 0x53,0x30,0x00,0x70,0x0A,0x80,0x5C,0x2E, /* 00000EB8 "S0.p..\." */ + 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00000EC0 "_GPEDPT2" */ + 0x14,0x13,0x5F,0x50,0x53,0x33,0x00,0x70, /* 00000EC8 ".._PS3.p" */ + 0x0A,0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00000ED0 "..\._GPE" */ + 0x44,0x50,0x54,0x32,0x14,0x1F,0x5F,0x45, /* 00000ED8 "DPT2.._E" */ + 0x4A,0x30,0x01,0x70,0x0A,0x88,0x5C,0x2E, /* 00000EE0 "J0.p..\." */ + 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00000EE8 "_GPEDPT2" */ + 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00000EF0 "p.\._GPE" */ + 0x50,0x48,0x50,0x31,0x14,0x1E,0x5F,0x53, /* 00000EF8 "PHP1.._S" */ + 0x54,0x41,0x00,0x70,0x0A,0x89,0x5C,0x2E, /* 00000F00 "TA.p..\." */ + 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00000F08 "_GPEDPT2" */ + 0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x50, /* 00000F10 ".\._GPEP" */ + 0x48,0x50,0x31,0x5B,0x82,0x4E,0x07,0x53, /* 00000F18 "HP1[.N.S" */ + 0x32,0x46,0x30,0x08,0x5F,0x41,0x44,0x52, /* 00000F20 "2F0._ADR" */ + 0x0C,0x00,0x00,0x07,0x00,0x08,0x5F,0x53, /* 00000F28 "......_S" */ + 0x55,0x4E,0x0A,0x02,0x14,0x13,0x5F,0x50, /* 00000F30 "UN...._P" */ + 0x53,0x30,0x00,0x70,0x0A,0x90,0x5C,0x2E, /* 00000F38 "S0.p..\." */ + 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00000F40 "_GPEDPT2" */ + 0x14,0x13,0x5F,0x50,0x53,0x33,0x00,0x70, /* 00000F48 ".._PS3.p" */ + 0x0A,0x93,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00000F50 "..\._GPE" */ + 0x44,0x50,0x54,0x32,0x14,0x1F,0x5F,0x45, /* 00000F58 "DPT2.._E" */ + 0x4A,0x30,0x01,0x70,0x0A,0x98,0x5C,0x2E, /* 00000F60 "J0.p..\." */ + 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00000F68 "_GPEDPT2" */ + 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00000F70 "p.\._GPE" */ + 0x50,0x48,0x50,0x32,0x14,0x1E,0x5F,0x53, /* 00000F78 "PHP2.._S" */ + 0x54,0x41,0x00,0x70,0x0A,0x99,0x5C,0x2E, /* 00000F80 "TA.p..\." */ + 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00000F88 "_GPEDPT2" */ + 0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x50, /* 00000F90 ".\._GPEP" */ + 0x48,0x50,0x32,0x10,0x4E,0x0B,0x5F,0x47, /* 00000F98 "HP2.N._G" */ + 0x50,0x45,0x5B,0x80,0x50,0x48,0x50,0x5F, /* 00000FA0 "PE[.PHP_" */ + 0x01,0x0B,0xC0,0x10,0x0A,0x03,0x5B,0x81, /* 00000FA8 "......[." */ + 0x15,0x50,0x48,0x50,0x5F,0x01,0x50,0x53, /* 00000FB0 ".PHP_.PS" */ + 0x54,0x41,0x08,0x50,0x48,0x50,0x31,0x08, /* 00000FB8 "TA.PHP1." */ + 0x50,0x48,0x50,0x32,0x08,0x5B,0x80,0x44, /* 00000FC0 "PHP2.[.D" */ + 0x47,0x31,0x5F,0x01,0x0B,0x44,0xB0,0x0A, /* 00000FC8 "G1_..D.." */ + 0x04,0x5B,0x81,0x10,0x44,0x47,0x31,0x5F, /* 00000FD0 ".[..DG1_" */ + 0x01,0x44,0x50,0x54,0x31,0x08,0x44,0x50, /* 00000FD8 ".DPT1.DP" */ + 0x54,0x32,0x08,0x14,0x46,0x07,0x5F,0x4C, /* 00000FE0 "T2..F._L" */ + 0x30,0x33,0x00,0x08,0x53,0x4C,0x54,0x5F, /* 00000FE8 "03..SLT_" */ + 0x00,0x08,0x45,0x56,0x54,0x5F,0x00,0x70, /* 00000FF0 "..EVT_.p" */ + 0x50,0x53,0x54,0x41,0x61,0x7A,0x61,0x0A, /* 00000FF8 "PSTAaza." */ + 0x04,0x53,0x4C,0x54,0x5F,0x7B,0x61,0x0A, /* 00001000 ".SLT_{a." */ + 0x0F,0x45,0x56,0x54,0x5F,0x70,0x53,0x4C, /* 00001008 ".EVT_pSL" */ + 0x54,0x5F,0x44,0x50,0x54,0x31,0x70,0x45, /* 00001010 "T_DPT1pE" */ + 0x56,0x54,0x5F,0x44,0x50,0x54,0x32,0xA0, /* 00001018 "VT_DPT2." */ + 0x1B,0x93,0x53,0x4C,0x54,0x5F,0x01,0x86, /* 00001020 "..SLT_.." */ + 0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F,0x50, /* 00001028 "\/._SB_P" */ + 0x43,0x49,0x30,0x53,0x31,0x46,0x30,0x45, /* 00001030 "CI0S1F0E" */ + 0x56,0x54,0x5F,0xA1,0x1E,0xA0,0x1C,0x93, /* 00001038 "VT_....." */ + 0x53,0x4C,0x54,0x5F,0x0A,0x02,0x86,0x5C, /* 00001040 "SLT_...\" */ + 0x2F,0x03,0x5F,0x53,0x42,0x5F,0x50,0x43, /* 00001048 "/._SB_PC" */ + 0x49,0x30,0x53,0x32,0x46,0x30,0x45,0x56, /* 00001050 "I0S2F0EV" */ + 0x54,0x5F, }; int DsdtLen=sizeof(AmlCode); diff -r 29c03bc32d3e -r c6eeb71a85cf tools/firmware/hvmloader/acpi/static_tables.c --- a/tools/firmware/hvmloader/acpi/static_tables.c Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/firmware/hvmloader/acpi/static_tables.c Fri Feb 15 14:13:17 2008 +0000 @@ -59,9 +59,11 @@ struct acpi_20_fadt Fadt = { .pm1a_evt_blk = ACPI_PM1A_EVT_BLK_ADDRESS, .pm1a_cnt_blk = ACPI_PM1A_CNT_BLK_ADDRESS, .pm_tmr_blk = ACPI_PM_TMR_BLK_ADDRESS, + .gpe0_blk = ACPI_GPE0_BLK_ADDRESS, .pm1_evt_len = ACPI_PM1A_EVT_BLK_BIT_WIDTH / 8, .pm1_cnt_len = ACPI_PM1A_CNT_BLK_BIT_WIDTH / 8, .pm_tmr_len = ACPI_PM_TMR_BLK_BIT_WIDTH / 8, + .gpe0_blk_len = ACPI_GPE0_BLK_LEN, .p_lvl2_lat = 0x0fff, /* >100, means we do not support C2 state */ .p_lvl3_lat = 0x0fff, /* >1000, means we do not support C3 state */ diff -r 29c03bc32d3e -r c6eeb71a85cf tools/ioemu/hw/pass-through.c --- a/tools/ioemu/hw/pass-through.c Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/ioemu/hw/pass-through.c Fri Feb 15 14:13:17 2008 +0000 @@ -29,31 +29,159 @@ extern FILE *logfile; +struct php_dev { + struct pt_dev *pt_dev; + uint8_t valid; + uint8_t r_bus; + uint8_t r_dev; + uint8_t r_func; +}; +struct dpci_infos { + + struct php_dev php_devs[PHP_SLOT_LEN]; + + PCIBus *e_bus; + struct pci_access *pci_access; + +} dpci_infos; + static int token_value(char *token) { - token = strchr(token, 'x') + 1; return strtol(token, NULL, 16); } static int next_bdf(char **str, int *seg, int *bus, int *dev, int *func) { - char *token; - - if ( !(*str) || !strchr(*str, ',') ) + char *token, *delim = ":.-"; + + if ( !(*str) || + ( !strchr(*str, ':') && !strchr(*str, '.')) ) return 0; - token = *str; - *seg = token_value(token); - token = strchr(token, ',') + 1; + token = strsep(str, delim); + *seg = token_value(token); + + token = strsep(str, delim); *bus = token_value(token); - token = strchr(token, ',') + 1; + + token = strsep(str, delim); *dev = token_value(token); - token = strchr(token, ',') + 1; + + token = strsep(str, delim); *func = token_value(token); - token = strchr(token, ','); - *str = token ? token + 1 : NULL; return 1; +} + +/* Insert a new pass-through device into a specific pci slot. + * input dom:bus:dev.func@slot, chose free one if slot == 0 + * return -1: required slot not available + * 0: no free hotplug slots, but normal slot should okay + * >0: the new hotplug slot + */ +static int __insert_to_pci_slot(int bus, int dev, int func, int slot) +{ + int i, php_slot; + + /* preferred virt pci slot */ + if ( slot >= PHP_SLOT_START && slot < PHP_SLOT_END ) + { + php_slot = PCI_TO_PHP_SLOT(slot); + if ( !dpci_infos.php_devs[php_slot].valid ) + { + goto found; + } + else + return -1; + } + + if ( slot != 0 ) + return -1; + + /* slot == 0, pick up a free one */ + for ( i = 0; i < PHP_SLOT_LEN; i++ ) + { + if ( !dpci_infos.php_devs[i].valid ) + { + php_slot = i; + goto found; + } + } + + /* not found */ + return 0; + +found: + dpci_infos.php_devs[php_slot].valid = 1; + dpci_infos.php_devs[php_slot].r_bus = bus; + dpci_infos.php_devs[php_slot].r_dev = dev; + dpci_infos.php_devs[php_slot].r_func = func; + return PHP_TO_PCI_SLOT(php_slot); +} + +/* Insert a new pass-through device into a specific pci slot. + * input dom:bus:dev.func@slot + */ +int insert_to_pci_slot(char *bdf_slt) +{ + int seg, bus, dev, func, slot; + char *bdf_str, *slt_str, *delim="@"; + + bdf_str = strsep(&bdf_slt, delim); + slt_str = bdf_slt; + slot = token_value(slt_str); + + if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func)) + { + return -1; + } + + return __insert_to_pci_slot(bus, dev, func, slot); + +} + +/* Test if a pci slot has a device + * 1: present + * 0: not present + * -1: invalide pci slot input + */ +int test_pci_slot(int slot) +{ + int php_slot; + + if ( slot < PHP_SLOT_START || slot >= PHP_SLOT_END ) + return -1; + + php_slot = PCI_TO_PHP_SLOT(slot); + if ( dpci_infos.php_devs[php_slot].valid ) + return 1; + else + return 0; +} + +/* find the pci slot for pass-through dev with specified BDF */ +int bdf_to_slot(char *bdf_str) +{ + int seg, bus, dev, func, i; + + if ( !next_bdf(&bdf_str, &seg, &bus, &dev, &func)) + { + return -1; + } + + /* locate the virtual pci slot for this VTd device */ + for ( i = 0; i < PHP_SLOT_LEN; i++ ) + { + if ( dpci_infos.php_devs[i].valid && + dpci_infos.php_devs[i].r_bus == bus && + dpci_infos.php_devs[i].r_dev == dev && + dpci_infos.php_devs[i].r_func == func ) + { + return PHP_TO_PCI_SLOT(i); + } + } + + return -1; } /* Being called each time a mmio region has been updated */ @@ -269,15 +397,64 @@ static int pt_register_regions(struct pt return 0; } +static int pt_unregister_regions(struct pt_dev *assigned_device) +{ + int i, type, ret; + uint32_t e_size; + PCIDevice *d = (PCIDevice*)assigned_device; + + for ( i = 0; i < PCI_NUM_REGIONS; i++ ) + { + e_size = assigned_device->bases[i].e_size; + if ( e_size == 0 ) + continue; + + type = d->io_regions[i].type; + + if ( type == PCI_ADDRESS_SPACE_MEM || + type == PCI_ADDRESS_SPACE_MEM_PREFETCH ) + { + ret = xc_domain_memory_mapping(xc_handle, domid, + assigned_device->bases[i].e_physbase >> XC_PAGE_SHIFT, + assigned_device->bases[i].access.maddr >> XC_PAGE_SHIFT, + (e_size+XC_PAGE_SIZE-1) >> XC_PAGE_SHIFT, + DPCI_REMOVE_MAPPING); + if ( ret != 0 ) + { + PT_LOG("Error: remove old mem mapping failed!\n"); + continue; + } + + } + else if ( type == PCI_ADDRESS_SPACE_IO ) + { + ret = xc_domain_ioport_mapping(xc_handle, domid, + assigned_device->bases[i].e_physbase, + assigned_device->bases[i].access.pio_base, + e_size, + DPCI_REMOVE_MAPPING); + if ( ret != 0 ) + { + PT_LOG("Error: remove old io mapping failed!\n"); + continue; + } + + } + + } + +} + struct pt_dev * register_real_device(PCIBus *e_bus, const char *e_dev_name, int e_devfn, uint8_t r_bus, uint8_t r_dev, uint8_t r_func, uint32_t machine_irq, struct pci_access *pci_access) { - int rc, i; + int rc = -1, i; struct pt_dev *assigned_device = NULL; struct pci_dev *pci_dev; uint8_t e_device, e_intx; struct pci_config_cf8 machine_bdf; + int free_pci_slot = -1; PT_LOG("Assigning real physical device %02x:%02x.%x ...\n", r_bus, r_dev, r_func); @@ -294,6 +471,15 @@ struct pt_dev * register_real_device(PCI { PT_LOG("Error: couldn't locate device in libpci structures\n"); return NULL; + } + + if ( e_devfn == PT_VIRT_DEVFN_AUTO ) { + /*indicate a static assignment(not hotplug), so find a free PCI hot plug slot */ + free_pci_slot = __insert_to_pci_slot(r_bus, r_dev, r_func, 0); + if ( free_pci_slot > 0 ) + e_devfn = free_pci_slot << 3; + else + PT_LOG("Error: no free virtual PCI hot plug slot, thus no live migration.\n"); } /* Register device */ @@ -306,7 +492,11 @@ struct pt_dev * register_real_device(PCI return NULL; } + if ( free_pci_slot > 0 ) + dpci_infos.php_devs[PCI_TO_PHP_SLOT(free_pci_slot)].pt_dev = assigned_device; + assigned_device->pci_dev = pci_dev; + /* Assign device */ machine_bdf.reg = 0; @@ -355,11 +545,96 @@ struct pt_dev * register_real_device(PCI return assigned_device; } +int unregister_real_device(int php_slot) +{ + struct php_dev *php_dev; + struct pci_dev *pci_dev; + uint8_t e_device, e_intx; + struct pt_dev *assigned_device = NULL; + uint32_t machine_irq; + uint32_t bdf = 0; + int rc = -1; + + if ( php_slot < 0 || php_slot >= PHP_SLOT_LEN ) + return -1; + + php_dev = &dpci_infos.php_devs[php_slot]; + assigned_device = php_dev->pt_dev; + + if ( !assigned_device || !php_dev->valid ) + return -1; + + pci_dev = assigned_device->pci_dev; + + /* hide pci dev from qemu */ + pci_hide_device((PCIDevice*)assigned_device); + + /* Unbind interrupt */ + e_device = (assigned_device->dev.devfn >> 3) & 0x1f; + e_intx = assigned_device->dev.config[0x3d]-1; + machine_irq = pci_dev->irq; + + if ( machine_irq != 0 ) { + rc = xc_domain_unbind_pt_irq(xc_handle, domid, machine_irq, PT_IRQ_TYPE_PCI, 0, + e_device, e_intx, 0); + if ( rc < 0 ) + { + /* TBD: unregister device in case of an error */ + PT_LOG("Error: Unbinding of interrupt failed! rc=%d\n", rc); + } + } + + /* unregister real device's MMIO/PIO BARs */ + pt_unregister_regions(assigned_device); + + /* deassign the dev to dom0 */ + bdf |= (pci_dev->bus & 0xff) << 16; + bdf |= (pci_dev->dev & 0x1f) << 11; + bdf |= (pci_dev->func & 0x1f) << 8; + if ( (rc = xc_deassign_device(xc_handle, domid, bdf)) != 0) + PT_LOG("Error: Revoking the device failed! rc=%d\n", rc); + + /* mark this slot as free */ + php_dev->valid = 0; + php_dev->pt_dev = NULL; + qemu_free(assigned_device); + + return 0; +} + +int power_on_php_slot(int php_slot) +{ + struct php_dev *php_dev = &dpci_infos.php_devs[php_slot]; + int pci_slot = php_slot + PHP_SLOT_START; + struct pt_dev *pt_dev; + pt_dev = + register_real_device(dpci_infos.e_bus, + "DIRECT PCI", + pci_slot << 3, + php_dev->r_bus, + php_dev->r_dev, + php_dev->r_func, + PT_MACHINE_IRQ_AUTO, + dpci_infos.pci_access); + + php_dev->pt_dev = pt_dev; + + return 0; + +} + +int power_off_php_slot(int php_slot) +{ + return unregister_real_device(php_slot); +} + int pt_init(PCIBus *e_bus, char *direct_pci) { - int seg, b, d, f; + int seg, b, d, f, php_slot = 0; struct pt_dev *pt_dev; struct pci_access *pci_access; + char *vslots; + char slot_str[8]; /* Initialize libpci */ pci_access = pci_alloc(); @@ -370,6 +645,19 @@ int pt_init(PCIBus *e_bus, char *direct_ } pci_init(pci_access); pci_scan_bus(pci_access); + + memset(&dpci_infos, 0, sizeof(struct dpci_infos)); + dpci_infos.pci_access = pci_access; + dpci_infos.e_bus = e_bus; + + if ( strlen(direct_pci) == 0 ) { + return 0; + } + + /* the virtual pci slots of all pass-through devs + * with hex format: xx;xx...; + */ + vslots = qemu_mallocz ( strlen(direct_pci) / 3 ); /* Assign given devices to guest */ while ( next_bdf(&direct_pci, &seg, &b, &d, &f) ) @@ -382,8 +670,37 @@ int pt_init(PCIBus *e_bus, char *direct_ PT_LOG("Error: Registration failed (%02x:%02x.%x)\n", b, d, f); return -1; } - } + + /* Record the virtual slot info */ + if ( php_slot < PHP_SLOT_LEN && + dpci_infos.php_devs[php_slot].pt_dev == pt_dev ) + { + sprintf(slot_str, "0x%x;", PHP_TO_PCI_SLOT(php_slot)); + } + else + sprintf(slot_str, "0x%x;", 0); + + strcat(vslots, slot_str); + php_slot++; + } + + /* Write virtual slots info to xenstore for Control panel use */ + xenstore_write_vslots(vslots); + + qemu_free(vslots); /* Success */ return 0; } + +void pt_uninit(void) +{ + struct pci_access *access; + + /* clean up the libpci */ + access = dpci_infos.pci_access; + if ( access ) { + pci_cleanup(access); + } + +} diff -r 29c03bc32d3e -r c6eeb71a85cf tools/ioemu/hw/pc.c --- a/tools/ioemu/hw/pc.c Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/ioemu/hw/pc.c Fri Feb 15 14:13:17 2008 +0000 @@ -945,8 +945,10 @@ static void pc_init1(uint64_t ram_size, } #ifdef CONFIG_PASSTHROUGH - /* Pass-through Initialization */ - if ( pci_enabled && direct_pci ) + /* Pass-through Initialization + * init libpci even direct_pci is null, as can hotplug a dev runtime + */ + if ( pci_enabled ) { rc = pt_init(pci_bus, direct_pci); if ( rc < 0 ) diff -r 29c03bc32d3e -r c6eeb71a85cf tools/ioemu/hw/pci.c --- a/tools/ioemu/hw/pci.c Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/ioemu/hw/pci.c Fri Feb 15 14:13:17 2008 +0000 @@ -107,7 +107,8 @@ PCIDevice *pci_register_device(PCIBus *b if (devfn < 0) { for(devfn = bus->devfn_min ; devfn < 256; devfn += 8) { - if (!bus->devices[devfn]) + if ( !bus->devices[devfn] && + !( devfn >= PHP_DEVFN_START && devfn < PHP_DEVFN_END ) ) goto found; } return NULL; @@ -130,6 +131,12 @@ PCIDevice *pci_register_device(PCIBus *b pci_dev->irq_index = pci_irq_index++; bus->devices[devfn] = pci_dev; return pci_dev; +} + +void pci_hide_device(PCIDevice *pci_dev) +{ + PCIBus *bus = pci_dev->bus; + bus->devices[pci_dev->devfn] = NULL; } void pci_register_io_region(PCIDevice *pci_dev, int region_num, diff -r 29c03bc32d3e -r c6eeb71a85cf tools/ioemu/hw/piix4acpi.c --- a/tools/ioemu/hw/piix4acpi.c Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/ioemu/hw/piix4acpi.c Fri Feb 15 14:13:17 2008 +0000 @@ -24,6 +24,7 @@ */ #include "vl.h" +#include <xen/hvm/ioreq.h> /* PM1a_CNT bits, as defined in the ACPI specification. */ #define SCI_EN (1 << 0) @@ -36,6 +37,19 @@ #define SLP_TYP_S4 (6 << 10) #define SLP_TYP_S5 (7 << 10) +#define ACPI_DBG_IO_ADDR 0xb044 +#define ACPI_PHP_IO_ADDR 0x10c0 + +#define PHP_EVT_ADD 0x0 +#define PHP_EVT_REMOVE 0x3 + +#define ACPI_SCI_IRQ 9 + +/* The bit in GPE0_STS/EN to notify the pci hotplug event */ +#define ACPI_PHP_GPE_BIT 3 + +#define ACPI_PHP_SLOT_NUM PHP_SLOT_LEN + typedef struct AcpiDeviceState AcpiDeviceState; AcpiDeviceState *acpi_device_table; @@ -43,6 +57,27 @@ typedef struct PCIAcpiState { PCIDevice dev; uint16_t pm1_control; /* pm1a_ECNT_BLK */ } PCIAcpiState; + +typedef struct GPEState { + /* GPE0 block */ + uint8_t gpe0_sts[ACPI_GPE0_BLK_LEN / 2]; + uint8_t gpe0_en[ACPI_GPE0_BLK_LEN / 2]; + + /* SCI IRQ level */ + uint8_t sci_asserted; + +} GPEState; + +GPEState gpe_state; + +typedef struct PHPSlots { + struct { + uint8_t status; /* Apaptor stats */ + } slot[ACPI_PHP_SLOT_NUM]; + uint8_t plug_evt; /* slot|event slot:0-no event;1-1st. event:0-remove;1-add */ +} PHPSlots; + +PHPSlots php_slots; static void piix4acpi_save(QEMUFile *f, void *opaque) { @@ -140,6 +175,318 @@ static void acpi_map(PCIDevice *pci_dev, /* Word access */ register_ioport_write(addr + 4, 2, 2, acpiPm1Control_writew, d); register_ioport_read(addr + 4, 2, 2, acpiPm1Control_readw, d); +} + +static inline int test_bit(uint8_t *map, int bit) +{ + return ( map[bit / 8] & (1 << (bit % 8)) ); +} + +static inline void set_bit(uint8_t *map, int bit) +{ + map[bit / 8] |= (1 << (bit % 8)); +} + +static inline void clear_bit(uint8_t *map, int bit) +{ + map[bit / 8] &= ~(1 << (bit % 8)); +} + +extern FILE *logfile; +static void acpi_dbg_writel(void *opaque, uint32_t addr, uint32_t val) +{ +#if defined(DEBUG) + printf("ACPI: DBG: 0x%08x\n", val); +#endif + fprintf(logfile, "ACPI:debug: write addr=0x%x, val=0x%x.\n", addr, val); +} + +/* + * simple PCI hotplug controller IO + * ACPI_PHP_IO_ADDR + : + * 0 - the hotplug description: slot(|event(remove/add); + * 1 - 1st php slot ctr/sts reg + * 2 - 2nd php slot ctr/sts reg + * ...... + */ +static uint32_t acpi_php_readb(void *opaque, uint32_t addr) +{ + PHPSlots *hotplug_slots = opaque; + int num; + uint32_t val; + + switch (addr) + { + case ACPI_PHP_IO_ADDR: + val = hotplug_slots->plug_evt; + break; + default: + num = addr - ACPI_PHP_IO_ADDR - 1; + val = hotplug_slots->slot[num].status; + } + + fprintf(logfile, "ACPI PCI hotplug: read addr=0x%x, val=0x%x.\n", addr, val); + return val; +} + +static void acpi_php_writeb(void *opaque, uint32_t addr, uint32_t val) +{ + PHPSlots *hotplug_slots = opaque; + int php_slot; + fprintf(logfile, "ACPI PCI hotplug: write addr=0x%x, val=0x%x.\n", addr, val); + + switch (addr) + { + case ACPI_PHP_IO_ADDR: + break; + default: + php_slot = addr - ACPI_PHP_IO_ADDR - 1; + if ( val == 0x1 ) { /* Eject command */ + /* make _STA of the slot 0 */ + hotplug_slots->slot[php_slot].status = 0; + + /* clear the hotplug event */ + hotplug_slots->plug_evt = 0; + + /* power off the slot */ + power_off_php_slot(php_slot); + + /* signal the CP ACPI hot remove done. */ + xenstore_record_dm_state("pci-removed"); + } + } +} + +static void pcislots_save(QEMUFile* f, void* opaque) +{ + PHPSlots *s = (PHPSlots*)opaque; + int i; + for ( i = 0; i < ACPI_PHP_SLOT_NUM; i++ ) { + qemu_put_8s( f, &s->slot[i].status); + } + qemu_put_8s(f, &s->plug_evt); +} + +static int pcislots_load(QEMUFile* f, void* opaque, int version_id) +{ + PHPSlots *s = (PHPSlots*)opaque; + int i; + if (version_id != 1) + return -EINVAL; + for ( i = 0; i < ACPI_PHP_SLOT_NUM; i++ ) { + qemu_get_8s( f, &s->slot[i].status); + } + qemu_get_8s(f, &s->plug_evt); + return 0; +} + +static void php_slots_init(void) +{ + PHPSlots *slots = &php_slots; + int i; + memset(slots, 0, sizeof(PHPSlots)); + + /* update the pci slot status */ + for ( i = 0; i < PHP_SLOT_LEN; i++ ) { + if ( test_pci_slot( PHP_TO_PCI_SLOT(i) ) == 1 ) + slots->slot[i].status = 0xf; + } + + + /* ACPI PCI hotplug controller */ + register_ioport_read(ACPI_PHP_IO_ADDR, ACPI_PHP_SLOT_NUM + 1, 1, acpi_php_readb, slots); + register_ioport_write(ACPI_PHP_IO_ADDR, ACPI_PHP_SLOT_NUM + 1, 1, acpi_php_writeb, slots); + register_savevm("pcislots", 0, 1, pcislots_save, pcislots_load, slots); +} + +/* GPEx_STS occupy 1st half of the block, while GPEx_EN 2nd half */ +static uint32_t gpe_sts_read(void *opaque, uint32_t addr) +{ + GPEState *s = opaque; + + return s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS]; +} + +/* write 1 to clear specific GPE bits */ +static void gpe_sts_write(void *opaque, uint32_t addr, uint32_t val) +{ + GPEState *s = opaque; + int hotplugged = 0; + + fprintf(logfile, "gpe_sts_write: addr=0x%x, val=0x%x.\n", addr, val); + + hotplugged = test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT); + s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS] &= ~val; + if ( s->sci_asserted && + hotplugged && + !test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT)) { + fprintf(logfile, "Clear the GPE0_STS bit for ACPI hotplug & deassert the IRQ.\n"); + pic_set_irq(ACPI_SCI_IRQ, 0); + } + +} + +static uint32_t gpe_en_read(void *opaque, uint32_t addr) +{ + GPEState *s = opaque; + + return s->gpe0_en[addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2)]; +} + +/* write 0 to clear en bit */ +static void gpe_en_write(void *opaque, uint32_t addr, uint32_t val) +{ + GPEState *s = opaque; + int reg_count; + + fprintf(logfile, "gpe_en_write: addr=0x%x, val=0x%x.\n", addr, val); + reg_count = addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2); + s->gpe0_en[reg_count] = val; + /* If disable GPE bit right after generating SCI on it, + * need deassert the intr to avoid redundant intrs + */ + if ( s->sci_asserted && + reg_count == (ACPI_PHP_GPE_BIT / 8) && + !(val & (1 << (ACPI_PHP_GPE_BIT % 8))) ) { + fprintf(logfile, "deassert due to disable GPE bit.\n"); + s->sci_asserted = 0; + pic_set_irq(ACPI_SCI_IRQ, 0); + } + +} + +static void gpe_save(QEMUFile* f, void* opaque) +{ + GPEState *s = (GPEState*)opaque; + int i; + + for ( i = 0; i < ACPI_GPE0_BLK_LEN / 2; i++ ) { + qemu_put_8s(f, &s->gpe0_sts[i]); + qemu_put_8s(f, &s->gpe0_en[i]); + } + + qemu_put_8s(f, &s->sci_asserted); + if ( s->sci_asserted ) { + fprintf(logfile, "gpe_save with sci asserted!\n"); + } +} + +static int gpe_load(QEMUFile* f, void* opaque, int version_id) +{ + GPEState *s = (GPEState*)opaque; + int i; + if (version_id != 1) + return -EINVAL; + + for ( i = 0; i < ACPI_GPE0_BLK_LEN / 2; i++ ) { + qemu_get_8s(f, &s->gpe0_sts[i]); + qemu_get_8s(f, &s->gpe0_en[i]); + } + + qemu_get_8s(f, &s->sci_asserted); + return 0; +} + +static void gpe_acpi_init(void) +{ + GPEState *s = &gpe_state; + memset(s, 0, sizeof(GPEState)); + + register_ioport_read(ACPI_GPE0_BLK_ADDRESS, + ACPI_GPE0_BLK_LEN / 2, + 1, + gpe_sts_read, + s); + register_ioport_read(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2, + ACPI_GPE0_BLK_LEN / 2, + 1, + gpe_en_read, + s); + + register_ioport_write(ACPI_GPE0_BLK_ADDRESS, + ACPI_GPE0_BLK_LEN / 2, + 1, + gpe_sts_write, + s); + register_ioport_write(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2, + ACPI_GPE0_BLK_LEN / 2, + 1, + gpe_en_write, + s); + + register_savevm("gpe", 0, 1, gpe_save, gpe_load, s); +} + +static void acpi_sci_intr(GPEState *s) +{ + if ( !test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT) && + test_bit(&s->gpe0_en[0], ACPI_PHP_GPE_BIT) ) { + + set_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT); + s->sci_asserted = 1; + pic_set_irq(ACPI_SCI_IRQ, 1); + fprintf(logfile, "generate a sci for PHP.\n"); + } +} + +void acpi_php_del(int pci_slot) +{ + GPEState *s = &gpe_state; + PHPSlots *hotplug_slots = &php_slots; + int php_slot = PCI_TO_PHP_SLOT(pci_slot); + + if ( pci_slot < PHP_SLOT_START || pci_slot >= PHP_SLOT_END ) { + fprintf(logfile, "not find the pci slot %d when hot remove.\n", pci_slot); + + return; + } + + /* update the php controller status */ + hotplug_slots->plug_evt = (((php_slot+1) << 4) | PHP_EVT_REMOVE); + + /* generate a SCI interrupt */ + acpi_sci_intr(s); +} + +void acpi_php_add(int pci_slot) +{ + GPEState *s = &gpe_state; + PHPSlots *hotplug_slots = &php_slots; + int php_slot = PCI_TO_PHP_SLOT(pci_slot); + char ret_str[30]; + + if ( pci_slot < PHP_SLOT_START || pci_slot >= PHP_SLOT_END ) { + fprintf(logfile, "hot add pci slot %d exceed.\n", pci_slot); + + if ( pci_slot == 0 ) + sprintf(ret_str, "no free hotplug slots"); + else if ( pci_slot == -1 ) + sprintf(ret_str, "wrong bdf or vslot"); + + if ( strlen(ret_str) > 0 ) + xenstore_record_dm("parameter", ret_str); + + return; + } + + /* update the php controller status */ + hotplug_slots->plug_evt = (((php_slot+1) << 4) | PHP_EVT_ADD); + + /* update the slot status as present */ + hotplug_slots->slot[php_slot].status = 0xf; + + /* power on the slot */ + power_on_php_slot(php_slot); + + /* tell Control panel which slot for the new pass-throgh dev */ + sprintf(ret_str, "0x%x", pci_slot); + xenstore_record_dm("parameter", ret_str); + + /* signal the CP ACPI hot insert done */ + xenstore_record_dm_state("pci-inserted"); + + /* generate a SCI interrupt */ + acpi_sci_intr(s); } /* PIIX4 acpi pci configuration space, func 2 */ @@ -181,5 +528,12 @@ void pci_piix4_acpi_init(PCIBus *bus, in acpi_map((PCIDevice *)d, 0, 0x1f40, 0x10, PCI_ADDRESS_SPACE_IO); + gpe_acpi_init(); + + php_slots_init(); + + /* for ACPI debug */ + register_ioport_write(ACPI_DBG_IO_ADDR, 4, 4, acpi_dbg_writel, d); + register_savevm("piix4acpi", 0, 1, piix4acpi_save, piix4acpi_load, d); } diff -r 29c03bc32d3e -r c6eeb71a85cf tools/ioemu/monitor.c --- a/tools/ioemu/monitor.c Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/ioemu/monitor.c Fri Feb 15 14:13:17 2008 +0000 @@ -1280,6 +1280,12 @@ static term_cmd_t term_cmds[] = { "device", "add USB device (e.g. 'host:bus.addr' or 'host:vendor_id:product_id')" }, { "usb_del", "s", do_usb_del, "device", "remove USB device 'bus.addr'" }, +#ifdef CONFIG_PHP_DEBUG + { "pci_add", "s", do_pci_add, + "device", "insert PCI pass-through device by BDF,e.g. (dom, bus, dev, func) by hex '0x0, 0x3, 0x0, 0x0'" }, + { "pci_del", "s", do_pci_del, + "device", "remove PCI pass-through device by BDF,e.g. (dom, bus, dev, func) by hex '0x0, 0x3, 0x0, 0x0'" }, +#endif #ifndef CONFIG_DM { "cpu", "i", do_cpu_set, "index", "set the default CPU" }, diff -r 29c03bc32d3e -r c6eeb71a85cf tools/ioemu/vl.c --- a/tools/ioemu/vl.c Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/ioemu/vl.c Fri Feb 15 14:13:17 2008 +0000 @@ -4382,6 +4382,24 @@ void usb_info(void) } } +void do_pci_del(char *devname) +{ + int pci_slot; + pci_slot = bdf_to_slot(devname); + + acpi_php_del(pci_slot); +} + +void do_pci_add(char *devname) +{ + int pci_slot; + + pci_slot = insert_to_pci_slot(devname); + + acpi_php_add(pci_slot); +} + + /***********************************************************/ /* pid file */ @@ -7067,7 +7085,7 @@ int main(int argc, char **argv) #endif sigset_t set; char qemu_dm_logfilename[128]; - const char *direct_pci = NULL; + const char *direct_pci = direct_pci_str; #if !defined(__sun__) && !defined(CONFIG_STUBDOM) /* Maximise rlimits. Needed where default constraints are tight (*BSD). */ @@ -7590,9 +7608,6 @@ int main(int argc, char **argv) case QEMU_OPTION_vncunused: vncunused++; break; - case QEMU_OPTION_pci: - direct_pci = optarg; - break; } } } @@ -7970,5 +7985,6 @@ int main(int argc, char **argv) main_loop(); quit_timers(); + pt_uninit(); return 0; } diff -r 29c03bc32d3e -r c6eeb71a85cf tools/ioemu/vl.h --- a/tools/ioemu/vl.h Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/ioemu/vl.h Fri Feb 15 14:13:17 2008 +0000 @@ -817,10 +817,14 @@ struct PCIDevice { int irq_state[4]; }; +extern char direct_pci_str[]; + PCIDevice *pci_register_device(PCIBus *bus, const char *name, int instance_size, int devfn, PCIConfigReadFunc *config_read, PCIConfigWriteFunc *config_write); + +void pci_hide_device(PCIDevice *pci_dev); void pci_register_io_region(PCIDevice *pci_dev, int region_num, uint32_t size, int type, @@ -849,6 +853,22 @@ void pci_info(void); void pci_info(void); PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint32_t id, pci_map_irq_fn map_irq, const char *name); + +/* PCI slot 6~7 support ACPI PCI hot plug */ +#define PHP_SLOT_START (6) +#define PHP_SLOT_END (8) +#define PHP_SLOT_LEN (PHP_SLOT_END - PHP_SLOT_START) +#define PHP_TO_PCI_SLOT(x) (x + PHP_SLOT_START) +#define PCI_TO_PHP_SLOT(x) (x - PHP_SLOT_START) +#define PHP_DEVFN_START (PHP_SLOT_START << 3) +#define PHP_DEVFN_END (PHP_SLOT_END << 3) + +int insert_to_pci_slot(char*); +int test_pci_slot(int); +int bdf_to_slot(char*); +int power_on_php_slot(int); +int power_off_php_slot(int); +void pt_uninit(void); /* prep_pci.c */ PCIBus *pci_prep_init(void); @@ -1120,6 +1140,9 @@ void tpm_tis_init(SetIRQFunc *set_irq, v /* piix4acpi.c */ extern void pci_piix4_acpi_init(PCIBus *bus, int devfn); +void acpi_php_add(int); +void acpi_php_del(int); + /* pc.c */ extern QEMUMachine pc_machine; @@ -1320,6 +1343,9 @@ void do_usb_del(const char *devname); void do_usb_del(const char *devname); void usb_info(void); +void do_pci_add(char *devname); +void do_pci_del(char *devname); + /* scsi-disk.c */ enum scsi_reason { SCSI_REASON_DONE, /* Command complete. */ @@ -1466,10 +1492,12 @@ void xenstore_parse_domain_config(int do void xenstore_parse_domain_config(int domid); int xenstore_fd(void); void xenstore_process_event(void *opaque); +void xenstore_record_dm(char *subpath, char *state); void xenstore_record_dm_state(char *state); void xenstore_check_new_media_present(int timeout); void xenstore_write_vncport(int vnc_display); void xenstore_read_vncpasswd(int domid, char *pwbuf, size_t pwbuflen); +void xenstore_write_vslots(char *vslots); int xenstore_domain_has_devtype(struct xs_handle *handle, const char *devtype); diff -r 29c03bc32d3e -r c6eeb71a85cf tools/ioemu/xenstore.c --- a/tools/ioemu/xenstore.c Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/ioemu/xenstore.c Fri Feb 15 14:13:17 2008 +0000 @@ -79,6 +79,8 @@ static void waitForDevice(char *fn) return; } +#define DIRECT_PCI_STR_LEN 160 +char direct_pci_str[DIRECT_PCI_STR_LEN]; void xenstore_parse_domain_config(int domid) { char **e = NULL; @@ -86,7 +88,7 @@ void xenstore_parse_domain_config(int do char *fpath = NULL, *bpath = NULL, *dev = NULL, *params = NULL, *type = NULL, *drv = NULL; int i, is_scsi, is_hdN = 0; - unsigned int len, num, hd_index; + unsigned int len, num, hd_index, pci_devid = 0; BlockDriverState *bs; for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++) @@ -250,6 +252,38 @@ void xenstore_parse_domain_config(int do fprintf(logfile, "Watching %s\n", buf); } + /* get the pci pass-through parameter */ + if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/num_devs", + domid, pci_devid) == -1) + goto out; + + free(params); + params = xs_read(xsh, XBT_NULL, buf, &len); + if (params == NULL) + goto out; + num = atoi(params); + + for ( i = 0; i < num; i++ ) { + if (pasprintf(&buf, "/local/domain/0/backend/pci/%u/%u/dev-%d", + domid, pci_devid, i) != -1) { + free(dev); + dev = xs_read(xsh, XBT_NULL, buf, &len); + + if ( strlen(dev) + strlen(direct_pci_str) > DIRECT_PCI_STR_LEN ) { + fprintf(stderr, "qemu: too many pci pass-through devices\n"); + memset(direct_pci_str, 0, DIRECT_PCI_STR_LEN); + goto out; + } + + /* append to direct_pci_str */ + if ( dev ) { + strcat(direct_pci_str, dev); + strcat(direct_pci_str, "-"); + } + } + } + + out: free(type); free(params); @@ -388,7 +422,7 @@ void xenstore_process_logdirty_event(voi /* Accept state change commands from the control tools */ static void xenstore_process_dm_command_event(void) { - char *path = NULL, *command = NULL; + char *path = NULL, *command = NULL, *par = NULL; unsigned int len; extern int suspend_requested; @@ -407,6 +441,34 @@ static void xenstore_process_dm_command_ } else if (!strncmp(command, "continue", len)) { fprintf(logfile, "dm-command: continue after state save\n"); suspend_requested = 0; + } else if (!strncmp(command, "pci-rem", len)) { + fprintf(logfile, "dm-command: hot remove pass-through pci dev \n"); + + if (pasprintf(&path, + "/local/domain/0/device-model/%u/parameter", domid) == -1) { + fprintf(logfile, "out of memory reading dm command parameter\n"); + goto out; + } + par = xs_read(xsh, XBT_NULL, path, &len); + if (!par) + goto out; + + do_pci_del(par); + free(par); + } else if (!strncmp(command, "pci-ins", len)) { + fprintf(logfile, "dm-command: hot insert pass-through pci dev \n"); + + if (pasprintf(&path, + "/local/domain/0/device-model/%u/parameter", domid) == -1) { + fprintf(logfile, "out of memory reading dm command parameter\n"); + goto out; + } + par = xs_read(xsh, XBT_NULL, path, &len); + if (!par) + goto out; + + do_pci_add(par); + free(par); } else { fprintf(logfile, "dm-command: unknown command\"%*s\"\n", len, command); } @@ -416,20 +478,25 @@ static void xenstore_process_dm_command_ free(command); } +void xenstore_record_dm(char *subpath, char *state) +{ + char *path = NULL; + + if (pasprintf(&path, + "/local/domain/0/device-model/%u/%s", domid, subpath) == -1) { + fprintf(logfile, "out of memory recording dm \n"); + goto out; + } + if (!xs_write(xsh, XBT_NULL, path, state, strlen(state))) + fprintf(logfile, "error recording dm \n"); + + out: + free(path); +} + void xenstore_record_dm_state(char *state) { - char *path = NULL; - - if (pasprintf(&path, - "/local/domain/0/device-model/%u/state", domid) == -1) { - fprintf(logfile, "out of memory recording dm state\n"); - goto out; - } - if (!xs_write(xsh, XBT_NULL, path, state, strlen(state))) - fprintf(logfile, "error recording dm state\n"); - - out: - free(path); + xenstore_record_dm("state", state); } void xenstore_process_event(void *opaque) @@ -520,6 +587,23 @@ void xenstore_write_vncport(int display) out: free(portstr); free(buf); +} + +void xenstore_write_vslots(char *vslots) +{ + char *path = NULL; + int pci_devid = 0; + + if (pasprintf(&path, + "/local/domain/0/backend/pci/%u/%u/vslots", domid, pci_devid) == -1) { + fprintf(logfile, "out of memory when updating vslots.\n"); + goto out; + } + if (!xs_write(xsh, XBT_NULL, path, vslots, strlen(vslots))) + fprintf(logfile, "error updating vslots \n"); + + out: + free(path); } void xenstore_read_vncpasswd(int domid, char *pwbuf, size_t pwbuflen) diff -r 29c03bc32d3e -r c6eeb71a85cf tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/libxc/xc_domain.c Fri Feb 15 14:13:17 2008 +0000 @@ -762,6 +762,20 @@ int xc_test_assign_device( return do_domctl(xc_handle, &domctl); } +int xc_deassign_device( + int xc_handle, + uint32_t domid, + uint32_t machine_bdf) +{ + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_deassign_device; + domctl.domain = domid; + domctl.u.assign_device.machine_bdf = machine_bdf; + + return do_domctl(xc_handle, &domctl); +} + /* Pass-through: binds machine irq to guests irq */ int xc_domain_bind_pt_irq( int xc_handle, @@ -797,6 +811,36 @@ int xc_domain_bind_pt_irq( return rc; } +int xc_domain_unbind_pt_irq( + int xc_handle, + uint32_t domid, + uint8_t machine_irq, + uint8_t irq_type, + uint8_t bus, + uint8_t device, + uint8_t intx, + uint8_t isa_irq) +{ + int rc; + xen_domctl_bind_pt_irq_t * bind; + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_unbind_pt_irq; + domctl.domain = (domid_t)domid; + + bind = &(domctl.u.bind_pt_irq); + bind->hvm_domid = domid; + bind->irq_type = irq_type; + bind->machine_irq = machine_irq; + bind->u.pci.bus = bus; + bind->u.pci.device = device; + bind->u.pci.intx = intx; + bind->u.isa.isa_irq = isa_irq; + + rc = do_domctl(xc_handle, &domctl); + return rc; +} + int xc_domain_bind_pt_pci_irq( int xc_handle, uint32_t domid, diff -r 29c03bc32d3e -r c6eeb71a85cf tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/libxc/xenctrl.h Fri Feb 15 14:13:17 2008 +0000 @@ -914,6 +914,10 @@ int xc_test_assign_device(int xc_handle, uint32_t domid, uint32_t machine_bdf); +int xc_deassign_device(int xc_handle, + uint32_t domid, + uint32_t machine_bdf); + int xc_domain_memory_mapping(int xc_handle, uint32_t domid, unsigned long first_gfn, @@ -937,6 +941,15 @@ int xc_domain_bind_pt_irq(int xc_handle, uint8_t intx, uint8_t isa_irq); +int xc_domain_unbind_pt_irq(int xc_handle, + uint32_t domid, + uint8_t machine_irq, + uint8_t irq_type, + uint8_t bus, + uint8_t device, + uint8_t intx, + uint8_t isa_irq); + int xc_domain_bind_pt_pci_irq(int xc_handle, uint32_t domid, uint8_t machine_irq, diff -r 29c03bc32d3e -r c6eeb71a85cf tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/python/xen/xend/XendDomainInfo.py Fri Feb 15 14:13:17 2008 +0000 @@ -516,6 +516,131 @@ class XendDomainInfo: asserts.isCharConvertible(key) self.storeDom("control/sysrq", '%c' % key) + def sync_pcidev_info(self): + + if not self.info.is_hvm(): + return + + devid = '0' + dev_info = self._getDeviceInfo_pci(devid) + if dev_info is None: + return + + # get the virtual slot info from xenstore + dev_uuid = sxp.child_value(dev_info, 'uuid') + pci_conf = self.info['devices'][dev_uuid][1] + pci_devs = pci_conf['devs'] + + count = 0 + vslots = None + while vslots is None and count < 20: + vslots = xstransact.Read("/local/domain/0/backend/pci/%u/%s/vslots" + % (self.getDomid(), devid)) + time.sleep(0.1) + count += 1 + if vslots is None: + log.error("Device model didn't tell the vslots for PCI device") + return + + #delete last delim + if vslots[-1] == ";": + vslots = vslots[:-1] + + slot_list = vslots.split(';') + if len(slot_list) != len(pci_devs): + log.error("Device model's pci dev num dismatch") + return + + #update the vslot info + count = 0; + for x in pci_devs: + x['vslt'] = slot_list[count] + count += 1 + + + def pci_device_create(self, dev_config): + log.debug("XendDomainInfo.pci_device_create: %s" % scrub_password(dev_config)) + + if not self.info.is_hvm(): + raise VmError("only HVM guest support pci attach") + + #all the PCI devs share one conf node + devid = '0' + + dev_type = sxp.name(dev_config) + new_devs = sxp.child_value(dev_config, 'devs') + new_dev = new_devs[0] + dev_info = self._getDeviceInfo_pci(devid)#from self.info['devices'] + + #check conflict before trigger hotplug event + if dev_info is not None: + dev_uuid = sxp.child_value(dev_info, 'uuid') + pci_conf = self.info['devices'][dev_uuid][1] + pci_devs = pci_conf['devs'] + for x in pci_devs: + if (int(x['vslt'], 16) == int(new_dev['vslt'], 16) and + int(x['vslt'], 16) != 0 ): + raise VmError("vslot %s already have a device." % (new_dev['vslt'])) + + if (int(x['domain'], 16) == int(new_dev['domain'], 16) and + int(x['bus'], 16) == int(new_dev['bus'], 16) and + int(x['slot'], 16) == int(new_dev['slot'], 16) and + int(x['func'], 16) == int(new_dev['func'], 16) ): + raise VmError("device is already inserted") + + # Test whether the devices can be assigned with VT-d + pci_str = "%s, %s, %s, %s" % (new_dev['domain'], + new_dev['bus'], + new_dev['slot'], + new_dev['func']) + bdf = xc.test_assign_device(self.domid, pci_str) + if bdf != 0: + bus = (bdf >> 16) & 0xff + devfn = (bdf >> 8) & 0xff + dev = (devfn >> 3) & 0x1f + func = devfn & 0x7 + raise VmError("Fail to hot insert device(%x:%x.%x): maybe VT-d is " + "not enabled, or the device is not exist, or it " + "has already been assigned to other domain" + % (bus, dev, func)) + + bdf_str = "%s:%s:%s.%s@%s" % (new_dev['domain'], + new_dev['bus'], + new_dev['slot'], + new_dev['func'], + new_dev['vslt']) + self.image.signalDeviceModel('pci-ins', 'pci-inserted', bdf_str) + + # update the virtual pci slot + vslt = xstransact.Read("/local/domain/0/device-model/%i/parameter" + % self.getDomid()) + new_dev['vslt'] = vslt + + if dev_info is None: + # create a new one from scrach + dev_cfg_sxp = [dev_type, + ['dev', + ['domain', new_dev['domain']], + ['bus', new_dev['bus']], + ['slot', new_dev['slot']], + ['func', new_dev['func']], + ['vslt', new_dev['vslt']] + ]] + dev_uuid = self.info.device_add(dev_type, cfg_sxp = dev_cfg_sxp) + dev_config_dict = self.info['devices'][dev_uuid][1] + try: + dev_config_dict['devid'] = devid = \ + self._createDevice(dev_type, dev_config_dict) + self._waitForDevice(dev_type, devid) + except VmError, ex: + raise ex + else: + # update the pci config to add the new dev + pci_devs.extend(new_devs) + self._reconfigureDevice('pci', devid, pci_conf) + + return self.getDeviceController('pci').sxpr(devid) + def device_create(self, dev_config): """Create a new device. @@ -524,6 +649,11 @@ class XendDomainInfo: """ log.debug("XendDomainInfo.device_create: %s" % scrub_password(dev_config)) dev_type = sxp.name(dev_config) + + if dev_type == 'pci': + rc = self.pci_device_create(dev_config) + return rc + dev_uuid = self.info.device_add(dev_type, cfg_sxp = dev_config) dev_config_dict = self.info['devices'][dev_uuid][1] log.debug("XendDomainInfo.device_create: %s" % scrub_password(dev_config_dict)) @@ -584,9 +714,64 @@ class XendDomainInfo: for devclass in XendDevices.valid_devices(): self.getDeviceController(devclass).waitForDevices() + def destroyPCIDevice(self, vslot): + log.debug("destroyPCIDevice called %s", vslot) + + if not self.info.is_hvm(): + raise VmError("only HVM guest support pci detach") + + #all the PCI devs share one conf node + devid = '0' + vslot = int(vslot) + dev_info = self._getDeviceInfo_pci('0')#from self.info['devices'] + dev_uuid = sxp.child_value(dev_info, 'uuid') + + #delete the pci bdf config under the pci device + pci_conf = self.info['devices'][dev_uuid][1] + pci_len = len(pci_conf['devs']) + + #find the pass-through device with the virtual slot + devnum = 0 + for x in pci_conf['devs']: + if int(x['vslt'], 16) == vslot: + break + devnum += 1 + + if devnum >= pci_len: + raise VmError("Device @ vslot 0x%x doesn't exist." % (vslot)) + + if vslot == 0: + raise VmError("Device @ vslot 0x%x do not support hotplug." % (vslot)) + + bdf_str = "%s:%s:%s.%s" % (x['domain'], x['bus'], x['slot'], x['func']) + log.info("destroyPCIDevice:%s:%s!", x, bdf_str) + + self.image.signalDeviceModel('pci-rem', 'pci-removed', bdf_str) + + if pci_len > 1: + del pci_conf['devs'][devnum] + self._reconfigureDevice('pci', devid, pci_conf) + else: + self.getDeviceController('pci').destroyDevice(devid, True) + del self.info['devices'][dev_uuid] + platform = self.info['platform'] + orig_dev_num = len(platform['pci']) + + #need remove the pci config + #TODO:can use this to keep some info to ask high level management tools to hot insert a new passthrough dev after migration + if orig_dev_num != 0: +# platform['pci'] = ["%dDEVs" % orig_dev_num] + platform['pci'] = [] + + return 0 + def destroyDevice(self, deviceClass, devid, force = False, rm_cfg = False): log.debug("XendDomainInfo.destroyDevice: deviceClass = %s, device = %s", deviceClass, devid) + + if deviceClass == 'dpci': + rc = self.destroyPCIDevice(devid) + return rc if rm_cfg: # Convert devid to device number. A device number is @@ -647,6 +832,14 @@ class XendDomainInfo: return rc def getDeviceSxprs(self, deviceClass): + if deviceClass == 'pci': + dev_info = self._getDeviceInfo_pci('0')#from self.info['devices'] + if dev_info is None: + return [] + dev_uuid = sxp.child_value(dev_info, 'uuid') + pci_devs = self.info['devices'][dev_uuid][1]['devs'] + pci_len = len(pci_devs) + return pci_devs if self._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED, DOM_STATE_CRASHED): return self.getDeviceController(deviceClass).sxprs() else: @@ -683,6 +876,12 @@ class XendDomainInfo: if devid == dev: return dev_info + def _getDeviceInfo_pci(self, devid): + for dev_type, dev_info in self.info.all_devices_sxpr(): + if dev_type != 'pci': + continue + return dev_info + return None def setMemoryTarget(self, target): """Set the memory target of this domain. @@ -1542,6 +1741,9 @@ class XendDomainInfo: if self.image: self.image.createDeviceModel() + + #if have pass-through devs, need the virtual pci slots info from qemu + self.sync_pcidev_info() def _releaseDevices(self, suspend = False): """Release all domain's devices. Nothrow guarantee.""" diff -r 29c03bc32d3e -r c6eeb71a85cf tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/python/xen/xend/image.py Fri Feb 15 14:13:17 2008 +0000 @@ -300,23 +300,42 @@ class ImageHandler: self.vm.storeDom("image/device-model-pid", self.pid) log.info("device model pid: %d", self.pid) - def saveDeviceModel(self): + def signalDeviceModel(self, cmd, ret, par = None): if self.device_model is None: return - # Signal the device model to pause itself and save its state + # Signal the device model to for action + if cmd is '' or ret is '': + raise VmError('need valid command and result when signal device model') + + orig_state = xstransact.Read("/local/domain/0/device-model/%i/state" + % self.vm.getDomid()) + + if par is not None: + xstransact.Store("/local/domain/0/device-model/%i" + % self.vm.getDomid(), ('parameter', par)) + xstransact.Store("/local/domain/0/device-model/%i" - % self.vm.getDomid(), ('command', 'save')) + % self.vm.getDomid(), ('command', cmd)) # Wait for confirmation. Could do this with a watch but we'd # still end up spinning here waiting for the watch to fire. state = '' count = 0 - while state != 'paused': + while state != ret: state = xstransact.Read("/local/domain/0/device-model/%i/state" % self.vm.getDomid()) time.sleep(0.1) count += 1 if count > 100: - raise VmError('Timed out waiting for device model to save') + raise VmError('Timed out waiting for device model action') + + #resotre orig state + xstransact.Store("/local/domain/0/device-model/%i" + % self.vm.getDomid(), ('state', orig_state)) + log.info("signalDeviceModel:restore dm state to %s", orig_state) + + def saveDeviceModel(self): + # Signal the device model to pause itself and save its state + self.signalDeviceModel('save', 'paused') def resumeDeviceModel(self): if self.device_model is None: @@ -479,7 +498,7 @@ class HVMImageHandler(ImageHandler): dmargs = [ 'boot', 'fda', 'fdb', 'soundhw', 'localtime', 'serial', 'stdvga', 'isa', - 'acpi', 'usb', 'usbdevice', 'pci' ] + 'acpi', 'usb', 'usbdevice' ] for a in dmargs: v = vmConfig['platform'].get(a) diff -r 29c03bc32d3e -r c6eeb71a85cf tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/python/xen/xend/server/DevController.py Fri Feb 15 14:13:17 2008 +0000 @@ -412,6 +412,14 @@ class DevController: return result + def removeBackend(self, devid, *args): + frontpath = self.frontendPath(devid) + backpath = xstransact.Read(frontpath, "backend") + if backpath: + return xstransact.Remove(backpath, *args) + else: + raise VmError("Device %s not connected" % devid) + def readBackend(self, devid, *args): frontpath = self.frontendPath(devid) backpath = xstransact.Read(frontpath, "backend") diff -r 29c03bc32d3e -r c6eeb71a85cf tools/python/xen/xend/server/pciif.py --- a/tools/python/xen/xend/server/pciif.py Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/python/xen/xend/server/pciif.py Fri Feb 15 14:13:17 2008 +0000 @@ -18,6 +18,7 @@ import types +import time from xen.xend import sxp from xen.xend.XendError import VmError @@ -62,25 +63,62 @@ class PciController(DevController): back = {} pcidevid = 0 + vslots = "" for pci_config in config.get('devs', []): domain = parse_hex(pci_config.get('domain', 0)) bus = parse_hex(pci_config.get('bus', 0)) slot = parse_hex(pci_config.get('slot', 0)) func = parse_hex(pci_config.get('func', 0)) + + vslt = pci_config.get('vslt') + if vslt is not None: + vslots = vslots + vslt + ";" + self.setupDevice(domain, bus, slot, func) back['dev-%i' % pcidevid] = "%04x:%02x:%02x.%02x" % \ (domain, bus, slot, func) pcidevid += 1 + if vslots != "": + back['vslots'] = vslots + back['num_devs']=str(pcidevid) back['uuid'] = config.get('uuid','') return (0, back, {}) + + def reconfigureDevice(self, _, config): + """@see DevController.reconfigureDevice""" + #currently only support config changes by hot insert/remove pass-through dev + #delete all the devices in xenstore + (devid, new_back, new_front) = self.getDeviceDetails(config) + num_devs = self.readBackend(devid, 'num_devs') + for i in range(int(num_devs)): + self.removeBackend(devid, 'dev-%d' % i) + self.removeBackend(devid, 'num_devs') + + #create new devices config + num_devs = new_back['num_devs'] + for i in range(int(num_devs)): + dev_no = 'dev-%d' % i + self.writeBackend(devid, dev_no, new_back[dev_no]) + self.writeBackend(devid, 'num_devs', num_devs) + + if new_back['vslots'] is not None: + self.writeBackend(devid, 'vslots', new_back['vslots']) + + return new_back.get('uuid') def getDeviceConfiguration(self, devid, transaction = None): result = DevController.getDeviceConfiguration(self, devid, transaction) num_devs = self.readBackend(devid, 'num_devs') pci_devs = [] + vslots = self.readBackend(devid, 'vslots') + if vslots is not None: + if vslots[-1] == ";": + vslots = vslots[:-1] + slot_list = vslots.split(';') + for i in range(int(num_devs)): dev_config = self.readBackend(devid, 'dev-%d' % i) @@ -91,10 +129,16 @@ class PciController(DevController): if pci_match!=None: pci_dev_info = pci_match.groupdict() - pci_devs.append({'domain': '0x%(domain)s' % pci_dev_info, + dev_dict = {'domain': '0x%(domain)s' % pci_dev_info, 'bus': '0x%(bus)s' % pci_dev_info, 'slot': '0x%(slot)s' % pci_dev_info, - 'func': '0x%(func)s' % pci_dev_info}) + 'func': '0x%(func)s' % pci_dev_info} + + #append vslot info + if vslots is not None: + dev_dict['vslt'] = slot_list[i] + + pci_devs.append(dev_dict) result['devs'] = pci_devs result['uuid'] = self.readBackend(devid, 'uuid') diff -r 29c03bc32d3e -r c6eeb71a85cf tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Fri Feb 15 12:50:55 2008 +0000 +++ b/tools/python/xen/xm/main.py Fri Feb 15 14:13:17 2008 +0000 @@ -175,6 +175,12 @@ SUBCOMMAND_HELP = { 'vnet-delete' : ('<VnetId>', 'Delete a Vnet.'), 'vnet-list' : ('[-l|--long]', 'List Vnets.'), 'vtpm-list' : ('<Domain> [--long]', 'List virtual TPM devices.'), + 'pci-attach ' : ('<Domain> <dom> <bus> <slot> <func> [virtual slot]', + 'Insert a new pass-through pci device.'), + 'pci-detach ' : ('<Domain> <virtual slot>', + 'Remove a domain\'s pass-through pci device.'), + 'pci-list' : ('<Domain>', + 'List pass-through pci devices for a domain.'), # security @@ -335,6 +341,9 @@ device_commands = [ "network-detach", "network-list", "vtpm-list", + "pci-attach", + "pci-detach", + "pci-list", ] vnet_commands = [ @@ -2051,6 +2060,31 @@ def xm_vtpm_list(args): % ni) +def xm_pci_list(args): + (use_long, params) = arg_check_for_resource_list(args, "pci-list") + + dom = params[0] + + devs = server.xend.domain.getDeviceSxprs(dom, 'pci') + + if len(devs) == 0: + return + + has_vslt = devs[0].has_key('vslt') + if has_vslt: + hdr_str = 'VSlt domain bus slot func' + fmt_str = "%(vslt)-3s %(domain)-3s %(bus)-3s %(slot)-3s %(func)-3s " + else: + hdr_str = 'domain bus slot func' + fmt_str = "%(domain)-3s %(bus)-3s %(slot)-3s %(func)-3s " + hdr = 0 + + for x in devs: + if hdr == 0: + print (hdr_str) + hdr = 1 + print ( fmt_str % x ) + def parse_block_configuration(args): dom = args[0] @@ -2198,6 +2232,29 @@ def xm_network_attach(args): vif.append(vif_param) server.xend.domain.device_create(dom, vif) +def parse_pci_configuration(args): + dom = args[0] + + if len(args) == 6: + vslt = args[5] + else: + vslt = '0x0' #chose a free virtual PCI slot + + pci = ['pci', + ['devs', + [{'domain': "0x%x" % int(args[1], 16), + 'bus': "0x%x" % int(args[2], 16), + 'slot': "0x%x" % int(args[3], 16), + 'func': "0x%x" % int(args[4], 16), + 'vslt': "0x%x" % int(vslt, 16)}] + ]] + + return (dom, pci) + +def xm_pci_attach(args): + arg_check(args, 'xm_pci_attach', 5, 6) + (dom, pci) = parse_pci_configuration(args) + server.xend.domain.device_create(dom, pci) def detach(args, deviceClass): rm_cfg = True @@ -2262,6 +2319,12 @@ def xm_network_detach(args): arg_check(args, 'network-detach', 2, 3) detach(args, 'vif') + +def xm_pci_detach(args): + arg_check(args, 'xm_pci_detach', 2, 2) + dom = args[0] + dev = args[1] + server.xend.domain.destroyDevice(dom, 'dpci', dev) def xm_vnet_list(args): xenapi_unsupported() @@ -2452,6 +2515,10 @@ commands = { "vnet-delete": xm_vnet_delete, # vtpm "vtpm-list": xm_vtpm_list, + #pci + "pci-attach": xm_pci_attach, + "pci-detach": xm_pci_detach, + "pci-list": xm_pci_list, } ## The commands supported by a separate argument parser in xend.xm. diff -r 29c03bc32d3e -r c6eeb71a85cf xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Fri Feb 15 12:50:55 2008 +0000 +++ b/xen/arch/x86/domctl.c Fri Feb 15 14:13:17 2008 +0000 @@ -580,6 +580,34 @@ long arch_do_domctl( } break; + case XEN_DOMCTL_deassign_device: + { + struct domain *d; + u8 bus, devfn; + + ret = -EINVAL; + if ( !iommu_enabled ) + break; + + if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) ) + { + gdprintk(XENLOG_ERR, + "XEN_DOMCTL_deassign_device: get_domain_by_id() failed\n"); + break; + } + bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff; + devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff; + + if ( !device_assigned(bus, devfn) ) + break; + + reassign_device_ownership(d, dom0, bus, devfn); + gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + put_domain(d); + } + break; + case XEN_DOMCTL_bind_pt_irq: { struct domain * d; @@ -593,6 +621,23 @@ long arch_do_domctl( ret = pt_irq_create_bind_vtd(d, bind); if ( ret < 0 ) gdprintk(XENLOG_ERR, "pt_irq_create_bind failed!\n"); + rcu_unlock_domain(d); + } + break; + + case XEN_DOMCTL_unbind_pt_irq: + { + struct domain * d; + xen_domctl_bind_pt_irq_t * bind; + + ret = -ESRCH; + if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL ) + break; + bind = &(domctl->u.bind_pt_irq); + if ( iommu_enabled ) + ret = pt_irq_destroy_bind_vtd(d, bind); + if ( ret < 0 ) + gdprintk(XENLOG_ERR, "pt_irq_destroy_bind failed!\n"); rcu_unlock_domain(d); } break; diff -r 29c03bc32d3e -r c6eeb71a85cf xen/arch/x86/hvm/irq.c --- a/xen/arch/x86/hvm/irq.c Fri Feb 15 12:50:55 2008 +0000 +++ b/xen/arch/x86/hvm/irq.c Fri Feb 15 14:13:17 2008 +0000 @@ -211,8 +211,7 @@ void hvm_set_pci_link_route(struct domai clear_bit(old_isa_irq, &hvm_irq->dpci->isairq_map); for ( i = 0; i < NR_LINK; i++ ) - if ( test_bit(i, &hvm_irq->dpci->link_map) && - hvm_irq->pci_link.route[i] ) + if ( hvm_irq->dpci->link_cnt[i] && hvm_irq->pci_link.route[i] ) set_bit(hvm_irq->pci_link.route[i], &hvm_irq->dpci->isairq_map); } diff -r 29c03bc32d3e -r c6eeb71a85cf xen/arch/x86/hvm/vmx/vtd/intel-iommu.c --- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Fri Feb 15 12:50:55 2008 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c Fri Feb 15 14:13:17 2008 +0000 @@ -1441,6 +1441,8 @@ void reassign_device_ownership( bus, PCI_SLOT(devfn), PCI_FUNC(devfn), source->domain_id, target->domain_id); + pdev_flr(bus, devfn); + for_each_pdev( source, pdev ) { if ( (pdev->bus != bus) || (pdev->devfn != devfn) ) @@ -1476,7 +1478,6 @@ void return_devices_to_dom0(struct domai dprintk(XENLOG_INFO VTDPREFIX, "return_devices_to_dom0: bdf = %x:%x:%x\n", pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); - pdev_flr(pdev->bus, pdev->devfn); reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn); } @@ -1941,7 +1942,6 @@ int intel_iommu_assign_device(struct dom "assign_device: bus = %x dev = %x func = %x\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - pdev_flr(bus, devfn); reassign_device_ownership(dom0, d, bus, devfn); /* Setup rmrr identify mapping */ diff -r 29c03bc32d3e -r c6eeb71a85cf xen/arch/x86/hvm/vmx/vtd/io.c --- a/xen/arch/x86/hvm/vmx/vtd/io.c Fri Feb 15 12:50:55 2008 +0000 +++ b/xen/arch/x86/hvm/vmx/vtd/io.c Fri Feb 15 14:13:17 2008 +0000 @@ -101,7 +101,7 @@ int pt_irq_create_bind_vtd( intx = pt_irq_bind->u.pci.intx; guest_gsi = hvm_pci_intx_gsi(device, intx); link = hvm_pci_intx_link(device, intx); - set_bit(link, hvm_irq_dpci->link_map); + hvm_irq_dpci->link_cnt[link]++; digl = xmalloc(struct dev_intx_gsi_link); if ( !digl ) @@ -134,6 +134,65 @@ int pt_irq_create_bind_vtd( gdprintk(XENLOG_INFO VTDPREFIX, "VT-d irq bind: m_irq = %x device = %x intx = %x\n", machine_gsi, device, intx); + return 0; +} + +int pt_irq_destroy_bind_vtd( + struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind) +{ + struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci; + uint32_t machine_gsi, guest_gsi; + uint32_t device, intx, link; + struct list_head *digl_list, *tmp; + struct dev_intx_gsi_link *digl; + + if ( hvm_irq_dpci == NULL ) + return 0; + + machine_gsi = pt_irq_bind->machine_irq; + device = pt_irq_bind->u.pci.device; + intx = pt_irq_bind->u.pci.intx; + guest_gsi = hvm_pci_intx_gsi(device, intx); + link = hvm_pci_intx_link(device, intx); + hvm_irq_dpci->link_cnt[link]--; + + gdprintk(XENLOG_INFO, + "pt_irq_destroy_bind_vtd: machine_gsi=%d, guest_gsi=%d, device=%d, intx=%d.\n", + machine_gsi, guest_gsi, device, intx); + memset(&hvm_irq_dpci->girq[guest_gsi], 0, sizeof(struct hvm_girq_dpci_mapping)); + + /* clear the mirq info */ + if ( hvm_irq_dpci->mirq[machine_gsi].valid ) + { + + list_for_each_safe ( digl_list, tmp, + &hvm_irq_dpci->mirq[machine_gsi].digl_list ) + { + digl = list_entry(digl_list, + struct dev_intx_gsi_link, list); + if ( digl->device == device && + digl->intx == intx && + digl->link == link && + digl->gsi == guest_gsi ) + { + list_del(&digl->list); + xfree(digl); + } + } + + if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) ) + { + pirq_guest_unbind(d, machine_gsi); + kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)]); + hvm_irq_dpci->mirq[machine_gsi].dom = NULL; + hvm_irq_dpci->mirq[machine_gsi].valid = 0; + } + } + + gdprintk(XENLOG_INFO, + "XEN_DOMCTL_irq_unmapping: m_irq = %x device = %x intx = %x\n", + machine_gsi, device, intx); + return 0; } diff -r 29c03bc32d3e -r c6eeb71a85cf xen/include/asm-x86/hvm/irq.h --- a/xen/include/asm-x86/hvm/irq.h Fri Feb 15 12:50:55 2008 +0000 +++ b/xen/include/asm-x86/hvm/irq.h Fri Feb 15 14:13:17 2008 +0000 @@ -64,7 +64,7 @@ struct hvm_irq_dpci { /* Record of mapped ISA IRQs */ DECLARE_BITMAP(isairq_map, NR_ISAIRQS); /* Record of mapped Links */ - DECLARE_BITMAP(link_map, NR_LINK); + uint8_t link_cnt[NR_LINK]; struct timer hvm_timer[NR_IRQS]; }; diff -r 29c03bc32d3e -r c6eeb71a85cf xen/include/asm-x86/iommu.h --- a/xen/include/asm-x86/iommu.h Fri Feb 15 12:50:55 2008 +0000 +++ b/xen/include/asm-x86/iommu.h Fri Feb 15 14:13:17 2008 +0000 @@ -74,6 +74,9 @@ void iommu_domain_destroy(struct domain void iommu_domain_destroy(struct domain *d); int device_assigned(u8 bus, u8 devfn); int assign_device(struct domain *d, u8 bus, u8 devfn); +void reassign_device_ownership(struct domain *source, + struct domain *target, + u8 bus, u8 devfn); int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn); int iommu_unmap_page(struct domain *d, unsigned long gfn); void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry); @@ -83,6 +86,8 @@ int dpci_ioport_intercept(ioreq_t *p); int dpci_ioport_intercept(ioreq_t *p); int pt_irq_create_bind_vtd(struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind); +int pt_irq_destroy_bind_vtd(struct domain *d, + xen_domctl_bind_pt_irq_t *pt_irq_bind); unsigned int io_apic_read_remap_rte( unsigned int apic, unsigned int reg); void io_apic_write_remap_rte(unsigned int apic, diff -r 29c03bc32d3e -r c6eeb71a85cf xen/include/public/domctl.h --- a/xen/include/public/domctl.h Fri Feb 15 12:50:55 2008 +0000 +++ b/xen/include/public/domctl.h Fri Feb 15 14:13:17 2008 +0000 @@ -439,6 +439,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendt /* Assign PCI device to HVM guest. Sets up IOMMU structures. */ #define XEN_DOMCTL_assign_device 37 #define XEN_DOMCTL_test_assign_device 45 +#define XEN_DOMCTL_deassign_device 47 struct xen_domctl_assign_device { uint32_t machine_bdf; /* machine PCI ID of assigned device */ }; @@ -448,6 +449,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_assig /* Pass-through interrupts: bind real irq -> hvm devfn. */ #define XEN_DOMCTL_bind_pt_irq 38 +#define XEN_DOMCTL_unbind_pt_irq 48 typedef enum pt_irq_type_e { PT_IRQ_TYPE_PCI, PT_IRQ_TYPE_ISA diff -r 29c03bc32d3e -r c6eeb71a85cf xen/include/public/hvm/ioreq.h --- a/xen/include/public/hvm/ioreq.h Fri Feb 15 12:50:55 2008 +0000 +++ b/xen/include/public/hvm/ioreq.h Fri Feb 15 14:13:17 2008 +0000 @@ -118,6 +118,11 @@ struct buffered_piopage { #define ACPI_PM1A_EVT_BLK_ADDRESS 0x0000000000001f40 #define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04) #define ACPI_PM_TMR_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08) + +#define ACPI_GPE0_BLK_ADDRESS (ACPI_PM_TMR_BLK_ADDRESS + 0x20) + +#define ACPI_GPE0_BLK_LEN 0x08 + #endif /* defined(__i386__) || defined(__x86_64__) */ #endif /* _IOREQ_H_ */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |