[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 08/28] ARM: GICv3 ITS: introduce host LPI array
On Mon, 30 Jan 2017, Andre Przywara wrote: > The number of LPIs on a host can be potentially huge (millions), > although in practise will be mostly reasonable. So prematurely allocating > an array of struct irq_desc's for each LPI is not an option. > However Xen itself does not care about LPIs, as every LPI will be injected > into a guest (Dom0 for now). > Create a dense data structure (8 Bytes) for each LPI which holds just > enough information to determine the virtual IRQ number and the VCPU into > which the LPI needs to be injected. > Also to not artificially limit the number of LPIs, we create a 2-level > table for holding those structures. > This patch introduces functions to initialize these tables and to > create, lookup and destroy entries for a given LPI. > We allocate and access LPI information in a way that does not require > a lock. > > Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx> > --- > xen/arch/arm/gic-v3-its.c | 80 ++++++++++++++++- > xen/arch/arm/gic-v3-lpi.c | 187 > ++++++++++++++++++++++++++++++++++++++- > xen/include/asm-arm/atomic.h | 6 +- > xen/include/asm-arm/gic.h | 5 ++ > xen/include/asm-arm/gic_v3_its.h | 9 ++ > 5 files changed, 282 insertions(+), 5 deletions(-) > > diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c > index 4a3a394..f073ab5 100644 > --- a/xen/arch/arm/gic-v3-its.c > +++ b/xen/arch/arm/gic-v3-its.c > @@ -83,6 +83,20 @@ static int its_send_cmd_sync(struct host_its *its, int cpu) > return its_send_command(its, cmd); > } > > +static int its_send_cmd_mapti(struct host_its *its, > + uint32_t deviceid, uint32_t eventid, > + uint32_t pintid, uint16_t icid) > +{ > + uint64_t cmd[4]; > + > + cmd[0] = GITS_CMD_MAPTI | ((uint64_t)deviceid << 32); > + cmd[1] = eventid | ((uint64_t)pintid << 32); > + cmd[2] = icid; > + cmd[3] = 0x00; > + > + return its_send_command(its, cmd); > +} > + > static int its_send_cmd_mapc(struct host_its *its, int collection_id, int > cpu) > { > uint64_t cmd[4]; > @@ -111,6 +125,19 @@ static int its_send_cmd_mapd(struct host_its *its, > uint32_t deviceid, > return its_send_command(its, cmd); > } > > +static int its_send_cmd_inv(struct host_its *its, > + uint32_t deviceid, uint32_t eventid) > +{ > + uint64_t cmd[4]; > + > + cmd[0] = GITS_CMD_INV | ((uint64_t)deviceid << 32); > + cmd[1] = eventid; > + cmd[2] = 0x00; > + cmd[3] = 0x00; > + > + return its_send_command(its, cmd); > +} > + > /* Set up the (1:1) collection mapping for the given host CPU. */ > int gicv3_its_setup_collection(int cpu) > { > @@ -359,13 +386,47 @@ int gicv3_its_init(struct host_its *hw_its) > > static void remove_mapped_guest_device(struct its_devices *dev) > { > + int i; > + > if ( dev->hw_its ) > its_send_cmd_mapd(dev->hw_its, dev->host_devid, 0, 0, false); > > + for ( i = 0; i < dev->eventids / 32; i++ ) > + gicv3_free_host_lpi_block(dev->hw_its, dev->host_lpis[i]); > + > xfree(dev->itt_addr); > + xfree(dev->host_lpis); > xfree(dev); > } > > +/* > + * On the host ITS @its, map @nr_events consecutive LPIs. > + * The mapping connects a device @devid and event @eventid pair to LPI @lpi, > + * increasing both @eventid and @lpi to cover the number of requested LPIs. > + */ > +int gicv3_its_map_host_events(struct host_its *its, > + int devid, int eventid, int lpi, > + int nr_events) > +{ > + int i, ret; > + > + for ( i = 0; i < nr_events; i++ ) > + { > + ret = its_send_cmd_mapti(its, devid, eventid + i, lpi + i, 0); > + if ( ret ) > + return ret; > + ret = its_send_cmd_inv(its, devid, eventid + i); > + if ( ret ) > + return ret; > + } > + > + ret = its_send_cmd_sync(its, 0); > + if ( ret ) > + return ret; > + > + return 0; > +} > + > int gicv3_its_map_guest_device(struct domain *d, int host_devid, > int guest_devid, int bits, bool valid) > { > @@ -373,7 +434,7 @@ int gicv3_its_map_guest_device(struct domain *d, int > host_devid, > struct its_devices *dev, *temp; > struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent = NULL; > struct host_its *hw_its; > - int ret; > + int ret, i; > > /* check for already existing mappings */ > spin_lock(&d->arch.vgic.its_devices_lock); > @@ -430,10 +491,19 @@ int gicv3_its_map_guest_device(struct domain *d, int > host_devid, > goto out_unlock; > } > > + dev->host_lpis = xzalloc_array(uint32_t, BIT(bits) / 32); > + if ( !dev->host_lpis ) > + { > + xfree(dev); > + xfree(itt_addr); > + return -ENOMEM; > + } > + > ret = its_send_cmd_mapd(hw_its, host_devid, bits - 1, > virt_to_maddr(itt_addr), true); > if ( ret ) > { > + xfree(dev->host_lpis); > xfree(itt_addr); > xfree(dev); > goto out_unlock; > @@ -450,6 +520,14 @@ int gicv3_its_map_guest_device(struct domain *d, int > host_devid, > > spin_unlock(&d->arch.vgic.its_devices_lock); > > + /* > + * Map all host LPIs within this device already. We can't afford to queue > + * any host ITS commands later on during the guest's runtime. > + */ > + for ( i = 0; i < BIT(bits) / 32; i++ ) > + dev->host_lpis[i] = gicv3_allocate_host_lpi_block(hw_its, d, > host_devid, > + i * 32); Not checking for errors > return 0; > > out_unlock: > diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c > index 5911b91..8f6e7f3 100644 > --- a/xen/arch/arm/gic-v3-lpi.c > +++ b/xen/arch/arm/gic-v3-lpi.c > @@ -18,16 +18,34 @@ > > #include <xen/config.h> > #include <xen/lib.h> > -#include <xen/mm.h> > +#include <xen/sched.h> > +#include <xen/err.h> > +#include <xen/sched.h> > #include <xen/sizes.h> > +#include <asm/atomic.h> > +#include <asm/domain.h> > +#include <asm/io.h> > #include <asm/gic.h> > #include <asm/gic_v3_defs.h> > #include <asm/gic_v3_its.h> > > +/* LPIs on the host always go to a guest, so no struct irq_desc for them. */ > +union host_lpi { > + uint64_t data; > + struct { > + uint32_t virt_lpi; > + uint16_t dom_id; > + uint16_t vcpu_id; > + }; > +}; > + > /* Global state */ > static struct { > uint8_t *lpi_property; > + union host_lpi **host_lpis; > unsigned int host_lpi_bits; > + /* Protects allocation and deallocation of host LPIs, but not the access > */ > + spinlock_t host_lpis_lock; > } lpi_data; > > /* Physical redistributor address */ > @@ -38,6 +56,19 @@ static DEFINE_PER_CPU(int, redist_id); > static DEFINE_PER_CPU(void *, pending_table); > > #define MAX_PHYS_LPIS (BIT_ULL(lpi_data.host_lpi_bits) - LPI_OFFSET) > +#define HOST_LPIS_PER_PAGE (PAGE_SIZE / sizeof(union host_lpi)) > + > +static union host_lpi *gic_get_host_lpi(uint32_t plpi) > +{ > + if ( !is_lpi(plpi) || plpi >= MAX_PHYS_LPIS + LPI_OFFSET ) > + return NULL; > + > + plpi -= LPI_OFFSET; > + if ( !lpi_data.host_lpis[plpi / HOST_LPIS_PER_PAGE] ) > + return NULL; > + > + return &lpi_data.host_lpis[plpi / HOST_LPIS_PER_PAGE][plpi % > HOST_LPIS_PER_PAGE]; > +} > > /* Stores this redistributor's physical address and ID in a per-CPU variable > */ > void gicv3_set_redist_address(paddr_t address, int redist_id) > @@ -130,15 +161,169 @@ uint64_t gicv3_lpi_get_proptable(void) > static unsigned int max_lpi_bits = CONFIG_MAX_PHYS_LPI_BITS; > integer_param("max_lpi_bits", max_lpi_bits); > > +/* > + * Allocate the 2nd level array for host LPIs. This one holds pointers > + * to the page with the actual "union host_lpi" entries. Our LPI limit > + * avoids excessive memory usage. > + */ > int gicv3_lpi_init_host_lpis(unsigned int hw_lpi_bits) > { > + int nr_lpi_ptrs; > + > + BUILD_BUG_ON(sizeof(union host_lpi) > sizeof(unsigned long)); > + > lpi_data.host_lpi_bits = min(hw_lpi_bits, max_lpi_bits); > > + spin_lock_init(&lpi_data.host_lpis_lock); > + > + nr_lpi_ptrs = MAX_PHYS_LPIS / (PAGE_SIZE / sizeof(union host_lpi)); > + lpi_data.host_lpis = xzalloc_array(union host_lpi *, nr_lpi_ptrs); > + if ( !lpi_data.host_lpis ) > + return -ENOMEM; > + > printk("GICv3: using at most %lld LPIs on the host.\n", MAX_PHYS_LPIS); > > return 0; > } > > +#define LPI_BLOCK 32 Still missing some info on why LPI_BLOCK is 32 (you already provided most of it in past emails). But the patch series is improving quite well :-) > +/* Must be called with host_lpis_lock held. */ > +static int find_unused_host_lpi(int start, uint32_t *index) > +{ > + int chunk; > + uint32_t i = *index; > + > + for ( chunk = start; chunk < MAX_PHYS_LPIS / HOST_LPIS_PER_PAGE; chunk++ > ) > + { > + /* If we hit an unallocated chunk, use entry 0 in that one. */ > + if ( !lpi_data.host_lpis[chunk] ) > + { > + *index = 0; > + return chunk; > + } > + > + /* Find an unallocated entry in this chunk. */ > + for ( ; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK ) > + { > + if ( lpi_data.host_lpis[chunk][i].dom_id == INVALID_DOMID ) > + { > + *index = i; > + return chunk; > + } > + } > + i = 0; > + } > + > + return -1; > +} > + > +/* > + * Allocate a block of 32 LPIs on the given host ITS for device "devid", > + * starting with "eventid". Put them into the respective ITT by issuing a > + * MAPTI command for each of them. > + */ > +int gicv3_allocate_host_lpi_block(struct host_its *its, struct domain *d, > + uint32_t host_devid, uint32_t eventid) > +{ > + static uint32_t next_lpi = 0; > + uint32_t lpi, lpi_idx = next_lpi % HOST_LPIS_PER_PAGE; > + int chunk; > + int i; > + > + spin_lock(&lpi_data.host_lpis_lock); > + chunk = find_unused_host_lpi(next_lpi / HOST_LPIS_PER_PAGE, &lpi_idx); > + > + if ( chunk == - 1 ) /* rescan for a hole from the beginning */ > + { > + lpi_idx = 0; > + chunk = find_unused_host_lpi(0, &lpi_idx); > + if ( chunk == -1 ) > + { > + spin_unlock(&lpi_data.host_lpis_lock); > + return -ENOSPC; > + } > + } > + > + /* If we hit an unallocated chunk, we initialize it and use entry 0. */ > + if ( !lpi_data.host_lpis[chunk] ) > + { > + union host_lpi *new_chunk; > + > + new_chunk = alloc_xenheap_pages(0, 0); > + if ( !new_chunk ) > + { > + spin_unlock(&lpi_data.host_lpis_lock); > + return -ENOMEM; > + } > + > + for ( i = 0; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK ) > + new_chunk[i].dom_id = INVALID_DOMID; > + > + lpi_data.host_lpis[chunk] = new_chunk; > + lpi_idx = 0; > + } > + > + lpi = chunk * HOST_LPIS_PER_PAGE + lpi_idx; > + > + for ( i = 0; i < LPI_BLOCK; i++ ) > + { > + union host_lpi hlpi; > + > + /* > + * Mark this host LPI as belonging to the domain, but don't assign > + * any virtual LPI or a VCPU yet. > + */ > + hlpi.virt_lpi = INVALID_LPI; > + hlpi.dom_id = d->domain_id; > + hlpi.vcpu_id = INVALID_DOMID; > + write_u64_atomic(&lpi_data.host_lpis[chunk][lpi_idx + i].data, > + hlpi.data); > + > + /* > + * Enable this host LPI, so we don't have to do this during the > + * guest's runtime. > + */ > + lpi_data.lpi_property[lpi + i] |= LPI_PROP_ENABLED; > + } > + > + /* > + * We have allocated and initialized the host LPI entries, so it's safe > + * to drop the lock now. Access to the structures can be done > concurrently > + * as it involves only an atomic uint64_t access. > + */ > + spin_unlock(&lpi_data.host_lpis_lock); > + > + __flush_dcache_area(&lpi_data.lpi_property[lpi], LPI_BLOCK); > + > + gicv3_its_map_host_events(its, host_devid, eventid, lpi + LPI_OFFSET, > + LPI_BLOCK); > + > + next_lpi = lpi + LPI_BLOCK; > + return lpi + LPI_OFFSET; > +} > + > +int gicv3_free_host_lpi_block(struct host_its *its, uint32_t lpi) > +{ > + union host_lpi *hlpi, empty_lpi = { .dom_id = INVALID_DOMID }; > + int i; > + > + hlpi = gic_get_host_lpi(lpi); > + if ( !hlpi ) > + return -ENOENT; > + > + spin_lock(&lpi_data.host_lpis_lock); > + > + for ( i = 0; i < LPI_BLOCK; i++ ) > + write_u64_atomic(&hlpi[i].data, empty_lpi.data); > + > + /* TODO: Call a function in gic-v3-its.c to send DISCARDs */ > + > + spin_unlock(&lpi_data.host_lpis_lock); > + > + return 0; > +} > + > /* > * Local variables: > * mode: C > diff --git a/xen/include/asm-arm/atomic.h b/xen/include/asm-arm/atomic.h > index 22a5036..df9de6a 100644 > --- a/xen/include/asm-arm/atomic.h > +++ b/xen/include/asm-arm/atomic.h > @@ -53,9 +53,9 @@ build_atomic_write(write_u16_atomic, "h", WORD, uint16_t, > "r") > build_atomic_write(write_u32_atomic, "", WORD, uint32_t, "r") > build_atomic_write(write_int_atomic, "", WORD, int, "r") > > -#if 0 /* defined (CONFIG_ARM_64) */ > -build_atomic_read(read_u64_atomic, "x", uint64_t, "=r") > -build_atomic_write(write_u64_atomic, "x", uint64_t, "r") > +#if defined (CONFIG_ARM_64) > +build_atomic_read(read_u64_atomic, "", "", uint64_t, "=r") > +build_atomic_write(write_u64_atomic, "", "", uint64_t, "r") > #endif > > build_add_sized(add_u8_sized, "b", BYTE, uint8_t, "ri") > diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h > index 12bd155..7825575 100644 > --- a/xen/include/asm-arm/gic.h > +++ b/xen/include/asm-arm/gic.h > @@ -220,7 +220,12 @@ enum gic_version { > GIC_V3, > }; > > +#define INVALID_LPI 0 > #define LPI_OFFSET 8192 > +static inline bool is_lpi(unsigned int irq) > +{ > + return irq >= LPI_OFFSET; > +} > > extern enum gic_version gic_hw_version(void); > > diff --git a/xen/include/asm-arm/gic_v3_its.h > b/xen/include/asm-arm/gic_v3_its.h > index 9c5dcf3..0e6b06a 100644 > --- a/xen/include/asm-arm/gic_v3_its.h > +++ b/xen/include/asm-arm/gic_v3_its.h > @@ -97,6 +97,8 @@ > #define HOST_ITS_FLUSH_CMD_QUEUE (1U << 0) > #define HOST_ITS_USES_PTA (1U << 1) > > +#define INVALID_DOMID ((uint16_t)~0) > + > /* data structure for each hardware ITS */ > struct host_its { > struct list_head entry; > @@ -117,6 +119,7 @@ struct its_devices { > uint32_t guest_devid; > uint32_t host_devid; > uint32_t eventids; > + uint32_t *host_lpis; > }; > > extern struct list_head host_its_list; > @@ -149,6 +152,12 @@ int gicv3_its_setup_collection(int cpu); > */ > int gicv3_its_map_guest_device(struct domain *d, int host_devid, > int guest_devid, int bits, bool valid); > +int gicv3_its_map_host_events(struct host_its *its, > + int host_devid, int eventid, > + int lpi, int nrevents); > +int gicv3_allocate_host_lpi_block(struct host_its *its, struct domain *d, > + uint32_t host_devid, uint32_t eventid); > +int gicv3_free_host_lpi_block(struct host_its *its, uint32_t lpi); > > #else > > -- > 2.9.0 > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |