|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 08/28] ARM: GICv3 ITS: introduce host LPI array
On Mon, 30 Jan 2017, Andre Przywara wrote:
> The number of LPIs on a host can be potentially huge (millions),
> although in practise will be mostly reasonable. So prematurely allocating
> an array of struct irq_desc's for each LPI is not an option.
> However Xen itself does not care about LPIs, as every LPI will be injected
> into a guest (Dom0 for now).
> Create a dense data structure (8 Bytes) for each LPI which holds just
> enough information to determine the virtual IRQ number and the VCPU into
> which the LPI needs to be injected.
> Also to not artificially limit the number of LPIs, we create a 2-level
> table for holding those structures.
> This patch introduces functions to initialize these tables and to
> create, lookup and destroy entries for a given LPI.
> We allocate and access LPI information in a way that does not require
> a lock.
>
> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
> ---
> xen/arch/arm/gic-v3-its.c | 80 ++++++++++++++++-
> xen/arch/arm/gic-v3-lpi.c | 187
> ++++++++++++++++++++++++++++++++++++++-
> xen/include/asm-arm/atomic.h | 6 +-
> xen/include/asm-arm/gic.h | 5 ++
> xen/include/asm-arm/gic_v3_its.h | 9 ++
> 5 files changed, 282 insertions(+), 5 deletions(-)
>
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index 4a3a394..f073ab5 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -83,6 +83,20 @@ static int its_send_cmd_sync(struct host_its *its, int cpu)
> return its_send_command(its, cmd);
> }
>
> +static int its_send_cmd_mapti(struct host_its *its,
> + uint32_t deviceid, uint32_t eventid,
> + uint32_t pintid, uint16_t icid)
> +{
> + uint64_t cmd[4];
> +
> + cmd[0] = GITS_CMD_MAPTI | ((uint64_t)deviceid << 32);
> + cmd[1] = eventid | ((uint64_t)pintid << 32);
> + cmd[2] = icid;
> + cmd[3] = 0x00;
> +
> + return its_send_command(its, cmd);
> +}
> +
> static int its_send_cmd_mapc(struct host_its *its, int collection_id, int
> cpu)
> {
> uint64_t cmd[4];
> @@ -111,6 +125,19 @@ static int its_send_cmd_mapd(struct host_its *its,
> uint32_t deviceid,
> return its_send_command(its, cmd);
> }
>
> +static int its_send_cmd_inv(struct host_its *its,
> + uint32_t deviceid, uint32_t eventid)
> +{
> + uint64_t cmd[4];
> +
> + cmd[0] = GITS_CMD_INV | ((uint64_t)deviceid << 32);
> + cmd[1] = eventid;
> + cmd[2] = 0x00;
> + cmd[3] = 0x00;
> +
> + return its_send_command(its, cmd);
> +}
> +
> /* Set up the (1:1) collection mapping for the given host CPU. */
> int gicv3_its_setup_collection(int cpu)
> {
> @@ -359,13 +386,47 @@ int gicv3_its_init(struct host_its *hw_its)
>
> static void remove_mapped_guest_device(struct its_devices *dev)
> {
> + int i;
> +
> if ( dev->hw_its )
> its_send_cmd_mapd(dev->hw_its, dev->host_devid, 0, 0, false);
>
> + for ( i = 0; i < dev->eventids / 32; i++ )
> + gicv3_free_host_lpi_block(dev->hw_its, dev->host_lpis[i]);
> +
> xfree(dev->itt_addr);
> + xfree(dev->host_lpis);
> xfree(dev);
> }
>
> +/*
> + * On the host ITS @its, map @nr_events consecutive LPIs.
> + * The mapping connects a device @devid and event @eventid pair to LPI @lpi,
> + * increasing both @eventid and @lpi to cover the number of requested LPIs.
> + */
> +int gicv3_its_map_host_events(struct host_its *its,
> + int devid, int eventid, int lpi,
> + int nr_events)
> +{
> + int i, ret;
> +
> + for ( i = 0; i < nr_events; i++ )
> + {
> + ret = its_send_cmd_mapti(its, devid, eventid + i, lpi + i, 0);
> + if ( ret )
> + return ret;
> + ret = its_send_cmd_inv(its, devid, eventid + i);
> + if ( ret )
> + return ret;
> + }
> +
> + ret = its_send_cmd_sync(its, 0);
> + if ( ret )
> + return ret;
> +
> + return 0;
> +}
> +
> int gicv3_its_map_guest_device(struct domain *d, int host_devid,
> int guest_devid, int bits, bool valid)
> {
> @@ -373,7 +434,7 @@ int gicv3_its_map_guest_device(struct domain *d, int
> host_devid,
> struct its_devices *dev, *temp;
> struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent = NULL;
> struct host_its *hw_its;
> - int ret;
> + int ret, i;
>
> /* check for already existing mappings */
> spin_lock(&d->arch.vgic.its_devices_lock);
> @@ -430,10 +491,19 @@ int gicv3_its_map_guest_device(struct domain *d, int
> host_devid,
> goto out_unlock;
> }
>
> + dev->host_lpis = xzalloc_array(uint32_t, BIT(bits) / 32);
> + if ( !dev->host_lpis )
> + {
> + xfree(dev);
> + xfree(itt_addr);
> + return -ENOMEM;
> + }
> +
> ret = its_send_cmd_mapd(hw_its, host_devid, bits - 1,
> virt_to_maddr(itt_addr), true);
> if ( ret )
> {
> + xfree(dev->host_lpis);
> xfree(itt_addr);
> xfree(dev);
> goto out_unlock;
> @@ -450,6 +520,14 @@ int gicv3_its_map_guest_device(struct domain *d, int
> host_devid,
>
> spin_unlock(&d->arch.vgic.its_devices_lock);
>
> + /*
> + * Map all host LPIs within this device already. We can't afford to queue
> + * any host ITS commands later on during the guest's runtime.
> + */
> + for ( i = 0; i < BIT(bits) / 32; i++ )
> + dev->host_lpis[i] = gicv3_allocate_host_lpi_block(hw_its, d,
> host_devid,
> + i * 32);
Not checking for errors
> return 0;
>
> out_unlock:
> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
> index 5911b91..8f6e7f3 100644
> --- a/xen/arch/arm/gic-v3-lpi.c
> +++ b/xen/arch/arm/gic-v3-lpi.c
> @@ -18,16 +18,34 @@
>
> #include <xen/config.h>
> #include <xen/lib.h>
> -#include <xen/mm.h>
> +#include <xen/sched.h>
> +#include <xen/err.h>
> +#include <xen/sched.h>
> #include <xen/sizes.h>
> +#include <asm/atomic.h>
> +#include <asm/domain.h>
> +#include <asm/io.h>
> #include <asm/gic.h>
> #include <asm/gic_v3_defs.h>
> #include <asm/gic_v3_its.h>
>
> +/* LPIs on the host always go to a guest, so no struct irq_desc for them. */
> +union host_lpi {
> + uint64_t data;
> + struct {
> + uint32_t virt_lpi;
> + uint16_t dom_id;
> + uint16_t vcpu_id;
> + };
> +};
> +
> /* Global state */
> static struct {
> uint8_t *lpi_property;
> + union host_lpi **host_lpis;
> unsigned int host_lpi_bits;
> + /* Protects allocation and deallocation of host LPIs, but not the access
> */
> + spinlock_t host_lpis_lock;
> } lpi_data;
>
> /* Physical redistributor address */
> @@ -38,6 +56,19 @@ static DEFINE_PER_CPU(int, redist_id);
> static DEFINE_PER_CPU(void *, pending_table);
>
> #define MAX_PHYS_LPIS (BIT_ULL(lpi_data.host_lpi_bits) - LPI_OFFSET)
> +#define HOST_LPIS_PER_PAGE (PAGE_SIZE / sizeof(union host_lpi))
> +
> +static union host_lpi *gic_get_host_lpi(uint32_t plpi)
> +{
> + if ( !is_lpi(plpi) || plpi >= MAX_PHYS_LPIS + LPI_OFFSET )
> + return NULL;
> +
> + plpi -= LPI_OFFSET;
> + if ( !lpi_data.host_lpis[plpi / HOST_LPIS_PER_PAGE] )
> + return NULL;
> +
> + return &lpi_data.host_lpis[plpi / HOST_LPIS_PER_PAGE][plpi %
> HOST_LPIS_PER_PAGE];
> +}
>
> /* Stores this redistributor's physical address and ID in a per-CPU variable
> */
> void gicv3_set_redist_address(paddr_t address, int redist_id)
> @@ -130,15 +161,169 @@ uint64_t gicv3_lpi_get_proptable(void)
> static unsigned int max_lpi_bits = CONFIG_MAX_PHYS_LPI_BITS;
> integer_param("max_lpi_bits", max_lpi_bits);
>
> +/*
> + * Allocate the 2nd level array for host LPIs. This one holds pointers
> + * to the page with the actual "union host_lpi" entries. Our LPI limit
> + * avoids excessive memory usage.
> + */
> int gicv3_lpi_init_host_lpis(unsigned int hw_lpi_bits)
> {
> + int nr_lpi_ptrs;
> +
> + BUILD_BUG_ON(sizeof(union host_lpi) > sizeof(unsigned long));
> +
> lpi_data.host_lpi_bits = min(hw_lpi_bits, max_lpi_bits);
>
> + spin_lock_init(&lpi_data.host_lpis_lock);
> +
> + nr_lpi_ptrs = MAX_PHYS_LPIS / (PAGE_SIZE / sizeof(union host_lpi));
> + lpi_data.host_lpis = xzalloc_array(union host_lpi *, nr_lpi_ptrs);
> + if ( !lpi_data.host_lpis )
> + return -ENOMEM;
> +
> printk("GICv3: using at most %lld LPIs on the host.\n", MAX_PHYS_LPIS);
>
> return 0;
> }
>
> +#define LPI_BLOCK 32
Still missing some info on why LPI_BLOCK is 32 (you already provided
most of it in past emails). But the patch series is improving quite well
:-)
> +/* Must be called with host_lpis_lock held. */
> +static int find_unused_host_lpi(int start, uint32_t *index)
> +{
> + int chunk;
> + uint32_t i = *index;
> +
> + for ( chunk = start; chunk < MAX_PHYS_LPIS / HOST_LPIS_PER_PAGE; chunk++
> )
> + {
> + /* If we hit an unallocated chunk, use entry 0 in that one. */
> + if ( !lpi_data.host_lpis[chunk] )
> + {
> + *index = 0;
> + return chunk;
> + }
> +
> + /* Find an unallocated entry in this chunk. */
> + for ( ; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK )
> + {
> + if ( lpi_data.host_lpis[chunk][i].dom_id == INVALID_DOMID )
> + {
> + *index = i;
> + return chunk;
> + }
> + }
> + i = 0;
> + }
> +
> + return -1;
> +}
> +
> +/*
> + * Allocate a block of 32 LPIs on the given host ITS for device "devid",
> + * starting with "eventid". Put them into the respective ITT by issuing a
> + * MAPTI command for each of them.
> + */
> +int gicv3_allocate_host_lpi_block(struct host_its *its, struct domain *d,
> + uint32_t host_devid, uint32_t eventid)
> +{
> + static uint32_t next_lpi = 0;
> + uint32_t lpi, lpi_idx = next_lpi % HOST_LPIS_PER_PAGE;
> + int chunk;
> + int i;
> +
> + spin_lock(&lpi_data.host_lpis_lock);
> + chunk = find_unused_host_lpi(next_lpi / HOST_LPIS_PER_PAGE, &lpi_idx);
> +
> + if ( chunk == - 1 ) /* rescan for a hole from the beginning */
> + {
> + lpi_idx = 0;
> + chunk = find_unused_host_lpi(0, &lpi_idx);
> + if ( chunk == -1 )
> + {
> + spin_unlock(&lpi_data.host_lpis_lock);
> + return -ENOSPC;
> + }
> + }
> +
> + /* If we hit an unallocated chunk, we initialize it and use entry 0. */
> + if ( !lpi_data.host_lpis[chunk] )
> + {
> + union host_lpi *new_chunk;
> +
> + new_chunk = alloc_xenheap_pages(0, 0);
> + if ( !new_chunk )
> + {
> + spin_unlock(&lpi_data.host_lpis_lock);
> + return -ENOMEM;
> + }
> +
> + for ( i = 0; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK )
> + new_chunk[i].dom_id = INVALID_DOMID;
> +
> + lpi_data.host_lpis[chunk] = new_chunk;
> + lpi_idx = 0;
> + }
> +
> + lpi = chunk * HOST_LPIS_PER_PAGE + lpi_idx;
> +
> + for ( i = 0; i < LPI_BLOCK; i++ )
> + {
> + union host_lpi hlpi;
> +
> + /*
> + * Mark this host LPI as belonging to the domain, but don't assign
> + * any virtual LPI or a VCPU yet.
> + */
> + hlpi.virt_lpi = INVALID_LPI;
> + hlpi.dom_id = d->domain_id;
> + hlpi.vcpu_id = INVALID_DOMID;
> + write_u64_atomic(&lpi_data.host_lpis[chunk][lpi_idx + i].data,
> + hlpi.data);
> +
> + /*
> + * Enable this host LPI, so we don't have to do this during the
> + * guest's runtime.
> + */
> + lpi_data.lpi_property[lpi + i] |= LPI_PROP_ENABLED;
> + }
> +
> + /*
> + * We have allocated and initialized the host LPI entries, so it's safe
> + * to drop the lock now. Access to the structures can be done
> concurrently
> + * as it involves only an atomic uint64_t access.
> + */
> + spin_unlock(&lpi_data.host_lpis_lock);
> +
> + __flush_dcache_area(&lpi_data.lpi_property[lpi], LPI_BLOCK);
> +
> + gicv3_its_map_host_events(its, host_devid, eventid, lpi + LPI_OFFSET,
> + LPI_BLOCK);
> +
> + next_lpi = lpi + LPI_BLOCK;
> + return lpi + LPI_OFFSET;
> +}
> +
> +int gicv3_free_host_lpi_block(struct host_its *its, uint32_t lpi)
> +{
> + union host_lpi *hlpi, empty_lpi = { .dom_id = INVALID_DOMID };
> + int i;
> +
> + hlpi = gic_get_host_lpi(lpi);
> + if ( !hlpi )
> + return -ENOENT;
> +
> + spin_lock(&lpi_data.host_lpis_lock);
> +
> + for ( i = 0; i < LPI_BLOCK; i++ )
> + write_u64_atomic(&hlpi[i].data, empty_lpi.data);
> +
> + /* TODO: Call a function in gic-v3-its.c to send DISCARDs */
> +
> + spin_unlock(&lpi_data.host_lpis_lock);
> +
> + return 0;
> +}
> +
> /*
> * Local variables:
> * mode: C
> diff --git a/xen/include/asm-arm/atomic.h b/xen/include/asm-arm/atomic.h
> index 22a5036..df9de6a 100644
> --- a/xen/include/asm-arm/atomic.h
> +++ b/xen/include/asm-arm/atomic.h
> @@ -53,9 +53,9 @@ build_atomic_write(write_u16_atomic, "h", WORD, uint16_t,
> "r")
> build_atomic_write(write_u32_atomic, "", WORD, uint32_t, "r")
> build_atomic_write(write_int_atomic, "", WORD, int, "r")
>
> -#if 0 /* defined (CONFIG_ARM_64) */
> -build_atomic_read(read_u64_atomic, "x", uint64_t, "=r")
> -build_atomic_write(write_u64_atomic, "x", uint64_t, "r")
> +#if defined (CONFIG_ARM_64)
> +build_atomic_read(read_u64_atomic, "", "", uint64_t, "=r")
> +build_atomic_write(write_u64_atomic, "", "", uint64_t, "r")
> #endif
>
> build_add_sized(add_u8_sized, "b", BYTE, uint8_t, "ri")
> diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
> index 12bd155..7825575 100644
> --- a/xen/include/asm-arm/gic.h
> +++ b/xen/include/asm-arm/gic.h
> @@ -220,7 +220,12 @@ enum gic_version {
> GIC_V3,
> };
>
> +#define INVALID_LPI 0
> #define LPI_OFFSET 8192
> +static inline bool is_lpi(unsigned int irq)
> +{
> + return irq >= LPI_OFFSET;
> +}
>
> extern enum gic_version gic_hw_version(void);
>
> diff --git a/xen/include/asm-arm/gic_v3_its.h
> b/xen/include/asm-arm/gic_v3_its.h
> index 9c5dcf3..0e6b06a 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -97,6 +97,8 @@
> #define HOST_ITS_FLUSH_CMD_QUEUE (1U << 0)
> #define HOST_ITS_USES_PTA (1U << 1)
>
> +#define INVALID_DOMID ((uint16_t)~0)
> +
> /* data structure for each hardware ITS */
> struct host_its {
> struct list_head entry;
> @@ -117,6 +119,7 @@ struct its_devices {
> uint32_t guest_devid;
> uint32_t host_devid;
> uint32_t eventids;
> + uint32_t *host_lpis;
> };
>
> extern struct list_head host_its_list;
> @@ -149,6 +152,12 @@ int gicv3_its_setup_collection(int cpu);
> */
> int gicv3_its_map_guest_device(struct domain *d, int host_devid,
> int guest_devid, int bits, bool valid);
> +int gicv3_its_map_host_events(struct host_its *its,
> + int host_devid, int eventid,
> + int lpi, int nrevents);
> +int gicv3_allocate_host_lpi_block(struct host_its *its, struct domain *d,
> + uint32_t host_devid, uint32_t eventid);
> +int gicv3_free_host_lpi_block(struct host_its *its, uint32_t lpi);
>
> #else
>
> --
> 2.9.0
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |