[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 03/28] ARM: GICv3: allocate LPI pending and property table



Hi Andre,

On 30/01/17 18:31, Andre Przywara wrote:
The ARM GICv3 provides a new kind of interrupt called LPIs.
The pending bits and the configuration data (priority, enable bits) for
those LPIs are stored in tables in normal memory, which software has to
provide to the hardware.
Allocate the required memory, initialize it and hand it over to each
redistributor. The maximum number of LPIs to be used can be adjusted with
the command line option "max_lpi_bits", which defaults to a compile time
constant exposed in Kconfig.

Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
---
 xen/arch/arm/Kconfig              |  15 +++++
 xen/arch/arm/Makefile             |   1 +
 xen/arch/arm/gic-v3-lpi.c         | 129 ++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/gic-v3.c             |  44 +++++++++++++
 xen/include/asm-arm/bitops.h      |   1 +
 xen/include/asm-arm/gic.h         |   2 +
 xen/include/asm-arm/gic_v3_defs.h |  52 ++++++++++++++-
 xen/include/asm-arm/gic_v3_its.h  |  22 ++++++-
 8 files changed, 264 insertions(+), 2 deletions(-)
 create mode 100644 xen/arch/arm/gic-v3-lpi.c

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index bf64c61..71734a1 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -49,6 +49,21 @@ config HAS_ITS
         bool "GICv3 ITS MSI controller support"
         depends on HAS_GICV3

+config MAX_PHYS_LPI_BITS
+        depends on HAS_ITS
+        int "Maximum bits for GICv3 host LPIs (14-32)"
+        range 14 32
+        default "20"
+        help
+          Specifies the maximum number of LPIs (in bits) Xen should take
+          care of. The host ITS may provide support for a very large number
+          of supported LPIs, for all of which we may not want to allocate
+          memory, so this number here allows to limit this.

I think the description is misleading, if a user wants 8K worth of LPIs by default, he would have to use 14 and not 13.

Furthermore, you provide both a runtime option (via command line) and build time option (via Kconfig). You don't express what is the differences between both and how there are supposed to co-exist.

Anyway, IHMO the command line option should be sufficient to allow override if necessary. So I would drop the Kconfig version.

+          Xen itself does not know how many LPIs domains will ever need
+          beforehand.
+          This can be overriden on the command line with the max_lpi_bits

s/overriden/overridden/

+          parameter.
+
 endmenu

 menu "ARM errata workaround via the alternative framework"
diff --git a/xen/arch/arm/Makefile b/xen/arch/arm/Makefile
index 5f4ff23..4ccf2eb 100644
--- a/xen/arch/arm/Makefile
+++ b/xen/arch/arm/Makefile
@@ -19,6 +19,7 @@ obj-y += gic.o
 obj-y += gic-v2.o
 obj-$(CONFIG_HAS_GICV3) += gic-v3.o
 obj-$(CONFIG_HAS_ITS) += gic-v3-its.o
+obj-$(CONFIG_HAS_ITS) += gic-v3-lpi.o
 obj-y += guestcopy.o
 obj-y += hvm.o
 obj-y += io.o
diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
new file mode 100644
index 0000000..e2fc901
--- /dev/null
+++ b/xen/arch/arm/gic-v3-lpi.c
@@ -0,0 +1,129 @@
+/*
+ * xen/arch/arm/gic-v3-lpi.c
+ *
+ * ARM GICv3 Locality-specific Peripheral Interrupts (LPI) support
+ *
+ * Copyright (C) 2016,2017 - ARM Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <xen/config.h>

xen/config.h is not necessary.

+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/sizes.h>
+#include <asm/gic.h>
+#include <asm/gic_v3_defs.h>
+#include <asm/gic_v3_its.h>
+
+/* Global state */
+static struct {
+    uint8_t *lpi_property;
+    unsigned int host_lpi_bits;

On the previous version, Stefano suggested to rename this to phys_lpi_bits + adding a comment as you store the number of bits.

However, looking at the usage the number of bits is only required during the initialization. Runtime code (such as gic_get_host_lpi) will use the number of LPIs (see gic_get_host_lpi) and therefore require extra instructions to compute the value.

So I would prefer if you store the number of LPIs here to optimize the common case.

Also, I find the naming "id_bits" confusing because you store the number of bits to encode the max LPI ID and not the number of bits to encode the number of LPI.

+} lpi_data;
+
+/* Pending table for each redistributor */
+static DEFINE_PER_CPU(void *, pending_table);
+
+#define MAX_PHYS_LPIS   (BIT_ULL(lpi_data.host_lpi_bits) - LPI_OFFSET)
+
+uint64_t gicv3_lpi_allocate_pendtable(void)
+{
+    uint64_t reg;
+    void *pendtable;
+
+    reg  = GIC_BASER_CACHE_RaWaWb << GICR_PENDBASER_INNER_CACHEABILITY_SHIFT;
+    reg |= GIC_BASER_CACHE_SameAsInner << 
GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT;
+    reg |= GIC_BASER_InnerShareable << GICR_PENDBASER_SHAREABILITY_SHIFT;
+
+    if ( !this_cpu(pending_table) )
+    {
+        /*
+         * The pending table holds one bit per LPI and even covers bits for
+         * interrupt IDs below 8192, so we allocate the full range.
+         * The GICv3 imposes a 64KB alignment requirement.
+         */
+        pendtable = _xmalloc(BIT_ULL(lpi_data.host_lpi_bits) / 8, SZ_64K);
+        if ( !pendtable )
+            return 0;
+
+        memset(pendtable, 0, BIT_ULL(lpi_data.host_lpi_bits) / 8);

You can use _zalloc to do the allocation and then memset to 0.

+        __flush_dcache_area(pendtable, BIT_ULL(lpi_data.host_lpi_bits) / 8);

Please use clean_and_invalidate_dcache_va_range.

+
+        this_cpu(pending_table) = pendtable;
+    }
+    else
+    {
+        pendtable = this_cpu(pending_table);
+    }

The {} are not necessary. Also, on the previous version it was mentioned this should be an error and then replace by a BUG_ON().

Please do the change.

+
+    reg |= GICR_PENDBASER_PTZ;
+
+    ASSERT(!(virt_to_maddr(pendtable) & ~GENMASK(51, 16)));

I don't understand the purpose of this ASSERT. The bits 15:0 should always be zero otherwise this would be a bug in the memory allocator. For bits 64:52, the architecture so far only support up to 52 bits.

By keeping this ASSERT, you will make our life more difficult to extend the number of physical address supported if ARM decides to bump it.
So please drop this ASSERT.

+    reg |= virt_to_maddr(pendtable);
+
+    return reg;
+}
+
+uint64_t gicv3_lpi_get_proptable(void)
+{
+    uint64_t reg;
+
+    reg  = GIC_BASER_CACHE_RaWaWb << GICR_PENDBASER_INNER_CACHEABILITY_SHIFT;
+    reg |= GIC_BASER_CACHE_SameAsInner << 
GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT;
+    reg |= GIC_BASER_InnerShareable << GICR_PENDBASER_SHAREABILITY_SHIFT;

You are using the shift defines from PENDBASER and not PROPBASER.

+
+    /*
+     * The property table is shared across all redistributors, so allocate
+     * this only once, but return the same value on subsequent calls.
+     */
+    if ( !lpi_data.lpi_property )
+    {
+        /* The property table holds one byte per LPI. */
+        void *table = alloc_xenheap_pages(lpi_data.host_lpi_bits - PAGE_SHIFT,
+                                          0);

The property table address has to be 4KB aligned right? If so, I would much prefer if you use _xmalloc(BIT_ULL(lpi_data.host_lpi_bits), SZ_4K) to avoid relying on PAGE_SIZE == 4KB.

Also, you will allocate more memory than necessary because the property table only covers the LPIs.

+
+        if ( !table )
+            return 0;
+
+        memset(table, GIC_PRI_IRQ | LPI_PROP_RES1, MAX_PHYS_LPIS);

You could combine both suggested _xmalloc and memset to 0 in a single call to _zalloc.

+        __flush_dcache_area(table, MAX_PHYS_LPIS);

Please use clean_and_invalidate_dcache_va_range.

+        lpi_data.lpi_property = table;
+    }
+
+    reg |= ((lpi_data.host_lpi_bits - 1) << 0);

Please avoid hardcoded shift.

+
+    ASSERT(!(virt_to_maddr(lpi_data.lpi_property) & ~GENMASK(51, 12)));
+    reg |= virt_to_maddr(lpi_data.lpi_property);
+
+    return reg;
+}
+
+static unsigned int max_lpi_bits = CONFIG_MAX_PHYS_LPI_BITS;
+integer_param("max_lpi_bits", max_lpi_bits);

Please document this new option in docs/misc/xen-command-line.markdown.

+
+int gicv3_lpi_init_host_lpis(unsigned int hw_lpi_bits)

Stefano suggested to rename this function to gicv3_lpi_init_phys_lpis and I agree with him here.

+{
+    lpi_data.host_lpi_bits = min(hw_lpi_bits, max_lpi_bits);
+
+    printk("GICv3: using at most %lld LPIs on the host.\n", MAX_PHYS_LPIS);

s/lld/llu/.

+
+    return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index 838dd11..fcb86c8 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -546,6 +546,9 @@ static void __init gicv3_dist_init(void)
     type = readl_relaxed(GICD + GICD_TYPER);
     nr_lines = 32 * ((type & GICD_TYPE_LINES) + 1);

+    if ( type & GICD_TYPE_LPIS )
+        gicv3_lpi_init_host_lpis(((type >> GICD_TYPE_ID_BITS_SHIFT) & 0x1f) + 
1);

A macro has been suggested on the previous version here to avoid the hardcoded 0x1f.

+
     printk("GICv3: %d lines, (IID %8.8x).\n",
            nr_lines, readl_relaxed(GICD + GICD_IIDR));

@@ -616,6 +619,33 @@ static int gicv3_enable_redist(void)
     return 0;
 }

+static int gicv3_rdist_init_lpis(void __iomem * rdist_base)

I think it would make sense to move this function in gicv3-lpi.c. So only one function rather than 2 would be exported.

+{
+    uint32_t reg;
+    uint64_t table_reg;
+
+    /* We don't support LPIs without an ITS. */
+    if ( list_empty(&host_its_list) )

See my comment on patch #2 regarding host_its_list.

+        return -ENODEV;
+
+    /* Make sure LPIs are disabled before setting up the tables. */
+    reg = readl_relaxed(rdist_base + GICR_CTLR);
+    if ( reg & GICR_CTLR_ENABLE_LPIS )
+        return -EBUSY;

Why don't you just disable LPIs here? AFAIK, it should just be
writel_relaxed(reg & ~GICR_CTLR_ENABLE_LPIS, GICR_CTLR);

+
+    table_reg = gicv3_lpi_allocate_pendtable();
+    if ( !table_reg )

From the spec, GICR_PENDBASER full of 0 is valid.

+        return -ENOMEM;
+    writeq_relaxed(table_reg, rdist_base + GICR_PENDBASER);

On the first version of this series, I mentioned that based on the spec (8.11.18 in ARM IHI 0069C) cacheability and shareability may not stick.

Whilst this may not (?) be a concern for the pending table, Xen will write in the property table to enable/disable LPIs. So we would need to know whether the cache needs to be cleaned after each access or not.

+
+    table_reg = gicv3_lpi_get_proptable();
+    if ( !table_reg )
+        return -ENOMEM;
+    writeq_relaxed(table_reg, rdist_base + GICR_PROPBASER);

See all my remarks above.

+
+    return 0;
+}
+
 static int __init gicv3_populate_rdist(void)
 {
     int i;
@@ -658,6 +688,20 @@ static int __init gicv3_populate_rdist(void)
             if ( (typer >> 32) == aff )
             {
                 this_cpu(rbase) = ptr;
+
+                if ( typer & GICR_TYPER_PLPIS )
+                {
+                    int ret;
+
+                    ret = gicv3_rdist_init_lpis(ptr);
+                    if ( ret && ret != -ENODEV )
+                    {
+                        printk("GICv3: CPU%d: Cannot initialize LPIs: %d\n",

CPU%u

+                               smp_processor_id(), ret);
+                        break;
+                    }
+                }
+
                 printk("GICv3: CPU%d: Found redistributor in region %d @%p\n",
                         smp_processor_id(), i, ptr);
                 return 0;
diff --git a/xen/include/asm-arm/bitops.h b/xen/include/asm-arm/bitops.h
index bda8898..1cbfb9e 100644
--- a/xen/include/asm-arm/bitops.h
+++ b/xen/include/asm-arm/bitops.h
@@ -24,6 +24,7 @@
 #define BIT(nr)                 (1UL << (nr))
 #define BIT_MASK(nr)            (1UL << ((nr) % BITS_PER_WORD))
 #define BIT_WORD(nr)            ((nr) / BITS_PER_WORD)
+#define BIT_ULL(nr)             (1ULL << (nr))
 #define BITS_PER_BYTE           8

 #define ADDR (*(volatile int *) addr)
diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
index 836a103..12bd155 100644
--- a/xen/include/asm-arm/gic.h
+++ b/xen/include/asm-arm/gic.h
@@ -220,6 +220,8 @@ enum gic_version {
     GIC_V3,
 };

+#define LPI_OFFSET      8192
+

It would make much sense to have this definition moved in irq.h close to NR_IRQS.

Also, I am a bit surprised that NR_IRQS & co has not been modified. Is there any reason for that?

Regards,

--
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.