Xen project Mailing List

Re: [Xen-devel] [PATCH 5/6] vpci: fix execution of long running operations

On Tue, Oct 09, 2018 at 11:42:35AM +0200, Roger Pau Monne wrote: > BAR map/unmap is a long running operation that needs to be preempted > in order to avoid overrunning the assigned vCPU time (or even > triggering the watchdog). > > Current logic for this preemption is wrong, and won't work at all for > AMD since only Intel makes use of hvm_io_pending (and even in that > case the current code is wrong). > > Instead move the code that performs the mapping/unmapping to > handle_hvm_io_completion and use do_softirq in order to execute the > pending softirqs while the {un}mapping takes place. Note that > do_softirq can also trigger a context switch to another vCPU, so > having pending vpci operations shouldn't prevent fair scheduling. > > When the {un}map operation is queued (as a result of a trapped PCI > access) a schedule softirq is raised in order to force a context > switch and the execution of handle_hvm_io_completion. The logic of this patch is wrong, and calling do_softirq in order to preempt can lead to stack overflow due to recursion because the callee vCPU is not blocked and can be scheduled when calling do_softirq, leading to a recursive execution of vpci_process_pending if the MMIO area to modify is big enough. Instead of running those operation on the vCPU context I think it's easier to switch to a task and instead pause the guest vCPU until the BARs are mapped and memory decoding is enabled. Below is the updated patch. ---8<--- From 5c9f8802c46594a39de776d2414210fb21127b78 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne <roger.pau@xxxxxxxxxx> Date: Wed, 10 Oct 2018 11:04:45 +0200 Subject: [PATCH 5/6] vpci: fix execution of long running operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BAR map/unmap is a long running operation that needs to be preempted in order to avoid overrunning the assigned vCPU time (or even triggering the watchdog). Current logic for this preemption is wrong, and won't work at all for AMD since only Intel makes use of hvm_io_pending (and even in that case the current code is wrong). Instead move the code that performs the mapping/unmapping to a tasklet and pause the guest vCPU until the {un}mapping is done and the memory decoding bit has been toggled. Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx> --- Cc: Paul Durrant <paul.durrant@xxxxxxxxxx> Cc: Jan Beulich <jbeulich@xxxxxxxx> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Cc: Wei Liu <wei.liu2@xxxxxxxxxx> Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx> Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> Cc: Julien Grall <julien.grall@xxxxxxx> Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx> Cc: Tim Deegan <tim@xxxxxxx> --- xen/arch/x86/hvm/ioreq.c | 3 -- xen/common/domain.c | 2 ++ xen/drivers/vpci/header.c | 70 +++++++++++++++++++++++---------------- xen/include/xen/vpci.h | 15 +++------ 4 files changed, 48 insertions(+), 42 deletions(-) diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c index 3569beaad5..31429265f8 100644 --- a/xen/arch/x86/hvm/ioreq.c +++ b/xen/arch/x86/hvm/ioreq.c @@ -85,9 +85,6 @@ bool hvm_io_pending(struct vcpu *v) struct hvm_ioreq_server *s; unsigned int id; - if ( has_vpci(d) && vpci_process_pending(v) ) - return true; - FOR_EACH_IOREQ_SERVER(d, id, s) { struct hvm_ioreq_vcpu *sv; diff --git a/xen/common/domain.c b/xen/common/domain.c index 65151e2ac4..54d2c26bd9 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -153,6 +153,8 @@ struct vcpu *vcpu_create( grant_table_init_vcpu(v); + vpci_init_vcpu(v); + if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) || !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) || !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) || diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c index 9234de9b26..4af85d3c02 100644 --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -118,39 +118,48 @@ static void modify_decoding(const struct pci_dev *pdev, bool map, bool rom_only) cmd); } -bool vpci_process_pending(struct vcpu *v) +static void vpci_process_pending(unsigned long data) { - if ( v->vpci.mem ) + struct vcpu *v = (void *)data; + struct map_data map_data = { + .d = v->domain, + .map = v->vpci.map, + }; + int rc; + + ASSERT(v->vpci.mem && atomic_read(&v->pause_count)); + + while ( (rc = rangeset_consume_ranges(v->vpci.mem, map_range, &map_data)) == + -ERESTART ) { - struct map_data data = { - .d = v->domain, - .map = v->vpci.map, - }; - int rc = rangeset_consume_ranges(v->vpci.mem, map_range, &data); - - if ( rc == -ERESTART ) - return true; - - spin_lock(&v->vpci.pdev->vpci->lock); - /* Disable memory decoding unconditionally on failure. */ - modify_decoding(v->vpci.pdev, !rc && v->vpci.map, - !rc && v->vpci.rom_only); - spin_unlock(&v->vpci.pdev->vpci->lock); - - rangeset_destroy(v->vpci.mem); - v->vpci.mem = NULL; - if ( rc ) - /* - * FIXME: in case of failure remove the device from the domain. - * Note that there might still be leftover mappings. While this is - * safe for Dom0, for DomUs the domain will likely need to be - * killed in order to avoid leaking stale p2m mappings on - * failure. - */ - vpci_remove_device(v->vpci.pdev); + tasklet_schedule(&v->vpci.task); + return; } - return false; + spin_lock(&v->vpci.pdev->vpci->lock); + /* Disable memory decoding unconditionally on failure. */ + modify_decoding(v->vpci.pdev, !rc && v->vpci.map, + !rc && v->vpci.rom_only); + spin_unlock(&v->vpci.pdev->vpci->lock); + + rangeset_destroy(v->vpci.mem); + v->vpci.mem = NULL; + if ( rc ) + /* + * FIXME: in case of failure remove the device from the domain. + * Note that there might still be leftover mappings. While this is + * safe for Dom0, for DomUs the domain will likely need to be + * killed in order to avoid leaking stale p2m mappings on + * failure. + */ + vpci_remove_device(v->vpci.pdev); + + vcpu_unpause(v); +} + +void vpci_init_vcpu(struct vcpu *v) +{ + tasklet_init(&v->vpci.task, vpci_process_pending, (unsigned long)v); } static int __init apply_map(struct domain *d, const struct pci_dev *pdev, @@ -183,6 +192,9 @@ static void defer_map(struct domain *d, struct pci_dev *pdev, curr->vpci.mem = mem; curr->vpci.map = map; curr->vpci.rom_only = rom_only; + /* Pause the vCPU and schedule the tasklet that will perform the mapping. */ + vcpu_pause_nosync(curr); + tasklet_schedule(&curr->vpci.task); } static int modify_bars(const struct pci_dev *pdev, bool map, bool rom_only) diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h index af2b8580ee..f97c48a8f1 100644 --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -49,11 +49,8 @@ uint32_t vpci_hw_read16(const struct pci_dev *pdev, unsigned int reg, uint32_t vpci_hw_read32(const struct pci_dev *pdev, unsigned int reg, void *data); -/* - * Check for pending vPCI operations on this vcpu. Returns true if the vcpu - * should not run. - */ -bool __must_check vpci_process_pending(struct vcpu *v); +/* Init per-vCPU vPCI data. */ +void vpci_init_vcpu(struct vcpu *v); struct vpci { /* List of vPCI handlers for a device. */ @@ -142,6 +139,8 @@ struct vpci { }; struct vpci_vcpu { + /* Per-vCPU tasklet to enqueue pending operations. */ + struct tasklet task; /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */ struct rangeset *mem; struct pci_dev *pdev; @@ -233,11 +232,7 @@ static inline void vpci_write(pci_sbdf_t sbdf, unsigned int reg, ASSERT_UNREACHABLE(); } -static inline bool vpci_process_pending(struct vcpu *v) -{ - ASSERT_UNREACHABLE(); - return false; -} +static inline void vpci_init_vcpu(struct vcpu *v) { } #endif #endif -- 2.19.0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.