Xen project Mailing List

[qemu-xen staging] Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

Date: Thu, 29 Oct 2020 20:01:41 +0000

Delivery-date: Thu, 29 Oct 2020 20:01:47 +0000

List-id: "Change log for Mercurial $receive only$" <xen-changelog.lists.xenproject.org>

=== This changeset includes merge from high-traffic branch === Commits on that branch are not reported individually. commit 2033cc6efa98b831d7839e367aa7d5aa74d0750f Merge: d4a6bab1ecf23d86e20e2f08a144e4e48643a6e4 56908dc5041aa424fd1495b6c6beb78c539d93e1 Author: Peter Maydell <peter.maydell@xxxxxxxxxx> AuthorDate: Sat Jul 11 16:52:24 2020 +0100 Commit: Peter Maydell <peter.maydell@xxxxxxxxxx> CommitDate: Sat Jul 11 16:52:24 2020 +0100 Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging * Make checkpatch say 'qemu' instead of 'kernel' (Aleksandar) * Fix PSE guests with emulated NPT (Alexander B. #1) * Fix leak (Alexander B. #2) * HVF fixes (Roman, Cameron) * New Sapphire Rapids CPUID bits (Cathy) * cpus.c and softmmu/ cleanups (Claudio) * TAP driver tweaks (Daniel, Havard) * object-add bugfix and testcases (Eric A.) * Fix Coverity MIN_CONST and MAX_CONST (Eric B.) * "info lapic" improvement (Jan) * SSE fixes (Joseph) * "-msg guest-name" option (Mario) * support for AMD nested live migration (myself) * Small i386 TCG fixes (myself) * improved error reporting for Xen (myself) * fix "-cpu host -overcommit cpu-pm=on" (myself) * Add accel/Kconfig (Philippe) * iscsi sense handling fixes (Yongji) * Misc bugfixes # gpg: Signature made Sat 11 Jul 2020 00:33:41 BST # gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83 # gpg: issuer "pbonzini@xxxxxxxxxx" # gpg: Good signature from "Paolo Bonzini <bonzini@xxxxxxx>" [full] # gpg: aka "Paolo Bonzini <pbonzini@xxxxxxxxxx>" [full] # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * remotes/bonzini/tags/for-upstream: (47 commits) linux-headers: update again to 5.8 apic: Report current_count via 'info lapic' scripts: improve message when TAP based tests fail target/i386: Enable TSX Suspend Load Address Tracking feature target/i386: Add SERIALIZE cpu feature softmmu/vl: Remove the check for colons in -accel parameters cpu-throttle: new module, extracted from cpus.c softmmu: move softmmu only files from root pc: fix leak in pc_system_flash_cleanup_unused cpus: Move CPU code from exec.c to cpus-common.c target/i386: Correct the warning message of Intel PT checkpatch: Change occurences of 'kernel' to 'qemu' in user messages iscsi: return -EIO when sense fields are meaningless iscsi: handle check condition status in retry loop target/i386: sev: fail query-sev-capabilities if QEMU cannot use SEV target/i386: sev: provide proper error reporting for query-sev-capabilities KVM: x86: believe what KVM says about WAITPKG target/i386: implement undocumented "smsw r32" behavior target/i386: remove gen_io_end Makefile: simplify MINIKCONF rules ... Signed-off-by: Peter Maydell <peter.maydell@xxxxxxxxxx> Kconfig | 4 + Kconfig.host | 7 - MAINTAINERS | 29 +- Makefile | 12 +- Makefile.target | 7 +- accel/Kconfig | 9 + accel/stubs/tcg-stub.c | 7 + arch_init.c | 113 - balloon.c | 106 - block/iscsi.c | 22 +- cpus-common.c | 18 + cpus.c | 2329 --------------------- exec.c | 22 - hw/core/null-machine.c | 5 + hw/i386/pc_sysfw.c | 5 + hw/intc/apic.c | 18 - hw/intc/apic_common.c | 19 + include/hw/core/cpu.h | 37 - include/hw/i386/apic_internal.h | 1 + include/qemu/error-report.h | 2 + include/qemu/main-loop.h | 5 + include/qemu/osdep.h | 21 +- include/qom/object.h | 26 +- include/sysemu/cpu-throttle.h | 68 + include/sysemu/hvf.h | 2 +- include/sysemu/hw_accel.h | 13 + ioport.c | 299 --- linux-headers/asm-arm/unistd-common.h | 1 + linux-headers/asm-x86/kvm.h | 5 +- memory.c | 3250 ----------------------------- memory_mapping.c | 357 ---- migration/migration.c | 1 + migration/ram.c | 1 + qemu-options.hx | 12 +- qom/object.c | 21 +- qom/object_interfaces.c | 7 +- qtest.c | 820 -------- scripts/checkpatch.pl | 6 +- scripts/tap-driver.pl | 2 +- softmmu/Makefile.objs | 11 + softmmu/arch_init.c | 113 + softmmu/balloon.c | 106 + softmmu/cpu-throttle.c | 122 ++ softmmu/cpus.c | 2238 ++++++++++++++++++++ softmmu/ioport.c | 299 +++ softmmu/memory.c | 3250 +++++++++++++++++++++++++++++ softmmu/memory_mapping.c | 357 ++++ softmmu/qtest.c | 820 ++++++++ softmmu/vl.c | 14 +- target/i386/Makefile.objs | 1 + target/i386/cpu.c | 13 +- target/i386/cpu.h | 10 + target/i386/excp_helper.c | 4 +- target/i386/fpu_helper.c | 37 +- target/i386/gdbstub.c | 1 + target/i386/helper.c | 6 +- target/i386/helper.h | 1 + target/i386/hvf/hvf.c | 137 +- target/i386/hvf/vmx.h | 17 +- target/i386/kvm.c | 53 +- target/i386/kvm_i386.h | 1 + target/i386/machine.c | 31 +- target/i386/monitor.c | 10 +- target/i386/ops_sse.h | 28 +- target/i386/sev-stub.c | 3 +- target/i386/sev.c | 27 +- target/i386/sev_i386.h | 2 +- target/i386/svm.h | 1 + target/i386/svm_helper.c | 7 +- target/i386/tcg-stub.c | 25 + target/i386/translate.c | 36 +- tests/Makefile.include | 2 +- tests/qtest/qmp-cmd-test.c | 109 +- tests/tcg/i386/Makefile.target | 4 + tests/tcg/i386/test-i386-sse-exceptions.c | 813 ++++++++ ui/cocoa.m | 1 + util/qemu-error.c | 7 + 77 files changed, 8790 insertions(+), 7616 deletions(-) diff --git a/Kconfig b/Kconfig new file mode 100644 index 0000000000..bf694c42af --- /dev/null +++ b/Kconfig @@ -0,0 +1,4 @@ +source Kconfig.host +source backends/Kconfig +source accel/Kconfig +source hw/Kconfig diff --git a/Kconfig.host b/Kconfig.host index 55136e037d..a6d871c399 100644 --- a/Kconfig.host +++ b/Kconfig.host @@ -2,9 +2,6 @@ # down to Kconfig. See also MINIKCONF_ARGS in the Makefile: # these two need to be kept in sync. -config KVM - bool - config LINUX bool @@ -31,10 +28,6 @@ config VHOST_KERNEL bool select VHOST -config XEN - bool - select FSDEV_9P if VIRTFS - config VIRTFS bool diff --git a/MAINTAINERS b/MAINTAINERS index 6aa54f7f8f..361ae5c662 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -115,7 +115,7 @@ Overall TCG CPUs M: Richard Henderson <rth@xxxxxxxxxxx> R: Paolo Bonzini <pbonzini@xxxxxxxxxx> S: Maintained -F: cpus.c +F: softmmu/cpus.c F: cpus-common.c F: exec.c F: accel/tcg/ @@ -362,7 +362,7 @@ Overall KVM CPUs M: Paolo Bonzini <pbonzini@xxxxxxxxxx> L: kvm@xxxxxxxxxxxxxxx S: Supported -F: */kvm.* +F: */*/kvm* F: accel/kvm/ F: accel/stubs/kvm-stub.c F: include/hw/kvm/ @@ -416,8 +416,21 @@ S: Supported F: target/i386/kvm.c F: scripts/kvm/vmxcap +Guest CPU Cores (other accelerators) +------------------------------------ +Overall +M: Richard Henderson <rth@xxxxxxxxxxx> +R: Paolo Bonzini <pbonzini@xxxxxxxxxx> +S: Maintained +F: include/sysemu/accel.h +F: accel/accel.c +F: accel/Makefile.objs +F: accel/stubs/Makefile.objs + X86 HVF CPUs +M: Cameron Esfahani <dirty@xxxxxxxxx> M: Roman Bolshakov <r.bolshakov@xxxxxxxxx> +W: https://wiki.qemu.org/Features/HVF S: Maintained F: accel/stubs/hvf-stub.c F: target/i386/hvf/ @@ -465,6 +478,7 @@ M: Colin Xu <colin.xu@xxxxxxxxx> L: haxm-team@xxxxxxxxx W: https://github.com/intel/haxm/issues S: Maintained +F: accel/stubs/hax-stub.c F: include/sysemu/hax.h F: target/i386/hax-* @@ -1710,7 +1724,7 @@ M: David Hildenbrand <david@xxxxxxxxxx> S: Maintained F: hw/virtio/virtio-balloon*.c F: include/hw/virtio/virtio-balloon.h -F: balloon.c +F: softmmu/balloon.c F: include/sysemu/balloon.h virtio-9p @@ -2189,12 +2203,12 @@ Memory API M: Paolo Bonzini <pbonzini@xxxxxxxxxx> S: Supported F: include/exec/ioport.h -F: ioport.c F: include/exec/memop.h F: include/exec/memory.h F: include/exec/ram_addr.h F: include/exec/ramblock.h -F: memory.c +F: softmmu/ioport.c +F: softmmu/memory.c F: include/exec/memory-internal.h F: exec.c F: scripts/coccinelle/memory-region-housekeeping.cocci @@ -2226,13 +2240,14 @@ F: ui/cocoa.m Main loop M: Paolo Bonzini <pbonzini@xxxxxxxxxx> S: Maintained -F: cpus.c F: include/qemu/main-loop.h F: include/sysemu/runstate.h F: util/main-loop.c F: util/qemu-timer.c F: softmmu/vl.c F: softmmu/main.c +F: softmmu/cpus.c +F: softmmu/cpu-throttle.c F: qapi/run-state.json Human Monitor (HMP) @@ -2387,7 +2402,7 @@ M: Thomas Huth <thuth@xxxxxxxxxx> M: Laurent Vivier <lvivier@xxxxxxxxxx> R: Paolo Bonzini <pbonzini@xxxxxxxxxx> S: Maintained -F: qtest.c +F: softmmu/qtest.c F: accel/qtest.c F: tests/qtest/ X: tests/qtest/bios-tables-test-allowed-diff.h diff --git a/Makefile b/Makefile index b1b8a5a6d0..32345c610e 100644 --- a/Makefile +++ b/Makefile @@ -404,7 +404,8 @@ endif # This has to be kept in sync with Kconfig.host. MINIKCONF_ARGS = \ $(CONFIG_MINIKCONF_MODE) \ - $@ $*/config-devices.mak.d $< $(MINIKCONF_INPUTS) \ + $@ $*/config-devices.mak.d $< $(SRC_PATH)/Kconfig \ + CONFIG_TCG=$(CONFIG_TCG) \ CONFIG_KVM=$(CONFIG_KVM) \ CONFIG_SPICE=$(CONFIG_SPICE) \ CONFIG_IVSHMEM=$(CONFIG_IVSHMEM) \ @@ -418,12 +419,11 @@ MINIKCONF_ARGS = \ CONFIG_LINUX=$(CONFIG_LINUX) \ CONFIG_PVRDMA=$(CONFIG_PVRDMA) -MINIKCONF_INPUTS = $(SRC_PATH)/Kconfig.host $(SRC_PATH)/backends/Kconfig $(SRC_PATH)/hw/Kconfig -MINIKCONF_DEPS = $(MINIKCONF_INPUTS) $(wildcard $(SRC_PATH)/hw/*/Kconfig) -MINIKCONF = $(PYTHON) $(SRC_PATH)/scripts/minikconf.py \ +MINIKCONF = $(PYTHON) $(SRC_PATH)/scripts/minikconf.py -$(SUBDIR_DEVICES_MAK): %/config-devices.mak: default-configs/%.mak $(MINIKCONF_DEPS) $(BUILD_DIR)/config-host.mak - $(call quiet-command, $(MINIKCONF) $(MINIKCONF_ARGS) > $@.tmp, "GEN", "$@.tmp") +$(SUBDIR_DEVICES_MAK): %/config-devices.mak: default-configs/%.mak $(SRC_PATH)/Kconfig $(BUILD_DIR)/config-host.mak + $(call quiet-command, $(MINIKCONF) $(MINIKCONF_ARGS) \ + > $@.tmp, "GEN", "$@.tmp") $(call quiet-command, if test -f $@; then \ if cmp -s $@.old $@; then \ mv $@.tmp $@; \ diff --git a/Makefile.target b/Makefile.target index 02bd9d7117..ffa2657269 100644 --- a/Makefile.target +++ b/Makefile.target @@ -152,16 +152,13 @@ endif #CONFIG_BSD_USER ######################################################### # System emulator target ifdef CONFIG_SOFTMMU -obj-y += arch_init.o cpus.o gdbstub.o balloon.o ioport.o -obj-y += qtest.o +obj-y += softmmu/ +obj-y += gdbstub.o obj-y += dump/ obj-y += hw/ obj-y += monitor/ obj-y += qapi/ -obj-y += memory.o -obj-y += memory_mapping.o obj-y += migration/ram.o -obj-y += softmmu/ LIBS := $(libs_softmmu) $(LIBS) # Hardware support diff --git a/accel/Kconfig b/accel/Kconfig new file mode 100644 index 0000000000..2ad94a3839 --- /dev/null +++ b/accel/Kconfig @@ -0,0 +1,9 @@ +config TCG + bool + +config KVM + bool + +config XEN + bool + select FSDEV_9P if VIRTFS diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c index 677191a69c..e4bbf997aa 100644 --- a/accel/stubs/tcg-stub.c +++ b/accel/stubs/tcg-stub.c @@ -22,3 +22,10 @@ void tb_flush(CPUState *cpu) void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) { } + +void *probe_access(CPUArchState *env, target_ulong addr, int size, + MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) +{ + /* Handled by hardware accelerator. */ + g_assert_not_reached(); +} diff --git a/arch_init.c b/arch_init.c deleted file mode 100644 index 8afea4748b..0000000000 --- a/arch_init.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -#include "qemu/osdep.h" -#include "cpu.h" -#include "sysemu/sysemu.h" -#include "sysemu/arch_init.h" -#include "hw/pci/pci.h" -#include "hw/audio/soundhw.h" -#include "qapi/error.h" -#include "qemu/config-file.h" -#include "qemu/error-report.h" -#include "hw/acpi/acpi.h" -#include "qemu/help_option.h" - -#ifdef TARGET_SPARC -int graphic_width = 1024; -int graphic_height = 768; -int graphic_depth = 8; -#elif defined(TARGET_M68K) -int graphic_width = 800; -int graphic_height = 600; -int graphic_depth = 8; -#else -int graphic_width = 800; -int graphic_height = 600; -int graphic_depth = 32; -#endif - - -#if defined(TARGET_ALPHA) -#define QEMU_ARCH QEMU_ARCH_ALPHA -#elif defined(TARGET_ARM) -#define QEMU_ARCH QEMU_ARCH_ARM -#elif defined(TARGET_CRIS) -#define QEMU_ARCH QEMU_ARCH_CRIS -#elif defined(TARGET_HPPA) -#define QEMU_ARCH QEMU_ARCH_HPPA -#elif defined(TARGET_I386) -#define QEMU_ARCH QEMU_ARCH_I386 -#elif defined(TARGET_LM32) -#define QEMU_ARCH QEMU_ARCH_LM32 -#elif defined(TARGET_M68K) -#define QEMU_ARCH QEMU_ARCH_M68K -#elif defined(TARGET_MICROBLAZE) -#define QEMU_ARCH QEMU_ARCH_MICROBLAZE -#elif defined(TARGET_MIPS) -#define QEMU_ARCH QEMU_ARCH_MIPS -#elif defined(TARGET_MOXIE) -#define QEMU_ARCH QEMU_ARCH_MOXIE -#elif defined(TARGET_NIOS2) -#define QEMU_ARCH QEMU_ARCH_NIOS2 -#elif defined(TARGET_OPENRISC) -#define QEMU_ARCH QEMU_ARCH_OPENRISC -#elif defined(TARGET_PPC) -#define QEMU_ARCH QEMU_ARCH_PPC -#elif defined(TARGET_RISCV) -#define QEMU_ARCH QEMU_ARCH_RISCV -#elif defined(TARGET_RX) -#define QEMU_ARCH QEMU_ARCH_RX -#elif defined(TARGET_S390X) -#define QEMU_ARCH QEMU_ARCH_S390X -#elif defined(TARGET_SH4) -#define QEMU_ARCH QEMU_ARCH_SH4 -#elif defined(TARGET_SPARC) -#define QEMU_ARCH QEMU_ARCH_SPARC -#elif defined(TARGET_TRICORE) -#define QEMU_ARCH QEMU_ARCH_TRICORE -#elif defined(TARGET_UNICORE32) -#define QEMU_ARCH QEMU_ARCH_UNICORE32 -#elif defined(TARGET_XTENSA) -#define QEMU_ARCH QEMU_ARCH_XTENSA -#endif - -const uint32_t arch_type = QEMU_ARCH; - -int kvm_available(void) -{ -#ifdef CONFIG_KVM - return 1; -#else - return 0; -#endif -} - -int xen_available(void) -{ -#ifdef CONFIG_XEN - return 1; -#else - return 0; -#endif -} diff --git a/balloon.c b/balloon.c deleted file mode 100644 index 354408c6ea..0000000000 --- a/balloon.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Generic Balloon handlers and management - * - * Copyright (c) 2003-2008 Fabrice Bellard - * Copyright (C) 2011 Red Hat, Inc. - * Copyright (C) 2011 Amit Shah <amit.shah@xxxxxxxxxx> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu/atomic.h" -#include "sysemu/kvm.h" -#include "sysemu/balloon.h" -#include "trace-root.h" -#include "qapi/error.h" -#include "qapi/qapi-commands-misc.h" -#include "qapi/qmp/qerror.h" - -static QEMUBalloonEvent *balloon_event_fn; -static QEMUBalloonStatus *balloon_stat_fn; -static void *balloon_opaque; - -static bool have_balloon(Error **errp) -{ - if (kvm_enabled() && !kvm_has_sync_mmu()) { - error_set(errp, ERROR_CLASS_KVM_MISSING_CAP, - "Using KVM without synchronous MMU, balloon unavailable"); - return false; - } - if (!balloon_event_fn) { - error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE, - "No balloon device has been activated"); - return false; - } - return true; -} - -int qemu_add_balloon_handler(QEMUBalloonEvent *event_func, - QEMUBalloonStatus *stat_func, void *opaque) -{ - if (balloon_event_fn || balloon_stat_fn || balloon_opaque) { - /* We're already registered one balloon handler. How many can - * a guest really have? - */ - return -1; - } - balloon_event_fn = event_func; - balloon_stat_fn = stat_func; - balloon_opaque = opaque; - return 0; -} - -void qemu_remove_balloon_handler(void *opaque) -{ - if (balloon_opaque != opaque) { - return; - } - balloon_event_fn = NULL; - balloon_stat_fn = NULL; - balloon_opaque = NULL; -} - -BalloonInfo *qmp_query_balloon(Error **errp) -{ - BalloonInfo *info; - - if (!have_balloon(errp)) { - return NULL; - } - - info = g_malloc0(sizeof(*info)); - balloon_stat_fn(balloon_opaque, info); - return info; -} - -void qmp_balloon(int64_t target, Error **errp) -{ - if (!have_balloon(errp)) { - return; - } - - if (target <= 0) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size"); - return; - } - - trace_balloon_event(balloon_opaque, target); - balloon_event_fn(balloon_opaque, target); -} diff --git a/block/iscsi.c b/block/iscsi.c index 6c2e353e1a..bd2122a3a4 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -241,9 +241,11 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, iTask->status = status; iTask->do_retry = 0; + iTask->err_code = 0; iTask->task = task; if (status != SCSI_STATUS_GOOD) { + iTask->err_code = -EIO; if (iTask->retries++ < ISCSI_CMD_RETRIES) { if (status == SCSI_STATUS_BUSY || status == SCSI_STATUS_TIMEOUT || @@ -266,16 +268,16 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, timer_mod(&iTask->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time); iTask->do_retry = 1; - } - } else if (status == SCSI_STATUS_CHECK_CONDITION) { - int error = iscsi_translate_sense(&task->sense); - if (error == EAGAIN) { - error_report("iSCSI CheckCondition: %s", - iscsi_get_error(iscsi)); - iTask->do_retry = 1; - } else { - iTask->err_code = -error; - iTask->err_str = g_strdup(iscsi_get_error(iscsi)); + } else if (status == SCSI_STATUS_CHECK_CONDITION) { + int error = iscsi_translate_sense(&task->sense); + if (error == EAGAIN) { + error_report("iSCSI CheckCondition: %s", + iscsi_get_error(iscsi)); + iTask->do_retry = 1; + } else { + iTask->err_code = -error; + iTask->err_str = g_strdup(iscsi_get_error(iscsi)); + } } } } diff --git a/cpus-common.c b/cpus-common.c index 8f5512b3d7..34044f4e4c 100644 --- a/cpus-common.c +++ b/cpus-common.c @@ -72,6 +72,8 @@ static int cpu_get_free_index(void) return max_cpu_index; } +CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); + void cpu_list_add(CPUState *cpu) { QEMU_LOCK_GUARD(&qemu_cpu_list_lock); @@ -96,6 +98,22 @@ void cpu_list_remove(CPUState *cpu) cpu->cpu_index = UNASSIGNED_CPU_INDEX; } +CPUState *qemu_get_cpu(int index) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) { + if (cpu->cpu_index == index) { + return cpu; + } + } + + return NULL; +} + +/* current CPU in the current thread. It is only valid inside cpu_exec() */ +__thread CPUState *current_cpu; + struct qemu_work_item { QSIMPLEQ_ENTRY(qemu_work_item) node; run_on_cpu_func func; diff --git a/cpus.c b/cpus.c deleted file mode 100644 index 41d1c5099f..0000000000 --- a/cpus.c +++ /dev/null @@ -1,2329 +0,0 @@ -/* - * QEMU System Emulator - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "qemu/config-file.h" -#include "qemu/cutils.h" -#include "migration/vmstate.h" -#include "monitor/monitor.h" -#include "qapi/error.h" -#include "qapi/qapi-commands-misc.h" -#include "qapi/qapi-events-run-state.h" -#include "qapi/qmp/qerror.h" -#include "qemu/error-report.h" -#include "qemu/qemu-print.h" -#include "sysemu/tcg.h" -#include "sysemu/block-backend.h" -#include "exec/gdbstub.h" -#include "sysemu/dma.h" -#include "sysemu/hw_accel.h" -#include "sysemu/kvm.h" -#include "sysemu/hax.h" -#include "sysemu/hvf.h" -#include "sysemu/whpx.h" -#include "exec/exec-all.h" - -#include "qemu/thread.h" -#include "qemu/plugin.h" -#include "sysemu/cpus.h" -#include "sysemu/qtest.h" -#include "qemu/main-loop.h" -#include "qemu/option.h" -#include "qemu/bitmap.h" -#include "qemu/seqlock.h" -#include "qemu/guest-random.h" -#include "tcg/tcg.h" -#include "hw/nmi.h" -#include "sysemu/replay.h" -#include "sysemu/runstate.h" -#include "hw/boards.h" -#include "hw/hw.h" - -#ifdef CONFIG_LINUX - -#include <sys/prctl.h> - -#ifndef PR_MCE_KILL -#define PR_MCE_KILL 33 -#endif - -#ifndef PR_MCE_KILL_SET -#define PR_MCE_KILL_SET 1 -#endif - -#ifndef PR_MCE_KILL_EARLY -#define PR_MCE_KILL_EARLY 1 -#endif - -#endif /* CONFIG_LINUX */ - -static QemuMutex qemu_global_mutex; - -int64_t max_delay; -int64_t max_advance; - -/* vcpu throttling controls */ -static QEMUTimer *throttle_timer; -static unsigned int throttle_percentage; - -#define CPU_THROTTLE_PCT_MIN 1 -#define CPU_THROTTLE_PCT_MAX 99 -#define CPU_THROTTLE_TIMESLICE_NS 10000000 - -bool cpu_is_stopped(CPUState *cpu) -{ - return cpu->stopped || !runstate_is_running(); -} - -static inline bool cpu_work_list_empty(CPUState *cpu) -{ - bool ret; - - qemu_mutex_lock(&cpu->work_mutex); - ret = QSIMPLEQ_EMPTY(&cpu->work_list); - qemu_mutex_unlock(&cpu->work_mutex); - return ret; -} - -static bool cpu_thread_is_idle(CPUState *cpu) -{ - if (cpu->stop || !cpu_work_list_empty(cpu)) { - return false; - } - if (cpu_is_stopped(cpu)) { - return true; - } - if (!cpu->halted || cpu_has_work(cpu) || - kvm_halt_in_kernel()) { - return false; - } - return true; -} - -static bool all_cpu_threads_idle(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - if (!cpu_thread_is_idle(cpu)) { - return false; - } - } - return true; -} - -/***********************************************************/ -/* guest cycle counter */ - -/* Protected by TimersState seqlock */ - -static bool icount_sleep = true; -/* Arbitrarily pick 1MIPS as the minimum allowable speed. */ -#define MAX_ICOUNT_SHIFT 10 - -typedef struct TimersState { - /* Protected by BQL. */ - int64_t cpu_ticks_prev; - int64_t cpu_ticks_offset; - - /* Protect fields that can be respectively read outside the - * BQL, and written from multiple threads. - */ - QemuSeqLock vm_clock_seqlock; - QemuSpin vm_clock_lock; - - int16_t cpu_ticks_enabled; - - /* Conversion factor from emulated instructions to virtual clock ticks. */ - int16_t icount_time_shift; - - /* Compensate for varying guest execution speed. */ - int64_t qemu_icount_bias; - - int64_t vm_clock_warp_start; - int64_t cpu_clock_offset; - - /* Only written by TCG thread */ - int64_t qemu_icount; - - /* for adjusting icount */ - QEMUTimer *icount_rt_timer; - QEMUTimer *icount_vm_timer; - QEMUTimer *icount_warp_timer; -} TimersState; - -static TimersState timers_state; -bool mttcg_enabled; - - -/* The current number of executed instructions is based on what we - * originally budgeted minus the current state of the decrementing - * icount counters in extra/u16.low. - */ -static int64_t cpu_get_icount_executed(CPUState *cpu) -{ - return (cpu->icount_budget - - (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra)); -} - -/* - * Update the global shared timer_state.qemu_icount to take into - * account executed instructions. This is done by the TCG vCPU - * thread so the main-loop can see time has moved forward. - */ -static void cpu_update_icount_locked(CPUState *cpu) -{ - int64_t executed = cpu_get_icount_executed(cpu); - cpu->icount_budget -= executed; - - atomic_set_i64(&timers_state.qemu_icount, - timers_state.qemu_icount + executed); -} - -/* - * Update the global shared timer_state.qemu_icount to take into - * account executed instructions. This is done by the TCG vCPU - * thread so the main-loop can see time has moved forward. - */ -void cpu_update_icount(CPUState *cpu) -{ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - cpu_update_icount_locked(cpu); - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); -} - -static int64_t cpu_get_icount_raw_locked(void) -{ - CPUState *cpu = current_cpu; - - if (cpu && cpu->running) { - if (!cpu->can_do_io) { - error_report("Bad icount read"); - exit(1); - } - /* Take into account what has run */ - cpu_update_icount_locked(cpu); - } - /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ - return atomic_read_i64(&timers_state.qemu_icount); -} - -static int64_t cpu_get_icount_locked(void) -{ - int64_t icount = cpu_get_icount_raw_locked(); - return atomic_read_i64(&timers_state.qemu_icount_bias) + - cpu_icount_to_ns(icount); -} - -int64_t cpu_get_icount_raw(void) -{ - int64_t icount; - unsigned start; - - do { - start = seqlock_read_begin(&timers_state.vm_clock_seqlock); - icount = cpu_get_icount_raw_locked(); - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); - - return icount; -} - -/* Return the virtual CPU time, based on the instruction counter. */ -int64_t cpu_get_icount(void) -{ - int64_t icount; - unsigned start; - - do { - start = seqlock_read_begin(&timers_state.vm_clock_seqlock); - icount = cpu_get_icount_locked(); - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); - - return icount; -} - -int64_t cpu_icount_to_ns(int64_t icount) -{ - return icount << atomic_read(&timers_state.icount_time_shift); -} - -static int64_t cpu_get_ticks_locked(void) -{ - int64_t ticks = timers_state.cpu_ticks_offset; - if (timers_state.cpu_ticks_enabled) { - ticks += cpu_get_host_ticks(); - } - - if (timers_state.cpu_ticks_prev > ticks) { - /* Non increasing ticks may happen if the host uses software suspend. */ - timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks; - ticks = timers_state.cpu_ticks_prev; - } - - timers_state.cpu_ticks_prev = ticks; - return ticks; -} - -/* return the time elapsed in VM between vm_start and vm_stop. Unless - * icount is active, cpu_get_ticks() uses units of the host CPU cycle - * counter. - */ -int64_t cpu_get_ticks(void) -{ - int64_t ticks; - - if (use_icount) { - return cpu_get_icount(); - } - - qemu_spin_lock(&timers_state.vm_clock_lock); - ticks = cpu_get_ticks_locked(); - qemu_spin_unlock(&timers_state.vm_clock_lock); - return ticks; -} - -static int64_t cpu_get_clock_locked(void) -{ - int64_t time; - - time = timers_state.cpu_clock_offset; - if (timers_state.cpu_ticks_enabled) { - time += get_clock(); - } - - return time; -} - -/* Return the monotonic time elapsed in VM, i.e., - * the time between vm_start and vm_stop - */ -int64_t cpu_get_clock(void) -{ - int64_t ti; - unsigned start; - - do { - start = seqlock_read_begin(&timers_state.vm_clock_seqlock); - ti = cpu_get_clock_locked(); - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); - - return ti; -} - -/* enable cpu_get_ticks() - * Caller must hold BQL which serves as mutex for vm_clock_seqlock. - */ -void cpu_enable_ticks(void) -{ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - if (!timers_state.cpu_ticks_enabled) { - timers_state.cpu_ticks_offset -= cpu_get_host_ticks(); - timers_state.cpu_clock_offset -= get_clock(); - timers_state.cpu_ticks_enabled = 1; - } - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); -} - -/* disable cpu_get_ticks() : the clock is stopped. You must not call - * cpu_get_ticks() after that. - * Caller must hold BQL which serves as mutex for vm_clock_seqlock. - */ -void cpu_disable_ticks(void) -{ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - if (timers_state.cpu_ticks_enabled) { - timers_state.cpu_ticks_offset += cpu_get_host_ticks(); - timers_state.cpu_clock_offset = cpu_get_clock_locked(); - timers_state.cpu_ticks_enabled = 0; - } - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); -} - -/* Correlation between real and virtual time is always going to be - fairly approximate, so ignore small variation. - When the guest is idle real and virtual time will be aligned in - the IO wait loop. */ -#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) - -static void icount_adjust(void) -{ - int64_t cur_time; - int64_t cur_icount; - int64_t delta; - - /* Protected by TimersState mutex. */ - static int64_t last_delta; - - /* If the VM is not running, then do nothing. */ - if (!runstate_is_running()) { - return; - } - - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, - cpu_get_clock_locked()); - cur_icount = cpu_get_icount_locked(); - - delta = cur_icount - cur_time; - /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ - if (delta > 0 - && last_delta + ICOUNT_WOBBLE < delta * 2 - && timers_state.icount_time_shift > 0) { - /* The guest is getting too far ahead. Slow time down. */ - atomic_set(&timers_state.icount_time_shift, - timers_state.icount_time_shift - 1); - } - if (delta < 0 - && last_delta - ICOUNT_WOBBLE > delta * 2 - && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { - /* The guest is getting too far behind. Speed time up. */ - atomic_set(&timers_state.icount_time_shift, - timers_state.icount_time_shift + 1); - } - last_delta = delta; - atomic_set_i64(&timers_state.qemu_icount_bias, - cur_icount - (timers_state.qemu_icount - << timers_state.icount_time_shift)); - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); -} - -static void icount_adjust_rt(void *opaque) -{ - timer_mod(timers_state.icount_rt_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); - icount_adjust(); -} - -static void icount_adjust_vm(void *opaque) -{ - timer_mod(timers_state.icount_vm_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - NANOSECONDS_PER_SECOND / 10); - icount_adjust(); -} - -static int64_t qemu_icount_round(int64_t count) -{ - int shift = atomic_read(&timers_state.icount_time_shift); - return (count + (1 << shift) - 1) >> shift; -} - -static void icount_warp_rt(void) -{ - unsigned seq; - int64_t warp_start; - - /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start - * changes from -1 to another value, so the race here is okay. - */ - do { - seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); - warp_start = timers_state.vm_clock_warp_start; - } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); - - if (warp_start == -1) { - return; - } - - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - if (runstate_is_running()) { - int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, - cpu_get_clock_locked()); - int64_t warp_delta; - - warp_delta = clock - timers_state.vm_clock_warp_start; - if (use_icount == 2) { - /* - * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too - * far ahead of real time. - */ - int64_t cur_icount = cpu_get_icount_locked(); - int64_t delta = clock - cur_icount; - warp_delta = MIN(warp_delta, delta); - } - atomic_set_i64(&timers_state.qemu_icount_bias, - timers_state.qemu_icount_bias + warp_delta); - } - timers_state.vm_clock_warp_start = -1; - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - - if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); - } -} - -static void icount_timer_cb(void *opaque) -{ - /* No need for a checkpoint because the timer already synchronizes - * with CHECKPOINT_CLOCK_VIRTUAL_RT. - */ - icount_warp_rt(); -} - -void qtest_clock_warp(int64_t dest) -{ - int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - AioContext *aio_context; - assert(qtest_enabled()); - aio_context = qemu_get_aio_context(); - while (clock < dest) { - int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - QEMU_TIMER_ATTR_ALL); - int64_t warp = qemu_soonest_timeout(dest - clock, deadline); - - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - atomic_set_i64(&timers_state.qemu_icount_bias, - timers_state.qemu_icount_bias + warp); - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - - qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); - timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]); - clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - } - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); -} - -void qemu_start_warp_timer(void) -{ - int64_t clock; - int64_t deadline; - - if (!use_icount) { - return; - } - - /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers - * do not fire, so computing the deadline does not make sense. - */ - if (!runstate_is_running()) { - return; - } - - if (replay_mode != REPLAY_MODE_PLAY) { - if (!all_cpu_threads_idle()) { - return; - } - - if (qtest_enabled()) { - /* When testing, qtest commands advance icount. */ - return; - } - - replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); - } else { - /* warp clock deterministically in record/replay mode */ - if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { - /* vCPU is sleeping and warp can't be started. - It is probably a race condition: notification sent - to vCPU was processed in advance and vCPU went to sleep. - Therefore we have to wake it up for doing someting. */ - if (replay_has_checkpoint()) { - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); - } - return; - } - } - - /* We want to use the earliest deadline from ALL vm_clocks */ - clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); - deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - ~QEMU_TIMER_ATTR_EXTERNAL); - if (deadline < 0) { - static bool notified; - if (!icount_sleep && !notified) { - warn_report("icount sleep disabled and no active timers"); - notified = true; - } - return; - } - - if (deadline > 0) { - /* - * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to - * sleep. Otherwise, the CPU might be waiting for a future timer - * interrupt to wake it up, but the interrupt never comes because - * the vCPU isn't running any insns and thus doesn't advance the - * QEMU_CLOCK_VIRTUAL. - */ - if (!icount_sleep) { - /* - * We never let VCPUs sleep in no sleep icount mode. - * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance - * to the next QEMU_CLOCK_VIRTUAL event and notify it. - * It is useful when we want a deterministic execution time, - * isolated from host latencies. - */ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - atomic_set_i64(&timers_state.qemu_icount_bias, - timers_state.qemu_icount_bias + deadline); - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); - } else { - /* - * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some - * "real" time, (related to the time left until the next event) has - * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. - * This avoids that the warps are visible externally; for example, - * you will not be sending network packets continuously instead of - * every 100ms. - */ - seqlock_write_lock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - if (timers_state.vm_clock_warp_start == -1 - || timers_state.vm_clock_warp_start > clock) { - timers_state.vm_clock_warp_start = clock; - } - seqlock_write_unlock(&timers_state.vm_clock_seqlock, - &timers_state.vm_clock_lock); - timer_mod_anticipate(timers_state.icount_warp_timer, - clock + deadline); - } - } else if (deadline == 0) { - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); - } -} - -static void qemu_account_warp_timer(void) -{ - if (!use_icount || !icount_sleep) { - return; - } - - /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers - * do not fire, so computing the deadline does not make sense. - */ - if (!runstate_is_running()) { - return; - } - - /* warp clock deterministically in record/replay mode */ - if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { - return; - } - - timer_del(timers_state.icount_warp_timer); - icount_warp_rt(); -} - -static bool icount_state_needed(void *opaque) -{ - return use_icount; -} - -static bool warp_timer_state_needed(void *opaque) -{ - TimersState *s = opaque; - return s->icount_warp_timer != NULL; -} - -static bool adjust_timers_state_needed(void *opaque) -{ - TimersState *s = opaque; - return s->icount_rt_timer != NULL; -} - -static bool shift_state_needed(void *opaque) -{ - return use_icount == 2; -} - -/* - * Subsection for warp timer migration is optional, because may not be created - */ -static const VMStateDescription icount_vmstate_warp_timer = { - .name = "timer/icount/warp_timer", - .version_id = 1, - .minimum_version_id = 1, - .needed = warp_timer_state_needed, - .fields = (VMStateField[]) { - VMSTATE_INT64(vm_clock_warp_start, TimersState), - VMSTATE_TIMER_PTR(icount_warp_timer, TimersState), - VMSTATE_END_OF_LIST() - } -}; - -static const VMStateDescription icount_vmstate_adjust_timers = { - .name = "timer/icount/timers", - .version_id = 1, - .minimum_version_id = 1, - .needed = adjust_timers_state_needed, - .fields = (VMStateField[]) { - VMSTATE_TIMER_PTR(icount_rt_timer, TimersState), - VMSTATE_TIMER_PTR(icount_vm_timer, TimersState), - VMSTATE_END_OF_LIST() - } -}; - -static const VMStateDescription icount_vmstate_shift = { - .name = "timer/icount/shift", - .version_id = 1, - .minimum_version_id = 1, - .needed = shift_state_needed, - .fields = (VMStateField[]) { - VMSTATE_INT16(icount_time_shift, TimersState), - VMSTATE_END_OF_LIST() - } -}; - -/* - * This is a subsection for icount migration. - */ -static const VMStateDescription icount_vmstate_timers = { - .name = "timer/icount", - .version_id = 1, - .minimum_version_id = 1, - .needed = icount_state_needed, - .fields = (VMStateField[]) { - VMSTATE_INT64(qemu_icount_bias, TimersState), - VMSTATE_INT64(qemu_icount, TimersState), - VMSTATE_END_OF_LIST() - }, - .subsections = (const VMStateDescription*[]) { - &icount_vmstate_warp_timer, - &icount_vmstate_adjust_timers, - &icount_vmstate_shift, - NULL - } -}; - -static const VMStateDescription vmstate_timers = { - .name = "timer", - .version_id = 2, - .minimum_version_id = 1, - .fields = (VMStateField[]) { - VMSTATE_INT64(cpu_ticks_offset, TimersState), - VMSTATE_UNUSED(8), - VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2), - VMSTATE_END_OF_LIST() - }, - .subsections = (const VMStateDescription*[]) { - &icount_vmstate_timers, - NULL - } -}; - -static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) -{ - double pct; - double throttle_ratio; - int64_t sleeptime_ns, endtime_ns; - - if (!cpu_throttle_get_percentage()) { - return; - } - - pct = (double)cpu_throttle_get_percentage()/100; - throttle_ratio = pct / (1 - pct); - /* Add 1ns to fix double's rounding error (like 0.9999999...) */ - sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1); - endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns; - while (sleeptime_ns > 0 && !cpu->stop) { - if (sleeptime_ns > SCALE_MS) { - qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex, - sleeptime_ns / SCALE_MS); - } else { - qemu_mutex_unlock_iothread(); - g_usleep(sleeptime_ns / SCALE_US); - qemu_mutex_lock_iothread(); - } - sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - } - atomic_set(&cpu->throttle_thread_scheduled, 0); -} - -static void cpu_throttle_timer_tick(void *opaque) -{ - CPUState *cpu; - double pct; - - /* Stop the timer if needed */ - if (!cpu_throttle_get_percentage()) { - return; - } - CPU_FOREACH(cpu) { - if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) { - async_run_on_cpu(cpu, cpu_throttle_thread, - RUN_ON_CPU_NULL); - } - } - - pct = (double)cpu_throttle_get_percentage()/100; - timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + - CPU_THROTTLE_TIMESLICE_NS / (1-pct)); -} - -void cpu_throttle_set(int new_throttle_pct) -{ - /* Ensure throttle percentage is within valid range */ - new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX); - new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN); - - atomic_set(&throttle_percentage, new_throttle_pct); - - timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + - CPU_THROTTLE_TIMESLICE_NS); -} - -void cpu_throttle_stop(void) -{ - atomic_set(&throttle_percentage, 0); -} - -bool cpu_throttle_active(void) -{ - return (cpu_throttle_get_percentage() != 0); -} - -int cpu_throttle_get_percentage(void) -{ - return atomic_read(&throttle_percentage); -} - -void cpu_ticks_init(void) -{ - seqlock_init(&timers_state.vm_clock_seqlock); - qemu_spin_init(&timers_state.vm_clock_lock); - vmstate_register(NULL, 0, &vmstate_timers, &timers_state); - throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, - cpu_throttle_timer_tick, NULL); -} - -void configure_icount(QemuOpts *opts, Error **errp) -{ - const char *option = qemu_opt_get(opts, "shift"); - bool sleep = qemu_opt_get_bool(opts, "sleep", true); - bool align = qemu_opt_get_bool(opts, "align", false); - long time_shift = -1; - - if (!option) { - if (qemu_opt_get(opts, "align") != NULL) { - error_setg(errp, "Please specify shift option when using align"); - } - return; - } - - if (align && !sleep) { - error_setg(errp, "align=on and sleep=off are incompatible"); - return; - } - - if (strcmp(option, "auto") != 0) { - if (qemu_strtol(option, NULL, 0, &time_shift) < 0 - || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { - error_setg(errp, "icount: Invalid shift value"); - return; - } - } else if (icount_align_option) { - error_setg(errp, "shift=auto and align=on are incompatible"); - return; - } else if (!icount_sleep) { - error_setg(errp, "shift=auto and sleep=off are incompatible"); - return; - } - - icount_sleep = sleep; - if (icount_sleep) { - timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, - icount_timer_cb, NULL); - } - - icount_align_option = align; - - if (time_shift >= 0) { - timers_state.icount_time_shift = time_shift; - use_icount = 1; - return; - } - - use_icount = 2; - - /* 125MIPS seems a reasonable initial guess at the guest speed. - It will be corrected fairly quickly anyway. */ - timers_state.icount_time_shift = 3; - - /* Have both realtime and virtual time triggers for speed adjustment. - The realtime trigger catches emulated time passing too slowly, - the virtual time trigger catches emulated time passing too fast. - Realtime triggers occur even when idle, so use them less frequently - than VM triggers. */ - timers_state.vm_clock_warp_start = -1; - timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, - icount_adjust_rt, NULL); - timer_mod(timers_state.icount_rt_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); - timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, - icount_adjust_vm, NULL); - timer_mod(timers_state.icount_vm_timer, - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - NANOSECONDS_PER_SECOND / 10); -} - -/***********************************************************/ -/* TCG vCPU kick timer - * - * The kick timer is responsible for moving single threaded vCPU - * emulation on to the next vCPU. If more than one vCPU is running a - * timer event with force a cpu->exit so the next vCPU can get - * scheduled. - * - * The timer is removed if all vCPUs are idle and restarted again once - * idleness is complete. - */ - -static QEMUTimer *tcg_kick_vcpu_timer; -static CPUState *tcg_current_rr_cpu; - -#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10) - -static inline int64_t qemu_tcg_next_kick(void) -{ - return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD; -} - -/* Kick the currently round-robin scheduled vCPU to next */ -static void qemu_cpu_kick_rr_next_cpu(void) -{ - CPUState *cpu; - do { - cpu = atomic_mb_read(&tcg_current_rr_cpu); - if (cpu) { - cpu_exit(cpu); - } - } while (cpu != atomic_mb_read(&tcg_current_rr_cpu)); -} - -/* Kick all RR vCPUs */ -static void qemu_cpu_kick_rr_cpus(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_exit(cpu); - }; -} - -static void do_nothing(CPUState *cpu, run_on_cpu_data unused) -{ -} - -void qemu_timer_notify_cb(void *opaque, QEMUClockType type) -{ - if (!use_icount || type != QEMU_CLOCK_VIRTUAL) { - qemu_notify_event(); - return; - } - - if (qemu_in_vcpu_thread()) { - /* A CPU is currently running; kick it back out to the - * tcg_cpu_exec() loop so it will recalculate its - * icount deadline immediately. - */ - qemu_cpu_kick(current_cpu); - } else if (first_cpu) { - /* qemu_cpu_kick is not enough to kick a halted CPU out of - * qemu_tcg_wait_io_event. async_run_on_cpu, instead, - * causes cpu_thread_is_idle to return false. This way, - * handle_icount_deadline can run. - * If we have no CPUs at all for some reason, we don't - * need to do anything. - */ - async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL); - } -} - -static void kick_tcg_thread(void *opaque) -{ - timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); - qemu_cpu_kick_rr_next_cpu(); -} - -static void start_tcg_kick_timer(void) -{ - assert(!mttcg_enabled); - if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) { - tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, - kick_tcg_thread, NULL); - } - if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) { - timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick()); - } -} - -static void stop_tcg_kick_timer(void) -{ - assert(!mttcg_enabled); - if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) { - timer_del(tcg_kick_vcpu_timer); - } -} - -/***********************************************************/ -void hw_error(const char *fmt, ...) -{ - va_list ap; - CPUState *cpu; - - va_start(ap, fmt); - fprintf(stderr, "qemu: hardware error: "); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - CPU_FOREACH(cpu) { - fprintf(stderr, "CPU #%d:\n", cpu->cpu_index); - cpu_dump_state(cpu, stderr, CPU_DUMP_FPU); - } - va_end(ap); - abort(); -} - -void cpu_synchronize_all_states(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_state(cpu); - /* TODO: move to cpu_synchronize_state() */ - if (hvf_enabled()) { - hvf_cpu_synchronize_state(cpu); - } - } -} - -void cpu_synchronize_all_post_reset(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_post_reset(cpu); - /* TODO: move to cpu_synchronize_post_reset() */ - if (hvf_enabled()) { - hvf_cpu_synchronize_post_reset(cpu); - } - } -} - -void cpu_synchronize_all_post_init(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_post_init(cpu); - /* TODO: move to cpu_synchronize_post_init() */ - if (hvf_enabled()) { - hvf_cpu_synchronize_post_init(cpu); - } - } -} - -void cpu_synchronize_all_pre_loadvm(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - cpu_synchronize_pre_loadvm(cpu); - } -} - -static int do_vm_stop(RunState state, bool send_stop) -{ - int ret = 0; - - if (runstate_is_running()) { - runstate_set(state); - cpu_disable_ticks(); - pause_all_vcpus(); - vm_state_notify(0, state); - if (send_stop) { - qapi_event_send_stop(); - } - } - - bdrv_drain_all(); - ret = bdrv_flush_all(); - - return ret; -} - -/* Special vm_stop() variant for terminating the process. Historically clients - * did not expect a QMP STOP event and so we need to retain compatibility. - */ -int vm_shutdown(void) -{ - return do_vm_stop(RUN_STATE_SHUTDOWN, false); -} - -static bool cpu_can_run(CPUState *cpu) -{ - if (cpu->stop) { - return false; - } - if (cpu_is_stopped(cpu)) { - return false; - } - return true; -} - -static void cpu_handle_guest_debug(CPUState *cpu) -{ - gdb_set_stop_cpu(cpu); - qemu_system_debug_request(); - cpu->stopped = true; -} - -#ifdef CONFIG_LINUX -static void sigbus_reraise(void) -{ - sigset_t set; - struct sigaction action; - - memset(&action, 0, sizeof(action)); - action.sa_handler = SIG_DFL; - if (!sigaction(SIGBUS, &action, NULL)) { - raise(SIGBUS); - sigemptyset(&set); - sigaddset(&set, SIGBUS); - pthread_sigmask(SIG_UNBLOCK, &set, NULL); - } - perror("Failed to re-raise SIGBUS!\n"); - abort(); -} - -static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx) -{ - if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) { - sigbus_reraise(); - } - - if (current_cpu) { - /* Called asynchronously in VCPU thread. */ - if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) { - sigbus_reraise(); - } - } else { - /* Called synchronously (via signalfd) in main thread. */ - if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) { - sigbus_reraise(); - } - } -} - -static void qemu_init_sigbus(void) -{ - struct sigaction action; - - memset(&action, 0, sizeof(action)); - action.sa_flags = SA_SIGINFO; - action.sa_sigaction = sigbus_handler; - sigaction(SIGBUS, &action, NULL); - - prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); -} -#else /* !CONFIG_LINUX */ -static void qemu_init_sigbus(void) -{ -} -#endif /* !CONFIG_LINUX */ - -static QemuThread io_thread; - -/* cpu creation */ -static QemuCond qemu_cpu_cond; -/* system init */ -static QemuCond qemu_pause_cond; - -void qemu_init_cpu_loop(void) -{ - qemu_init_sigbus(); - qemu_cond_init(&qemu_cpu_cond); - qemu_cond_init(&qemu_pause_cond); - qemu_mutex_init(&qemu_global_mutex); - - qemu_thread_get_self(&io_thread); -} - -void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data) -{ - do_run_on_cpu(cpu, func, data, &qemu_global_mutex); -} - -static void qemu_kvm_destroy_vcpu(CPUState *cpu) -{ - if (kvm_destroy_vcpu(cpu) < 0) { - error_report("kvm_destroy_vcpu failed"); - exit(EXIT_FAILURE); - } -} - -static void qemu_tcg_destroy_vcpu(CPUState *cpu) -{ -} - -static void qemu_cpu_stop(CPUState *cpu, bool exit) -{ - g_assert(qemu_cpu_is_self(cpu)); - cpu->stop = false; - cpu->stopped = true; - if (exit) { - cpu_exit(cpu); - } - qemu_cond_broadcast(&qemu_pause_cond); -} - -static void qemu_wait_io_event_common(CPUState *cpu) -{ - atomic_mb_set(&cpu->thread_kicked, false); - if (cpu->stop) { - qemu_cpu_stop(cpu, false); - } - process_queued_cpu_work(cpu); -} - -static void qemu_tcg_rr_wait_io_event(void) -{ - CPUState *cpu; - - while (all_cpu_threads_idle()) { - stop_tcg_kick_timer(); - qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex); - } - - start_tcg_kick_timer(); - - CPU_FOREACH(cpu) { - qemu_wait_io_event_common(cpu); - } -} - -static void qemu_wait_io_event(CPUState *cpu) -{ - bool slept = false; - - while (cpu_thread_is_idle(cpu)) { - if (!slept) { - slept = true; - qemu_plugin_vcpu_idle_cb(cpu); - } - qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); - } - if (slept) { - qemu_plugin_vcpu_resume_cb(cpu); - } - -#ifdef _WIN32 - /* Eat dummy APC queued by qemu_cpu_kick_thread. */ - if (!tcg_enabled()) { - SleepEx(0, TRUE); - } -#endif - qemu_wait_io_event_common(cpu); -} - -static void *qemu_kvm_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - int r; - - rcu_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - cpu->thread_id = qemu_get_thread_id(); - cpu->can_do_io = 1; - current_cpu = cpu; - - r = kvm_init_vcpu(cpu); - if (r < 0) { - error_report("kvm_init_vcpu failed: %s", strerror(-r)); - exit(1); - } - - kvm_init_cpu_signals(cpu); - - /* signal CPU creation */ - cpu->created = true; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - if (cpu_can_run(cpu)) { - r = kvm_cpu_exec(cpu); - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - } - } - qemu_wait_io_event(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - - qemu_kvm_destroy_vcpu(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -} - -static void *qemu_dummy_cpu_thread_fn(void *arg) -{ -#ifdef _WIN32 - error_report("qtest is not supported under Windows"); - exit(1); -#else - CPUState *cpu = arg; - sigset_t waitset; - int r; - - rcu_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - cpu->thread_id = qemu_get_thread_id(); - cpu->can_do_io = 1; - current_cpu = cpu; - - sigemptyset(&waitset); - sigaddset(&waitset, SIG_IPI); - - /* signal CPU creation */ - cpu->created = true; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - qemu_mutex_unlock_iothread(); - do { - int sig; - r = sigwait(&waitset, &sig); - } while (r == -1 && (errno == EAGAIN || errno == EINTR)); - if (r == -1) { - perror("sigwait"); - exit(1); - } - qemu_mutex_lock_iothread(); - qemu_wait_io_event(cpu); - } while (!cpu->unplug); - - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -#endif -} - -static int64_t tcg_get_icount_limit(void) -{ - int64_t deadline; - - if (replay_mode != REPLAY_MODE_PLAY) { - /* - * Include all the timers, because they may need an attention. - * Too long CPU execution may create unnecessary delay in UI. - */ - deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - QEMU_TIMER_ATTR_ALL); - /* Check realtime timers, because they help with input processing */ - deadline = qemu_soonest_timeout(deadline, - qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME, - QEMU_TIMER_ATTR_ALL)); - - /* Maintain prior (possibly buggy) behaviour where if no deadline - * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than - * INT32_MAX nanoseconds ahead, we still use INT32_MAX - * nanoseconds. - */ - if ((deadline < 0) || (deadline > INT32_MAX)) { - deadline = INT32_MAX; - } - - return qemu_icount_round(deadline); - } else { - return replay_get_instructions(); - } -} - -static void notify_aio_contexts(void) -{ - /* Wake up other AioContexts. */ - qemu_clock_notify(QEMU_CLOCK_VIRTUAL); - qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); -} - -static void handle_icount_deadline(void) -{ - assert(qemu_in_vcpu_thread()); - if (use_icount) { - int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, - QEMU_TIMER_ATTR_ALL); - - if (deadline == 0) { - notify_aio_contexts(); - } - } -} - -static void prepare_icount_for_run(CPUState *cpu) -{ - if (use_icount) { - int insns_left; - - /* These should always be cleared by process_icount_data after - * each vCPU execution. However u16.high can be raised - * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt - */ - g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0); - g_assert(cpu->icount_extra == 0); - - cpu->icount_budget = tcg_get_icount_limit(); - insns_left = MIN(0xffff, cpu->icount_budget); - cpu_neg(cpu)->icount_decr.u16.low = insns_left; - cpu->icount_extra = cpu->icount_budget - insns_left; - - replay_mutex_lock(); - - if (cpu->icount_budget == 0 && replay_has_checkpoint()) { - notify_aio_contexts(); - } - } -} - -static void process_icount_data(CPUState *cpu) -{ - if (use_icount) { - /* Account for executed instructions */ - cpu_update_icount(cpu); - - /* Reset the counters */ - cpu_neg(cpu)->icount_decr.u16.low = 0; - cpu->icount_extra = 0; - cpu->icount_budget = 0; - - replay_account_executed_instructions(); - - replay_mutex_unlock(); - } -} - - -static int tcg_cpu_exec(CPUState *cpu) -{ - int ret; -#ifdef CONFIG_PROFILER - int64_t ti; -#endif - - assert(tcg_enabled()); -#ifdef CONFIG_PROFILER - ti = profile_getclock(); -#endif - cpu_exec_start(cpu); - ret = cpu_exec(cpu); - cpu_exec_end(cpu); -#ifdef CONFIG_PROFILER - atomic_set(&tcg_ctx->prof.cpu_exec_time, - tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); -#endif - return ret; -} - -/* Destroy any remaining vCPUs which have been unplugged and have - * finished running - */ -static void deal_with_unplugged_cpus(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - if (cpu->unplug && !cpu_can_run(cpu)) { - qemu_tcg_destroy_vcpu(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - break; - } - } -} - -/* Single-threaded TCG - * - * In the single-threaded case each vCPU is simulated in turn. If - * there is more than a single vCPU we create a simple timer to kick - * the vCPU and ensure we don't get stuck in a tight loop in one vCPU. - * This is done explicitly rather than relying on side-effects - * elsewhere. - */ - -static void *qemu_tcg_rr_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - - assert(tcg_enabled()); - rcu_register_thread(); - tcg_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - - cpu->thread_id = qemu_get_thread_id(); - cpu->created = true; - cpu->can_do_io = 1; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - /* wait for initial kick-off after machine start */ - while (first_cpu->stopped) { - qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex); - - /* process any pending work */ - CPU_FOREACH(cpu) { - current_cpu = cpu; - qemu_wait_io_event_common(cpu); - } - } - - start_tcg_kick_timer(); - - cpu = first_cpu; - - /* process any pending work */ - cpu->exit_request = 1; - - while (1) { - qemu_mutex_unlock_iothread(); - replay_mutex_lock(); - qemu_mutex_lock_iothread(); - /* Account partial waits to QEMU_CLOCK_VIRTUAL. */ - qemu_account_warp_timer(); - - /* Run the timers here. This is much more efficient than - * waking up the I/O thread and waiting for completion. - */ - handle_icount_deadline(); - - replay_mutex_unlock(); - - if (!cpu) { - cpu = first_cpu; - } - - while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) { - - atomic_mb_set(&tcg_current_rr_cpu, cpu); - current_cpu = cpu; - - qemu_clock_enable(QEMU_CLOCK_VIRTUAL, - (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0); - - if (cpu_can_run(cpu)) { - int r; - - qemu_mutex_unlock_iothread(); - prepare_icount_for_run(cpu); - - r = tcg_cpu_exec(cpu); - - process_icount_data(cpu); - qemu_mutex_lock_iothread(); - - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - break; - } else if (r == EXCP_ATOMIC) { - qemu_mutex_unlock_iothread(); - cpu_exec_step_atomic(cpu); - qemu_mutex_lock_iothread(); - break; - } - } else if (cpu->stop) { - if (cpu->unplug) { - cpu = CPU_NEXT(cpu); - } - break; - } - - cpu = CPU_NEXT(cpu); - } /* while (cpu && !cpu->exit_request).. */ - - /* Does not need atomic_mb_set because a spurious wakeup is okay. */ - atomic_set(&tcg_current_rr_cpu, NULL); - - if (cpu && cpu->exit_request) { - atomic_mb_set(&cpu->exit_request, 0); - } - - if (use_icount && all_cpu_threads_idle()) { - /* - * When all cpus are sleeping (e.g in WFI), to avoid a deadlock - * in the main_loop, wake it up in order to start the warp timer. - */ - qemu_notify_event(); - } - - qemu_tcg_rr_wait_io_event(); - deal_with_unplugged_cpus(); - } - - rcu_unregister_thread(); - return NULL; -} - -static void *qemu_hax_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - int r; - - rcu_register_thread(); - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - - cpu->thread_id = qemu_get_thread_id(); - cpu->created = true; - current_cpu = cpu; - - hax_init_vcpu(cpu); - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - if (cpu_can_run(cpu)) { - r = hax_smp_cpu_exec(cpu); - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - } - } - - qemu_wait_io_event(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - rcu_unregister_thread(); - return NULL; -} - -/* The HVF-specific vCPU thread function. This one should only run when the host - * CPU supports the VMX "unrestricted guest" feature. */ -static void *qemu_hvf_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - - int r; - - assert(hvf_enabled()); - - rcu_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - - cpu->thread_id = qemu_get_thread_id(); - cpu->can_do_io = 1; - current_cpu = cpu; - - hvf_init_vcpu(cpu); - - /* signal CPU creation */ - cpu->created = true; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - if (cpu_can_run(cpu)) { - r = hvf_vcpu_exec(cpu); - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - } - } - qemu_wait_io_event(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - - hvf_vcpu_destroy(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -} - -static void *qemu_whpx_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - int r; - - rcu_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - cpu->thread_id = qemu_get_thread_id(); - current_cpu = cpu; - - r = whpx_init_vcpu(cpu); - if (r < 0) { - fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r)); - exit(1); - } - - /* signal CPU creation */ - cpu->created = true; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - do { - if (cpu_can_run(cpu)) { - r = whpx_vcpu_exec(cpu); - if (r == EXCP_DEBUG) { - cpu_handle_guest_debug(cpu); - } - } - while (cpu_thread_is_idle(cpu)) { - qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); - } - qemu_wait_io_event_common(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - - whpx_destroy_vcpu(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -} - -#ifdef _WIN32 -static void CALLBACK dummy_apc_func(ULONG_PTR unused) -{ -} -#endif - -/* Multi-threaded TCG - * - * In the multi-threaded case each vCPU has its own thread. The TLS - * variable current_cpu can be used deep in the code to find the - * current CPUState for a given thread. - */ - -static void *qemu_tcg_cpu_thread_fn(void *arg) -{ - CPUState *cpu = arg; - - assert(tcg_enabled()); - g_assert(!use_icount); - - rcu_register_thread(); - tcg_register_thread(); - - qemu_mutex_lock_iothread(); - qemu_thread_get_self(cpu->thread); - - cpu->thread_id = qemu_get_thread_id(); - cpu->created = true; - cpu->can_do_io = 1; - current_cpu = cpu; - qemu_cond_signal(&qemu_cpu_cond); - qemu_guest_random_seed_thread_part2(cpu->random_seed); - - /* process any pending work */ - cpu->exit_request = 1; - - do { - if (cpu_can_run(cpu)) { - int r; - qemu_mutex_unlock_iothread(); - r = tcg_cpu_exec(cpu); - qemu_mutex_lock_iothread(); - switch (r) { - case EXCP_DEBUG: - cpu_handle_guest_debug(cpu); - break; - case EXCP_HALTED: - /* during start-up the vCPU is reset and the thread is - * kicked several times. If we don't ensure we go back - * to sleep in the halted state we won't cleanly - * start-up when the vCPU is enabled. - * - * cpu->halted should ensure we sleep in wait_io_event - */ - g_assert(cpu->halted); - break; - case EXCP_ATOMIC: - qemu_mutex_unlock_iothread(); - cpu_exec_step_atomic(cpu); - qemu_mutex_lock_iothread(); - default: - /* Ignore everything else? */ - break; - } - } - - atomic_mb_set(&cpu->exit_request, 0); - qemu_wait_io_event(cpu); - } while (!cpu->unplug || cpu_can_run(cpu)); - - qemu_tcg_destroy_vcpu(cpu); - cpu->created = false; - qemu_cond_signal(&qemu_cpu_cond); - qemu_mutex_unlock_iothread(); - rcu_unregister_thread(); - return NULL; -} - -static void qemu_cpu_kick_thread(CPUState *cpu) -{ -#ifndef _WIN32 - int err; - - if (cpu->thread_kicked) { - return; - } - cpu->thread_kicked = true; - err = pthread_kill(cpu->thread->thread, SIG_IPI); - if (err && err != ESRCH) { - fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); - exit(1); - } -#else /* _WIN32 */ - if (!qemu_cpu_is_self(cpu)) { - if (whpx_enabled()) { - whpx_vcpu_kick(cpu); - } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) { - fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n", - __func__, GetLastError()); - exit(1); - } - } -#endif -} - -void qemu_cpu_kick(CPUState *cpu) -{ - qemu_cond_broadcast(cpu->halt_cond); - if (tcg_enabled()) { - if (qemu_tcg_mttcg_enabled()) { - cpu_exit(cpu); - } else { - qemu_cpu_kick_rr_cpus(); - } - } else { - if (hax_enabled()) { - /* - * FIXME: race condition with the exit_request check in - * hax_vcpu_hax_exec - */ - cpu->exit_request = 1; - } - qemu_cpu_kick_thread(cpu); - } -} - -void qemu_cpu_kick_self(void) -{ - assert(current_cpu); - qemu_cpu_kick_thread(current_cpu); -} - -bool qemu_cpu_is_self(CPUState *cpu) -{ - return qemu_thread_is_self(cpu->thread); -} - -bool qemu_in_vcpu_thread(void) -{ - return current_cpu && qemu_cpu_is_self(current_cpu); -} - -static __thread bool iothread_locked = false; - -bool qemu_mutex_iothread_locked(void) -{ - return iothread_locked; -} - -/* - * The BQL is taken from so many places that it is worth profiling the - * callers directly, instead of funneling them all through a single function. - */ -void qemu_mutex_lock_iothread_impl(const char *file, int line) -{ - QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func); - - g_assert(!qemu_mutex_iothread_locked()); - bql_lock(&qemu_global_mutex, file, line); - iothread_locked = true; -} - -void qemu_mutex_unlock_iothread(void) -{ - g_assert(qemu_mutex_iothread_locked()); - iothread_locked = false; - qemu_mutex_unlock(&qemu_global_mutex); -} - -void qemu_cond_wait_iothread(QemuCond *cond) -{ - qemu_cond_wait(cond, &qemu_global_mutex); -} - -static bool all_vcpus_paused(void) -{ - CPUState *cpu; - - CPU_FOREACH(cpu) { - if (!cpu->stopped) { - return false; - } - } - - return true; -} - -void pause_all_vcpus(void) -{ - CPUState *cpu; - - qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); - CPU_FOREACH(cpu) { - if (qemu_cpu_is_self(cpu)) { - qemu_cpu_stop(cpu, true); - } else { - cpu->stop = true; - qemu_cpu_kick(cpu); - } - } - - /* We need to drop the replay_lock so any vCPU threads woken up - * can finish their replay tasks - */ - replay_mutex_unlock(); - - while (!all_vcpus_paused()) { - qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); - CPU_FOREACH(cpu) { - qemu_cpu_kick(cpu); - } - } - - qemu_mutex_unlock_iothread(); - replay_mutex_lock(); - qemu_mutex_lock_iothread(); -} - -void cpu_resume(CPUState *cpu) -{ - cpu->stop = false; - cpu->stopped = false; - qemu_cpu_kick(cpu); -} - -void resume_all_vcpus(void) -{ - CPUState *cpu; - - if (!runstate_is_running()) { - return; - } - - qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); - CPU_FOREACH(cpu) { - cpu_resume(cpu); - } -} - -void cpu_remove_sync(CPUState *cpu) -{ - cpu->stop = true; - cpu->unplug = true; - qemu_cpu_kick(cpu); - qemu_mutex_unlock_iothread(); - qemu_thread_join(cpu->thread); - qemu_mutex_lock_iothread(); -} - -/* For temporary buffers for forming a name */ -#define VCPU_THREAD_NAME_SIZE 16 - -static void qemu_tcg_init_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - static QemuCond *single_tcg_halt_cond; - static QemuThread *single_tcg_cpu_thread; - static int tcg_region_inited; - - assert(tcg_enabled()); - /* - * Initialize TCG regions--once. Now is a good time, because: - * (1) TCG's init context, prologue and target globals have been set up. - * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the - * -accel flag is processed, so the check doesn't work then). - */ - if (!tcg_region_inited) { - tcg_region_inited = 1; - tcg_region_init(); - } - - if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) { - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - - if (qemu_tcg_mttcg_enabled()) { - /* create a thread per vCPU with TCG (MTTCG) */ - parallel_cpus = true; - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG", - cpu->cpu_index); - - qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); - - } else { - /* share a single thread for all cpus with TCG */ - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG"); - qemu_thread_create(cpu->thread, thread_name, - qemu_tcg_rr_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); - - single_tcg_halt_cond = cpu->halt_cond; - single_tcg_cpu_thread = cpu->thread; - } -#ifdef _WIN32 - cpu->hThread = qemu_thread_get_handle(cpu->thread); -#endif - } else { - /* For non-MTTCG cases we share the thread */ - cpu->thread = single_tcg_cpu_thread; - cpu->halt_cond = single_tcg_halt_cond; - cpu->thread_id = first_cpu->thread_id; - cpu->can_do_io = 1; - cpu->created = true; - } -} - -static void qemu_hax_start_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); -#ifdef _WIN32 - cpu->hThread = qemu_thread_get_handle(cpu->thread); -#endif -} - -static void qemu_kvm_start_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); -} - -static void qemu_hvf_start_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - /* HVF currently does not support TCG, and only runs in - * unrestricted-guest mode. */ - assert(hvf_enabled()); - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); -} - -static void qemu_whpx_start_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn, - cpu, QEMU_THREAD_JOINABLE); -#ifdef _WIN32 - cpu->hThread = qemu_thread_get_handle(cpu->thread); -#endif -} - -static void qemu_dummy_start_vcpu(CPUState *cpu) -{ - char thread_name[VCPU_THREAD_NAME_SIZE]; - - cpu->thread = g_malloc0(sizeof(QemuThread)); - cpu->halt_cond = g_malloc0(sizeof(QemuCond)); - qemu_cond_init(cpu->halt_cond); - snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY", - cpu->cpu_index); - qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu, - QEMU_THREAD_JOINABLE); -} - -void qemu_init_vcpu(CPUState *cpu) -{ - MachineState *ms = MACHINE(qdev_get_machine()); - - cpu->nr_cores = ms->smp.cores; - cpu->nr_threads = ms->smp.threads; - cpu->stopped = true; - cpu->random_seed = qemu_guest_random_seed_thread_part1(); - - if (!cpu->as) { - /* If the target cpu hasn't set up any address spaces itself, - * give it the default one. - */ - cpu->num_ases = 1; - cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory); - } - - if (kvm_enabled()) { - qemu_kvm_start_vcpu(cpu); - } else if (hax_enabled()) { - qemu_hax_start_vcpu(cpu); - } else if (hvf_enabled()) { - qemu_hvf_start_vcpu(cpu); - } else if (tcg_enabled()) { - qemu_tcg_init_vcpu(cpu); - } else if (whpx_enabled()) { - qemu_whpx_start_vcpu(cpu); - } else { - qemu_dummy_start_vcpu(cpu); - } - - while (!cpu->created) { - qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); - } -} - -void cpu_stop_current(void) -{ - if (current_cpu) { - current_cpu->stop = true; - cpu_exit(current_cpu); - } -} - -int vm_stop(RunState state) -{ - if (qemu_in_vcpu_thread()) { - qemu_system_vmstop_request_prepare(); - qemu_system_vmstop_request(state); - /* - * FIXME: should not return to device code in case - * vm_stop() has been requested. - */ - cpu_stop_current(); - return 0; - } - - return do_vm_stop(state, true); -} - -/** - * Prepare for (re)starting the VM. - * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already - * running or in case of an error condition), 0 otherwise. - */ -int vm_prepare_start(void) -{ - RunState requested; - - qemu_vmstop_requested(&requested); - if (runstate_is_running() && requested == RUN_STATE__MAX) { - return -1; - } - - /* Ensure that a STOP/RESUME pair of events is emitted if a - * vmstop request was pending. The BLOCK_IO_ERROR event, for - * example, according to documentation is always followed by - * the STOP event. - */ - if (runstate_is_running()) { - qapi_event_send_stop(); - qapi_event_send_resume(); - return -1; - } - - /* We are sending this now, but the CPUs will be resumed shortly later */ - qapi_event_send_resume(); - - cpu_enable_ticks(); - runstate_set(RUN_STATE_RUNNING); - vm_state_notify(1, RUN_STATE_RUNNING); - return 0; -} - -void vm_start(void) -{ - if (!vm_prepare_start()) { - resume_all_vcpus(); - } -} - -/* does a state transition even if the VM is already stopped, - current state is forgotten forever */ -int vm_stop_force_state(RunState state) -{ - if (runstate_is_running()) { - return vm_stop(state); - } else { - runstate_set(state); - - bdrv_drain_all(); - /* Make sure to return an error if the flush in a previous vm_stop() - * failed. */ - return bdrv_flush_all(); - } -} - -void list_cpus(const char *optarg) -{ - /* XXX: implement xxx_cpu_list for targets that still miss it */ -#if defined(cpu_list) - cpu_list(); -#endif -} - -void qmp_memsave(int64_t addr, int64_t size, const char *filename, - bool has_cpu, int64_t cpu_index, Error **errp) -{ - FILE *f; - uint32_t l; - CPUState *cpu; - uint8_t buf[1024]; - int64_t orig_addr = addr, orig_size = size; - - if (!has_cpu) { - cpu_index = 0; - } - - cpu = qemu_get_cpu(cpu_index); - if (cpu == NULL) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index", - "a CPU number"); - return; - } - - f = fopen(filename, "wb"); - if (!f) { - error_setg_file_open(errp, errno, filename); - return; - } - - while (size != 0) { - l = sizeof(buf); - if (l > size) - l = size; - if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) { - error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64 - " specified", orig_addr, orig_size); - goto exit; - } - if (fwrite(buf, 1, l, f) != l) { - error_setg(errp, QERR_IO_ERROR); - goto exit; - } - addr += l; - size -= l; - } - -exit: - fclose(f); -} - -void qmp_pmemsave(int64_t addr, int64_t size, const char *filename, - Error **errp) -{ - FILE *f; - uint32_t l; - uint8_t buf[1024]; - - f = fopen(filename, "wb"); - if (!f) { - error_setg_file_open(errp, errno, filename); - return; - } - - while (size != 0) { - l = sizeof(buf); - if (l > size) - l = size; - cpu_physical_memory_read(addr, buf, l); - if (fwrite(buf, 1, l, f) != l) { - error_setg(errp, QERR_IO_ERROR); - goto exit; - } - addr += l; - size -= l; - } - -exit: - fclose(f); -} - -void qmp_inject_nmi(Error **errp) -{ - nmi_monitor_handle(monitor_get_cpu_index(), errp); -} - -void dump_drift_info(void) -{ - if (!use_icount) { - return; - }

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.