x86/HVM: fix direct PCI port I/O emulation retry and error handling dpci_ioport_{read,write}() guest memory access failure handling should be modelled after process_portio_intercept()'s (and others): Upon encountering an error on other than the first iteration, the count successfully handled needs to be stored and X86EMUL_OKAY returned, in order for the generic instruction emulator to update register state correctly before reporting failure or retrying (both of which would only happen after re-invoking emulation). Further we leverage (and slightly extend, due to the above mentioned need to return X86EMUL_OKAY) the "large MMIO" retry model. Note that there is still a special case not explicitly taken care of here: While the first retry on the last iteration of a "rep ins" correctly recovers the already read data, an eventual subsequent retry is being handled by the pre-existing mmio-large logic (through hvmemul_do_io() storing the [recovered] data [again], also taking into consideration that the emulator converts a single iteration "ins" to ->read_io() plus ->write()). Also fix an off-by-one in the mmio-large-read logic, and slightly simplify the copying of the data. Signed-off-by: Jan Beulich --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -173,6 +173,13 @@ static int hvmemul_do_io( (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion; vio->io_size = size; + /* + * When retrying a repeated string instruction, force exit to guest after + * completion of the retried iteration to allow handling of interrupts. + */ + if ( vio->mmio_retrying ) + *reps = 1; + p->dir = dir; p->data_is_ptr = value_is_ptr; p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO; @@ -202,8 +209,14 @@ static int hvmemul_do_io( case X86EMUL_RETRY: *reps = p->count; p->state = STATE_IORESP_READY; - hvm_io_assist(); - vio->io_state = HVMIO_none; + if ( !vio->mmio_retry ) + { + hvm_io_assist(); + vio->io_state = HVMIO_none; + } + else + /* Defer hvm_io_assist() invocation to hvm_do_resume(). */ + vio->io_state = HVMIO_handle_mmio_awaiting_completion; break; case X86EMUL_UNHANDLEABLE: rc = X86EMUL_RETRY; @@ -249,10 +262,9 @@ static int hvmemul_do_io( if ( bytes == 0 ) pa = vio->mmio_large_read_pa = addr; if ( (addr == (pa + bytes)) && - ((bytes + size) < - sizeof(vio->mmio_large_read)) ) + ((bytes + size) <= sizeof(vio->mmio_large_read)) ) { - memcpy(&vio->mmio_large_read[addr - pa], p_data, size); + memcpy(&vio->mmio_large_read[bytes], p_data, size); vio->mmio_large_read_bytes += size; } } @@ -1151,9 +1163,13 @@ int hvm_emulate_one( ? sizeof(hvmemul_ctxt->insn_buf) : 0; hvmemul_ctxt->exn_pending = 0; + vio->mmio_retrying = vio->mmio_retry; + vio->mmio_retry = 0; rc = x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops); + if ( rc == X86EMUL_OKAY && vio->mmio_retry ) + rc = X86EMUL_RETRY; if ( rc != X86EMUL_RETRY ) vio->mmio_large_read_bytes = vio->mmio_large_write_bytes = 0; --- a/xen/arch/x86/hvm/io.c +++ b/xen/arch/x86/hvm/io.c @@ -294,12 +294,21 @@ void hvm_io_assist(void) static int dpci_ioport_read(uint32_t mport, ioreq_t *p) { - int i, step = p->df ? -p->size : p->size; + struct hvm_vcpu_io *vio = ¤t->arch.hvm_vcpu.hvm_io; + int rc = X86EMUL_OKAY, i, step = p->df ? -p->size : p->size; uint32_t data = 0; for ( i = 0; i < p->count; i++ ) { - switch ( p->size ) + if ( vio->mmio_retrying ) + { + if ( vio->mmio_large_read_bytes != p->size ) + return X86EMUL_UNHANDLEABLE; + memcpy(&data, vio->mmio_large_read, p->size); + vio->mmio_large_read_bytes = 0; + vio->mmio_retrying = 0; + } + else switch ( p->size ) { case 1: data = inb(mport); @@ -316,22 +325,51 @@ static int dpci_ioport_read(uint32_t mpo if ( p->data_is_ptr ) { - int ret; - ret = hvm_copy_to_guest_phys(p->data + step * i, &data, p->size); - if ( (ret == HVMCOPY_gfn_paged_out) || - (ret == HVMCOPY_gfn_shared) ) - return X86EMUL_RETRY; + switch ( hvm_copy_to_guest_phys(p->data + step * i, + &data, p->size) ) + { + case HVMCOPY_okay: + break; + case HVMCOPY_gfn_paged_out: + case HVMCOPY_gfn_shared: + rc = X86EMUL_RETRY; + break; + case HVMCOPY_bad_gfn_to_mfn: + /* Drop the write as real hardware would. */ + continue; + case HVMCOPY_bad_gva_to_gfn: + ASSERT(0); + /* fall through */ + default: + rc = X86EMUL_UNHANDLEABLE; + break; + } + if ( rc != X86EMUL_OKAY) + break; } else p->data = data; } - return X86EMUL_OKAY; + if ( rc == X86EMUL_RETRY ) + { + vio->mmio_retry = 1; + vio->mmio_large_read_bytes = p->size; + memcpy(vio->mmio_large_read, &data, p->size); + } + + if ( i != 0 ) + { + p->count = i; + rc = X86EMUL_OKAY; + } + + return rc; } static int dpci_ioport_write(uint32_t mport, ioreq_t *p) { - int i, step = p->df ? -p->size : p->size; + int rc = X86EMUL_OKAY, i, step = p->df ? -p->size : p->size; uint32_t data; for ( i = 0; i < p->count; i++ ) @@ -346,7 +384,8 @@ static int dpci_ioport_write(uint32_t mp break; case HVMCOPY_gfn_paged_out: case HVMCOPY_gfn_shared: - return X86EMUL_RETRY; + rc = X86EMUL_RETRY; + break; case HVMCOPY_bad_gfn_to_mfn: data = ~0; break; @@ -354,8 +393,11 @@ static int dpci_ioport_write(uint32_t mp ASSERT(0); /* fall through */ default: - return X86EMUL_UNHANDLEABLE; + rc = X86EMUL_UNHANDLEABLE; + break; } + if ( rc != X86EMUL_OKAY) + break; } switch ( p->size ) @@ -374,7 +416,16 @@ static int dpci_ioport_write(uint32_t mp } } - return X86EMUL_OKAY; + if ( rc == X86EMUL_RETRY ) + current->arch.hvm_vcpu.hvm_io.mmio_retry = 1; + + if ( i != 0 ) + { + p->count = i; + rc = X86EMUL_OKAY; + } + + return rc; } int dpci_ioport_intercept(ioreq_t *p) --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -66,6 +66,11 @@ struct hvm_vcpu_io { /* We may write up to m256 as a number of device-model transactions. */ unsigned int mmio_large_write_bytes; paddr_t mmio_large_write_pa; + /* + * For string instruction emulation we need to be able to signal a + * necessary retry through other than function return codes. + */ + bool_t mmio_retry, mmio_retrying; }; #define VMCX_EADDR (~0ULL)