[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Alex Williamson <alex.williamson@xxxxxx> # Date 1187282853 21600 # Node ID 778985f246a01b054378cb551069b6455fc1159a # Parent b5dbf184df6c643ab524945c3de207c59eaf08c5 # Parent 256160ff19b74057f5819af20d68e6e3388c80e3 merge with xen-unstable.hg --- tools/libfsimage/fat/fsys_fat.c | 6 tools/libxc/xc_domain_restore.c | 37 tools/libxc/xc_evtchn.c | 7 tools/libxc/xc_linux.c | 30 tools/libxc/xc_solaris.c | 30 tools/libxc/xenctrl.h | 134 +- tools/python/xen/util/acmpolicy.py | 21 tools/python/xen/util/security.py | 64 + tools/python/xen/xend/XendCheckpoint.py | 3 tools/python/xen/xend/XendDomainInfo.py | 110 ++ tools/python/xen/xend/server/DevController.py | 71 + tools/python/xen/xend/server/blkif.py | 18 tools/python/xen/xm/main.py | 5 tools/xm-test/configure.ac | 7 tools/xm-test/lib/XmTestLib/NetConfig.py | 14 tools/xm-test/lib/XmTestLib/XenDevice.py | 2 tools/xm-test/lib/XmTestLib/config.py.in | 1 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c | 3 xen/acm/acm_policy.c | 12 xen/arch/x86/acpi/boot.c | 15 xen/arch/x86/hvm/hvm.c | 185 +++- xen/arch/x86/hvm/svm/svm.c | 538 ++--------- xen/arch/x86/hvm/svm/vmcb.c | 20 xen/arch/x86/hvm/vioapic.c | 4 xen/arch/x86/hvm/vmx/vmcs.c | 82 + xen/arch/x86/hvm/vmx/vmx.c | 588 +++---------- xen/arch/x86/hvm/vmx/x86_32/exits.S | 2 xen/arch/x86/hvm/vmx/x86_64/exits.S | 2 xen/arch/x86/mm.c | 4 xen/arch/x86/mm/hap/guest_walk.c | 2 xen/arch/x86/mm/hap/hap.c | 43 xen/arch/x86/mm/shadow/common.c | 4 xen/arch/x86/mm/shadow/multi.c | 27 xen/arch/x86/physdev.c | 19 xen/arch/x86/setup.c | 25 xen/arch/x86/string.c | 22 xen/arch/x86/traps.c | 28 xen/arch/x86/x86_32/asm-offsets.c | 2 xen/arch/x86/x86_32/traps.c | 7 xen/arch/x86/x86_64/asm-offsets.c | 2 xen/arch/x86/x86_64/compat/traps.c | 21 xen/arch/x86/x86_64/traps.c | 4 xen/common/domctl.c | 8 xen/common/page_alloc.c | 9 xen/common/xencomm.c | 203 ++-- xen/drivers/char/console.c | 14 xen/drivers/video/Makefile | 12 xen/drivers/video/vesa.c | 307 ++++++ xen/drivers/video/vga.c | 112 +- xen/include/asm-x86/hvm/hvm.h | 76 - xen/include/asm-x86/hvm/support.h | 4 xen/include/asm-x86/hvm/svm/asid.h | 14 xen/include/asm-x86/hvm/svm/vmcb.h | 5 xen/include/asm-x86/hvm/vcpu.h | 13 xen/include/asm-x86/hvm/vmx/vmcs.h | 6 xen/include/asm-x86/hvm/vmx/vmx.h | 4 xen/include/public/arch-x86/xen-x86_32.h | 36 xen/include/xen/vga.h | 8 xen/include/xen/xencomm.h | 15 59 files changed, 1607 insertions(+), 1460 deletions(-) diff -r b5dbf184df6c -r 778985f246a0 tools/libfsimage/fat/fsys_fat.c --- a/tools/libfsimage/fat/fsys_fat.c Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/libfsimage/fat/fsys_fat.c Thu Aug 16 10:47:33 2007 -0600 @@ -228,15 +228,15 @@ fat_read (fsi_file_t *ffi, char *buf, in if (!devread (ffi, sector, 0, FAT_CACHE_SIZE, (char*) FAT_BUF)) return 0; } - next_cluster = * (unsigned long *) (FAT_BUF + (cached_pos >> 1)); + next_cluster = ((__u16 *) (FAT_BUF + (cached_pos >> 1)))[0]; if (FAT_SUPER->fat_size == 3) { if (cached_pos & 1) next_cluster >>= 4; next_cluster &= 0xFFF; } - else if (FAT_SUPER->fat_size == 4) - next_cluster &= 0xFFFF; + else if (FAT_SUPER->fat_size > 4) + next_cluster |= ((__u16 *) (FAT_BUF + (cached_pos >> 1)))[1] << 16; if (next_cluster >= FAT_SUPER->clust_eof_marker) return ret; diff -r b5dbf184df6c -r 778985f246a0 tools/libxc/xc_domain_restore.c --- a/tools/libxc/xc_domain_restore.c Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/libxc/xc_domain_restore.c Thu Aug 16 10:47:33 2007 -0600 @@ -272,7 +272,8 @@ int xc_domain_restore(int xc_handle, int /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ - shared_info_t *shared_info = (shared_info_t *)shared_info_page; + shared_info_t *old_shared_info = (shared_info_t *)shared_info_page; + shared_info_t *new_shared_info; /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; @@ -285,9 +286,6 @@ int xc_domain_restore(int xc_handle, int /* Types of the pfns in the current region */ unsigned long region_pfn_type[MAX_BATCH_SIZE]; - - /* A temporary mapping, and a copy, of one frame of guest memory. */ - unsigned long *page = NULL; /* A copy of the pfn-to-mfn table frame list. */ xen_pfn_t *p2m_frame_list = NULL; @@ -1084,17 +1082,30 @@ int xc_domain_restore(int xc_handle, int goto out; } + /* Restore contents of shared-info page. No checking needed. */ + new_shared_info = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); + + /* restore saved vcpu_info and arch specific info */ + memcpy(&new_shared_info->vcpu_info, + &old_shared_info->vcpu_info, + sizeof(new_shared_info->vcpu_info)); + memcpy(&new_shared_info->arch, + &old_shared_info->arch, + sizeof(new_shared_info->arch)); + /* clear any pending events and the selector */ - memset(&(shared_info->evtchn_pending[0]), 0, - sizeof (shared_info->evtchn_pending)); + memset(&(new_shared_info->evtchn_pending[0]), 0, + sizeof (new_shared_info->evtchn_pending)); for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - shared_info->vcpu_info[i].evtchn_pending_sel = 0; - - /* Copy saved contents of shared-info page. No checking needed. */ - page = xc_map_foreign_range( - xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); - memcpy(page, shared_info, PAGE_SIZE); - munmap(page, PAGE_SIZE); + new_shared_info->vcpu_info[i].evtchn_pending_sel = 0; + + /* mask event channels */ + memset(&(new_shared_info->evtchn_mask[0]), 0xff, + sizeof (new_shared_info->evtchn_mask)); + + /* leave wallclock time. set by hypervisor */ + munmap(new_shared_info, PAGE_SIZE); /* Uncanonicalise the pfn-to-mfn table frame-number list. */ for ( i = 0; i < P2M_FL_ENTRIES; i++ ) diff -r b5dbf184df6c -r 778985f246a0 tools/libxc/xc_evtchn.c --- a/tools/libxc/xc_evtchn.c Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/libxc/xc_evtchn.c Thu Aug 16 10:47:33 2007 -0600 @@ -33,9 +33,10 @@ static int do_evtchn_op(int xc_handle, i } -int xc_evtchn_alloc_unbound(int xc_handle, - uint32_t dom, - uint32_t remote_dom) +evtchn_port_or_error_t +xc_evtchn_alloc_unbound(int xc_handle, + uint32_t dom, + uint32_t remote_dom) { int rc; struct evtchn_alloc_unbound arg = { diff -r b5dbf184df6c -r 778985f246a0 tools/libxc/xc_linux.c --- a/tools/libxc/xc_linux.c Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/libxc/xc_linux.c Thu Aug 16 10:47:33 2007 -0600 @@ -254,7 +254,8 @@ int xc_evtchn_notify(int xce_handle, evt return ioctl(xce_handle, IOCTL_EVTCHN_NOTIFY, ¬ify); } -evtchn_port_t xc_evtchn_bind_unbound_port(int xce_handle, int domid) +evtchn_port_or_error_t +xc_evtchn_bind_unbound_port(int xce_handle, int domid) { struct ioctl_evtchn_bind_unbound_port bind; @@ -263,8 +264,9 @@ evtchn_port_t xc_evtchn_bind_unbound_por return ioctl(xce_handle, IOCTL_EVTCHN_BIND_UNBOUND_PORT, &bind); } -evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid, - evtchn_port_t remote_port) +evtchn_port_or_error_t +xc_evtchn_bind_interdomain(int xce_handle, int domid, + evtchn_port_t remote_port) { struct ioctl_evtchn_bind_interdomain bind; @@ -274,6 +276,16 @@ evtchn_port_t xc_evtchn_bind_interdomain return ioctl(xce_handle, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); } +evtchn_port_or_error_t +xc_evtchn_bind_virq(int xce_handle, unsigned int virq) +{ + struct ioctl_evtchn_bind_virq bind; + + bind.virq = virq; + + return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind); +} + int xc_evtchn_unbind(int xce_handle, evtchn_port_t port) { struct ioctl_evtchn_unbind unbind; @@ -281,15 +293,6 @@ int xc_evtchn_unbind(int xce_handle, evt unbind.port = port; return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind); -} - -evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq) -{ - struct ioctl_evtchn_bind_virq bind; - - bind.virq = virq; - - return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind); } static int dorw(int fd, char *data, size_t size, int do_write) @@ -317,7 +320,8 @@ static int dorw(int fd, char *data, size return 0; } -evtchn_port_t xc_evtchn_pending(int xce_handle) +evtchn_port_or_error_t +xc_evtchn_pending(int xce_handle) { evtchn_port_t port; diff -r b5dbf184df6c -r 778985f246a0 tools/libxc/xc_solaris.c --- a/tools/libxc/xc_solaris.c Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/libxc/xc_solaris.c Thu Aug 16 10:47:33 2007 -0600 @@ -165,7 +165,8 @@ int xc_evtchn_notify(int xce_handle, evt return ioctl(xce_handle, IOCTL_EVTCHN_NOTIFY, ¬ify); } -evtchn_port_t xc_evtchn_bind_unbound_port(int xce_handle, int domid) +evtchn_port_or_error_t +xc_evtchn_bind_unbound_port(int xce_handle, int domid) { struct ioctl_evtchn_bind_unbound_port bind; @@ -174,8 +175,9 @@ evtchn_port_t xc_evtchn_bind_unbound_por return ioctl(xce_handle, IOCTL_EVTCHN_BIND_UNBOUND_PORT, &bind); } -evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid, - evtchn_port_t remote_port) +evtchn_port_or_error_t +xc_evtchn_bind_interdomain(int xce_handle, int domid, + evtchn_port_t remote_port) { struct ioctl_evtchn_bind_interdomain bind; @@ -185,6 +187,16 @@ evtchn_port_t xc_evtchn_bind_interdomain return ioctl(xce_handle, IOCTL_EVTCHN_BIND_INTERDOMAIN, &bind); } +evtchn_port_or_error_t +xc_evtchn_bind_virq(int xce_handle, unsigned int virq) +{ + struct ioctl_evtchn_bind_virq bind; + + bind.virq = virq; + + return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind); +} + int xc_evtchn_unbind(int xce_handle, evtchn_port_t port) { struct ioctl_evtchn_unbind unbind; @@ -192,15 +204,6 @@ int xc_evtchn_unbind(int xce_handle, evt unbind.port = port; return ioctl(xce_handle, IOCTL_EVTCHN_UNBIND, &unbind); -} - -evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq) -{ - struct ioctl_evtchn_bind_virq bind; - - bind.virq = virq; - - return ioctl(xce_handle, IOCTL_EVTCHN_BIND_VIRQ, &bind); } static int dorw(int fd, char *data, size_t size, int do_write) @@ -228,7 +231,8 @@ static int dorw(int fd, char *data, size return 0; } -evtchn_port_t xc_evtchn_pending(int xce_handle) +evtchn_port_or_error_t +xc_evtchn_pending(int xce_handle) { evtchn_port_t port; diff -r b5dbf184df6c -r 778985f246a0 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/libxc/xenctrl.h Thu Aug 16 10:47:33 2007 -0600 @@ -449,6 +449,9 @@ int xc_domain_setdebugging(int xc_handle * EVENT CHANNEL FUNCTIONS */ +/* A port identifier is guaranteed to fit in 31 bits. */ +typedef int evtchn_port_or_error_t; + /** * This function allocates an unbound port. Ports are named endpoints used for * interdomain communication. This function is most useful in opening a @@ -463,12 +466,77 @@ int xc_domain_setdebugging(int xc_handle * @parm remote_dom the ID of the domain who will later bind * @return allocated port (in @dom) on success, -1 on failure */ -int xc_evtchn_alloc_unbound(int xc_handle, - uint32_t dom, - uint32_t remote_dom); +evtchn_port_or_error_t +xc_evtchn_alloc_unbound(int xc_handle, + uint32_t dom, + uint32_t remote_dom); int xc_evtchn_reset(int xc_handle, uint32_t dom); + +/* + * Return a handle to the event channel driver, or -1 on failure, in which case + * errno will be set appropriately. + */ +int xc_evtchn_open(void); + +/* + * Close a handle previously allocated with xc_evtchn_open(). + */ +int xc_evtchn_close(int xce_handle); + +/* + * Return an fd that can be select()ed on for further calls to + * xc_evtchn_pending(). + */ +int xc_evtchn_fd(int xce_handle); + +/* + * Notify the given event channel. Returns -1 on failure, in which case + * errno will be set appropriately. + */ +int xc_evtchn_notify(int xce_handle, evtchn_port_t port); + +/* + * Returns a new event port awaiting interdomain connection from the given + * domain ID, or -1 on failure, in which case errno will be set appropriately. + */ +evtchn_port_or_error_t +xc_evtchn_bind_unbound_port(int xce_handle, int domid); + +/* + * Returns a new event port bound to the remote port for the given domain ID, + * or -1 on failure, in which case errno will be set appropriately. + */ +evtchn_port_or_error_t +xc_evtchn_bind_interdomain(int xce_handle, int domid, + evtchn_port_t remote_port); + +/* + * Bind an event channel to the given VIRQ. Returns the event channel bound to + * the VIRQ, or -1 on failure, in which case errno will be set appropriately. + */ +evtchn_port_or_error_t +xc_evtchn_bind_virq(int xce_handle, unsigned int virq); + +/* + * Unbind the given event channel. Returns -1 on failure, in which case errno + * will be set appropriately. + */ +int xc_evtchn_unbind(int xce_handle, evtchn_port_t port); + +/* + * Return the next event channel to become pending, or -1 on failure, in which + * case errno will be set appropriately. + */ +evtchn_port_or_error_t +xc_evtchn_pending(int xce_handle); + +/* + * Unmask the given event channel. Returns -1 on failure, in which case errno + * will be set appropriately. + */ +int xc_evtchn_unmask(int xce_handle, evtchn_port_t port); int xc_physdev_pci_access_modify(int xc_handle, uint32_t domid, @@ -699,66 +767,6 @@ int xc_version(int xc_handle, int cmd, v int xc_acm_op(int xc_handle, int cmd, void *arg, unsigned long arg_size); -/* - * Return a handle to the event channel driver, or -1 on failure, in which case - * errno will be set appropriately. - */ -int xc_evtchn_open(void); - -/* - * Close a handle previously allocated with xc_evtchn_open(). - */ -int xc_evtchn_close(int xce_handle); - -/* - * Return an fd that can be select()ed on for further calls to - * xc_evtchn_pending(). - */ -int xc_evtchn_fd(int xce_handle); - -/* - * Notify the given event channel. Returns -1 on failure, in which case - * errno will be set appropriately. - */ -int xc_evtchn_notify(int xce_handle, evtchn_port_t port); - -/* - * Returns a new event port awaiting interdomain connection from the given - * domain ID, or -1 on failure, in which case errno will be set appropriately. - */ -evtchn_port_t xc_evtchn_bind_unbound_port(int xce_handle, int domid); - -/* - * Returns a new event port bound to the remote port for the given domain ID, - * or -1 on failure, in which case errno will be set appropriately. - */ -evtchn_port_t xc_evtchn_bind_interdomain(int xce_handle, int domid, - evtchn_port_t remote_port); - -/* - * Unbind the given event channel. Returns -1 on failure, in which case errno - * will be set appropriately. - */ -int xc_evtchn_unbind(int xce_handle, evtchn_port_t port); - -/* - * Bind an event channel to the given VIRQ. Returns the event channel bound to - * the VIRQ, or -1 on failure, in which case errno will be set appropriately. - */ -evtchn_port_t xc_evtchn_bind_virq(int xce_handle, unsigned int virq); - -/* - * Return the next event channel to become pending, or -1 on failure, in which - * case errno will be set appropriately. - */ -evtchn_port_t xc_evtchn_pending(int xce_handle); - -/* - * Unmask the given event channel. Returns -1 on failure, in which case errno - * will be set appropriately. - */ -int xc_evtchn_unmask(int xce_handle, evtchn_port_t port); - /************************** * GRANT TABLE OPERATIONS * **************************/ diff -r b5dbf184df6c -r 778985f246a0 tools/python/xen/util/acmpolicy.py --- a/tools/python/xen/util/acmpolicy.py Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/python/xen/util/acmpolicy.py Thu Aug 16 10:47:33 2007 -0600 @@ -191,20 +191,21 @@ class ACMPolicy(XSPolicy): acmpol_old.policy_get_virtualmachinelabel_names_sorted() del_array = "" chg_array = "" + for o in oldvmnames: if o not in newvmnames: - old_idx = oldvmnames.index(o) + 1 # for _NULL_LABEL_ + old_idx = oldvmnames.index(o) if vmlabel_map.has_key(o): #not a deletion, but a renaming new = vmlabel_map[o] - new_idx = newvmnames.index(new) + 1 # for _NULL_LABEL_ + new_idx = newvmnames.index(new) chg_array += struct.pack("ii", old_idx, new_idx) else: del_array += struct.pack("i", old_idx) for v in newvmnames: if v in oldvmnames: - old_idx = oldvmnames.index(v) + 1 # for _NULL_LABEL_ - new_idx = newvmnames.index(v) + 1 # for _NULL_LABEL_ + old_idx = oldvmnames.index(v) + new_idx = newvmnames.index(v) if old_idx != new_idx: chg_array += struct.pack("ii", old_idx, new_idx) @@ -348,7 +349,7 @@ class ACMPolicy(XSPolicy): ssidref = xsconstants.INVALID_SSIDREF names = self.policy_get_virtualmachinelabel_names_sorted() try: - vmidx = names.index(vm_label) + 1 # for _NULL_LABEL_ + vmidx = names.index(vm_label) ssidref = (vmidx << 16) | vmidx except: pass @@ -618,6 +619,9 @@ class ACMPolicy(XSPolicy): vmnames.remove(bootstrap) vmnames.sort() vmnames.insert(0, bootstrap) + if ACM_LABEL_UNLABELED in vmnames: + vmnames.remove(ACM_LABEL_UNLABELED) + vmnames.insert(0, ACM_LABEL_UNLABELED) return vmnames def policy_get_virtualmachinelabel_names_sorted(self): @@ -625,7 +629,10 @@ class ACMPolicy(XSPolicy): label will be the first one in that list, followed by an alphabetically sorted list of VM label names """ vmnames = self.policy_get_virtualmachinelabel_names() - return self.policy_sort_virtualmachinelabel_names(vmnames) + res = self.policy_sort_virtualmachinelabel_names(vmnames) + if res[0] != ACM_LABEL_UNLABELED: + res.insert(0, ACM_LABEL_UNLABELED) + return res def policy_get_virtualmachinelabels(self): """ Get a list of all virtual machine labels in this policy """ @@ -906,7 +913,7 @@ class ACMPolicy(XSPolicy): allvmtypes = self.policy_get_virtualmachinelabel_names_sorted() except: return None - return allvmtypes[chwall_ref-1] # skip _NULL_LABEL_ + return allvmtypes[chwall_ref] def policy_get_domain_label_formatted(self, domid): label = self.policy_get_domain_label(domid) diff -r b5dbf184df6c -r 778985f246a0 tools/python/xen/util/security.py --- a/tools/python/xen/util/security.py Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/python/xen/util/security.py Thu Aug 16 10:47:33 2007 -0600 @@ -838,13 +838,28 @@ def set_resource_label_xapi(resource, re def is_resource_in_use(resource): - """ Investigate all running domains whether they use this device """ + """ + Domain-0 'owns' resources of type 'VLAN', the rest are owned by + the guests. + """ from xen.xend import XendDomain - dominfos = XendDomain.instance().list('all') lst = [] - for dominfo in dominfos: - if is_resource_in_use_by_dom(dominfo, resource): - lst.append(dominfo) + if resource.startswith('vlan'): + from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance + curpol = XSPolicyAdminInstance().get_loaded_policy() + policytype, label, policy = get_res_label(resource) + if curpol and \ + policytype == xsconstants.ACM_POLICY_ID and \ + policy == curpol.get_name() and \ + label in curpol.policy_get_resourcelabel_names(): + # VLAN is in use. + lst.append(XendDomain.instance(). + get_vm_by_uuid(XendDomain.DOM0_UUID)) + else: + dominfos = XendDomain.instance().list('all') + for dominfo in dominfos: + if is_resource_in_use_by_dom(dominfo, resource): + lst.append(dominfo) return lst def devices_equal(res1, res2, mustexist=True): @@ -892,6 +907,10 @@ def get_domain_resources(dominfo): if sec_lab: resources[typ].append(sec_lab) else: + # !!! This should really get the label of the domain + # or at least a resource label that has the same STE type + # as the domain has + from xen.util.acmpolicy import ACM_LABEL_UNLABELED resources[typ].append("%s:%s:%s" % (xsconstants.ACM_POLICY_ID, active_policy, @@ -924,7 +943,8 @@ def resources_compatible_with_vmlabel(xs def __resources_compatible_with_vmlabel(xspol, dominfo, vmlabel, - access_control): + access_control, + is_policy_update=False): """ Check whether the resources' labels are compatible with the given VM label. The access_control parameter provides a @@ -955,15 +975,23 @@ def __resources_compatible_with_vmlabel( elif key in [ 'vif' ]: for xapi_label in value: label = xapi_label.split(":") - if not collect_labels(reslabels, label, polname): - return False + from xen.util.acmpolicy import ACM_LABEL_UNLABELED + if not (is_policy_update and \ + label[2] == ACM_LABEL_UNLABELED): + if not collect_labels(reslabels, label, polname): + return False else: log.error("Unhandled device type: %s" % key) return False # Check that all resource labes have a common STE type with the # vmlabel - rc = xspol.policy_check_vmlabel_against_reslabels(vmlabel, reslabels) + if len(reslabels) > 0: + rc = xspol.policy_check_vmlabel_against_reslabels(vmlabel, reslabels) + else: + rc = True + log.info("vmlabel=%s, reslabels=%s, rc=%s" % + (vmlabel, reslabels, str(rc))) return rc; def set_resource_label(resource, policytype, policyref, reslabel, \ @@ -1234,11 +1262,12 @@ def change_acm_policy(bin_pol, del_array compatible = __resources_compatible_with_vmlabel(new_acmpol, dominfo, new_vmlabel, - access_control) + access_control, + is_policy_update=True) log.info("Domain %s with new label '%s' can access its " "resources? : %s" % (name, new_vmlabel, str(compatible))) - log.info("VM labels in new domain: %s" % + log.info("VM labels in new policy: %s" % new_acmpol.policy_get_virtualmachinelabel_names()) if not compatible: return (-xsconstants.XSERR_RESOURCE_ACCESS, "") @@ -1252,11 +1281,16 @@ def change_acm_policy(bin_pol, del_array sec_lab, new_seclab = labels if sec_lab != new_seclab: log.info("Updating domain %s to new label '%s'." % \ - (sec_lab, new_seclab)) + (dominfo.getName(), new_seclab)) # This better be working! - dominfo.set_security_label(new_seclab, - sec_lab, - new_acmpol) + res = dominfo.set_security_label(new_seclab, + sec_lab, + new_acmpol, + cur_acmpol) + if res[0] != xsconstants.XSERR_SUCCESS: + log.info("ERROR: Could not chg label on domain %s: %s" % + (dominfo.getName(), + xsconstants.xserr2string(-int(res[0])))) finally: log.info("----------------------------------------------") mapfile_unlock() diff -r b5dbf184df6c -r 778985f246a0 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/python/xen/xend/XendCheckpoint.py Thu Aug 16 10:47:33 2007 -0600 @@ -98,6 +98,9 @@ def save(fd, dominfo, network, live, dst log.info("Domain %d suspended.", dominfo.getDomid()) dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3, domain_name) + if hvm: + dominfo.image.saveDeviceModel() + tochild.write("done\n") tochild.flush() log.debug('Written done') diff -r b5dbf184df6c -r 778985f246a0 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Aug 16 10:47:33 2007 -0600 @@ -558,9 +558,64 @@ class XendDomainInfo: for devclass in XendDevices.valid_devices(): self.getDeviceController(devclass).waitForDevices() - def destroyDevice(self, deviceClass, devid, force = False): - log.debug("dev = %s", devid) - return self.getDeviceController(deviceClass).destroyDevice(devid, force) + def destroyDevice(self, deviceClass, devid, force = False, rm_cfg = False): + log.debug("XendDomainInfo.destroyDevice: deviceClass = %s, device = %s", + deviceClass, devid) + + if rm_cfg: + # Convert devid to device number. A device number is + # needed to remove its configuration. + dev = self.getDeviceController(deviceClass).convertToDeviceNumber(devid) + + # Save current sxprs. A device number and a backend + # path are needed to remove its configuration but sxprs + # do not have those after calling destroyDevice. + sxprs = self.getDeviceSxprs(deviceClass) + + rc = None + if self.domid is not None: + rc = self.getDeviceController(deviceClass).destroyDevice(devid, force) + if not force and rm_cfg: + # The backend path, other than the device itself, + # has to be passed because its accompanied frontend + # path may be void until its removal is actually + # issued. It is probable because destroyDevice is + # issued first. + for dev_num, dev_info in sxprs: + dev_num = int(dev_num) + if dev_num == dev: + for x in dev_info: + if x[0] == 'backend': + backend = x[1] + break + break + self._waitForDevice_destroy(deviceClass, devid, backend) + + if rm_cfg: + if deviceClass == 'vif': + if self.domid is not None: + for dev_num, dev_info in sxprs: + dev_num = int(dev_num) + if dev_num == dev: + for x in dev_info: + if x[0] == 'mac': + mac = x[1] + break + break + dev_info = self.getDeviceInfo_vif(mac) + else: + _, dev_info = sxprs[dev] + else: # 'vbd' or 'tap' + dev_info = self.getDeviceInfo_vbd(dev) + if dev_info is None: + return rc + + dev_uuid = sxp.child_value(dev_info, 'uuid') + del self.info['devices'][dev_uuid] + self.info['%s_refs' % deviceClass].remove(dev_uuid) + xen.xend.XendDomain.instance().managed_config_save(self) + + return rc def getDeviceSxprs(self, deviceClass): if self._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): @@ -573,6 +628,23 @@ class XendDomainInfo: sxprs.append([dev_num, dev_info]) dev_num += 1 return sxprs + + def getDeviceInfo_vif(self, mac): + for dev_type, dev_info in self.info.all_devices_sxpr(): + if dev_type != 'vif': + continue + if mac == sxp.child_value(dev_info, 'mac'): + return dev_info + + def getDeviceInfo_vbd(self, devid): + for dev_type, dev_info in self.info.all_devices_sxpr(): + if dev_type != 'vbd' and dev_type != 'tap': + continue + dev = sxp.child_value(dev_info, 'dev') + dev = dev.split(':')[0] + dev = self.getDeviceController(dev_type).convertToDeviceNumber(dev) + if devid == dev: + return dev_info def setMemoryTarget(self, target): @@ -1112,8 +1184,6 @@ class XendDomainInfo: self._clearRestart() if reason == 'suspend': - if self._stateGet() != DOM_STATE_SUSPENDED: - self.image.saveDeviceModel() self._stateSet(DOM_STATE_SUSPENDED) # Don't destroy the domain. XendCheckpoint will do # this once it has finished. However, stop watching @@ -1321,6 +1391,10 @@ class XendDomainInfo: deviceClass, config = self.info['devices'].get(dev_uuid) self._waitForDevice(deviceClass, config['devid']) + def _waitForDevice_destroy(self, deviceClass, devid, backpath): + return self.getDeviceController(deviceClass).waitForDevice_destroy( + devid, backpath) + def _reconfigureDevice(self, deviceClass, devid, devconfig): return self.getDeviceController(deviceClass).reconfigureDevice( devid, devconfig) @@ -2187,11 +2261,18 @@ class XendDomainInfo: return self.metrics.get_uuid(); - def get_security_label(self): + def get_security_label(self, xspol=None): + """ + Get the security label of a domain + @param xspol The policy to use when converting the ssid into + a label; only to be passed during the updating + of the policy + """ domid = self.getDomid() - from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance - xspol = XSPolicyAdminInstance().get_loaded_policy() + if not xspol: + from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance + xspol = XSPolicyAdminInstance().get_loaded_policy() if domid == 0: if xspol: @@ -2202,7 +2283,8 @@ class XendDomainInfo: label = self.info.get('security_label', '') return label - def set_security_label(self, seclab, old_seclab, xspol=None): + def set_security_label(self, seclab, old_seclab, xspol=None, + xspol_old=None): """ Set the security label of a domain from its old to a new value. @@ -2213,6 +2295,8 @@ class XendDomainInfo: @param xspol An optional policy under which this update should be done. If not given, then the current active policy is used. + @param xspol_old The old policy; only to be passed during + the updating of a policy @return Returns return code, a string with errors from the hypervisor's operation, old label of the domain @@ -2223,6 +2307,7 @@ class XendDomainInfo: new_ssidref = 0 domid = self.getDomid() res_labels = None + is_policy_update = (xspol_old != None) from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance from xen.util import xsconstants @@ -2276,13 +2361,16 @@ class XendDomainInfo: # Check that all used resources are accessible under the # new label - if not security.resources_compatible_with_vmlabel(xspol, + if not is_policy_update and \ + not security.resources_compatible_with_vmlabel(xspol, self, label): return (-xsconstants.XSERR_BAD_LABEL, "", "", 0) #Check label against expected one. - old_label = self.get_security_label() + old_label = self.get_security_label(xspol_old) if old_label != old_seclab: + log.info("old_label != old_seclab: %s != %s" % + (old_label, old_seclab)) return (-xsconstants.XSERR_BAD_LABEL, "", "", 0) # relabel domain in the hypervisor diff -r b5dbf184df6c -r 778985f246a0 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/python/xen/xend/server/DevController.py Thu Aug 16 10:47:33 2007 -0600 @@ -28,17 +28,19 @@ from xen.xend.xenstore.xswatch import xs import os -DEVICE_CREATE_TIMEOUT = 100 +DEVICE_CREATE_TIMEOUT = 100 +DEVICE_DESTROY_TIMEOUT = 100 HOTPLUG_STATUS_NODE = "hotplug-status" HOTPLUG_ERROR_NODE = "hotplug-error" HOTPLUG_STATUS_ERROR = "error" HOTPLUG_STATUS_BUSY = "busy" -Connected = 1 -Error = 2 -Missing = 3 -Timeout = 4 -Busy = 5 +Connected = 1 +Error = 2 +Missing = 3 +Timeout = 4 +Busy = 5 +Disconnected = 6 xenbusState = { 'Unknown' : 0, @@ -185,6 +187,18 @@ class DevController: (devid, self.deviceClass, err)) + def waitForDevice_destroy(self, devid, backpath): + log.debug("Waiting for %s - destroyDevice.", devid) + + if not self.hotplug: + return + + status = self.waitForBackend_destroy(backpath) + + if status == Timeout: + raise VmError("Device %s (%s) could not be disconnected. " % + (devid, self.deviceClass)) + def reconfigureDevice(self, devid, config): """Reconfigure the specified device. @@ -209,12 +223,7 @@ class DevController: here. """ - try: - dev = int(devid) - except ValueError: - # Does devid contain devicetype/deviceid? - # Propogate exception if unable to find an integer devid - dev = int(type(devid) is str and devid.split('/')[-1] or None) + dev = self.convertToDeviceNumber(devid) # Modify online status /before/ updating state (latter is watched by # drivers, so this ordering avoids a race). @@ -282,6 +291,15 @@ class DevController: config_dict = self.getDeviceConfiguration(devid) all_configs[devid] = config_dict return all_configs + + + def convertToDeviceNumber(self, devid): + try: + return int(devid) + except ValueError: + # Does devid contain devicetype/deviceid? + # Propogate exception if unable to find an integer devid + return int(type(devid) is str and devid.split('/')[-1] or None) ## protected: @@ -513,6 +531,19 @@ class DevController: return (Missing, None) + def waitForBackend_destroy(self, backpath): + + statusPath = backpath + '/' + HOTPLUG_STATUS_NODE + ev = Event() + result = { 'status': Timeout } + + xswatch(statusPath, deviceDestroyCallback, ev, result) + + ev.wait(DEVICE_DESTROY_TIMEOUT) + + return result['status'] + + def backendPath(self, backdom, devid): """Construct backend path given the backend domain and device id. @@ -561,3 +592,19 @@ def hotplugStatusCallback(statusPath, ev ev.set() return 0 + + +def deviceDestroyCallback(statusPath, ev, result): + log.debug("deviceDestroyCallback %s.", statusPath) + + status = xstransact.Read(statusPath) + + if status is None: + result['status'] = Disconnected + else: + return 1 + + log.debug("deviceDestroyCallback %d.", result['status']) + + ev.set() + return 0 diff -r b5dbf184df6c -r 778985f246a0 tools/python/xen/xend/server/blkif.py --- a/tools/python/xen/xend/server/blkif.py Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/python/xen/xend/server/blkif.py Thu Aug 16 10:47:33 2007 -0600 @@ -165,11 +165,23 @@ class BlkifController(DevController): try: DevController.destroyDevice(self, devid, force) except ValueError: - devid_end = type(devid) is str and devid.split('/')[-1] or None + dev = self.convertToDeviceNumber(devid) for i in self.deviceIDs(): - d = self.readBackend(i, 'dev') - if d == devid or (devid_end and d == devid_end): + if i == dev: DevController.destroyDevice(self, i, force) return raise VmError("Device %s not connected" % devid) + + def convertToDeviceNumber(self, devid): + try: + dev = int(devid) + except ValueError: + if type(devid) is not str: + raise VmError("devid %s is wrong type" % str(devid)) + try: + dev = devid.split('/')[-1] + dev = int(dev) + except ValueError: + dev = blkif.blkdev_name_to_number(dev) + return dev diff -r b5dbf184df6c -r 778985f246a0 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/python/xen/xm/main.py Thu Aug 16 10:47:33 2007 -0600 @@ -876,7 +876,7 @@ def parse_doms_info(info): if len(tmp) != 3: seclabel = "" else: - seclabel = tmp[2] + seclabel = security_label parsed_info['seclabel'] = seclabel if serverType == SERVER_XEN_API: @@ -2186,6 +2186,7 @@ def xm_network_attach(args): def detach(args, deviceClass): + rm_cfg = True dom = args[0] dev = args[1] try: @@ -2196,7 +2197,7 @@ def detach(args, deviceClass): except IndexError: force = None - server.xend.domain.destroyDevice(dom, deviceClass, dev, force) + server.xend.domain.destroyDevice(dom, deviceClass, dev, force, rm_cfg) def xm_block_detach(args): diff -r b5dbf184df6c -r 778985f246a0 tools/xm-test/configure.ac --- a/tools/xm-test/configure.ac Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/xm-test/configure.ac Thu Aug 16 10:47:33 2007 -0600 @@ -85,6 +85,13 @@ AC_SUBST(NETWORK_ADDRESS) AC_SUBST(NETWORK_ADDRESS) AC_SUBST(NETMASK) +DOM0_INTF="vif0.0" +AC_ARG_WITH(dom0-intf, + [ --with-dom0-intf=intf Set dom0 interface name [[default="vif0.0"]]], + [ DOM0_INTF="$withval" ]) + +AC_SUBST(DOM0_INTF) + AC_ARG_WITH(hvm-kernel, [[ --with-hvm-kernel=kernel Use this kernel for hvm disk.img testing]], HVMKERNEL=$withval, diff -r b5dbf184df6c -r 778985f246a0 tools/xm-test/lib/XmTestLib/NetConfig.py --- a/tools/xm-test/lib/XmTestLib/NetConfig.py Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/xm-test/lib/XmTestLib/NetConfig.py Thu Aug 16 10:47:33 2007 -0600 @@ -104,8 +104,8 @@ class NetConfig: if self.network == "169.254.0.0": checkZeroconfAddresses() - # Clean out any aliases in the network range for vif0.0. If - # an alias exists, a test xendevice add command could fail. + # Clean out any aliases in the network range for dom0's interface. + # If an alias exists, a test xendevice add command could fail. if NETWORK_IP_RANGE != "dhcp": self.__cleanDom0Aliases() @@ -139,20 +139,22 @@ class NetConfig: def __cleanDom0Aliases(self): # Remove any aliases within the supplied network IP range on dom0 - scmd = 'ip addr show dev vif0.0' + scmd = 'ip addr show dev %s' % (DOM0_INTF) status, output = traceCommand(scmd) if status: - raise NetworkError("Failed to show vif0.0 aliases: %d" % status) + raise NetworkError("Failed to show %s aliases: %d" % + (DOM0_INTF, status)) lines = output.split("\n") for line in lines: ip = re.search('(\d+\.\d+\.\d+\.\d+)', line) if ip and self.isIPInRange(ip.group(1)) == True: - dcmd = 'ip addr del %s dev vif0.0' % ip.group(1) + dcmd = 'ip addr del %s dev %s' % (ip.group(1), DOM0_INTF) dstatus, doutput = traceCommand(dcmd) if dstatus: - raise NetworkError("Failed to remove vif0.0 aliases: %d" % status) + raise NetworkError("Failed to remove %s aliases: %d" % + (DOM0_INTF, status)) def getNetEnv(self): return self.netenv diff -r b5dbf184df6c -r 778985f246a0 tools/xm-test/lib/XmTestLib/XenDevice.py --- a/tools/xm-test/lib/XmTestLib/XenDevice.py Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/xm-test/lib/XmTestLib/XenDevice.py Thu Aug 16 10:47:33 2007 -0600 @@ -214,7 +214,7 @@ class XenNetDevice(XenDevice): def removeDevice(self): self.releaseNetDevIP() - def addDom0AliasCmd(self, dev="vif0.0"): + def addDom0AliasCmd(self, dev=DOM0_INTF): # Method to add start and remove dom0 alias cmds acmd = 'ip addr add %s dev %s' % (self.dom0_alias_ip, dev) rcmd = 'ip addr del %s dev %s' % (self.dom0_alias_ip, dev) diff -r b5dbf184df6c -r 778985f246a0 tools/xm-test/lib/XmTestLib/config.py.in --- a/tools/xm-test/lib/XmTestLib/config.py.in Thu Aug 16 10:03:26 2007 -0600 +++ b/tools/xm-test/lib/XmTestLib/config.py.in Thu Aug 16 10:47:33 2007 -0600 @@ -4,3 +4,4 @@ NETWORK_IP_RANGE = "@NET_IP_RANGE@" NETWORK_IP_RANGE = "@NET_IP_RANGE@" NETWORK = "@NETWORK_ADDRESS@" NETMASK = "@NETMASK@" +DOM0_INTF = "@DOM0_INTF@" diff -r b5dbf184df6c -r 778985f246a0 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c --- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Thu Aug 16 10:03:26 2007 -0600 +++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Thu Aug 16 10:47:33 2007 -0600 @@ -1,5 +1,6 @@ #include <linux/config.h> -#include <linux/stop_machine.h> +#include <linux/cpumask.h> +#include <linux/preempt.h> #include <xen/evtchn.h> #include <xen/gnttab.h> #include <xen/xenbus.h> diff -r b5dbf184df6c -r 778985f246a0 xen/acm/acm_policy.c --- a/xen/acm/acm_policy.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/acm/acm_policy.c Thu Aug 16 10:47:33 2007 -0600 @@ -710,12 +710,12 @@ acm_change_policy(struct acm_change_poli goto acm_chg_policy_exit; } - if ( copy_from_guest((u8 *)dels.array, + if ( copy_from_guest(dels.array, chgpolicy->del_array, - chgpolicy->delarray_size) || - copy_from_guest((u8 *)ssidmap.array, + dels.num_items) || + copy_from_guest(ssidmap.array, chgpolicy->chg_array, - chgpolicy->chgarray_size) || + ssidmap.num_items) || copy_from_guest(binpolicy, chgpolicy->policy_pushcache, chgpolicy->policy_pushcache_size )) @@ -844,9 +844,9 @@ acm_relabel_domains(struct acm_relabel_d memset(errors.array, 0x0, sizeof(uint32_t) * errors.num_items); } - if ( copy_from_guest((u8 *)relabels.array, + if ( copy_from_guest(relabels.array, relabel->relabel_map, - relabel->relabel_map_size) ) + relabels.num_items) ) { rc = -EFAULT; goto acm_relabel_doms_exit; diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/acpi/boot.c --- a/xen/arch/x86/acpi/boot.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/acpi/boot.c Thu Aug 16 10:47:33 2007 -0600 @@ -423,7 +423,7 @@ acpi_fadt_parse_sleep_info(struct fadt_d goto bad; if (strncmp(facs->signature, "FACS", 4)) { - printk(KERN_ERR PREFIX "Invalid FACS signature %s\n", + printk(KERN_ERR PREFIX "Invalid FACS signature %.4s\n", facs->signature); goto bad; } @@ -451,12 +451,13 @@ acpi_fadt_parse_sleep_info(struct fadt_d acpi_sinfo.vector_width = 64; } - printk (KERN_INFO PREFIX - "ACPI SLEEP INFO: pm1x_cnt[%x,%x], pm1x_evt[%x,%x]\n" - " wakeup_vec[%"PRIx64"], vec_size[%x]\n", - acpi_sinfo.pm1a_cnt, acpi_sinfo.pm1b_cnt, - acpi_sinfo.pm1a_evt, acpi_sinfo.pm1b_cnt, - acpi_sinfo.wakeup_vector, acpi_sinfo.vector_width); + printk(KERN_INFO PREFIX + "ACPI SLEEP INFO: pm1x_cnt[%x,%x], pm1x_evt[%x,%x]\n", + acpi_sinfo.pm1a_cnt, acpi_sinfo.pm1b_cnt, + acpi_sinfo.pm1a_evt, acpi_sinfo.pm1b_cnt); + printk(KERN_INFO PREFIX + " wakeup_vec[%"PRIx64"], vec_size[%x]\n", + acpi_sinfo.wakeup_vector, acpi_sinfo.vector_width); return; bad: memset(&acpi_sinfo, 0, sizeof(acpi_sinfo)); diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/hvm/hvm.c Thu Aug 16 10:47:33 2007 -0600 @@ -76,13 +76,6 @@ void hvm_enable(struct hvm_function_tabl hvm_enabled = 1; } -void hvm_stts(struct vcpu *v) -{ - /* FPU state already dirty? Then no need to setup_fpu() lazily. */ - if ( !v->fpu_dirtied ) - hvm_funcs.stts(v); -} - void hvm_set_guest_time(struct vcpu *v, u64 gtime) { u64 host_tsc; @@ -112,7 +105,8 @@ void hvm_do_resume(struct vcpu *v) { ioreq_t *p; - hvm_stts(v); + if ( !v->fpu_dirtied ) + hvm_funcs.stts(v); pt_thaw_time(v); @@ -520,6 +514,174 @@ void hvm_triple_fault(void) domain_shutdown(v->domain, SHUTDOWN_reboot); } +int hvm_set_cr0(unsigned long value) +{ + struct vcpu *v = current; + unsigned long mfn, old_base_mfn, old_value = v->arch.hvm_vcpu.guest_cr[0]; + + HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value); + + if ( (u32)value != value ) + { + HVM_DBG_LOG(DBG_LEVEL_1, + "Guest attempts to set upper 32 bits in CR0: %lx", + value); + hvm_inject_exception(TRAP_gp_fault, 0, 0); + return 0; + } + + value &= ~HVM_CR0_GUEST_RESERVED_BITS; + + /* ET is reserved and should be always be 1. */ + value |= X86_CR0_ET; + + if ( (value & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG ) + { + hvm_inject_exception(TRAP_gp_fault, 0, 0); + return 0; + } + + if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) ) + { + if ( v->arch.hvm_vcpu.guest_efer & EFER_LME ) + { + if ( !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) ) + { + HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable"); + hvm_inject_exception(TRAP_gp_fault, 0, 0); + return 0; + } + HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode"); + v->arch.hvm_vcpu.guest_efer |= EFER_LMA; + hvm_update_guest_efer(v); + } + + if ( !paging_mode_hap(v->domain) ) + { + /* The guest CR3 must be pointing to the guest physical. */ + mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT); + if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) + { + gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", + v->arch.hvm_vcpu.guest_cr[3], mfn); + domain_crash(v->domain); + return 0; + } + + /* Now arch.guest_table points to machine physical. */ + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + v->arch.guest_table = pagetable_from_pfn(mfn); + if ( old_base_mfn ) + put_page(mfn_to_page(old_base_mfn)); + + HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", + v->arch.hvm_vcpu.guest_cr[3], mfn); + } + } + else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) ) + { + /* When CR0.PG is cleared, LMA is cleared immediately. */ + if ( hvm_long_mode_enabled(v) ) + { + v->arch.hvm_vcpu.guest_efer &= ~EFER_LMA; + hvm_update_guest_efer(v); + } + + if ( !paging_mode_hap(v->domain) ) + { + put_page(mfn_to_page(get_mfn_from_gpfn( + v->arch.hvm_vcpu.guest_cr[3] >> PAGE_SHIFT))); + v->arch.guest_table = pagetable_null(); + } + } + + v->arch.hvm_vcpu.guest_cr[0] = value; + hvm_update_guest_cr(v, 0); + + if ( (value ^ old_value) & X86_CR0_PG ) + paging_update_paging_modes(v); + + return 1; +} + +int hvm_set_cr3(unsigned long value) +{ + unsigned long old_base_mfn, mfn; + struct vcpu *v = current; + + if ( paging_mode_hap(v->domain) || !hvm_paging_enabled(v) ) + { + /* Nothing to do. */ + } + else if ( value == v->arch.hvm_vcpu.guest_cr[3] ) + { + /* Shadow-mode TLB flush. Invalidate the shadow. */ + mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); + if ( mfn != pagetable_get_pfn(v->arch.guest_table) ) + goto bad_cr3; + } + else + { + /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */ + HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); + mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); + if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) + goto bad_cr3; + + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + v->arch.guest_table = pagetable_from_pfn(mfn); + + if ( old_base_mfn ) + put_page(mfn_to_page(old_base_mfn)); + + HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); + } + + v->arch.hvm_vcpu.guest_cr[3] = value; + paging_update_cr3(v); + return 1; + + bad_cr3: + gdprintk(XENLOG_ERR, "Invalid CR3\n"); + domain_crash(v->domain); + return 0; +} + +int hvm_set_cr4(unsigned long value) +{ + struct vcpu *v = current; + unsigned long old_cr; + + if ( value & HVM_CR4_GUEST_RESERVED_BITS ) + { + HVM_DBG_LOG(DBG_LEVEL_1, + "Guest attempts to set reserved bit in CR4: %lx", + value); + goto gpf; + } + + if ( !(value & X86_CR4_PAE) && hvm_long_mode_enabled(v) ) + { + HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while " + "EFER.LMA is set"); + goto gpf; + } + + old_cr = v->arch.hvm_vcpu.guest_cr[4]; + v->arch.hvm_vcpu.guest_cr[4] = value; + hvm_update_guest_cr(v, 4); + + /* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */ + if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) + paging_update_paging_modes(v); + + return 1; + + gpf: + hvm_inject_exception(TRAP_gp_fault, 0, 0); + return 0; +} + /* * __hvm_copy(): * @buf = hypervisor buffer @@ -668,7 +830,6 @@ static hvm_hypercall_t *hvm_hypercall32_ static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = { HYPERCALL(memory_op), [ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op, - HYPERCALL(multicall), HYPERCALL(xen_version), HYPERCALL(grant_table_op), HYPERCALL(event_channel_op), @@ -811,12 +972,6 @@ int hvm_do_hypercall(struct cpu_user_reg return (this_cpu(hc_preempted) ? HVM_HCALL_preempted : flush ? HVM_HCALL_invalidate : HVM_HCALL_completed); -} - -void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3) -{ - v->arch.hvm_vcpu.hw_cr3 = guest_cr3; - hvm_funcs.update_guest_cr3(v); } static void hvm_latch_shinfo_size(struct domain *d) diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Aug 16 10:47:33 2007 -0600 @@ -59,8 +59,9 @@ int inst_copy_from_guest(unsigned char * int inst_len); asmlinkage void do_IRQ(struct cpu_user_regs *); -static int svm_reset_to_realmode(struct vcpu *v, - struct cpu_user_regs *regs); +static int svm_reset_to_realmode( + struct vcpu *v, struct cpu_user_regs *regs); +static void svm_update_guest_cr(struct vcpu *v, unsigned int cr); /* va of hardware host save area */ static void *hsa[NR_CPUS] __read_mostly; @@ -78,7 +79,7 @@ static void svm_inject_exception( struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; if ( trap == TRAP_page_fault ) - HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_svm.cpu_cr2, error_code); + HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vcpu.guest_cr[2], error_code); else HVMTRACE_2D(INJ_EXC, v, trap, error_code); @@ -97,55 +98,14 @@ static void svm_cpu_down(void) write_efer(read_efer() & ~EFER_SVME); } +static int svm_lme_is_set(struct vcpu *v) +{ #ifdef __x86_64__ - -static int svm_lme_is_set(struct vcpu *v) -{ - u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer; + u64 guest_efer = v->arch.hvm_vcpu.guest_efer; return guest_efer & EFER_LME; -} - -static int svm_long_mode_enabled(struct vcpu *v) -{ - u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer; - return guest_efer & EFER_LMA; -} - -#else /* __i386__ */ - -static int svm_lme_is_set(struct vcpu *v) -{ return 0; } -static int svm_long_mode_enabled(struct vcpu *v) -{ return 0; } - +#else + return 0; #endif - -static int svm_cr4_pae_is_set(struct vcpu *v) -{ - unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4; - return guest_cr4 & X86_CR4_PAE; -} - -static int svm_paging_enabled(struct vcpu *v) -{ - unsigned long guest_cr0 = v->arch.hvm_svm.cpu_shadow_cr0; - return (guest_cr0 & X86_CR0_PE) && (guest_cr0 & X86_CR0_PG); -} - -static int svm_pae_enabled(struct vcpu *v) -{ - unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4; - return svm_paging_enabled(v) && (guest_cr4 & X86_CR4_PAE); -} - -static int svm_nx_enabled(struct vcpu *v) -{ - return v->arch.hvm_svm.cpu_shadow_efer & EFER_NX; -} - -static int svm_pgbit_test(struct vcpu *v) -{ - return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG; } static void svm_store_cpu_guest_regs( @@ -165,10 +125,10 @@ static void svm_store_cpu_guest_regs( if ( crs != NULL ) { /* Returning the guest's regs */ - crs[0] = v->arch.hvm_svm.cpu_shadow_cr0; - crs[2] = v->arch.hvm_svm.cpu_cr2; - crs[3] = v->arch.hvm_svm.cpu_cr3; - crs[4] = v->arch.hvm_svm.cpu_shadow_cr4; + crs[0] = v->arch.hvm_vcpu.guest_cr[0]; + crs[2] = v->arch.hvm_vcpu.guest_cr[2]; + crs[3] = v->arch.hvm_vcpu.guest_cr[3]; + crs[4] = v->arch.hvm_vcpu.guest_cr[4]; } } @@ -202,7 +162,8 @@ static enum handler_return long_mode_do_ if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) ) { /* EFER.LME transition from 0 to 1. */ - if ( svm_paging_enabled(v) || !svm_cr4_pae_is_set(v) ) + if ( hvm_paging_enabled(v) || + !(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE) ) { gdprintk(XENLOG_WARNING, "Trying to set LME bit when " "in paging mode or PAE bit is not set\n"); @@ -212,7 +173,7 @@ static enum handler_return long_mode_do_ else if ( !(msr_content & EFER_LME) && svm_lme_is_set(v) ) { /* EFER.LME transistion from 1 to 0. */ - if ( svm_paging_enabled(v) ) + if ( hvm_paging_enabled(v) ) { gdprintk(XENLOG_WARNING, "Trying to clear EFER.LME while paging enabled\n"); @@ -220,9 +181,9 @@ static enum handler_return long_mode_do_ } } - v->arch.hvm_svm.cpu_shadow_efer = msr_content; + v->arch.hvm_vcpu.guest_efer = msr_content; vmcb->efer = msr_content | EFER_SVME; - if ( !svm_paging_enabled(v) ) + if ( !hvm_paging_enabled(v) ) vmcb->efer &= ~(EFER_LME | EFER_LMA); break; @@ -297,10 +258,10 @@ int svm_vmcb_save(struct vcpu *v, struct c->rsp = vmcb->rsp; c->rflags = vmcb->rflags; - c->cr0 = v->arch.hvm_svm.cpu_shadow_cr0; - c->cr2 = v->arch.hvm_svm.cpu_cr2; - c->cr3 = v->arch.hvm_svm.cpu_cr3; - c->cr4 = v->arch.hvm_svm.cpu_shadow_cr4; + c->cr0 = v->arch.hvm_vcpu.guest_cr[0]; + c->cr2 = v->arch.hvm_vcpu.guest_cr[2]; + c->cr3 = v->arch.hvm_vcpu.guest_cr[3]; + c->cr4 = v->arch.hvm_vcpu.guest_cr[4]; #ifdef HVM_DEBUG_SUSPEND printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", @@ -383,58 +344,31 @@ int svm_vmcb_restore(struct vcpu *v, str vmcb->rsp = c->rsp; vmcb->rflags = c->rflags; - v->arch.hvm_svm.cpu_shadow_cr0 = c->cr0; - vmcb->cr0 = c->cr0 | X86_CR0_WP | X86_CR0_ET | X86_CR0_PG; - - v->arch.hvm_svm.cpu_cr2 = c->cr2; + v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET; + v->arch.hvm_vcpu.guest_cr[2] = c->cr2; + v->arch.hvm_vcpu.guest_cr[3] = c->cr3; + v->arch.hvm_vcpu.guest_cr[4] = c->cr4; + svm_update_guest_cr(v, 0); + svm_update_guest_cr(v, 2); + svm_update_guest_cr(v, 4); #ifdef HVM_DEBUG_SUSPEND printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", - __func__, - c->cr3, - c->cr0, - c->cr4); + __func__, c->cr3, c->cr0, c->cr4); #endif - if ( !svm_paging_enabled(v) ) - { - printk("%s: paging not enabled.\n", __func__); - goto skip_cr3; - } - - if ( c->cr3 == v->arch.hvm_svm.cpu_cr3 ) - { - /* - * This is simple TLB flush, implying the guest has - * removed some translation or changed page attributes. - * We simply invalidate the shadow. - */ - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); - if ( mfn != pagetable_get_pfn(v->arch.guest_table) ) - goto bad_cr3; - } - else - { - /* - * If different, make a shadow. Check if the PDBR is valid - * first. - */ - HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3); + if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) ) + { + HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 = %"PRIx64, c->cr3); mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) goto bad_cr3; - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_from_pfn(mfn); - if (old_base_mfn) + if ( old_base_mfn ) put_page(mfn_to_page(old_base_mfn)); - v->arch.hvm_svm.cpu_cr3 = c->cr3; - } - - skip_cr3: - vmcb->cr4 = c->cr4 | HVM_CR4_HOST_MASK; - v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4; - + } + vmcb->idtr.limit = c->idtr_limit; vmcb->idtr.base = c->idtr_base; @@ -488,10 +422,6 @@ int svm_vmcb_restore(struct vcpu *v, str if ( paging_mode_hap(v->domain) ) { - vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0; - vmcb->cr4 = (v->arch.hvm_svm.cpu_shadow_cr4 | - (HVM_CR4_HOST_MASK & ~X86_CR4_PAE)); - vmcb->cr3 = c->cr3; vmcb->np_enable = 1; vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table); @@ -521,7 +451,6 @@ int svm_vmcb_restore(struct vcpu *v, str } paging_update_paging_modes(v); - svm_asid_g_update_paging(v); return 0; @@ -540,7 +469,7 @@ static void svm_save_cpu_state(struct vc data->msr_star = vmcb->star; data->msr_cstar = vmcb->cstar; data->msr_syscall_mask = vmcb->sfmask; - data->msr_efer = v->arch.hvm_svm.cpu_shadow_efer; + data->msr_efer = v->arch.hvm_vcpu.guest_efer; data->msr_flags = -1ULL; data->tsc = hvm_get_guest_time(v); @@ -556,7 +485,7 @@ static void svm_load_cpu_state(struct vc vmcb->star = data->msr_star; vmcb->cstar = data->msr_cstar; vmcb->sfmask = data->msr_syscall_mask; - v->arch.hvm_svm.cpu_shadow_efer = data->msr_efer; + v->arch.hvm_vcpu.guest_efer = data->msr_efer; vmcb->efer = data->msr_efer | EFER_SVME; /* VMCB's EFER.LME isn't set unless we're actually in long mode * (see long_mode_do_msr_write()) */ @@ -605,11 +534,11 @@ static int svm_guest_x86_mode(struct vcp { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - if ( unlikely(!(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PE)) ) + if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) ) return 0; if ( unlikely(vmcb->rflags & X86_EFLAGS_VM) ) return 1; - if ( svm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) ) + if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) ) return 8; return (likely(vmcb->cs.attr.fields.db) ? 4 : 2); } @@ -619,9 +548,45 @@ static void svm_update_host_cr3(struct v /* SVM doesn't have a HOST_CR3 equivalent to update. */ } -static void svm_update_guest_cr3(struct vcpu *v) -{ - v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; +static void svm_update_guest_cr(struct vcpu *v, unsigned int cr) +{ + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + + switch ( cr ) + { + case 0: + vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0]; + if ( !paging_mode_hap(v->domain) ) + vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP; + break; + case 2: + vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2]; + break; + case 3: + vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3]; + svm_asid_inv_asid(v); + break; + case 4: + vmcb->cr4 = HVM_CR4_HOST_MASK; + if ( paging_mode_hap(v->domain) ) + vmcb->cr4 &= ~X86_CR4_PAE; + vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4]; + break; + default: + BUG(); + } +} + +static void svm_update_guest_efer(struct vcpu *v) +{ +#ifdef __x86_64__ + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + + if ( v->arch.hvm_vcpu.guest_efer & EFER_LMA ) + vmcb->efer |= EFER_LME | EFER_LMA; + else + vmcb->efer &= ~(EFER_LME | EFER_LMA); +#endif } static void svm_flush_guest_tlbs(void) @@ -639,24 +604,6 @@ static void svm_update_vtpr(struct vcpu vmcb->vintr.fields.tpr = value & 0x0f; } -static unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num) -{ - switch ( num ) - { - case 0: - return v->arch.hvm_svm.cpu_shadow_cr0; - case 2: - return v->arch.hvm_svm.cpu_cr2; - case 3: - return v->arch.hvm_svm.cpu_cr3; - case 4: - return v->arch.hvm_svm.cpu_shadow_cr4; - default: - BUG(); - } - return 0; /* dummy */ -} - static void svm_sync_vmcb(struct vcpu *v) { struct arch_svm_struct *arch_svm = &v->arch.hvm_svm; @@ -674,7 +621,7 @@ static unsigned long svm_get_segment_bas static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - int long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v); + int long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v); switch ( seg ) { @@ -748,7 +695,7 @@ static void svm_stts(struct vcpu *v) * then this is not necessary: no FPU activity can occur until the guest * clears CR0.TS, and we will initialise the FPU when that happens. */ - if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) ) + if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) { v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device; vmcb->cr0 |= X86_CR0_TS; @@ -949,7 +896,7 @@ static void svm_hvm_inject_exception( { struct vcpu *v = current; if ( trapnr == TRAP_page_fault ) - v->arch.hvm_svm.vmcb->cr2 = v->arch.hvm_svm.cpu_cr2 = cr2; + v->arch.hvm_svm.vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2] = cr2; svm_inject_exception(v, trapnr, (errcode != -1), errcode); } @@ -970,17 +917,13 @@ static struct hvm_function_table svm_fun .load_cpu_guest_regs = svm_load_cpu_guest_regs, .save_cpu_ctxt = svm_save_vmcb_ctxt, .load_cpu_ctxt = svm_load_vmcb_ctxt, - .paging_enabled = svm_paging_enabled, - .long_mode_enabled = svm_long_mode_enabled, - .pae_enabled = svm_pae_enabled, - .nx_enabled = svm_nx_enabled, .interrupts_enabled = svm_interrupts_enabled, .guest_x86_mode = svm_guest_x86_mode, - .get_guest_ctrl_reg = svm_get_ctrl_reg, .get_segment_base = svm_get_segment_base, .get_segment_register = svm_get_segment_register, .update_host_cr3 = svm_update_host_cr3, - .update_guest_cr3 = svm_update_guest_cr3, + .update_guest_cr = svm_update_guest_cr, + .update_guest_efer = svm_update_guest_efer, .flush_guest_tlbs = svm_flush_guest_tlbs, .update_vtpr = svm_update_vtpr, .stts = svm_stts, @@ -1075,7 +1018,7 @@ static void svm_do_no_device_fault(struc setup_fpu(v); vmcb->exception_intercepts &= ~(1U << TRAP_no_device); - if ( !(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_TS) ) + if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) vmcb->cr0 &= ~X86_CR0_TS; } @@ -1347,7 +1290,7 @@ static int svm_get_io_address( struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; /* If we're in long mode, don't check the segment presence & limit */ - long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v); + long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v); /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit. * l field combined with EFER_LMA says whether it's 16 or 64 bit. @@ -1650,31 +1593,11 @@ static int svm_set_cr0(unsigned long val static int svm_set_cr0(unsigned long value) { struct vcpu *v = current; - unsigned long mfn, old_value = v->arch.hvm_svm.cpu_shadow_cr0; - struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - unsigned long old_base_mfn; - - HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value); - - if ( (u32)value != value ) - { - HVM_DBG_LOG(DBG_LEVEL_1, - "Guest attempts to set upper 32 bits in CR0: %lx", - value); - svm_inject_exception(v, TRAP_gp_fault, 1, 0); + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + int rc = hvm_set_cr0(value); + + if ( rc == 0 ) return 0; - } - - value &= ~HVM_CR0_GUEST_RESERVED_BITS; - - /* ET is reserved and should be always be 1. */ - value |= X86_CR0_ET; - - if ( (value & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG ) - { - svm_inject_exception(v, TRAP_gp_fault, 1, 0); - return 0; - } /* TS cleared? Then initialise FPU now. */ if ( !(value & X86_CR0_TS) ) @@ -1683,76 +1606,9 @@ static int svm_set_cr0(unsigned long val vmcb->exception_intercepts &= ~(1U << TRAP_no_device); } - if ( (value & X86_CR0_PG) && !(old_value & X86_CR0_PG) ) - { - if ( svm_lme_is_set(v) ) - { - if ( !svm_cr4_pae_is_set(v) ) - { - HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable"); - svm_inject_exception(v, TRAP_gp_fault, 1, 0); - return 0; - } - HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode"); - v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA; - vmcb->efer |= EFER_LMA | EFER_LME; - } - - if ( !paging_mode_hap(v->domain) ) - { - /* The guest CR3 must be pointing to the guest physical. */ - mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) - { - gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", - v->arch.hvm_svm.cpu_cr3, mfn); - domain_crash(v->domain); - return 0; - } - - /* Now arch.guest_table points to machine physical. */ - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = pagetable_from_pfn(mfn); - if ( old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); - - HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", - v->arch.hvm_vmx.cpu_cr3, mfn); - } - } - else if ( !(value & X86_CR0_PG) && (old_value & X86_CR0_PG) ) - { - /* When CR0.PG is cleared, LMA is cleared immediately. */ - if ( svm_long_mode_enabled(v) ) - { - vmcb->efer &= ~(EFER_LME | EFER_LMA); - v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA; - } - - if ( !paging_mode_hap(v->domain) && v->arch.hvm_svm.cpu_cr3 ) - { - put_page(mfn_to_page(get_mfn_from_gpfn( - v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT))); - v->arch.guest_table = pagetable_null(); - } - } - - vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0 = value; - if ( !paging_mode_hap(v->domain) ) - vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP; - - if ( (value ^ old_value) & X86_CR0_PG ) - { - paging_update_paging_modes(v); - svm_asid_g_update_paging(v); - } - return 1; } -/* - * Read from control registers. CR0 and CR4 are read from the shadow. - */ static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs) { unsigned long value = 0; @@ -1763,16 +1619,16 @@ static void mov_from_cr(int cr, int gp, switch ( cr ) { case 0: - value = v->arch.hvm_svm.cpu_shadow_cr0; + value = v->arch.hvm_vcpu.guest_cr[0]; break; case 2: value = vmcb->cr2; break; case 3: - value = (unsigned long)v->arch.hvm_svm.cpu_cr3; + value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3]; break; case 4: - value = (unsigned long)v->arch.hvm_svm.cpu_shadow_cr4; + value = (unsigned long)v->arch.hvm_vcpu.guest_cr[4]; break; case 8: value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI); @@ -1791,13 +1647,9 @@ static void mov_from_cr(int cr, int gp, HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx", cr, value); } - -/* - * Write to control registers - */ static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs) { - unsigned long value, old_cr, old_base_mfn, mfn; + unsigned long value; struct vcpu *v = current; struct vlapic *vlapic = vcpu_vlapic(v); struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; @@ -1815,131 +1667,10 @@ static int mov_to_cr(int gpreg, int cr, return svm_set_cr0(value); case 3: - if ( paging_mode_hap(v->domain) ) - { - vmcb->cr3 = v->arch.hvm_svm.cpu_cr3 = value; - break; - } - - /* If paging is not enabled yet, simply copy the value to CR3. */ - if ( !svm_paging_enabled(v) ) - { - v->arch.hvm_svm.cpu_cr3 = value; - break; - } - - /* We make a new one if the shadow does not exist. */ - if ( value == v->arch.hvm_svm.cpu_cr3 ) - { - /* - * This is simple TLB flush, implying the guest has - * removed some translation or changed page attributes. - * We simply invalidate the shadow. - */ - mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); - if ( mfn != pagetable_get_pfn(v->arch.guest_table) ) - goto bad_cr3; - paging_update_cr3(v); - /* signal paging update to ASID handler */ - svm_asid_g_mov_to_cr3 (v); - } - else - { - /* - * If different, make a shadow. Check if the PDBR is valid - * first. - */ - HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); - mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) - goto bad_cr3; - - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = pagetable_from_pfn(mfn); - - if ( old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); - - v->arch.hvm_svm.cpu_cr3 = value; - update_cr3(v); - HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); - /* signal paging update to ASID handler */ - svm_asid_g_mov_to_cr3 (v); - } - break; - - case 4: /* CR4 */ - if ( value & HVM_CR4_GUEST_RESERVED_BITS ) - { - HVM_DBG_LOG(DBG_LEVEL_1, - "Guest attempts to set reserved bit in CR4: %lx", - value); - svm_inject_exception(v, TRAP_gp_fault, 1, 0); - break; - } - - if ( paging_mode_hap(v->domain) ) - { - v->arch.hvm_svm.cpu_shadow_cr4 = value; - vmcb->cr4 = value | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE); - paging_update_paging_modes(v); - /* signal paging update to ASID handler */ - svm_asid_g_update_paging (v); - break; - } - - old_cr = v->arch.hvm_svm.cpu_shadow_cr4; - if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) ) - { - if ( svm_pgbit_test(v) ) - { -#if CONFIG_PAGING_LEVELS >= 3 - /* The guest is a 32-bit PAE guest. */ - unsigned long mfn, old_base_mfn; - mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || - !get_page(mfn_to_page(mfn), v->domain) ) - goto bad_cr3; - - /* - * Now arch.guest_table points to machine physical. - */ - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = pagetable_from_pfn(mfn); - if ( old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); - paging_update_paging_modes(v); - /* signal paging update to ASID handler */ - svm_asid_g_update_paging (v); - - HVM_DBG_LOG(DBG_LEVEL_VMMU, - "Update CR3 value = %lx, mfn = %lx", - v->arch.hvm_svm.cpu_cr3, mfn); -#endif - } - } - else if ( !(value & X86_CR4_PAE) ) - { - if ( svm_long_mode_enabled(v) ) - { - svm_inject_exception(v, TRAP_gp_fault, 1, 0); - } - } - - v->arch.hvm_svm.cpu_shadow_cr4 = value; - vmcb->cr4 = value | HVM_CR4_HOST_MASK; - - /* - * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates - * all TLB entries except global entries. - */ - if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) - { - paging_update_paging_modes(v); - /* signal paging update to ASID handler */ - svm_asid_g_update_paging (v); - } - break; + return hvm_set_cr3(value); + + case 4: + return hvm_set_cr4(value); case 8: vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4)); @@ -1953,19 +1684,11 @@ static int mov_to_cr(int gpreg, int cr, } return 1; - - bad_cr3: - gdprintk(XENLOG_ERR, "Invalid CR3\n"); - domain_crash(v->domain); - return 0; -} - - -#define ARR_SIZE(x) (sizeof(x) / sizeof(x[0])) - - -static int svm_cr_access(struct vcpu *v, unsigned int cr, unsigned int type, - struct cpu_user_regs *regs) +} + +static void svm_cr_access( + struct vcpu *v, unsigned int cr, unsigned int type, + struct cpu_user_regs *regs) { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; int inst_len = 0; @@ -1990,12 +1713,12 @@ static int svm_cr_access(struct vcpu *v, if ( type == TYPE_MOV_TO_CR ) { inst_len = __get_instruction_length_from_list( - v, list_a, ARR_SIZE(list_a), &buffer[index], &match); + v, list_a, ARRAY_SIZE(list_a), &buffer[index], &match); } else /* type == TYPE_MOV_FROM_CR */ { inst_len = __get_instruction_length_from_list( - v, list_b, ARR_SIZE(list_b), &buffer[index], &match); + v, list_b, ARRAY_SIZE(list_b), &buffer[index], &match); } ASSERT(inst_len > 0); @@ -2008,7 +1731,8 @@ static int svm_cr_access(struct vcpu *v, HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip); - switch (match) + switch ( match ) + { case INSTR_MOV2CR: gpreg = decode_src_reg(prefix, buffer[index+2]); @@ -2025,18 +1749,18 @@ static int svm_cr_access(struct vcpu *v, setup_fpu(current); vmcb->exception_intercepts &= ~(1U << TRAP_no_device); vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */ - v->arch.hvm_svm.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */ + v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */ break; case INSTR_LMSW: gpreg = decode_src_reg(prefix, buffer[index+2]); value = get_reg(gpreg, regs, vmcb) & 0xF; - value = (v->arch.hvm_svm.cpu_shadow_cr0 & ~0xF) | value; + value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value; result = svm_set_cr0(value); break; case INSTR_SMSW: - value = v->arch.hvm_svm.cpu_shadow_cr0 & 0xFFFF; + value = v->arch.hvm_vcpu.guest_cr[0] & 0xFFFF; modrm = buffer[index+2]; addr_size = svm_guest_x86_mode(v); if ( addr_size < 2 ) @@ -2099,9 +1823,8 @@ static int svm_cr_access(struct vcpu *v, ASSERT(inst_len); - __update_guest_eip(vmcb, inst_len); - - return result; + if ( result ) + __update_guest_eip(vmcb, inst_len); } static void svm_do_msr_access( @@ -2129,7 +1852,7 @@ static void svm_do_msr_access( break; case MSR_EFER: - msr_content = v->arch.hvm_svm.cpu_shadow_efer; + msr_content = v->arch.hvm_vcpu.guest_efer; break; case MSR_K8_MC4_MISC: /* Threshold register */ @@ -2319,8 +2042,7 @@ void svm_handle_invlpg(const short invlp HVMTRACE_3D(INVLPG, v, (invlpga?1:0), g_vaddr, (invlpga?regs->ecx:0)); paging_invlpg(v, g_vaddr); - /* signal invplg to ASID handler */ - svm_asid_g_invlpg (v, g_vaddr); + svm_asid_g_invlpg(v, g_vaddr); } @@ -2335,29 +2057,23 @@ static int svm_reset_to_realmode(struct { struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - /* clear the vmcb and user regs */ memset(regs, 0, sizeof(struct cpu_user_regs)); - - /* VMCB State */ - vmcb->cr0 = X86_CR0_ET | X86_CR0_PG | X86_CR0_WP; - v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET; - - vmcb->cr2 = 0; + + v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET; + svm_update_guest_cr(v, 0); + + v->arch.hvm_vcpu.guest_cr[2] = 0; + svm_update_guest_cr(v, 2); + + v->arch.hvm_vcpu.guest_cr[4] = 0; + svm_update_guest_cr(v, 4); + vmcb->efer = EFER_SVME; - - vmcb->cr4 = HVM_CR4_HOST_MASK; - v->arch.hvm_svm.cpu_shadow_cr4 = 0; - - if ( paging_mode_hap(v->domain) ) { - vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0; - vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 | - (HVM_CR4_HOST_MASK & ~X86_CR4_PAE); - } /* This will jump to ROMBIOS */ vmcb->rip = 0xFFF0; - /* setup the segment registers and all their hidden states */ + /* Set up the segment registers and all their hidden states. */ vmcb->cs.sel = 0xF000; vmcb->cs.attr.bytes = 0x089b; vmcb->cs.limit = 0xffff; @@ -2483,7 +2199,7 @@ asmlinkage void svm_vmexit_handler(struc unsigned long va; va = vmcb->exitinfo2; regs->error_code = vmcb->exitinfo1; - HVM_DBG_LOG(DBG_LEVEL_VMMU, + HVM_DBG_LOG(DBG_LEVEL_VMMU, "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx", (unsigned long)regs->eax, (unsigned long)regs->ebx, (unsigned long)regs->ecx, (unsigned long)regs->edx, @@ -2495,7 +2211,7 @@ asmlinkage void svm_vmexit_handler(struc break; } - v->arch.hvm_svm.cpu_cr2 = vmcb->cr2 = va; + v->arch.hvm_vcpu.guest_cr[2] = vmcb->cr2 = va; svm_inject_exception(v, TRAP_page_fault, 1, regs->error_code); break; } diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/hvm/svm/vmcb.c --- a/xen/arch/x86/hvm/svm/vmcb.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/hvm/svm/vmcb.c Thu Aug 16 10:47:33 2007 -0600 @@ -111,7 +111,7 @@ static int construct_vmcb(struct vcpu *v svm_segment_attributes_t attrib; /* TLB control, and ASID assigment. */ - svm_asid_init_vcpu (v); + svm_asid_init_vcpu(v); vmcb->general1_intercepts = GENERAL1_INTERCEPT_INTR | GENERAL1_INTERCEPT_NMI | @@ -216,27 +216,19 @@ static int construct_vmcb(struct vcpu *v vmcb->tr.base = 0; vmcb->tr.limit = 0xff; - /* Guest CR0. */ - vmcb->cr0 = read_cr0(); - arch_svm->cpu_shadow_cr0 = vmcb->cr0 & ~(X86_CR0_PG | X86_CR0_TS); - vmcb->cr0 |= X86_CR0_WP; - - /* Guest CR4. */ - arch_svm->cpu_shadow_cr4 = - read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE); - vmcb->cr4 = arch_svm->cpu_shadow_cr4 | HVM_CR4_HOST_MASK; + v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_TS; + hvm_update_guest_cr(v, 0); + + v->arch.hvm_vcpu.guest_cr[4] = 0; + hvm_update_guest_cr(v, 4); paging_update_paging_modes(v); - vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; if ( paging_mode_hap(v->domain) ) { - vmcb->cr0 = arch_svm->cpu_shadow_cr0; vmcb->np_enable = 1; /* enable nested paging */ vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table); - vmcb->cr4 = arch_svm->cpu_shadow_cr4 = - (HVM_CR4_HOST_MASK & ~X86_CR4_PAE); vmcb->exception_intercepts = HVM_TRAP_MASK; /* No point in intercepting CR3/4 reads, because the hardware diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/hvm/vioapic.c --- a/xen/arch/x86/hvm/vioapic.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/hvm/vioapic.c Thu Aug 16 10:47:33 2007 -0600 @@ -43,10 +43,6 @@ /* HACK: Route IRQ0 only to VCPU0 to prevent time jumps. */ #define IRQ0_SPECIAL_ROUTING 1 -#if defined(__ia64__) -#define opt_hvm_debug_level opt_vmx_debug_level -#endif - static void vioapic_deliver(struct hvm_hw_vioapic *vioapic, int irq); static unsigned long vioapic_read_indirect(struct hvm_hw_vioapic *vioapic, diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Thu Aug 16 10:47:33 2007 -0600 @@ -315,34 +315,69 @@ void vmx_cpu_down(void) local_irq_restore(flags); } +struct foreign_vmcs { + struct vcpu *v; + unsigned int count; +}; +static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs); + void vmx_vmcs_enter(struct vcpu *v) { + struct foreign_vmcs *fv; + /* * NB. We must *always* run an HVM VCPU on its own VMCS, except for * vmx_vmcs_enter/exit critical regions. */ - if ( v == current ) + if ( likely(v == current) ) return; - vcpu_pause(v); - spin_lock(&v->arch.hvm_vmx.vmcs_lock); - - vmx_clear_vmcs(v); - vmx_load_vmcs(v); + fv = &this_cpu(foreign_vmcs); + + if ( fv->v == v ) + { + BUG_ON(fv->count == 0); + } + else + { + BUG_ON(fv->v != NULL); + BUG_ON(fv->count != 0); + + vcpu_pause(v); + spin_lock(&v->arch.hvm_vmx.vmcs_lock); + + vmx_clear_vmcs(v); + vmx_load_vmcs(v); + + fv->v = v; + } + + fv->count++; } void vmx_vmcs_exit(struct vcpu *v) { - if ( v == current ) + struct foreign_vmcs *fv; + + if ( likely(v == current) ) return; - /* Don't confuse vmx_do_resume (for @v or @current!) */ - vmx_clear_vmcs(v); - if ( is_hvm_vcpu(current) ) - vmx_load_vmcs(current); - - spin_unlock(&v->arch.hvm_vmx.vmcs_lock); - vcpu_unpause(v); + fv = &this_cpu(foreign_vmcs); + BUG_ON(fv->v != v); + BUG_ON(fv->count == 0); + + if ( --fv->count == 0 ) + { + /* Don't confuse vmx_do_resume (for @v or @current!) */ + vmx_clear_vmcs(v); + if ( is_hvm_vcpu(current) ) + vmx_load_vmcs(current); + + spin_unlock(&v->arch.hvm_vmx.vmcs_lock); + vcpu_unpause(v); + + fv->v = NULL; + } } struct xgt_desc { @@ -380,7 +415,6 @@ static void vmx_set_host_env(struct vcpu static void construct_vmcs(struct vcpu *v) { - unsigned long cr0, cr4; union vmcs_arbytes arbytes; vmx_vmcs_enter(v); @@ -504,19 +538,11 @@ static void construct_vmcs(struct vcpu * __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault)); - /* Guest CR0. */ - cr0 = read_cr0(); - v->arch.hvm_vmx.cpu_cr0 = cr0; - __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0); - v->arch.hvm_vmx.cpu_shadow_cr0 = cr0 & ~(X86_CR0_PG | X86_CR0_TS); - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0); - - /* Guest CR4. */ - cr4 = read_cr4(); - __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE); - v->arch.hvm_vmx.cpu_shadow_cr4 = - cr4 & ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE); - __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4); + v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET; + hvm_update_guest_cr(v, 0); + + v->arch.hvm_vcpu.guest_cr[4] = 0; + hvm_update_guest_cr(v, 4); if ( cpu_has_vmx_tpr_shadow ) { diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Aug 16 10:47:33 2007 -0600 @@ -61,6 +61,8 @@ static int vmx_alloc_vlapic_mapping(str static int vmx_alloc_vlapic_mapping(struct domain *d); static void vmx_free_vlapic_mapping(struct domain *d); static void vmx_install_vlapic_mapping(struct vcpu *v); +static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr); +static void vmx_update_guest_efer(struct vcpu *v); static int vmx_domain_initialise(struct domain *d) { @@ -100,62 +102,7 @@ static void vmx_vcpu_destroy(struct vcpu vmx_destroy_vmcs(v); } -static int vmx_paging_enabled(struct vcpu *v) -{ - unsigned long cr0 = v->arch.hvm_vmx.cpu_shadow_cr0; - return (cr0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG); -} - -static int vmx_pgbit_test(struct vcpu *v) -{ - unsigned long cr0 = v->arch.hvm_vmx.cpu_shadow_cr0; - return cr0 & X86_CR0_PG; -} - -static int vmx_pae_enabled(struct vcpu *v) -{ - unsigned long cr4 = v->arch.hvm_vmx.cpu_shadow_cr4; - return vmx_paging_enabled(v) && (cr4 & X86_CR4_PAE); -} - -static int vmx_nx_enabled(struct vcpu *v) -{ - return v->arch.hvm_vmx.efer & EFER_NX; -} - #ifdef __x86_64__ - -static int vmx_lme_is_set(struct vcpu *v) -{ - return v->arch.hvm_vmx.efer & EFER_LME; -} - -static int vmx_long_mode_enabled(struct vcpu *v) -{ - return v->arch.hvm_vmx.efer & EFER_LMA; -} - -static void vmx_enable_long_mode(struct vcpu *v) -{ - unsigned long vm_entry_value; - - vm_entry_value = __vmread(VM_ENTRY_CONTROLS); - vm_entry_value |= VM_ENTRY_IA32E_MODE; - __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); - - v->arch.hvm_vmx.efer |= EFER_LMA; -} - -static void vmx_disable_long_mode(struct vcpu *v) -{ - unsigned long vm_entry_value; - - vm_entry_value = __vmread(VM_ENTRY_CONTROLS); - vm_entry_value &= ~VM_ENTRY_IA32E_MODE; - __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); - - v->arch.hvm_vmx.efer &= ~EFER_LMA; -} static DEFINE_PER_CPU(struct vmx_msr_state, host_msr_state); @@ -190,7 +137,7 @@ static enum handler_return long_mode_do_ switch ( ecx ) { case MSR_EFER: - msr_content = v->arch.hvm_vmx.efer; + msr_content = v->arch.hvm_vcpu.guest_efer; break; case MSR_FS_BASE: @@ -204,7 +151,7 @@ static enum handler_return long_mode_do_ case MSR_SHADOW_GS_BASE: msr_content = v->arch.hvm_vmx.shadow_gs; check_long_mode: - if ( !(vmx_long_mode_enabled(v)) ) + if ( !(hvm_long_mode_enabled(v)) ) { vmx_inject_hw_exception(v, TRAP_gp_fault, 0); return HNDL_exception_raised; @@ -263,9 +210,9 @@ static enum handler_return long_mode_do_ } if ( (msr_content & EFER_LME) - && !(v->arch.hvm_vmx.efer & EFER_LME) ) + && !(v->arch.hvm_vcpu.guest_efer & EFER_LME) ) { - if ( unlikely(vmx_paging_enabled(v)) ) + if ( unlikely(hvm_paging_enabled(v)) ) { gdprintk(XENLOG_WARNING, "Trying to set EFER.LME with paging enabled\n"); @@ -273,9 +220,9 @@ static enum handler_return long_mode_do_ } } else if ( !(msr_content & EFER_LME) - && (v->arch.hvm_vmx.efer & EFER_LME) ) + && (v->arch.hvm_vcpu.guest_efer & EFER_LME) ) { - if ( unlikely(vmx_paging_enabled(v)) ) + if ( unlikely(hvm_paging_enabled(v)) ) { gdprintk(XENLOG_WARNING, "Trying to clear EFER.LME with paging enabled\n"); @@ -283,17 +230,17 @@ static enum handler_return long_mode_do_ } } - if ( (msr_content ^ v->arch.hvm_vmx.efer) & (EFER_NX|EFER_SCE) ) + if ( (msr_content ^ v->arch.hvm_vcpu.guest_efer) & (EFER_NX|EFER_SCE) ) write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) | (msr_content & (EFER_NX|EFER_SCE))); - v->arch.hvm_vmx.efer = msr_content; + v->arch.hvm_vcpu.guest_efer = msr_content; break; case MSR_FS_BASE: case MSR_GS_BASE: case MSR_SHADOW_GS_BASE: - if ( !vmx_long_mode_enabled(v) ) + if ( !hvm_long_mode_enabled(v) ) goto gp_fault; if ( !is_canonical_address(msr_content) ) @@ -394,26 +341,17 @@ static void vmx_restore_guest_msrs(struc clear_bit(i, &guest_flags); } - if ( (v->arch.hvm_vmx.efer ^ read_efer()) & (EFER_NX | EFER_SCE) ) + if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & (EFER_NX | EFER_SCE) ) { HVM_DBG_LOG(DBG_LEVEL_2, "restore guest's EFER with value %lx", - v->arch.hvm_vmx.efer); + v->arch.hvm_vcpu.guest_efer); write_efer((read_efer() & ~(EFER_NX | EFER_SCE)) | - (v->arch.hvm_vmx.efer & (EFER_NX | EFER_SCE))); + (v->arch.hvm_vcpu.guest_efer & (EFER_NX | EFER_SCE))); } } #else /* __i386__ */ - -static int vmx_lme_is_set(struct vcpu *v) -{ return 0; } -static int vmx_long_mode_enabled(struct vcpu *v) -{ return 0; } -static void vmx_enable_long_mode(struct vcpu *v) -{ BUG(); } -static void vmx_disable_long_mode(struct vcpu *v) -{ BUG(); } #define vmx_save_host_msrs() ((void)0) @@ -427,13 +365,13 @@ static void vmx_restore_host_msrs(void) static void vmx_restore_guest_msrs(struct vcpu *v) { - if ( (v->arch.hvm_vmx.efer ^ read_efer()) & EFER_NX ) + if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & EFER_NX ) { HVM_DBG_LOG(DBG_LEVEL_2, "restore guest's EFER with value %lx", - v->arch.hvm_vmx.efer); + v->arch.hvm_vcpu.guest_efer); write_efer((read_efer() & ~EFER_NX) | - (v->arch.hvm_vmx.efer & EFER_NX)); + (v->arch.hvm_vcpu.guest_efer & EFER_NX)); } } @@ -444,7 +382,7 @@ static enum handler_return long_mode_do_ switch ( regs->ecx ) { case MSR_EFER: - msr_content = v->arch.hvm_vmx.efer; + msr_content = v->arch.hvm_vcpu.guest_efer; break; default: @@ -475,10 +413,10 @@ static enum handler_return long_mode_do_ return HNDL_exception_raised; } - if ( (msr_content ^ v->arch.hvm_vmx.efer) & EFER_NX ) + if ( (msr_content ^ v->arch.hvm_vcpu.guest_efer) & EFER_NX ) write_efer((read_efer() & ~EFER_NX) | (msr_content & EFER_NX)); - v->arch.hvm_vmx.efer = msr_content; + v->arch.hvm_vcpu.guest_efer = msr_content; break; default: @@ -501,12 +439,12 @@ static int vmx_guest_x86_mode(struct vcp ASSERT(v == current); - if ( unlikely(!(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_PE)) ) + if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) ) return 0; if ( unlikely(__vmread(GUEST_RFLAGS) & X86_EFLAGS_VM) ) return 1; cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES); - if ( vmx_long_mode_enabled(v) && + if ( hvm_long_mode_enabled(v) && likely(cs_ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) ) return 8; return (likely(cs_ar_bytes & X86_SEG_AR_DEF_OP_SIZE) ? 4 : 2); @@ -551,12 +489,12 @@ void vmx_vmcs_save(struct vcpu *v, struc c->rsp = __vmread(GUEST_RSP); c->rflags = __vmread(GUEST_RFLAGS); - c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0; - c->cr2 = v->arch.hvm_vmx.cpu_cr2; - c->cr3 = v->arch.hvm_vmx.cpu_cr3; - c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4; - - c->msr_efer = v->arch.hvm_vmx.efer; + c->cr0 = v->arch.hvm_vcpu.guest_cr[0]; + c->cr2 = v->arch.hvm_vcpu.guest_cr[2]; + c->cr3 = v->arch.hvm_vcpu.guest_cr[3]; + c->cr4 = v->arch.hvm_vcpu.guest_cr[4]; + + c->msr_efer = v->arch.hvm_vcpu.guest_efer; #ifdef HVM_DEBUG_SUSPEND printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", @@ -635,51 +573,33 @@ int vmx_vmcs_restore(struct vcpu *v, str __vmwrite(GUEST_RSP, c->rsp); __vmwrite(GUEST_RFLAGS, c->rflags); - v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG | - X86_CR0_NE | X86_CR0_WP | X86_CR0_ET); - __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0); - v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0; - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0); - - v->arch.hvm_vmx.cpu_cr2 = c->cr2; - - v->arch.hvm_vmx.efer = c->msr_efer; + v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET; + v->arch.hvm_vcpu.guest_cr[2] = c->cr2; + v->arch.hvm_vcpu.guest_cr[3] = c->cr3; + v->arch.hvm_vcpu.guest_cr[4] = c->cr4; + vmx_update_guest_cr(v, 0); + vmx_update_guest_cr(v, 2); + vmx_update_guest_cr(v, 4); #ifdef HVM_DEBUG_SUSPEND printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n", __func__, c->cr3, c->cr0, c->cr4); #endif - if ( !vmx_paging_enabled(v) ) - { - HVM_DBG_LOG(DBG_LEVEL_VMMU, "%s: paging not enabled.", __func__); - goto skip_cr3; - } - - HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 = %"PRIx64, c->cr3); - /* current!=vcpu as not called by arch_vmx_do_launch */ - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) - { - gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64".\n", c->cr3); - vmx_vmcs_exit(v); - return -EINVAL; - } - - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = pagetable_from_pfn(mfn); - if ( old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); - - skip_cr3: - v->arch.hvm_vmx.cpu_cr3 = c->cr3; - - if ( vmx_long_mode_enabled(v) ) - vmx_enable_long_mode(v); - - __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK)); - v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4; - __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4); + if ( hvm_paging_enabled(v) ) + { + HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 = %"PRIx64, c->cr3); + mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); + if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) + goto bad_cr3; + old_base_mfn = pagetable_get_pfn(v->arch.guest_table); + v->arch.guest_table = pagetable_from_pfn(mfn); + if ( old_base_mfn ) + put_page(mfn_to_page(old_base_mfn)); + } + + v->arch.hvm_vcpu.guest_efer = c->msr_efer; + vmx_update_guest_efer(v); __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit); __vmwrite(GUEST_IDTR_BASE, c->idtr_base); @@ -760,6 +680,11 @@ int vmx_vmcs_restore(struct vcpu *v, str } return 0; + + bad_cr3: + gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3); + vmx_vmcs_exit(v); + return -EINVAL; } #if defined(__x86_64__) && defined(HVM_DEBUG_SUSPEND) @@ -884,10 +809,10 @@ static void vmx_store_cpu_guest_regs( if ( crs != NULL ) { - crs[0] = v->arch.hvm_vmx.cpu_shadow_cr0; - crs[2] = v->arch.hvm_vmx.cpu_cr2; - crs[3] = v->arch.hvm_vmx.cpu_cr3; - crs[4] = v->arch.hvm_vmx.cpu_shadow_cr4; + crs[0] = v->arch.hvm_vcpu.guest_cr[0]; + crs[2] = v->arch.hvm_vcpu.guest_cr[2]; + crs[3] = v->arch.hvm_vcpu.guest_cr[3]; + crs[4] = v->arch.hvm_vcpu.guest_cr[4]; } vmx_vmcs_exit(v); @@ -928,24 +853,6 @@ static void vmx_load_cpu_guest_regs(stru vmx_vmcs_exit(v); } -static unsigned long vmx_get_ctrl_reg(struct vcpu *v, unsigned int num) -{ - switch ( num ) - { - case 0: - return v->arch.hvm_vmx.cpu_cr0; - case 2: - return v->arch.hvm_vmx.cpu_cr2; - case 3: - return v->arch.hvm_vmx.cpu_cr3; - case 4: - return v->arch.hvm_vmx.cpu_shadow_cr4; - default: - BUG(); - } - return 0; /* dummy */ -} - static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg) { unsigned long base = 0; @@ -953,7 +860,7 @@ static unsigned long vmx_get_segment_bas ASSERT(v == current); - if ( vmx_long_mode_enabled(v) && + if ( hvm_long_mode_enabled(v) && (__vmread(GUEST_CS_AR_BYTES) & X86_SEG_AR_CS_LM_ACTIVE) ) long_mode = 1; @@ -1045,6 +952,9 @@ static void vmx_get_segment_register(str } reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00); + /* Unusable flag is folded into Present flag. */ + if ( attr & (1u<<16) ) + reg->attr.fields.p = 0; } /* Make sure that xen intercepts any FP accesses from current */ @@ -1059,10 +969,10 @@ static void vmx_stts(struct vcpu *v) * then this is not necessary: no FPU activity can occur until the guest * clears CR0.TS, and we will initialise the FPU when that happens. */ - if ( !(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_TS) ) - { - v->arch.hvm_vmx.cpu_cr0 |= X86_CR0_TS; - __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0); + if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) + { + v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_TS; + __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); __vm_set_bit(EXCEPTION_BITMAP, TRAP_no_device); } } @@ -1135,12 +1045,58 @@ static void vmx_update_host_cr3(struct v vmx_vmcs_exit(v); } -static void vmx_update_guest_cr3(struct vcpu *v) +static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) { ASSERT((v == current) || !vcpu_runnable(v)); + vmx_vmcs_enter(v); - __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3); + + switch ( cr ) + { + case 0: + v->arch.hvm_vcpu.hw_cr[0] = + v->arch.hvm_vcpu.guest_cr[0] | + X86_CR0_PE | X86_CR0_NE | X86_CR0_PG | X86_CR0_WP; + __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); + break; + case 2: + /* CR2 is updated in exit stub. */ + break; + case 3: + __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]); + break; + case 4: + v->arch.hvm_vcpu.hw_cr[4] = + v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK; + __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]); + __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]); + break; + default: + BUG(); + } + vmx_vmcs_exit(v); +} + +static void vmx_update_guest_efer(struct vcpu *v) +{ +#ifdef __x86_64__ + unsigned long vm_entry_value; + + ASSERT((v == current) || !vcpu_runnable(v)); + + vmx_vmcs_enter(v); + + vm_entry_value = __vmread(VM_ENTRY_CONTROLS); + if ( v->arch.hvm_vcpu.guest_efer & EFER_LMA ) + vm_entry_value |= VM_ENTRY_IA32E_MODE; + else + vm_entry_value &= ~VM_ENTRY_IA32E_MODE; + __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); + + vmx_vmcs_exit(v); +#endif } static void vmx_flush_guest_tlbs(void) @@ -1156,7 +1112,7 @@ static void vmx_inject_exception( struct vcpu *v = current; vmx_inject_hw_exception(v, trapnr, errcode); if ( trapnr == TRAP_page_fault ) - v->arch.hvm_vmx.cpu_cr2 = cr2; + v->arch.hvm_vcpu.guest_cr[2] = cr2; } static void vmx_update_vtpr(struct vcpu *v, unsigned long value) @@ -1200,17 +1156,13 @@ static struct hvm_function_table vmx_fun .load_cpu_guest_regs = vmx_load_cpu_guest_regs, .save_cpu_ctxt = vmx_save_vmcs_ctxt, .load_cpu_ctxt = vmx_load_vmcs_ctxt, - .paging_enabled = vmx_paging_enabled, - .long_mode_enabled = vmx_long_mode_enabled, - .pae_enabled = vmx_pae_enabled, - .nx_enabled = vmx_nx_enabled, .interrupts_enabled = vmx_interrupts_enabled, .guest_x86_mode = vmx_guest_x86_mode, - .get_guest_ctrl_reg = vmx_get_ctrl_reg, .get_segment_base = vmx_get_segment_base, .get_segment_register = vmx_get_segment_register, .update_host_cr3 = vmx_update_host_cr3, - .update_guest_cr3 = vmx_update_guest_cr3, + .update_guest_cr = vmx_update_guest_cr, + .update_guest_efer = vmx_update_guest_efer, .flush_guest_tlbs = vmx_flush_guest_tlbs, .update_vtpr = vmx_update_vtpr, .stts = vmx_stts, @@ -1315,10 +1267,10 @@ static void vmx_do_no_device_fault(void) __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device); /* Disable TS in guest CR0 unless the guest wants the exception too. */ - if ( !(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_TS) ) - { - v->arch.hvm_vmx.cpu_cr0 &= ~X86_CR0_TS; - __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0); + if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) ) + { + v->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS; + __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); } } @@ -1773,7 +1725,7 @@ static void vmx_do_str_pio(unsigned long sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1; ar_bytes = __vmread(GUEST_CS_AR_BYTES); - if ( vmx_long_mode_enabled(current) && + if ( hvm_long_mode_enabled(current) && (ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) ) long_mode = 1; addr = __vmread(GUEST_LINEAR_ADDRESS); @@ -1900,9 +1852,9 @@ static void vmx_world_save(struct vcpu * c->esp = __vmread(GUEST_RSP); c->eflags = __vmread(GUEST_RFLAGS) & ~X86_EFLAGS_RF; - c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0; - c->cr3 = v->arch.hvm_vmx.cpu_cr3; - c->cr4 = v->arch.hvm_vmx.cpu_shadow_cr4; + c->cr0 = v->arch.hvm_vcpu.guest_cr[0]; + c->cr3 = v->arch.hvm_vcpu.guest_cr[3]; + c->cr4 = v->arch.hvm_vcpu.guest_cr[4]; c->idtr_limit = __vmread(GUEST_IDTR_LIMIT); c->idtr_base = __vmread(GUEST_IDTR_BASE); @@ -1959,30 +1911,15 @@ static int vmx_world_restore(struct vcpu __vmwrite(GUEST_RSP, c->esp); __vmwrite(GUEST_RFLAGS, c->eflags); - v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0; - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0); - - if ( !vmx_paging_enabled(v) ) - goto skip_cr3; - - if ( c->cr3 == v->arch.hvm_vmx.cpu_cr3 ) - { - /* - * This is simple TLB flush, implying the guest has - * removed some translation or changed page attributes. - * We simply invalidate the shadow. - */ - mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT); - if ( mfn != pagetable_get_pfn(v->arch.guest_table) ) - goto bad_cr3; - } - else - { - /* - * If different, make a shadow. Check if the PDBR is valid - * first. - */ - HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %x", c->cr3); + v->arch.hvm_vcpu.guest_cr[0] = c->cr0; + v->arch.hvm_vcpu.guest_cr[3] = c->cr3; + v->arch.hvm_vcpu.guest_cr[4] = c->cr4; + vmx_update_guest_cr(v, 0); + vmx_update_guest_cr(v, 4); + + if ( hvm_paging_enabled(v) ) + { + HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 = %x", c->cr3); mfn = get_mfn_from_gpfn(c->cr3 >> PAGE_SHIFT); if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) goto bad_cr3; @@ -1990,18 +1927,7 @@ static int vmx_world_restore(struct vcpu v->arch.guest_table = pagetable_from_pfn(mfn); if ( old_base_mfn ) put_page(mfn_to_page(old_base_mfn)); - v->arch.hvm_vmx.cpu_cr3 = c->cr3; - } - - skip_cr3: - if ( !vmx_paging_enabled(v) ) - HVM_DBG_LOG(DBG_LEVEL_VMMU, "switching to vmxassist. use phys table"); - else - HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3); - - __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK)); - v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4; - __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4); + } __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit); __vmwrite(GUEST_IDTR_BASE, c->idtr_base); @@ -2149,108 +2075,17 @@ static int vmx_set_cr0(unsigned long val static int vmx_set_cr0(unsigned long value) { struct vcpu *v = current; - unsigned long mfn; unsigned long eip; - int paging_enabled; - unsigned long old_cr0; - unsigned long old_base_mfn; - - HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value); - - if ( (u32)value != value ) - { - HVM_DBG_LOG(DBG_LEVEL_1, - "Guest attempts to set upper 32 bits in CR0: %lx", - value); - vmx_inject_hw_exception(v, TRAP_gp_fault, 0); + int rc = hvm_set_cr0(value); + + if ( rc == 0 ) return 0; - } - - value &= ~HVM_CR0_GUEST_RESERVED_BITS; - - /* ET is reserved and should be always be 1. */ - value |= X86_CR0_ET; - - if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PG ) - { - vmx_inject_hw_exception(v, TRAP_gp_fault, 0); - return 0; - } /* TS cleared? Then initialise FPU now. */ if ( !(value & X86_CR0_TS) ) { setup_fpu(v); __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device); - } - - old_cr0 = v->arch.hvm_vmx.cpu_shadow_cr0; - paging_enabled = old_cr0 & X86_CR0_PG; - - v->arch.hvm_vmx.cpu_cr0 = (value | X86_CR0_PE | X86_CR0_PG - | X86_CR0_NE | X86_CR0_WP); - __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0); - - v->arch.hvm_vmx.cpu_shadow_cr0 = value; - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0); - - /* Trying to enable paging. */ - if ( (value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled ) - { - if ( vmx_lme_is_set(v) && !vmx_long_mode_enabled(v) ) - { - if ( !(v->arch.hvm_vmx.cpu_shadow_cr4 & X86_CR4_PAE) ) - { - HVM_DBG_LOG(DBG_LEVEL_1, "Guest enabled paging " - "with EFER.LME set but not CR4.PAE"); - vmx_inject_hw_exception(v, TRAP_gp_fault, 0); - return 0; - } - - HVM_DBG_LOG(DBG_LEVEL_1, "Enabling long mode"); - vmx_enable_long_mode(v); - } - - /* - * The guest CR3 must be pointing to the guest physical. - */ - mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) - { - gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", - v->arch.hvm_vmx.cpu_cr3, mfn); - domain_crash(v->domain); - return 0; - } - - /* - * Now arch.guest_table points to machine physical. - */ - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = pagetable_from_pfn(mfn); - if ( old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); - - HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", - v->arch.hvm_vmx.cpu_cr3, mfn); - - paging_update_paging_modes(v); - } - - /* Trying to disable paging. */ - if ( ((value & (X86_CR0_PE | X86_CR0_PG)) != (X86_CR0_PE | X86_CR0_PG)) && - paging_enabled ) - { - /* When CR0.PG is cleared, LMA is cleared immediately. */ - if ( vmx_long_mode_enabled(v) ) - vmx_disable_long_mode(v); - - if ( v->arch.hvm_vmx.cpu_cr3 ) - { - put_page(mfn_to_page(get_mfn_from_gpfn( - v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT))); - v->arch.guest_table = pagetable_null(); - } } /* @@ -2258,14 +2093,8 @@ static int vmx_set_cr0(unsigned long val * real-mode by performing a world switch to VMXAssist whenever * a partition disables the CR0.PE bit. */ - if ( (value & X86_CR0_PE) == 0 ) - { - if ( value & X86_CR0_PG ) - { - vmx_inject_hw_exception(v, TRAP_gp_fault, 0); - return 0; - } - + if ( !(value & X86_CR0_PE) ) + { if ( vmx_assist(v, VMX_ASSIST_INVOKE) ) { eip = __vmread(GUEST_RIP); @@ -2286,8 +2115,6 @@ static int vmx_set_cr0(unsigned long val return 0; /* do not update eip! */ } } - else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) - paging_update_paging_modes(v); return 1; } @@ -2316,12 +2143,9 @@ static int vmx_set_cr0(unsigned long val CASE_ ## T ## ET_REG(R15, r15) #endif -/* - * Write to control registers - */ static int mov_to_cr(int gp, int cr, struct cpu_user_regs *regs) { - unsigned long value, old_cr, old_base_mfn, mfn; + unsigned long value; struct vcpu *v = current; struct vlapic *vlapic = vcpu_vlapic(v); @@ -2353,108 +2177,10 @@ static int mov_to_cr(int gp, int cr, str return vmx_set_cr0(value); case 3: - /* - * If paging is not enabled yet, simply copy the value to CR3. - */ - if ( !vmx_paging_enabled(v) ) - { - v->arch.hvm_vmx.cpu_cr3 = value; - break; - } - - /* - * We make a new one if the shadow does not exist. - */ - if ( value == v->arch.hvm_vmx.cpu_cr3 ) { - /* - * This is simple TLB flush, implying the guest has - * removed some translation or changed page attributes. - * We simply invalidate the shadow. - */ - mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); - if ( mfn != pagetable_get_pfn(v->arch.guest_table) ) - goto bad_cr3; - paging_update_cr3(v); - } else { - /* - * If different, make a shadow. Check if the PDBR is valid - * first. - */ - HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); - mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) - goto bad_cr3; - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = pagetable_from_pfn(mfn); - if ( old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); - v->arch.hvm_vmx.cpu_cr3 = value; - update_cr3(v); - HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); - } - break; - - case 4: /* CR4 */ - old_cr = v->arch.hvm_vmx.cpu_shadow_cr4; - - if ( value & HVM_CR4_GUEST_RESERVED_BITS ) - { - HVM_DBG_LOG(DBG_LEVEL_1, - "Guest attempts to set reserved bit in CR4: %lx", - value); - vmx_inject_hw_exception(v, TRAP_gp_fault, 0); - return 0; - } - - if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) ) - { - if ( vmx_pgbit_test(v) ) - { -#if CONFIG_PAGING_LEVELS >= 3 - /* The guest is a 32-bit PAE guest. */ - unsigned long mfn, old_base_mfn; - mfn = get_mfn_from_gpfn(v->arch.hvm_vmx.cpu_cr3 >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || - !get_page(mfn_to_page(mfn), v->domain) ) - goto bad_cr3; - - /* - * Now arch.guest_table points to machine physical. - */ - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); - v->arch.guest_table = pagetable_from_pfn(mfn); - if ( old_base_mfn ) - put_page(mfn_to_page(old_base_mfn)); - - HVM_DBG_LOG(DBG_LEVEL_VMMU, - "Update CR3 value = %lx, mfn = %lx", - v->arch.hvm_vmx.cpu_cr3, mfn); -#endif - } - } - else if ( !(value & X86_CR4_PAE) ) - { - if ( unlikely(vmx_long_mode_enabled(v)) ) - { - HVM_DBG_LOG(DBG_LEVEL_1, "Guest cleared CR4.PAE while " - "EFER.LMA is set"); - vmx_inject_hw_exception(v, TRAP_gp_fault, 0); - return 0; - } - } - - __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK); - v->arch.hvm_vmx.cpu_shadow_cr4 = value; - __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4); - - /* - * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates - * all TLB entries except global entries. - */ - if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) - paging_update_paging_modes(v); - - break; + return hvm_set_cr3(value); + + case 4: + return hvm_set_cr4(value); case 8: vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4)); @@ -2462,14 +2188,11 @@ static int mov_to_cr(int gp, int cr, str default: gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr); - domain_crash(v->domain); - return 0; + goto exit_and_crash; } return 1; - bad_cr3: - gdprintk(XENLOG_ERR, "Invalid CR3\n"); exit_and_crash: domain_crash(v->domain); return 0; @@ -2487,7 +2210,7 @@ static void mov_from_cr(int cr, int gp, switch ( cr ) { case 3: - value = (unsigned long)v->arch.hvm_vmx.cpu_cr3; + value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3]; break; case 8: value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI); @@ -2530,7 +2253,8 @@ static int vmx_cr_access(unsigned long e unsigned long value; struct vcpu *v = current; - switch ( exit_qualification & CONTROL_REG_ACCESS_TYPE ) { + switch ( exit_qualification & CONTROL_REG_ACCESS_TYPE ) + { case TYPE_MOV_TO_CR: gp = exit_qualification & CONTROL_REG_ACCESS_REG; cr = exit_qualification & CONTROL_REG_ACCESS_NUM; @@ -2545,14 +2269,14 @@ static int vmx_cr_access(unsigned long e setup_fpu(v); __vm_clear_bit(EXCEPTION_BITMAP, TRAP_no_device); - v->arch.hvm_vmx.cpu_cr0 &= ~X86_CR0_TS; /* clear TS */ - __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0); - - v->arch.hvm_vmx.cpu_shadow_cr0 &= ~X86_CR0_TS; /* clear TS */ - __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0); + v->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS; /* clear TS */ + __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]); + + v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */ + __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]); break; case TYPE_LMSW: - value = v->arch.hvm_vmx.cpu_shadow_cr0; + value = v->arch.hvm_vcpu.guest_cr[0]; value = (value & ~0xF) | (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF); return vmx_set_cr0(value); @@ -2943,7 +2667,7 @@ asmlinkage void vmx_vmexit_handler(struc break; } - v->arch.hvm_vmx.cpu_cr2 = exit_qualification; + v->arch.hvm_vcpu.guest_cr[2] = exit_qualification; vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code); break; case TRAP_nmi: diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/hvm/vmx/x86_32/exits.S --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/hvm/vmx/x86_32/exits.S Thu Aug 16 10:47:33 2007 -0600 @@ -74,7 +74,7 @@ ENTRY(vmx_asm_do_vmentry) jnz vmx_process_softirqs call vmx_intr_assist - movl VCPU_vmx_cr2(%ebx),%eax + movl VCPU_hvm_guest_cr2(%ebx),%eax movl %eax,%cr2 call vmx_trace_vmentry diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/hvm/vmx/x86_64/exits.S --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/hvm/vmx/x86_64/exits.S Thu Aug 16 10:47:33 2007 -0600 @@ -88,7 +88,7 @@ ENTRY(vmx_asm_do_vmentry) jnz vmx_process_softirqs call vmx_intr_assist - movq VCPU_vmx_cr2(%rbx),%rax + movq VCPU_hvm_guest_cr2(%rbx),%rax movq %rax,%cr2 call vmx_trace_vmentry diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/mm.c Thu Aug 16 10:47:33 2007 -0600 @@ -394,8 +394,8 @@ void write_ptbase(struct vcpu *v) write_cr3(v->arch.cr3); } -/* Should be called after CR3 is updated. - * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. +/* + * Should be called after CR3 is updated. * * Uses values found in vcpu->arch.(guest_table and guest_table_user), and * for HVM guests, arch.monitor_table and hvm's guest CR3. diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/mm/hap/guest_walk.c --- a/xen/arch/x86/mm/hap/guest_walk.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/mm/hap/guest_walk.c Thu Aug 16 10:47:33 2007 -0600 @@ -62,7 +62,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)( struct vcpu *v, unsigned long gva) { - unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3); + unsigned long gcr3 = v->arch.hvm_vcpu.guest_cr[3]; int mode = GUEST_PAGING_LEVELS; int lev, index; paddr_t gpa = 0; diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/mm/hap/hap.c Thu Aug 16 10:47:33 2007 -0600 @@ -603,47 +603,36 @@ static int hap_invlpg(struct vcpu *v, un return 0; } -/* - * HAP guests do not need to take any action on CR3 writes (they are still - * intercepted, so that Xen's copy of the guest's CR3 can be kept in sync.) - */ static void hap_update_cr3(struct vcpu *v, int do_locking) { + v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3]; + hvm_update_guest_cr(v, 3); } static void hap_update_paging_modes(struct vcpu *v) { - struct domain *d; - - d = v->domain; + struct domain *d = v->domain; + hap_lock(d); - /* update guest paging mode. Note that we rely on hvm functions to detect - * guest's paging mode. So, make sure the shadow registers (CR0, CR4, EFER) - * reflect guest's status correctly. - */ - if ( hvm_paging_enabled(v) ) - { - if ( hvm_long_mode_enabled(v) ) - v->arch.paging.mode = &hap_paging_long_mode; - else if ( hvm_pae_enabled(v) ) - v->arch.paging.mode = &hap_paging_pae_mode; - else - v->arch.paging.mode = &hap_paging_protected_mode; - } - else - { - v->arch.paging.mode = &hap_paging_real_mode; - } - - v->arch.paging.translate_enabled = !!hvm_paging_enabled(v); + v->arch.paging.mode = + !hvm_paging_enabled(v) ? &hap_paging_real_mode : + hvm_long_mode_enabled(v) ? &hap_paging_long_mode : + hvm_pae_enabled(v) ? &hap_paging_pae_mode : + &hap_paging_protected_mode; + + v->arch.paging.translate_enabled = hvm_paging_enabled(v); if ( pagetable_is_null(v->arch.monitor_table) ) { mfn_t mmfn = hap_make_monitor_table(v); v->arch.monitor_table = pagetable_from_mfn(mmfn); make_cr3(v, mfn_x(mmfn)); - } + hvm_update_host_cr3(v); + } + + /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */ + hap_update_cr3(v, 0); hap_unlock(d); } diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/mm/shadow/common.c Thu Aug 16 10:47:33 2007 -0600 @@ -2266,7 +2266,7 @@ static void sh_update_paging_modes(struc ASSERT(shadow_mode_translate(d)); ASSERT(shadow_mode_external(d)); - v->arch.paging.translate_enabled = !!hvm_paging_enabled(v); + v->arch.paging.translate_enabled = hvm_paging_enabled(v); if ( !v->arch.paging.translate_enabled ) { /* Set v->arch.guest_table to use the p2m map, and choose @@ -2347,7 +2347,7 @@ static void sh_update_paging_modes(struc SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u " "(was g=%u s=%u)\n", d->domain_id, v->vcpu_id, - is_hvm_domain(d) ? !!hvm_paging_enabled(v) : 1, + is_hvm_domain(d) ? hvm_paging_enabled(v) : 1, v->arch.paging.mode->guest_levels, v->arch.paging.mode->shadow.shadow_levels, old_mode ? old_mode->guest_levels : 0, diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Aug 16 10:47:33 2007 -0600 @@ -175,7 +175,7 @@ guest_supports_superpages(struct vcpu *v /* The _PAGE_PSE bit must be honoured in HVM guests, whenever * CR4.PSE is set or the guest is in PAE or long mode */ return (is_hvm_vcpu(v) && (GUEST_PAGING_LEVELS != 2 - || (hvm_get_guest_ctrl_reg(v, 4) & X86_CR4_PSE))); + || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE))); } static inline int @@ -3483,7 +3483,7 @@ sh_update_cr3(struct vcpu *v, int do_loc * Paravirtual guests should set v->arch.guest_table (and guest_table_user, * if appropriate). * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works; - * this function will call hvm_update_guest_cr3() to tell them where the + * this function will call hvm_update_guest_cr(v, 3) to tell them where the * shadow tables are. * If do_locking != 0, assume we are being called from outside the * shadow code, and must take and release the shadow lock; otherwise @@ -3525,7 +3525,7 @@ sh_update_cr3(struct vcpu *v, int do_loc // Is paging enabled on this vcpu? if ( paging_vcpu_mode_translate(v) ) { - gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3))); + gfn = _gfn(paddr_to_pfn(v->arch.hvm_vcpu.guest_cr[3])); gmfn = vcpu_gfn_to_mfn(v, gfn); ASSERT(mfn_valid(gmfn)); ASSERT(pagetable_get_pfn(v->arch.guest_table) == mfn_x(gmfn)); @@ -3576,11 +3576,11 @@ sh_update_cr3(struct vcpu *v, int do_loc if ( shadow_mode_external(d) && paging_vcpu_mode_translate(v) ) /* Paging enabled: find where in the page the l3 table is */ - guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3)); - else - /* Paging disabled or PV: l3 is at the start of a page */ - guest_idx = 0; - + guest_idx = guest_index((void *)v->arch.hvm_vcpu.guest_cr[3]); + else + /* Paging disabled or PV: l3 is at the start of a page */ + guest_idx = 0; + // Ignore the low 2 bits of guest_idx -- they are really just // cache control. guest_idx &= ~3; @@ -3718,18 +3718,21 @@ sh_update_cr3(struct vcpu *v, int do_loc /// - /// v->arch.hvm_vcpu.hw_cr3 + /// v->arch.hvm_vcpu.hw_cr[3] /// if ( shadow_mode_external(d) ) { ASSERT(is_hvm_domain(d)); #if SHADOW_PAGING_LEVELS == 3 /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */ - hvm_update_guest_cr3(v, virt_to_maddr(&v->arch.paging.shadow.l3table)); + v->arch.hvm_vcpu.hw_cr[3] = + virt_to_maddr(&v->arch.paging.shadow.l3table); #else /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */ - hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.shadow_table[0])); -#endif + v->arch.hvm_vcpu.hw_cr[3] = + pagetable_get_paddr(v->arch.shadow_table[0]); +#endif + hvm_update_guest_cr(v, 3); } /* Fix up the linear pagetable mappings */ diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/physdev.c --- a/xen/arch/x86/physdev.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/physdev.c Thu Aug 16 10:47:33 2007 -0600 @@ -28,6 +28,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H { int irq; ret_t ret; + struct vcpu *v = current; switch ( cmd ) { @@ -36,13 +37,13 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H ret = -EFAULT; if ( copy_from_guest(&eoi, arg, 1) != 0 ) break; - ret = pirq_guest_eoi(current->domain, eoi.irq); + ret = pirq_guest_eoi(v->domain, eoi.irq); break; } /* Legacy since 0x00030202. */ case PHYSDEVOP_IRQ_UNMASK_NOTIFY: { - ret = pirq_guest_unmask(current->domain); + ret = pirq_guest_unmask(v->domain); break; } @@ -70,7 +71,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( copy_from_guest(&apic, arg, 1) != 0 ) break; ret = -EPERM; - if ( !IS_PRIV(current->domain) ) + if ( !IS_PRIV(v->domain) ) break; ret = ioapic_guest_read(apic.apic_physbase, apic.reg, &apic.value); if ( copy_to_guest(arg, &apic, 1) != 0 ) @@ -84,7 +85,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( copy_from_guest(&apic, arg, 1) != 0 ) break; ret = -EPERM; - if ( !IS_PRIV(current->domain) ) + if ( !IS_PRIV(v->domain) ) break; ret = ioapic_guest_write(apic.apic_physbase, apic.reg, apic.value); break; @@ -98,7 +99,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H break; ret = -EPERM; - if ( !IS_PRIV(current->domain) ) + if ( !IS_PRIV(v->domain) ) break; irq = irq_op.irq; @@ -120,7 +121,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( set_iopl.iopl > 3 ) break; ret = 0; - current->arch.iopl = set_iopl.iopl; + v->arch.iopl = set_iopl.iopl; break; } @@ -135,11 +136,11 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H break; ret = 0; #ifndef COMPAT - current->arch.iobmp = set_iobitmap.bitmap; + v->arch.iobmp = set_iobitmap.bitmap; #else - guest_from_compat_handle(current->arch.iobmp, set_iobitmap.bitmap); + guest_from_compat_handle(v->arch.iobmp, set_iobitmap.bitmap); #endif - current->arch.iobmp_limit = set_iobitmap.nr_ports; + v->arch.iobmp_limit = set_iobitmap.nr_ports; break; } diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/setup.c Thu Aug 16 10:47:33 2007 -0600 @@ -106,6 +106,8 @@ extern void trap_init(void); extern void trap_init(void); extern void early_time_init(void); extern void early_cpu_init(void); +extern void vesa_init(void); +extern void vesa_mtrr_init(void); struct tss_struct init_tss[NR_CPUS]; @@ -282,9 +284,28 @@ static void __init srat_detect_node(int printk(KERN_INFO "CPU %d APIC %d -> Node %d\n", cpu, apicid, node); } +/* + * Ensure a given physical memory range is present in the bootstrap mappings. + * Use superpage mappings to ensure that pagetable memory needn't be allocated. + */ +static void __init bootstrap_map(unsigned long start, unsigned long end) +{ + unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1; + start = start & ~mask; + end = (end + mask) & ~mask; + if ( end > BOOTSTRAP_DIRECTMAP_END ) + panic("Cannot access memory beyond end of " + "bootstrap direct-map area\n"); + map_pages_to_xen( + (unsigned long)maddr_to_bootstrap_virt(start), + start >> PAGE_SHIFT, (end-start) >> PAGE_SHIFT, PAGE_HYPERVISOR); +} + static void __init move_memory( unsigned long dst, unsigned long src_start, unsigned long src_end) { + bootstrap_map(src_start, src_end); + bootstrap_map(dst, dst + src_end - src_start); memmove(maddr_to_bootstrap_virt(dst), maddr_to_bootstrap_virt(src_start), src_end - src_start); @@ -882,6 +903,7 @@ void __init __start_xen(unsigned long mb #ifdef __x86_64__ init_xenheap_pages(xen_phys_start, __pa(&_start)); nr_pages += (__pa(&_start) - xen_phys_start) >> PAGE_SHIFT; + vesa_init(); #endif xenheap_phys_start = xen_phys_start; printk("Xen heap: %luMB (%lukB)\n", @@ -947,6 +969,9 @@ void __init __start_xen(unsigned long mb set_in_cr4(X86_CR4_OSFXSR); if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT); +#ifdef CONFIG_X86_64 + vesa_mtrr_init(); +#endif if ( opt_nosmp ) max_cpus = 0; diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/string.c --- a/xen/arch/x86/string.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/string.c Thu Aug 16 10:47:33 2007 -0600 @@ -11,10 +11,18 @@ #undef memcpy void *memcpy(void *dest, const void *src, size_t n) { - int d0, d1, d2; + long d0, d1, d2; __asm__ __volatile__ ( - " rep ; movsl ; " +#ifdef __i386__ + " rep movsl ; " +#else + " rep movsq ; " + " testb $4,%b4 ; " + " je 0f ; " + " movsl ; " + "0: ; " +#endif " testb $2,%b4 ; " " je 1f ; " " movsw ; " @@ -23,7 +31,7 @@ void *memcpy(void *dest, const void *src " movsb ; " "2: " : "=&c" (d0), "=&D" (d1), "=&S" (d2) - : "0" (n/4), "q" (n), "1" (dest), "2" (src) + : "0" (n/sizeof(long)), "q" (n), "1" (dest), "2" (src) : "memory"); return dest; @@ -32,10 +40,10 @@ void *memcpy(void *dest, const void *src #undef memset void *memset(void *s, int c, size_t n) { - int d0, d1; + long d0, d1; __asm__ __volatile__ ( - "rep ; stosb" + "rep stosb" : "=&c" (d0), "=&D" (d1) : "a" (c), "1" (s), "0" (n) : "memory"); @@ -46,14 +54,14 @@ void *memset(void *s, int c, size_t n) #undef memmove void *memmove(void *dest, const void *src, size_t n) { - int d0, d1, d2; + long d0, d1, d2; if ( dest < src ) return memcpy(dest, src, n); __asm__ __volatile__ ( " std ; " - " rep ; movsb ; " + " rep movsb ; " " cld " : "=&c" (d0), "=&S" (d1), "=&D" (d2) : "0" (n), "1" (n-1+(const char *)src), "2" (n-1+(char *)dest) diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/traps.c Thu Aug 16 10:47:33 2007 -0600 @@ -1219,7 +1219,7 @@ static int emulate_privileged_op(struct unsigned long code_base, code_limit; char io_emul_stub[16]; void (*io_emul)(struct cpu_user_regs *) __attribute__((__regparm__(1))); - u32 l, h; + u32 l, h, eax, edx; if ( !read_descriptor(regs->cs, v, regs, &code_base, &code_limit, &ar, @@ -1696,43 +1696,43 @@ static int emulate_privileged_op(struct break; case 0x30: /* WRMSR */ + eax = regs->eax; + edx = regs->edx; + res = ((u64)edx << 32) | eax; switch ( regs->ecx ) { #ifdef CONFIG_X86_64 case MSR_FS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) ) + if ( wrmsr_safe(MSR_FS_BASE, eax, edx) ) goto fail; - v->arch.guest_context.fs_base = - ((u64)regs->edx << 32) | regs->eax; + v->arch.guest_context.fs_base = res; break; case MSR_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) ) + if ( wrmsr_safe(MSR_GS_BASE, eax, edx) ) goto fail; - v->arch.guest_context.gs_base_kernel = - ((u64)regs->edx << 32) | regs->eax; + v->arch.guest_context.gs_base_kernel = res; break; case MSR_SHADOW_GS_BASE: if ( is_pv_32on64_vcpu(v) ) goto fail; - if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) ) + if ( wrmsr_safe(MSR_SHADOW_GS_BASE, eax, edx) ) goto fail; - v->arch.guest_context.gs_base_user = - ((u64)regs->edx << 32) | regs->eax; + v->arch.guest_context.gs_base_user = res; break; #endif default: - if ( wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx) ) + if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) ) break; if ( (rdmsr_safe(regs->ecx, l, h) != 0) || - (regs->eax != l) || (regs->edx != h) ) + (eax != l) || (edx != h) ) gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from " - "%08x:%08x to %08lx:%08lx.\n", - _p(regs->ecx), h, l, (long)regs->edx, (long)regs->eax); + "%08x:%08x to %08x:%08x.\n", + _p(regs->ecx), h, l, edx, eax); break; } break; diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/x86_32/asm-offsets.c --- a/xen/arch/x86/x86_32/asm-offsets.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/x86_32/asm-offsets.c Thu Aug 16 10:47:33 2007 -0600 @@ -85,7 +85,7 @@ void __dummy__(void) BLANK(); OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched); - OFFSET(VCPU_vmx_cr2, struct vcpu, arch.hvm_vmx.cpu_cr2); + OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]); BLANK(); OFFSET(VMCB_rax, struct vmcb_struct, rax); diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/x86_32/traps.c --- a/xen/arch/x86/x86_32/traps.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/x86_32/traps.c Thu Aug 16 10:47:33 2007 -0600 @@ -172,6 +172,7 @@ unsigned long do_iret(void) unsigned long do_iret(void) { struct cpu_user_regs *regs = guest_cpu_user_regs(); + struct vcpu *v = current; u32 eflags; /* Check worst-case stack frame for overlap with Xen protected area. */ @@ -215,10 +216,10 @@ unsigned long do_iret(void) } /* No longer in NMI context. */ - current->nmi_masked = 0; + v->nmi_masked = 0; /* Restore upcall mask from supplied EFLAGS.IF. */ - current->vcpu_info->evtchn_upcall_mask = !(eflags & X86_EFLAGS_IF); + vcpu_info(v, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF); /* * The hypercall exit path will overwrite EAX with this return @@ -228,7 +229,7 @@ unsigned long do_iret(void) exit_and_crash: gdprintk(XENLOG_ERR, "Fatal error\n"); - domain_crash(current->domain); + domain_crash(v->domain); return 0; } diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/x86_64/asm-offsets.c --- a/xen/arch/x86/x86_64/asm-offsets.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/x86_64/asm-offsets.c Thu Aug 16 10:47:33 2007 -0600 @@ -88,7 +88,7 @@ void __dummy__(void) BLANK(); OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched); - OFFSET(VCPU_vmx_cr2, struct vcpu, arch.hvm_vmx.cpu_cr2); + OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]); BLANK(); OFFSET(DOMAIN_is_32bit_pv, struct domain, arch.is_32bit_pv); diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/x86_64/compat/traps.c --- a/xen/arch/x86/x86_64/compat/traps.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/x86_64/compat/traps.c Thu Aug 16 10:47:33 2007 -0600 @@ -37,6 +37,7 @@ unsigned int compat_iret(void) unsigned int compat_iret(void) { struct cpu_user_regs *regs = guest_cpu_user_regs(); + struct vcpu *v = current; u32 eflags; /* Trim stack pointer to 32 bits. */ @@ -70,7 +71,7 @@ unsigned int compat_iret(void) * mode frames). */ const struct trap_info *ti; - u32 x, ksp = current->arch.guest_context.kernel_sp - 40; + u32 x, ksp = v->arch.guest_context.kernel_sp - 40; unsigned int i; int rc = 0; @@ -95,9 +96,9 @@ unsigned int compat_iret(void) if ( rc ) goto exit_and_crash; regs->_esp = ksp; - regs->ss = current->arch.guest_context.kernel_ss; - - ti = ¤t->arch.guest_context.trap_ctxt[13]; + regs->ss = v->arch.guest_context.kernel_ss; + + ti = &v->arch.guest_context.trap_ctxt[13]; if ( TI_GET_IF(ti) ) eflags &= ~X86_EFLAGS_IF; regs->_eflags = eflags & ~(X86_EFLAGS_VM|X86_EFLAGS_RF| @@ -121,10 +122,10 @@ unsigned int compat_iret(void) regs->_esp += 16; /* No longer in NMI context. */ - current->nmi_masked = 0; + v->nmi_masked = 0; /* Restore upcall mask from supplied EFLAGS.IF. */ - vcpu_info(current, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF); + vcpu_info(v, evtchn_upcall_mask) = !(eflags & X86_EFLAGS_IF); /* * The hypercall exit path will overwrite EAX with this return @@ -134,11 +135,12 @@ unsigned int compat_iret(void) exit_and_crash: gdprintk(XENLOG_ERR, "Fatal error\n"); - domain_crash(current->domain); + domain_crash(v->domain); return 0; } -static long compat_register_guest_callback(struct compat_callback_register *reg) +static long compat_register_guest_callback( + struct compat_callback_register *reg) { long ret = 0; struct vcpu *v = current; @@ -175,7 +177,8 @@ static long compat_register_guest_callba return ret; } -static long compat_unregister_guest_callback(struct compat_callback_unregister *unreg) +static long compat_unregister_guest_callback( + struct compat_callback_unregister *unreg) { long ret; diff -r b5dbf184df6c -r 778985f246a0 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/arch/x86/x86_64/traps.c Thu Aug 16 10:47:33 2007 -0600 @@ -235,10 +235,10 @@ unsigned long do_iret(void) } /* No longer in NMI context. */ - current->nmi_masked = 0; + v->nmi_masked = 0; /* Restore upcall mask from supplied EFLAGS.IF. */ - vcpu_info(current, evtchn_upcall_mask) = !(iret_saved.rflags & EF_IE); + vcpu_info(v, evtchn_upcall_mask) = !(iret_saved.rflags & EF_IE); /* Saved %rax gets written back to regs->rax in entry.S. */ return iret_saved.rax; diff -r b5dbf184df6c -r 778985f246a0 xen/common/domctl.c --- a/xen/common/domctl.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/common/domctl.c Thu Aug 16 10:47:33 2007 -0600 @@ -463,19 +463,13 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc case XEN_DOMCTL_getdomaininfo: { struct domain *d; - domid_t dom; - - dom = op->domain; - if ( dom == DOMID_SELF ) - dom = current->domain->domain_id; + domid_t dom = op->domain; rcu_read_lock(&domlist_read_lock); for_each_domain ( d ) - { if ( d->domain_id >= dom ) break; - } if ( d == NULL ) { diff -r b5dbf184df6c -r 778985f246a0 xen/common/page_alloc.c --- a/xen/common/page_alloc.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/common/page_alloc.c Thu Aug 16 10:47:33 2007 -0600 @@ -54,21 +54,14 @@ boolean_param("bootscrub", opt_bootscrub /* * Bit width of the DMA heap. */ -static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE; -static unsigned long max_dma_mfn = (1UL<<(CONFIG_DMA_BITSIZE-PAGE_SHIFT))-1; +static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE; static void __init parse_dma_bits(char *s) { unsigned int v = simple_strtol(s, NULL, 0); if ( v >= (BITS_PER_LONG + PAGE_SHIFT) ) - { dma_bitsize = BITS_PER_LONG + PAGE_SHIFT; - max_dma_mfn = ~0UL; - } else if ( v > PAGE_SHIFT + 1 ) - { dma_bitsize = v; - max_dma_mfn = (1UL << (dma_bitsize - PAGE_SHIFT)) - 1; - } else printk("Invalid dma_bits value of %u ignored.\n", v); } diff -r b5dbf184df6c -r 778985f246a0 xen/common/xencomm.c --- a/xen/common/xencomm.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/common/xencomm.c Thu Aug 16 10:47:33 2007 -0600 @@ -26,35 +26,36 @@ #include <public/xen.h> #include <public/xencomm.h> - #undef DEBUG #ifdef DEBUG -static int xencomm_debug = 1; /* extremely verbose */ +#define xc_dprintk(f, a...) printk("[xencomm]" f , ## a) #else -#define xencomm_debug 0 +#define xc_dprintk(f, a...) ((void)0) #endif +static void* +xencomm_maddr_to_vaddr(unsigned long maddr) +{ + return maddr ? maddr_to_virt(maddr) : NULL; +} + static unsigned long -xencomm_inline_from_guest(void *to, const void *from, unsigned int n, - unsigned int skip) -{ - unsigned long src_paddr = xencomm_inline_addr(from); - - src_paddr += skip; - - while (n > 0) { - unsigned int chunksz; +xencomm_inline_from_guest( + void *to, const void *from, unsigned int n, unsigned int skip) +{ + unsigned long src_paddr = xencomm_inline_addr(from) + skip; + + while ( n > 0 ) + { + unsigned int chunksz, bytes; unsigned long src_maddr; - unsigned int bytes; chunksz = PAGE_SIZE - (src_paddr % PAGE_SIZE); - - bytes = min(chunksz, n); + bytes = min(chunksz, n); src_maddr = paddr_to_maddr(src_paddr); - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", src_maddr, bytes, (unsigned long)to); - memcpy(to, (void *)src_maddr, bytes); + xc_dprintk("%lx[%d] -> %lx\n", src_maddr, bytes, (unsigned long)to); + memcpy(to, maddr_to_virt(src_maddr), bytes); src_paddr += bytes; to += bytes; n -= bytes; @@ -77,36 +78,40 @@ xencomm_inline_from_guest(void *to, cons * On success, this will be zero. */ unsigned long -xencomm_copy_from_guest(void *to, const void *from, unsigned int n, - unsigned int skip) +xencomm_copy_from_guest( + void *to, const void *from, unsigned int n, unsigned int skip) { struct xencomm_desc *desc; unsigned int from_pos = 0; unsigned int to_pos = 0; unsigned int i = 0; - if (xencomm_is_inline(from)) + if ( xencomm_is_inline(from) ) return xencomm_inline_from_guest(to, from, n, skip); - /* first we need to access the descriptor */ - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)from); - if (desc == NULL) - return n; - - if (desc->magic != XENCOMM_MAGIC) { + /* First we need to access the descriptor. */ + desc = (struct xencomm_desc *) + xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)from)); + if ( desc == NULL ) + return n; + + if ( desc->magic != XENCOMM_MAGIC ) + { printk("%s: error: %p magic was 0x%x\n", __func__, desc, desc->magic); return n; } - /* iterate through the descriptor, copying up to a page at a time */ - while ((to_pos < n) && (i < desc->nr_addrs)) { + /* Iterate through the descriptor, copying up to a page at a time. */ + while ( (to_pos < n) && (i < desc->nr_addrs) ) + { unsigned long src_paddr = desc->address[i]; unsigned int pgoffset; unsigned int chunksz; unsigned int chunk_skip; - if (src_paddr == XENCOMM_INVALID) { + if ( src_paddr == XENCOMM_INVALID ) + { i++; continue; } @@ -119,18 +124,18 @@ xencomm_copy_from_guest(void *to, const chunksz -= chunk_skip; skip -= chunk_skip; - if (skip == 0 && chunksz > 0) { + if ( (skip == 0) && (chunksz > 0) ) + { unsigned long src_maddr; unsigned long dest = (unsigned long)to + to_pos; unsigned int bytes = min(chunksz, n - to_pos); src_maddr = paddr_to_maddr(src_paddr + chunk_skip); - if (src_maddr == 0) + if ( src_maddr == 0 ) return n - to_pos; - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", src_maddr, bytes, dest); - memcpy((void *)dest, (void *)src_maddr, bytes); + xc_dprintk("%lx[%d] -> %lx\n", src_maddr, bytes, dest); + memcpy((void *)dest, maddr_to_virt(src_maddr), bytes); from_pos += bytes; to_pos += bytes; } @@ -142,32 +147,28 @@ xencomm_copy_from_guest(void *to, const } static unsigned long -xencomm_inline_to_guest(void *to, const void *from, unsigned int n, - unsigned int skip) -{ - unsigned long dest_paddr = xencomm_inline_addr(to); - - dest_paddr += skip; - - while (n > 0) { - unsigned int chunksz; +xencomm_inline_to_guest( + void *to, const void *from, unsigned int n, unsigned int skip) +{ + unsigned long dest_paddr = xencomm_inline_addr(to) + skip; + + while ( n > 0 ) + { + unsigned int chunksz, bytes; unsigned long dest_maddr; - unsigned int bytes; chunksz = PAGE_SIZE - (dest_paddr % PAGE_SIZE); - - bytes = min(chunksz, n); + bytes = min(chunksz, n); dest_maddr = paddr_to_maddr(dest_paddr); - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", (unsigned long)from, bytes, dest_maddr); - memcpy((void *)dest_maddr, (void *)from, bytes); + xc_dprintk("%lx[%d] -> %lx\n", (unsigned long)from, bytes, dest_maddr); + memcpy(maddr_to_virt(dest_maddr), (void *)from, bytes); dest_paddr += bytes; from += bytes; n -= bytes; } - /* Always successful. */ + /* Always successful. */ return 0; } @@ -184,35 +185,37 @@ xencomm_inline_to_guest(void *to, const * On success, this will be zero. */ unsigned long -xencomm_copy_to_guest(void *to, const void *from, unsigned int n, - unsigned int skip) +xencomm_copy_to_guest( + void *to, const void *from, unsigned int n, unsigned int skip) { struct xencomm_desc *desc; unsigned int from_pos = 0; unsigned int to_pos = 0; unsigned int i = 0; - if (xencomm_is_inline(to)) + if ( xencomm_is_inline(to) ) return xencomm_inline_to_guest(to, from, n, skip); - /* first we need to access the descriptor */ - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)to); - if (desc == NULL) - return n; - - if (desc->magic != XENCOMM_MAGIC) { + /* First we need to access the descriptor. */ + desc = (struct xencomm_desc *) + xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)to)); + if ( desc == NULL ) + return n; + + if ( desc->magic != XENCOMM_MAGIC ) + { printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); return n; } - /* iterate through the descriptor, copying up to a page at a time */ - while ((from_pos < n) && (i < desc->nr_addrs)) { + /* Iterate through the descriptor, copying up to a page at a time. */ + while ( (from_pos < n) && (i < desc->nr_addrs) ) + { unsigned long dest_paddr = desc->address[i]; - unsigned int pgoffset; - unsigned int chunksz; - unsigned int chunk_skip; - - if (dest_paddr == XENCOMM_INVALID) { + unsigned int pgoffset, chunksz, chunk_skip; + + if ( dest_paddr == XENCOMM_INVALID ) + { i++; continue; } @@ -225,18 +228,18 @@ xencomm_copy_to_guest(void *to, const vo chunksz -= chunk_skip; skip -= chunk_skip; - if (skip == 0 && chunksz > 0) { + if ( (skip == 0) && (chunksz > 0) ) + { unsigned long dest_maddr; unsigned long source = (unsigned long)from + from_pos; unsigned int bytes = min(chunksz, n - from_pos); dest_maddr = paddr_to_maddr(dest_paddr + chunk_skip); - if (dest_maddr == 0) - return -1; - - if (xencomm_debug) - printk("%lx[%d] -> %lx\n", source, bytes, dest_maddr); - memcpy((void *)dest_maddr, (void *)source, bytes); + if ( dest_maddr == 0 ) + return n - from_pos; + + xc_dprintk("%lx[%d] -> %lx\n", source, bytes, dest_maddr); + memcpy(maddr_to_virt(dest_maddr), (void *)source, bytes); from_pos += bytes; to_pos += bytes; } @@ -260,38 +263,46 @@ int xencomm_add_offset(void **handle, un struct xencomm_desc *desc; int i = 0; - if (xencomm_is_inline(*handle)) + if ( xencomm_is_inline(*handle) ) return xencomm_inline_add_offset(handle, bytes); - /* first we need to access the descriptor */ - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)*handle); - if (desc == NULL) + /* First we need to access the descriptor. */ + desc = (struct xencomm_desc *) + xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)*handle)); + if ( desc == NULL ) return -1; - if (desc->magic != XENCOMM_MAGIC) { + if ( desc->magic != XENCOMM_MAGIC ) + { printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); return -1; } - /* iterate through the descriptor incrementing addresses */ - while ((bytes > 0) && (i < desc->nr_addrs)) { + /* Iterate through the descriptor incrementing addresses. */ + while ( (bytes > 0) && (i < desc->nr_addrs) ) + { unsigned long dest_paddr = desc->address[i]; - unsigned int pgoffset; - unsigned int chunksz; - unsigned int chunk_skip; + unsigned int pgoffset, chunksz, chunk_skip; + + if ( dest_paddr == XENCOMM_INVALID ) + { + i++; + continue; + } pgoffset = dest_paddr % PAGE_SIZE; chunksz = PAGE_SIZE - pgoffset; chunk_skip = min(chunksz, bytes); - if (chunk_skip == chunksz) { - /* exhausted this page */ - desc->address[i] = XENCOMM_INVALID; - } else { + if ( chunk_skip == chunksz ) + desc->address[i] = XENCOMM_INVALID; /* exchausted this page */ + else desc->address[i] += chunk_skip; - } bytes -= chunk_skip; - } + + i++; + } + return 0; } @@ -300,17 +311,17 @@ int xencomm_handle_is_null(void *handle) struct xencomm_desc *desc; int i; - if (xencomm_is_inline(handle)) + if ( xencomm_is_inline(handle) ) return xencomm_inline_addr(handle) == 0; - desc = (struct xencomm_desc *)paddr_to_maddr((unsigned long)handle); - if (desc == NULL) + desc = (struct xencomm_desc *) + xencomm_maddr_to_vaddr(paddr_to_maddr((unsigned long)handle)); + if ( desc == NULL ) return 1; - for (i = 0; i < desc->nr_addrs; i++) - if (desc->address[i] != XENCOMM_INVALID) + for ( i = 0; i < desc->nr_addrs; i++ ) + if ( desc->address[i] != XENCOMM_INVALID ) return 0; return 1; } - diff -r b5dbf184df6c -r 778985f246a0 xen/drivers/char/console.c --- a/xen/drivers/char/console.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/drivers/char/console.c Thu Aug 16 10:47:33 2007 -0600 @@ -331,13 +331,11 @@ static long guest_console_write(XEN_GUES kbuf[kcount] = '\0'; sercon_puts(kbuf); - - for ( kptr = kbuf; *kptr != '\0'; kptr++ ) - { - vga_putchar(*kptr); - if ( opt_console_to_ring ) + vga_puts(kbuf); + + if ( opt_console_to_ring ) + for ( kptr = kbuf; *kptr != '\0'; kptr++ ) putchar_console_ring(*kptr); - } if ( opt_console_to_ring ) send_guest_global_virq(dom0, VIRQ_CON_RING); @@ -404,12 +402,10 @@ static void __putstr(const char *str) int c; sercon_puts(str); + vga_puts(str); while ( (c = *str++) != '\0' ) - { - vga_putchar(c); putchar_console_ring(c); - } send_guest_global_virq(dom0, VIRQ_CON_RING); } diff -r b5dbf184df6c -r 778985f246a0 xen/drivers/video/Makefile --- a/xen/drivers/video/Makefile Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/drivers/video/Makefile Thu Aug 16 10:47:33 2007 -0600 @@ -1,4 +1,8 @@ obj-y += font_8x14.o -obj-y += font_8x14.o -obj-y += font_8x16.o -obj-y += font_8x8.o -obj-y += vga.o +obj-y := vga.o +obj-$(CONFIG_X86_64) += font_8x14.o +obj-$(CONFIG_X86_64) += font_8x16.o +obj-$(CONFIG_X86_64) += font_8x8.o +obj-$(CONFIG_X86_64) += vesa.o + +# extra dependencies +vesa.o: font.h diff -r b5dbf184df6c -r 778985f246a0 xen/drivers/video/vesa.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/video/vesa.c Thu Aug 16 10:47:33 2007 -0600 @@ -0,0 +1,307 @@ +/****************************************************************************** + * vesa.c + * + * VESA linear frame buffer handling. + */ + +#include <xen/config.h> +#include <xen/compile.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/mm.h> +#include <xen/errno.h> +#include <xen/console.h> +#include <xen/vga.h> +#include "font.h" + +#define vlfb_info vga_console_info.u.vesa_lfb +#define text_columns (vlfb_info.width / font->width) +#define text_rows (vlfb_info.height / font->height) + +static void vesa_redraw_puts(const char *s); +static void vesa_scroll_puts(const char *s); + +static unsigned char *lfb, *lbuf, *text_buf; +static const struct font_desc *font; +static bool_t vga_compat; +static unsigned int pixel_on; +static unsigned int xpos, ypos; + +static unsigned int vram_total; +integer_param("vesa-ram", vram_total); + +static unsigned int vram_remap; +integer_param("vesa-map", vram_remap); + +static int font_height; +static void __init parse_font_height(const char *s) +{ + if ( simple_strtoul(s, &s, 10) == 8 && (*s++ == 'x') ) + font_height = simple_strtoul(s, &s, 10); + if ( *s != '\0' ) + font_height = 0; +} +custom_param("font", parse_font_height); + +void __init vesa_early_init(void) +{ + unsigned int vram_vmode; + + /* XXX vga_compat = !(boot_video_info.capabilities & 2); */ + + if ( (vlfb_info.bits_per_pixel < 8) || (vlfb_info.bits_per_pixel > 32) ) + return; + + if ( font_height == 0 ) /* choose a sensible default */ + font = ((vlfb_info.height <= 600) ? &font_vga_8x8 : + (vlfb_info.height <= 768) ? &font_vga_8x14 : &font_vga_8x16); + else if ( font_height <= 8 ) + font = &font_vga_8x8; + else if ( font_height <= 14 ) + font = &font_vga_8x14; + else + font = &font_vga_8x16; + + /* vram_vmode -- that is the amount of memory needed for the + * used video mode, i.e. the minimum amount of + * memory we need. */ + vram_vmode = vlfb_info.height * vlfb_info.bytes_per_line; + + /* vram_total -- all video memory we have. Used for mtrr + * entries. */ + vram_total = vram_total ? (vram_total << 20) : (vlfb_info.lfb_size << 16); + vram_total = max_t(unsigned int, vram_total, vram_vmode); + + /* vram_remap -- the amount of video memory we are going to + * use for vesafb. With modern cards it is no + * option to simply use vram_total as that + * wastes plenty of kernel address space. */ + vram_remap = (vram_remap ? + (vram_remap << 20) : + ((vram_vmode + (1 << L2_PAGETABLE_SHIFT) - 1) & + ~((1 << L2_PAGETABLE_SHIFT) - 1))); + vram_remap = max_t(unsigned int, vram_remap, vram_vmode); + vram_remap = min_t(unsigned int, vram_remap, vram_total); +} + +void __init vesa_init(void) +{ + if ( !font ) + goto fail; + + lbuf = xmalloc_bytes(vlfb_info.bytes_per_line); + if ( !lbuf ) + goto fail; + + text_buf = xmalloc_bytes(text_columns * text_rows); + if ( !text_buf ) + goto fail; + + if ( map_pages_to_xen(IOREMAP_VIRT_START, + vlfb_info.lfb_base >> PAGE_SHIFT, + vram_remap >> PAGE_SHIFT, + PAGE_HYPERVISOR_NOCACHE) ) + goto fail; + + lfb = memset((void *)IOREMAP_VIRT_START, 0, vram_remap); + memset(text_buf, 0, text_columns * text_rows); + + vga_puts = vesa_redraw_puts; + + printk(XENLOG_INFO "vesafb: framebuffer at 0x%x, mapped to 0x%p, " + "using %uk, total %uk\n", + vlfb_info.lfb_base, lfb, + vram_remap >> 10, vram_total >> 10); + printk(XENLOG_INFO "vesafb: mode is %dx%dx%u, linelength=%d, font %ux%u\n", + vlfb_info.width, vlfb_info.height, + vlfb_info.bits_per_pixel, vlfb_info.bytes_per_line, + font->width, font->height); + printk(XENLOG_INFO "vesafb: %scolor: size=%d:%d:%d:%d, " + "shift=%d:%d:%d:%d\n", + vlfb_info.bits_per_pixel > 8 ? "True" : + vga_compat ? "Pseudo" : "Static Pseudo", + vlfb_info.rsvd_size, vlfb_info.red_size, + vlfb_info.green_size, vlfb_info.blue_size, + vlfb_info.rsvd_pos, vlfb_info.red_pos, + vlfb_info.green_pos, vlfb_info.blue_pos); + + if ( vlfb_info.bits_per_pixel > 8 ) + { + /* Light grey in truecolor. */ + unsigned int grey = 0xaaaaaaaa; + pixel_on = + ((grey >> (32 - vlfb_info. red_size)) << vlfb_info. red_pos) | + ((grey >> (32 - vlfb_info.green_size)) << vlfb_info.green_pos) | + ((grey >> (32 - vlfb_info. blue_size)) << vlfb_info. blue_pos); + } + else + { + /* White(ish) in default pseudocolor palette. */ + pixel_on = 7; + } + + return; + + fail: + xfree(lbuf); + xfree(text_buf); +} + +void __init vesa_endboot(void) +{ + xpos = 0; + vga_puts = vesa_scroll_puts; +} + +#if defined(CONFIG_X86) + +#include <asm/mtrr.h> + +static unsigned int vesa_mtrr; +integer_param("vesa-mtrr", vesa_mtrr); + +void __init vesa_mtrr_init(void) +{ + static const int mtrr_types[] = { + 0, MTRR_TYPE_UNCACHABLE, MTRR_TYPE_WRBACK, + MTRR_TYPE_WRCOMB, MTRR_TYPE_WRTHROUGH }; + unsigned int size_total; + int rc, type; + + if ( !lfb || (vesa_mtrr == 0) || (vesa_mtrr >= ARRAY_SIZE(mtrr_types)) ) + return; + + type = mtrr_types[vesa_mtrr]; + if ( !type ) + return; + + /* Find the largest power-of-two */ + size_total = vram_total; + while ( size_total & (size_total - 1) ) + size_total &= size_total - 1; + + /* Try and find a power of two to add */ + do { + rc = mtrr_add(vlfb_info.lfb_base, size_total, type, 1); + size_total >>= 1; + } while ( (size_total >= PAGE_SIZE) && (rc == -EINVAL) ); +} + +static void lfb_flush(void) +{ + if ( vesa_mtrr == 3 ) + __asm__ __volatile__ ("sfence" : : : "memory"); +} + +#else /* !defined(CONFIG_X86) */ + +#define lfb_flush() ((void)0) + +#endif + +/* Render one line of text to given linear framebuffer line. */ +static void vesa_show_line( + const unsigned char *text_line, + unsigned char *video_line, + unsigned int nr_chars) +{ + unsigned int i, j, b, bpp, pixel; + + bpp = (vlfb_info.bits_per_pixel + 7) >> 3; + + for ( i = 0; i < font->height; i++ ) + { + unsigned char *ptr = lbuf; + + for ( j = 0; j < nr_chars; j++ ) + { + const unsigned char *bits = font->data; + bits += ((text_line[j] * font->height + i) * + ((font->width + 7) >> 3)); + for ( b = font->width; b--; ) + { + pixel = test_bit(b, bits) ? pixel_on : 0; + memcpy(ptr, &pixel, bpp); + ptr += bpp; + } + } + + memset(ptr, 0, (vlfb_info.width - nr_chars * font->width) * bpp); + memcpy(video_line, lbuf, vlfb_info.width * bpp); + video_line += vlfb_info.bytes_per_line; + } +} + +/* Fast mode which redraws all modified parts of a 2D text buffer. */ +static void vesa_redraw_puts(const char *s) +{ + unsigned int i, min_redraw_y = ypos; + char c; + + /* Paste characters into text buffer. */ + while ( (c = *s++) != '\0' ) + { + if ( (c == '\n') || (xpos >= text_columns) ) + { + if ( ++ypos >= text_rows ) + { + min_redraw_y = 0; + ypos = text_rows - 1; + memmove(text_buf, text_buf + text_columns, + ypos * text_columns); + memset(text_buf + ypos * text_columns, 0, xpos); + } + xpos = 0; + } + + if ( c != '\n' ) + text_buf[xpos++ + ypos * text_columns] = c; + } + + /* Render modified section of text buffer to VESA linear framebuffer. */ + for ( i = min_redraw_y; i <= ypos; i++ ) + vesa_show_line(text_buf + i * text_columns, + lfb + i * font->height * vlfb_info.bytes_per_line, + text_columns); + + lfb_flush(); +} + +/* Slower line-based scroll mode which interacts better with dom0. */ +static void vesa_scroll_puts(const char *s) +{ + unsigned int i; + char c; + + while ( (c = *s++) != '\0' ) + { + if ( (c == '\n') || (xpos >= text_columns) ) + { + unsigned int bytes = (vlfb_info.width * + ((vlfb_info.bits_per_pixel + 7) >> 3)); + unsigned char *src = lfb + font->height * vlfb_info.bytes_per_line; + unsigned char *dst = lfb; + + /* New line: scroll all previous rows up one line. */ + for ( i = font->height; i < vlfb_info.height; i++ ) + { + memcpy(dst, src, bytes); + src += vlfb_info.bytes_per_line; + dst += vlfb_info.bytes_per_line; + } + + /* Render new line. */ + vesa_show_line( + text_buf, + lfb + (text_rows-1) * font->height * vlfb_info.bytes_per_line, + xpos); + + xpos = 0; + } + + if ( c != '\n' ) + text_buf[xpos++] = c; + } + + lfb_flush(); +} diff -r b5dbf184df6c -r 778985f246a0 xen/drivers/video/vga.c --- a/xen/drivers/video/vga.c Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/drivers/video/vga.c Thu Aug 16 10:47:33 2007 -0600 @@ -10,22 +10,20 @@ #include <xen/lib.h> #include <xen/mm.h> #include <xen/errno.h> -#include <xen/event.h> -#include <xen/spinlock.h> #include <xen/console.h> #include <xen/vga.h> #include <asm/io.h> -#include "font.h" /* Filled in by arch boot code. */ struct xen_vga_console_info vga_console_info; -static int vgacon_enabled = 0; -static int vgacon_keep = 0; -/*static const struct font_desc *font;*/ +static int vgacon_keep; +static unsigned int xpos, ypos; +static unsigned char *video; -static int xpos, ypos; -static unsigned char *video; +static void vga_text_puts(const char *s); +static void vga_noop_puts(const char *s) {} +void (*vga_puts)(const char *) = vga_noop_puts; /* * 'vga=<mode-specifier>[,keep]' where <mode-specifier> is one of: @@ -55,10 +53,16 @@ string_param("vga", opt_vga); string_param("vga", opt_vga); /* VGA text-mode definitions. */ -#define COLUMNS vga_console_info.u.text_mode_3.columns -#define LINES vga_console_info.u.text_mode_3.rows +static unsigned int columns, lines; #define ATTRIBUTE 7 -#define VIDEO_SIZE (COLUMNS * LINES * 2) + +#ifdef CONFIG_X86_64 +void vesa_early_init(void); +void vesa_endboot(void); +#else +#define vesa_early_init() ((void)0) +#define vesa_endboot() ((void)0) +#endif void __init vga_init(void) { @@ -76,77 +80,61 @@ void __init vga_init(void) switch ( vga_console_info.video_type ) { case XEN_VGATYPE_TEXT_MODE_3: - if ( memory_is_conventional_ram(0xB8000) ) + if ( memory_is_conventional_ram(0xB8000) || + ((video = ioremap(0xB8000, 0x8000)) == NULL) ) return; - video = ioremap(0xB8000, 0x8000); - if ( video == NULL ) - return; - /* Disable cursor. */ - outw(0x200a, 0x3d4); - memset(video, 0, VIDEO_SIZE); + outw(0x200a, 0x3d4); /* disable cursor */ + columns = vga_console_info.u.text_mode_3.columns; + lines = vga_console_info.u.text_mode_3.rows; + memset(video, 0, columns * lines * 2); + vga_puts = vga_text_puts; break; case XEN_VGATYPE_VESA_LFB: -#if 0 - /* XXX Implement me! */ - video = ioremap(vga_console_info.u.vesa_lfb.lfb_base, - vga_console_info.u.vesa_lfb.lfb_size); - if ( video == NULL ) - return; - memset(video, 0, vga_console_info.u.vesa_lfb.lfb_size); + vesa_early_init(); break; -#else - return; -#endif default: memset(&vga_console_info, 0, sizeof(vga_console_info)); - return; + break; } - - vgacon_enabled = 1; } void __init vga_endboot(void) { - if ( !vgacon_enabled ) + if ( vga_puts == vga_noop_puts ) return; printk("Xen is %s VGA console.\n", vgacon_keep ? "keeping" : "relinquishing"); - vgacon_enabled = vgacon_keep; + vesa_endboot(); + + if ( !vgacon_keep ) + vga_puts = vga_noop_puts; } +static void vga_text_puts(const char *s) +{ + char c; -static void put_newline(void) -{ - xpos = 0; - ypos++; + while ( (c = *s++) != '\0' ) + { + if ( (c == '\n') || (xpos >= columns) ) + { + if ( ++ypos >= lines ) + { + ypos = lines - 1; + memmove(video, video + 2 * columns, ypos * 2 * columns); + memset(video + ypos * 2 * columns, 0, 2 * xpos); + } + xpos = 0; + } - if ( ypos >= LINES ) - { - ypos = LINES-1; - memmove((char*)video, - (char*)video + 2*COLUMNS, (LINES-1)*2*COLUMNS); - memset((char*)video + (LINES-1)*2*COLUMNS, 0, 2*COLUMNS); - } -} - -void vga_putchar(int c) -{ - if ( !vgacon_enabled ) - return; - - if ( c == '\n' ) - { - put_newline(); - } - else - { - if ( xpos >= COLUMNS ) - put_newline(); - video[(xpos + ypos * COLUMNS) * 2] = c & 0xFF; - video[(xpos + ypos * COLUMNS) * 2 + 1] = ATTRIBUTE; - ++xpos; + if ( c != '\n' ) + { + video[(xpos + ypos * columns) * 2] = c; + video[(xpos + ypos * columns) * 2 + 1] = ATTRIBUTE; + xpos++; + } } } diff -r b5dbf184df6c -r 778985f246a0 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/include/asm-x86/hvm/hvm.h Thu Aug 16 10:47:33 2007 -0600 @@ -95,36 +95,27 @@ struct hvm_function_table { /* * Examine specifics of the guest state: - * 1) determine whether paging is enabled, - * 2) determine whether long mode is enabled, - * 3) determine whether PAE paging is enabled, - * 4) determine whether NX is enabled, - * 5) determine whether interrupts are enabled or not, - * 6) determine the mode the guest is running in, - * 7) return the current guest control-register value - * 8) return the current guest segment descriptor base - * 9) return the current guest segment descriptor - */ - int (*paging_enabled)(struct vcpu *v); - int (*long_mode_enabled)(struct vcpu *v); - int (*pae_enabled)(struct vcpu *v); - int (*nx_enabled)(struct vcpu *v); + * 1) determine whether interrupts are enabled or not + * 2) determine the mode the guest is running in + * 3) return the current guest segment descriptor base + * 4) return the current guest segment descriptor + */ int (*interrupts_enabled)(struct vcpu *v, enum hvm_intack); int (*guest_x86_mode)(struct vcpu *v); - unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num); unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg); void (*get_segment_register)(struct vcpu *v, enum x86_segment seg, struct segment_register *reg); /* - * Re-set the value of CR3 that Xen runs on when handling VM exits + * Re-set the value of CR3 that Xen runs on when handling VM exits. */ void (*update_host_cr3)(struct vcpu *v); /* - * Called to inform HVM layer that a guest cr3 has changed - */ - void (*update_guest_cr3)(struct vcpu *v); + * Called to inform HVM layer that a guest CRn or EFER has changed. + */ + void (*update_guest_cr)(struct vcpu *v, unsigned int cr); + void (*update_guest_efer)(struct vcpu *v); /* * Called to ensure than all guest-specific mappings in a tagged TLB @@ -189,38 +180,24 @@ void hvm_set_guest_time(struct vcpu *v, void hvm_set_guest_time(struct vcpu *v, u64 gtime); u64 hvm_get_guest_time(struct vcpu *v); -static inline int -hvm_paging_enabled(struct vcpu *v) -{ - return hvm_funcs.paging_enabled(v); -} +#define hvm_paging_enabled(v) \ + (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG)) +#define hvm_pae_enabled(v) \ + (hvm_paging_enabled(v) && ((v)->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PAE)) +#define hvm_nx_enabled(v) \ + (!!((v)->arch.hvm_vcpu.guest_efer & EFER_NX)) #ifdef __x86_64__ -static inline int -hvm_long_mode_enabled(struct vcpu *v) -{ - return hvm_funcs.long_mode_enabled(v); -} +#define hvm_long_mode_enabled(v) \ + ((v)->arch.hvm_vcpu.guest_efer & EFER_LMA) #else #define hvm_long_mode_enabled(v) (v,0) #endif static inline int -hvm_pae_enabled(struct vcpu *v) -{ - return hvm_funcs.pae_enabled(v); -} - -static inline int hvm_interrupts_enabled(struct vcpu *v, enum hvm_intack type) { return hvm_funcs.interrupts_enabled(v, type); -} - -static inline int -hvm_nx_enabled(struct vcpu *v) -{ - return hvm_funcs.nx_enabled(v); } static inline int @@ -244,7 +221,15 @@ hvm_update_vtpr(struct vcpu *v, unsigned hvm_funcs.update_vtpr(v, value); } -void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3); +static inline void hvm_update_guest_cr(struct vcpu *v, unsigned int cr) +{ + hvm_funcs.update_guest_cr(v, cr); +} + +static inline void hvm_update_guest_efer(struct vcpu *v) +{ + hvm_funcs.update_guest_efer(v); +} static inline void hvm_flush_guest_tlbs(void) @@ -257,12 +242,6 @@ void hvm_hypercall_page_initialise(struc void *hypercall_page); static inline unsigned long -hvm_get_guest_ctrl_reg(struct vcpu *v, unsigned int num) -{ - return hvm_funcs.get_guest_ctrl_reg(v, num); -} - -static inline unsigned long hvm_get_segment_base(struct vcpu *v, enum x86_segment seg) { return hvm_funcs.get_segment_base(v, seg); @@ -277,7 +256,6 @@ hvm_get_segment_register(struct vcpu *v, void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); -void hvm_stts(struct vcpu *v); void hvm_migrate_timers(struct vcpu *v); void hvm_do_resume(struct vcpu *v); diff -r b5dbf184df6c -r 778985f246a0 xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/include/asm-x86/hvm/support.h Thu Aug 16 10:47:33 2007 -0600 @@ -234,4 +234,8 @@ void hvm_hlt(unsigned long rflags); void hvm_hlt(unsigned long rflags); void hvm_triple_fault(void); +int hvm_set_cr0(unsigned long value); +int hvm_set_cr3(unsigned long value); +int hvm_set_cr4(unsigned long value); + #endif /* __ASM_X86_HVM_SUPPORT_H__ */ diff -r b5dbf184df6c -r 778985f246a0 xen/include/asm-x86/hvm/svm/asid.h --- a/xen/include/asm-x86/hvm/svm/asid.h Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/include/asm-x86/hvm/svm/asid.h Thu Aug 16 10:47:33 2007 -0600 @@ -32,20 +32,6 @@ void svm_asid_inv_asid(struct vcpu *v); void svm_asid_inv_asid(struct vcpu *v); void svm_asid_inc_generation(void); -/* - * ASID related, guest triggered events. - */ - -static inline void svm_asid_g_update_paging(struct vcpu *v) -{ - svm_asid_inv_asid(v); -} - -static inline void svm_asid_g_mov_to_cr3(struct vcpu *v) -{ - svm_asid_inv_asid(v); -} - static inline void svm_asid_g_invlpg(struct vcpu *v, unsigned long g_vaddr) { #if 0 diff -r b5dbf184df6c -r 778985f246a0 xen/include/asm-x86/hvm/svm/vmcb.h --- a/xen/include/asm-x86/hvm/svm/vmcb.h Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h Thu Aug 16 10:47:33 2007 -0600 @@ -440,11 +440,6 @@ struct arch_svm_struct { u32 *msrpm; int launch_core; bool_t vmcb_in_sync; /* VMCB sync'ed with VMSAVE? */ - unsigned long cpu_shadow_cr0; /* Guest value for CR0 */ - unsigned long cpu_shadow_cr4; /* Guest value for CR4 */ - unsigned long cpu_shadow_efer; /* Guest value for EFER */ - unsigned long cpu_cr2; - unsigned long cpu_cr3; }; struct vmcb_struct *alloc_vmcb(void); diff -r b5dbf184df6c -r 778985f246a0 xen/include/asm-x86/hvm/vcpu.h --- a/xen/include/asm-x86/hvm/vcpu.h Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/include/asm-x86/hvm/vcpu.h Thu Aug 16 10:47:33 2007 -0600 @@ -29,7 +29,18 @@ #define HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI 1 struct hvm_vcpu { - unsigned long hw_cr3; /* value we give to HW to use */ + /* Guest control-register and EFER values, just as the guest sees them. */ + unsigned long guest_cr[5]; + unsigned long guest_efer; + + /* + * Processor-visible control-register values, while guest executes. + * CR0, CR4: Used as a cache of VMCS contents by VMX only. + * CR1, CR2: Never used (guest_cr[2] is always processor-visible CR2). + * CR3: Always used and kept up to date by paging subsystem. + */ + unsigned long hw_cr[5]; + struct hvm_io_op io_op; struct vlapic vlapic; s64 cache_tsc_offset; diff -r b5dbf184df6c -r 778985f246a0 xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Aug 16 10:47:33 2007 -0600 @@ -67,17 +67,11 @@ struct arch_vmx_struct { /* Cache of cpu execution control. */ u32 exec_control; - unsigned long cpu_cr0; /* copy of guest CR0 */ - unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */ - unsigned long cpu_shadow_cr4; /* copy of guest read shadow CR4 */ - unsigned long cpu_cr2; /* save CR2 */ - unsigned long cpu_cr3; #ifdef __x86_64__ struct vmx_msr_state msr_state; unsigned long shadow_gs; unsigned long cstar; #endif - unsigned long efer; /* Following fields are all specific to vmxassist. */ unsigned long vmxassist_enabled:1; diff -r b5dbf184df6c -r 778985f246a0 xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Thu Aug 16 10:47:33 2007 -0600 @@ -279,8 +279,8 @@ static inline void __vmx_inject_exceptio __vmwrite(VM_ENTRY_INTR_INFO, intr_fields); - if (trap == TRAP_page_fault) - HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vmx.cpu_cr2, error_code); + if ( trap == TRAP_page_fault ) + HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vcpu.guest_cr[2], error_code); else HVMTRACE_2D(INJ_EXC, v, trap, error_code); } diff -r b5dbf184df6c -r 778985f246a0 xen/include/public/arch-x86/xen-x86_32.h --- a/xen/include/public/arch-x86/xen-x86_32.h Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/include/public/arch-x86/xen-x86_32.h Thu Aug 16 10:47:33 2007 -0600 @@ -64,18 +64,34 @@ #define FLAT_USER_DS FLAT_RING3_DS #define FLAT_USER_SS FLAT_RING3_SS -/* - * Virtual addresses beyond this are not modifiable by guest OSes. The - * machine->physical mapping table starts at this address, read-only. - */ +#define __HYPERVISOR_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_END_PAE 0xF6800000 +#define HYPERVISOR_VIRT_START_PAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) +#define MACH2PHYS_VIRT_START_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) +#define MACH2PHYS_VIRT_END_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) + +#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 +#define HYPERVISOR_VIRT_START_NONPAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_START_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_END_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) + #ifdef CONFIG_X86_PAE -#define __HYPERVISOR_VIRT_START 0xF5800000 -#define __MACH2PHYS_VIRT_START 0xF5800000 -#define __MACH2PHYS_VIRT_END 0xF6800000 +#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE +#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE +#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE #else -#define __HYPERVISOR_VIRT_START 0xFC000000 -#define __MACH2PHYS_VIRT_START 0xFC000000 -#define __MACH2PHYS_VIRT_END 0xFC400000 +#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_NONPAE +#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_NONPAE +#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_NONPAE #endif #ifndef HYPERVISOR_VIRT_START diff -r b5dbf184df6c -r 778985f246a0 xen/include/xen/vga.h --- a/xen/include/xen/vga.h Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/include/xen/vga.h Thu Aug 16 10:47:33 2007 -0600 @@ -15,11 +15,11 @@ extern struct xen_vga_console_info vga_c extern struct xen_vga_console_info vga_console_info; void vga_init(void); void vga_endboot(void); -void vga_putchar(int c); +extern void (*vga_puts)(const char *); #else -#define vga_init() ((void)0) -#define vga_endboot() ((void)0) -#define vga_putchar(c) ((void)0) +#define vga_init() ((void)0) +#define vga_endboot() ((void)0) +#define vga_puts(s) ((void)0) #endif #endif /* _XEN_VGA_H */ diff -r b5dbf184df6c -r 778985f246a0 xen/include/xen/xencomm.h --- a/xen/include/xen/xencomm.h Thu Aug 16 10:03:26 2007 -0600 +++ b/xen/include/xen/xencomm.h Thu Aug 16 10:47:33 2007 -0600 @@ -23,13 +23,12 @@ #include <public/xen.h> -extern unsigned long xencomm_copy_to_guest(void *to, const void *from, - unsigned int len, unsigned int skip); -extern unsigned long xencomm_copy_from_guest(void *to, const void *from, - unsigned int len, unsigned int skip); -extern int xencomm_add_offset(void **handle, unsigned int bytes); -extern int xencomm_handle_is_null(void *ptr); - +unsigned long xencomm_copy_to_guest( + void *to, const void *from, unsigned int len, unsigned int skip); +unsigned long xencomm_copy_from_guest( + void *to, const void *from, unsigned int len, unsigned int skip); +int xencomm_add_offset(void **handle, unsigned int bytes); +int xencomm_handle_is_null(void *ptr); static inline int xencomm_is_inline(const void *handle) { @@ -39,7 +38,7 @@ static inline int xencomm_is_inline(cons static inline unsigned long xencomm_inline_addr(const void *handle) { - return (unsigned long)handle & ~XENCOMM_INLINE_FLAG; + return (unsigned long)handle & ~XENCOMM_INLINE_FLAG; } /* Is the guest handle a NULL reference? */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |