[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Isaku Yamahata <yamahata@xxxxxxxxxxxxx> # Date 1210942778 -32400 # Node ID 6d0cc186bf41e10d7cbf4e3c466acce82f6d9454 # Parent c96507e0c83d292f36bd48b1d4a59771e92c0907 # Parent d0817f08599afe3f2d7ffa2e3fd88e6a65f5e85d merge with xen-unstable.hg --- tools/ioemu/block.c | 32 +++ tools/ioemu/hw/serial.c | 8 tools/ioemu/hw/xenfb.c | 1 tools/ioemu/xenstore.c | 4 tools/libxc/Makefile | 1 tools/libxc/xc_hvm_build.c | 50 +++++ tools/libxc/xc_pm.c | 101 ++++++++++++ tools/libxc/xenctrl.h | 19 ++ tools/python/xen/xend/XendDomainInfo.py | 3 xen/arch/ia64/xen/mm.c | 41 ++-- xen/arch/x86/acpi/Makefile | 1 xen/arch/x86/acpi/boot.c | 14 - xen/arch/x86/acpi/cpufreq/cpufreq.c | 16 + xen/arch/x86/acpi/cpufreq/utility.c | 77 +++++++++ xen/arch/x86/acpi/pmstat.c | 110 +++++++++++++ xen/arch/x86/mm.c | 6 xen/arch/x86/mm/hap/p2m-ept.c | 208 ++++++++++++++++++------ xen/arch/x86/mm/p2m.c | 251 +++++++++++++++++++++++------- xen/arch/x86/platform_hypercall.c | 3 xen/common/grant_table.c | 2 xen/common/memory.c | 12 - xen/common/sysctl.c | 16 + xen/include/acpi/cpufreq/processor_perf.h | 23 ++ xen/include/asm-ia64/grant_table.h | 2 xen/include/asm-ia64/shadow.h | 6 xen/include/asm-x86/p2m.h | 15 + xen/include/public/sysctl.h | 37 ++++ xen/include/xen/paging.h | 4 28 files changed, 890 insertions(+), 173 deletions(-) diff -r c96507e0c83d -r 6d0cc186bf41 tools/ioemu/block.c --- a/tools/ioemu/block.c Thu May 15 16:23:56 2008 +0900 +++ b/tools/ioemu/block.c Fri May 16 21:59:38 2008 +0900 @@ -240,8 +240,28 @@ static int is_windows_drive(const char * } #endif +static int bdrv_invalid_protocol_open(BlockDriverState *bs, + const char *filename, int flags) { + return -ENOENT; +} + +static BlockDriver bdrv_invalid_protocol = { + "invalid_protocol", + .bdrv_open = bdrv_invalid_protocol_open, +}; + static BlockDriver *find_protocol(const char *filename) { + /* Return values: + * &bdrv_xxx + * filename specifies protocol xxx + * caller should use that + * NULL filename does not specify any protocol + * caller may apply their own default + * &bdrv_invalid_protocol filename speciies an unknown protocol + * caller should return -ENOENT; or may just try to open with + * that bdrv, which always fails that way. + */ BlockDriver *drv1; char protocol[128]; int len; @@ -254,7 +274,7 @@ static BlockDriver *find_protocol(const #endif p = strchr(filename, ':'); if (!p) - return NULL; /* do not ever guess raw, it is a security problem! */ + return NULL; len = p - filename; if (len > sizeof(protocol) - 1) len = sizeof(protocol) - 1; @@ -265,7 +285,7 @@ static BlockDriver *find_protocol(const !strcmp(drv1->protocol_name, protocol)) return drv1; } - return NULL; + return &bdrv_invalid_protocol; } /* XXX: force raw format if block or character device ? It would @@ -295,8 +315,8 @@ static BlockDriver *find_image_format(co #endif drv = find_protocol(filename); - /* no need to test disk image formats for vvfat */ - if (drv == &bdrv_vvfat) + /* no need to test disk image format if the filename told us */ + if (drv != NULL) return drv; ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY); @@ -390,7 +410,7 @@ int bdrv_open2(BlockDriverState *bs, con if (flags & BDRV_O_FILE) { drv = find_protocol(filename); if (!drv) - return -ENOENT; + drv = &bdrv_raw; } else { if (!drv) { drv = find_image_format(filename); @@ -438,7 +458,7 @@ int bdrv_open2(BlockDriverState *bs, con } path_combine(backing_filename, sizeof(backing_filename), filename, bs->backing_file); - if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0) + if (bdrv_open2(bs->backing_hd, backing_filename, 0, &bdrv_raw) < 0) goto fail; } diff -r c96507e0c83d -r 6d0cc186bf41 tools/ioemu/hw/serial.c --- a/tools/ioemu/hw/serial.c Thu May 15 16:23:56 2008 +0900 +++ b/tools/ioemu/hw/serial.c Fri May 16 21:59:38 2008 +0900 @@ -705,12 +705,13 @@ static void serial_save(QEMUFile *f, voi qemu_put_8s(f,&s->lsr); qemu_put_8s(f,&s->msr); qemu_put_8s(f,&s->scr); - qemu_get_8s(f,&s->fcr); + qemu_put_8s(f,&s->fcr); } static int serial_load(QEMUFile *f, void *opaque, int version_id) { SerialState *s = opaque; + uint8_t fcr = 0; if(version_id > 2) return -EINVAL; @@ -729,6 +730,11 @@ static int serial_load(QEMUFile *f, void qemu_get_8s(f,&s->scr); qemu_get_8s(f,&s->fcr); + if (version_id >= 2) + qemu_get_8s(f,&fcr); + + /* Initialize fcr via setter to perform essential side-effects */ + serial_ioport_write(s, 0x02, fcr); return 0; } diff -r c96507e0c83d -r 6d0cc186bf41 tools/ioemu/hw/xenfb.c --- a/tools/ioemu/hw/xenfb.c Thu May 15 16:23:56 2008 +0900 +++ b/tools/ioemu/hw/xenfb.c Fri May 16 21:59:38 2008 +0900 @@ -502,6 +502,7 @@ static int xenfb_configure_fb(struct xen fprintf(stderr, "FB: frontend fb size %zu limited to %zu\n", fb_len, fb_len_lim); + fb_len = fb_len_lim; } if (depth != 8 && depth != 16 && depth != 24 && depth != 32) { fprintf(stderr, diff -r c96507e0c83d -r 6d0cc186bf41 tools/ioemu/xenstore.c --- a/tools/ioemu/xenstore.c Thu May 15 16:23:56 2008 +0900 +++ b/tools/ioemu/xenstore.c Fri May 16 21:59:38 2008 +0900 @@ -260,6 +260,8 @@ void xenstore_parse_domain_config(int hv /* autoguess qcow vs qcow2 */ } else if (!strcmp(drv,"file") || !strcmp(drv,"phy")) { format = &bdrv_raw; + } else if (!strcmp(drv,"phy")) { + format = &bdrv_raw; } else { format = bdrv_find_format(drv); if (!format) { @@ -269,7 +271,7 @@ void xenstore_parse_domain_config(int hv } } if (bdrv_open2(bs, params, 0 /* snapshot */, format) < 0) - fprintf(stderr, "qemu: could not open vbd '%s' or hard disk image '%s' (drv '%s')\n", buf, params, drv ? drv : "?"); + fprintf(stderr, "qemu: could not open vbd '%s' or hard disk image '%s' (drv '%s' format '%s')\n", buf, params, drv ? drv : "?", format ? format->format_name : "0"); } } diff -r c96507e0c83d -r 6d0cc186bf41 tools/libxc/Makefile --- a/tools/libxc/Makefile Thu May 15 16:23:56 2008 +0900 +++ b/tools/libxc/Makefile Fri May 16 21:59:38 2008 +0900 @@ -20,6 +20,7 @@ CTRL_SRCS-y += xc_sedf.c CTRL_SRCS-y += xc_sedf.c CTRL_SRCS-y += xc_csched.c CTRL_SRCS-y += xc_tbuf.c +CTRL_SRCS-y += xc_pm.c ifneq ($(stubdom),y) CTRL_SRCS-y += xc_resume.c endif diff -r c96507e0c83d -r 6d0cc186bf41 tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Thu May 15 16:23:56 2008 +0900 +++ b/tools/libxc/xc_hvm_build.c Fri May 16 21:59:38 2008 +0900 @@ -18,6 +18,9 @@ #include "xc_e820.h" #include <xen/libelf.h> + +#define SUPERPAGE_PFN_SHIFT 9 +#define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT) #define SCRATCH_PFN 0xFFFFF @@ -211,7 +214,7 @@ static int setup_guest(int xc_handle, /* * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000. - * We allocate pages in batches of no more than 2048 to ensure that + * We allocate pages in batches of no more than 8MB to ensure that * we can be preempted and hence dom0 remains responsive. */ rc = xc_domain_memory_populate_physmap( @@ -219,13 +222,50 @@ static int setup_guest(int xc_handle, cur_pages = 0xc0; while ( (rc == 0) && (nr_pages > cur_pages) ) { + /* Clip count to maximum 8MB extent. */ unsigned long count = nr_pages - cur_pages; if ( count > 2048 ) count = 2048; - rc = xc_domain_memory_populate_physmap( - xc_handle, dom, count, 0, 0, &page_array[cur_pages]); - cur_pages += count; - } + + /* Clip partial superpage extents to superpage boundaries. */ + if ( ((cur_pages & (SUPERPAGE_NR_PFNS-1)) != 0) && + (count > (-cur_pages & (SUPERPAGE_NR_PFNS-1))) ) + count = -cur_pages & (SUPERPAGE_NR_PFNS-1); /* clip s.p. tail */ + else if ( ((count & (SUPERPAGE_NR_PFNS-1)) != 0) && + (count > SUPERPAGE_NR_PFNS) ) + count &= ~(SUPERPAGE_NR_PFNS - 1); /* clip non-s.p. tail */ + + /* Attempt to allocate superpage extents. */ + if ( ((count | cur_pages) & (SUPERPAGE_NR_PFNS - 1)) == 0 ) + { + long done; + xen_pfn_t sp_extents[2048 >> SUPERPAGE_PFN_SHIFT]; + struct xen_memory_reservation sp_req = { + .nr_extents = count >> SUPERPAGE_PFN_SHIFT, + .extent_order = SUPERPAGE_PFN_SHIFT, + .domid = dom + }; + set_xen_guest_handle(sp_req.extent_start, sp_extents); + for ( i = 0; i < sp_req.nr_extents; i++ ) + sp_extents[i] = page_array[cur_pages+(i<<SUPERPAGE_PFN_SHIFT)]; + done = xc_memory_op(xc_handle, XENMEM_populate_physmap, &sp_req); + if ( done > 0 ) + { + done <<= SUPERPAGE_PFN_SHIFT; + cur_pages += done; + count -= done; + } + } + + /* Fall back to 4kB extents. */ + if ( count != 0 ) + { + rc = xc_domain_memory_populate_physmap( + xc_handle, dom, count, 0, 0, &page_array[cur_pages]); + cur_pages += count; + } + } + if ( rc != 0 ) { PERROR("Could not allocate memory for HVM guest.\n"); diff -r c96507e0c83d -r 6d0cc186bf41 tools/libxc/xc_pm.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/xc_pm.c Fri May 16 21:59:38 2008 +0900 @@ -0,0 +1,101 @@ +/****************************************************************************** + * xc_pm.c - Libxc API for Xen Power Management (Px/Cx/Tx, etc.) statistic + * + * Copyright (c) 2008, Liu Jinsong <jinsong.liu@xxxxxxxxx> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "xc_private.h" + +int xc_pm_get_max_px(int xc_handle, int cpuid, int *max_px) +{ + DECLARE_SYSCTL; + int ret; + + sysctl.cmd = XEN_SYSCTL_get_pmstat; + sysctl.u.get_pmstat.type = PMSTAT_get_max_px; + sysctl.u.get_pmstat.cpuid = cpuid; + ret = xc_sysctl(xc_handle, &sysctl); + if ( ret ) + return ret; + + *max_px = sysctl.u.get_pmstat.u.getpx.total; + return ret; +} + +int xc_pm_get_pxstat(int xc_handle, int cpuid, struct xc_px_stat *pxpt) +{ + DECLARE_SYSCTL; + int max_px, ret; + + if ( !pxpt || !(pxpt->trans_pt) || !(pxpt->pt) ) + return -EINVAL; + + if ( (ret = xc_pm_get_max_px(xc_handle, cpuid, &max_px)) != 0) + return ret; + + if ( (ret = lock_pages(pxpt->trans_pt, + max_px * max_px * sizeof(uint64_t))) != 0 ) + return ret; + + if ( (ret = lock_pages(pxpt->pt, + max_px * sizeof(struct xc_px_val))) != 0 ) + { + unlock_pages(pxpt->trans_pt, max_px * max_px * sizeof(uint64_t)); + return ret; + } + + sysctl.cmd = XEN_SYSCTL_get_pmstat; + sysctl.u.get_pmstat.type = PMSTAT_get_pxstat; + sysctl.u.get_pmstat.cpuid = cpuid; + set_xen_guest_handle(sysctl.u.get_pmstat.u.getpx.trans_pt, pxpt->trans_pt); + set_xen_guest_handle(sysctl.u.get_pmstat.u.getpx.pt, + (pm_px_val_t *)pxpt->pt); + + ret = xc_sysctl(xc_handle, &sysctl); + if ( ret ) + { + unlock_pages(pxpt->trans_pt, max_px * max_px * sizeof(uint64_t)); + unlock_pages(pxpt->pt, max_px * sizeof(struct xc_px_val)); + return ret; + } + + pxpt->total = sysctl.u.get_pmstat.u.getpx.total; + pxpt->usable = sysctl.u.get_pmstat.u.getpx.usable; + pxpt->last = sysctl.u.get_pmstat.u.getpx.last; + pxpt->cur = sysctl.u.get_pmstat.u.getpx.cur; + + unlock_pages(pxpt->trans_pt, max_px * max_px * sizeof(uint64_t)); + unlock_pages(pxpt->pt, max_px * sizeof(struct xc_px_val)); + + return ret; +} + +int xc_pm_reset_pxstat(int xc_handle, int cpuid) +{ + DECLARE_SYSCTL; + + sysctl.cmd = XEN_SYSCTL_get_pmstat; + sysctl.u.get_pmstat.type = PMSTAT_reset_pxstat; + sysctl.u.get_pmstat.cpuid = cpuid; + + return xc_sysctl(xc_handle, &sysctl); +} diff -r c96507e0c83d -r 6d0cc186bf41 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Thu May 15 16:23:56 2008 +0900 +++ b/tools/libxc/xenctrl.h Fri May 16 21:59:38 2008 +0900 @@ -1034,4 +1034,23 @@ void xc_cpuid_to_str(const unsigned int char **strs); #endif +struct xc_px_val { + uint64_t freq; /* Px core frequency */ + uint64_t residency; /* Px residency time */ + uint64_t count; /* Px transition count */ +}; + +struct xc_px_stat { + uint8_t total; /* total Px states */ + uint8_t usable; /* usable Px states */ + uint8_t last; /* last Px state */ + uint8_t cur; /* current Px state */ + uint64_t *trans_pt; /* Px transition table */ + struct xc_px_val *pt; +}; + +int xc_pm_get_max_px(int xc_handle, int cpuid, int *max_px); +int xc_pm_get_pxstat(int xc_handle, int cpuid, struct xc_px_stat *pxpt); +int xc_pm_reset_pxstat(int xc_handle, int cpuid); + #endif /* XENCTRL_H */ diff -r c96507e0c83d -r 6d0cc186bf41 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu May 15 16:23:56 2008 +0900 +++ b/tools/python/xen/xend/XendDomainInfo.py Fri May 16 21:59:38 2008 +0900 @@ -3013,7 +3013,8 @@ class XendDomainInfo: # shortcut if the domain isn't started because # the devcontrollers will have no better information # than XendConfig. - if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED,): + if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED, + XEN_API_VM_POWER_STATE_SUSPENDED): if dev_config: return copy.deepcopy(dev_config) return None diff -r c96507e0c83d -r 6d0cc186bf41 xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/arch/ia64/xen/mm.c Fri May 16 21:59:38 2008 +0900 @@ -2424,16 +2424,20 @@ steal_page(struct domain *d, struct page int guest_physmap_add_page(struct domain *d, unsigned long gpfn, - unsigned long mfn) -{ - BUG_ON(!mfn_valid(mfn)); - BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1)); - set_gpfn_from_mfn(mfn, gpfn); - smp_mb(); - assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, - ASSIGN_writable | ASSIGN_pgc_allocated); - - //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> PAGE_SHIFT)); + unsigned long mfn, unsigned int page_order) +{ + unsigned long i; + + for (i = 0; i < (1UL << page_order); i++) { + BUG_ON(!mfn_valid(mfn)); + BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1)); + set_gpfn_from_mfn(mfn, gpfn); + smp_mb(); + assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, + ASSIGN_writable | ASSIGN_pgc_allocated); + mfn++; + gpfn++; + } perfc_incr(guest_physmap_add_page); return 0; @@ -2441,10 +2445,15 @@ guest_physmap_add_page(struct domain *d, void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, - unsigned long mfn) -{ + unsigned long mfn, unsigned int page_order) +{ + unsigned long i; + BUG_ON(mfn == 0);//XXX - zap_domain_page_one(d, gpfn << PAGE_SHIFT, 0, mfn); + + for (i = 0; i < (1UL << page_order); i++) + zap_domain_page_one(d, (gpfn+i) << PAGE_SHIFT, 0, mfn+i); + perfc_incr(guest_physmap_remove_page); } @@ -2847,7 +2856,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE( if (prev_mfn && mfn_valid(prev_mfn)) { if (is_xen_heap_mfn(prev_mfn)) /* Xen heap frames are simply unhooked from this phys slot. */ - guest_physmap_remove_page(d, xatp.gpfn, prev_mfn); + guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0); else /* Normal domain memory is freed, to avoid leaking memory. */ guest_remove_page(d, xatp.gpfn); @@ -2856,10 +2865,10 @@ arch_memory_op(int op, XEN_GUEST_HANDLE( /* Unmap from old location, if any. */ gpfn = get_gpfn_from_mfn(mfn); if (gpfn != INVALID_M2P_ENTRY) - guest_physmap_remove_page(d, gpfn, mfn); + guest_physmap_remove_page(d, gpfn, mfn, 0); /* Map at new location. */ - guest_physmap_add_page(d, xatp.gpfn, mfn); + guest_physmap_add_page(d, xatp.gpfn, mfn, 0); out: domain_unlock(d); diff -r c96507e0c83d -r 6d0cc186bf41 xen/arch/x86/acpi/Makefile --- a/xen/arch/x86/acpi/Makefile Thu May 15 16:23:56 2008 +0900 +++ b/xen/arch/x86/acpi/Makefile Fri May 16 21:59:38 2008 +0900 @@ -2,3 +2,4 @@ subdir-y += cpufreq obj-y += boot.o obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o +obj-y += pmstat.o diff -r c96507e0c83d -r 6d0cc186bf41 xen/arch/x86/acpi/boot.c --- a/xen/arch/x86/acpi/boot.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/arch/x86/acpi/boot.c Fri May 16 21:59:38 2008 +0900 @@ -441,17 +441,9 @@ acpi_fadt_parse_sleep_info(struct acpi_t "FACS is shorter than ACPI spec allow: 0x%x", facs->length); - if ((rsdp->revision < 2) || (facs->length < 32)) { - acpi_sinfo.wakeup_vector = facs_pa + - offsetof(struct acpi_table_facs, - firmware_waking_vector); - acpi_sinfo.vector_width = 32; - } else { - acpi_sinfo.wakeup_vector = facs_pa + - offsetof(struct acpi_table_facs, - xfirmware_waking_vector); - acpi_sinfo.vector_width = 64; - } + acpi_sinfo.wakeup_vector = facs_pa + + offsetof(struct acpi_table_facs, firmware_waking_vector); + acpi_sinfo.vector_width = 32; printk(KERN_INFO PREFIX " wakeup_vec[%"PRIx64"], vec_size[%x]\n", diff -r c96507e0c83d -r 6d0cc186bf41 xen/arch/x86/acpi/cpufreq/cpufreq.c --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri May 16 21:59:38 2008 +0900 @@ -369,6 +369,8 @@ static int acpi_cpufreq_target(struct cp if (!check_freqs(cmd.mask, freqs.new, data)) return -EAGAIN; + px_statistic_update(cmd.mask, perf->state, next_perf_state); + perf->state = next_perf_state; policy->cur = freqs.new; @@ -581,9 +583,13 @@ int acpi_cpufreq_init(void) for_each_online_cpu(i) { xen_px_policy[i].cpu = i; + ret = px_statistic_init(i); + if (ret) + goto out; + ret = acpi_cpufreq_cpu_init(&xen_px_policy[i]); if (ret) - goto cpufreq_init_out; + goto out; } /* setup ondemand cpufreq */ @@ -593,10 +599,10 @@ int acpi_cpufreq_init(void) i = first_cpu(pt[dom]); ret = cpufreq_governor_dbs(&xen_px_policy[i], CPUFREQ_GOV_START); if (ret) - goto cpufreq_init_out; - } - -cpufreq_init_out: + goto out; + } + +out: xfree(pt); return ret; diff -r c96507e0c83d -r 6d0cc186bf41 xen/arch/x86/acpi/cpufreq/utility.c --- a/xen/arch/x86/acpi/cpufreq/utility.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/arch/x86/acpi/cpufreq/utility.c Fri May 16 21:59:38 2008 +0900 @@ -34,6 +34,83 @@ struct cpufreq_driver *cpufreq_driver; struct cpufreq_driver *cpufreq_driver; /********************************************************************* + * Px STATISTIC INFO * + *********************************************************************/ + +void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to) +{ + uint32_t i; + uint64_t now; + + now = NOW(); + + for_each_cpu_mask(i, cpumask) { + struct pm_px *pxpt = &px_statistic_data[i]; + uint32_t statnum = processor_pminfo[i].perf.state_count; + + pxpt->u.last = from; + pxpt->u.cur = to; + pxpt->u.pt[to].count++; + pxpt->u.pt[from].residency += now - pxpt->prev_state_wall; + + (*(pxpt->u.trans_pt + from*statnum + to))++; + + pxpt->prev_state_wall = now; + } +} + +int px_statistic_init(int cpuid) +{ + uint32_t i, count; + struct pm_px *pxpt = &px_statistic_data[cpuid]; + struct processor_pminfo *pmpt = &processor_pminfo[cpuid]; + + count = pmpt->perf.state_count; + + pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count); + if (!pxpt->u.trans_pt) + return -ENOMEM; + + pxpt->u.pt = xmalloc_array(struct pm_px_val, count); + if (!pxpt->u.pt) { + xfree(pxpt->u.trans_pt); + return -ENOMEM; + } + + memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t))); + memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val))); + + pxpt->u.total = pmpt->perf.state_count; + pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc; + + for (i=0; i < pmpt->perf.state_count; i++) + pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency; + + pxpt->prev_state_wall = NOW(); + + return 0; +} + +void px_statistic_reset(int cpuid) +{ + uint32_t i, j, count; + struct pm_px *pxpt = &px_statistic_data[cpuid]; + + count = processor_pminfo[cpuid].perf.state_count; + + for (i=0; i < count; i++) { + pxpt->u.pt[i].residency = 0; + pxpt->u.pt[i].count = 0; + + for (j=0; j < count; j++) + *(pxpt->u.trans_pt + i*count + j) = 0; + } + + pxpt->prev_state_wall = NOW(); +} + + +/********************************************************************* * FREQUENCY TABLE HELPERS * *********************************************************************/ diff -r c96507e0c83d -r 6d0cc186bf41 xen/arch/x86/acpi/pmstat.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/acpi/pmstat.c Fri May 16 21:59:38 2008 +0900 @@ -0,0 +1,110 @@ +/***************************************************************************** +# pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.) +# +# Copyright (c) 2008, Liu Jinsong <jinsong.liu@xxxxxxxxx> +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., 59 +# Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# The full GNU General Public License is included in this distribution in the +# file called LICENSE. +# +*****************************************************************************/ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/errno.h> +#include <xen/sched.h> +#include <xen/event.h> +#include <xen/irq.h> +#include <xen/iocap.h> +#include <xen/compat.h> +#include <xen/guest_access.h> +#include <asm/current.h> +#include <public/xen.h> +#include <xen/cpumask.h> +#include <asm/processor.h> +#include <xen/percpu.h> + +#include <public/sysctl.h> +#include <acpi/cpufreq/cpufreq.h> + +struct pm_px px_statistic_data[NR_CPUS]; + +int do_get_pm_info(struct xen_sysctl_get_pmstat *op) +{ + int ret = 0; + struct pm_px *pxpt = &px_statistic_data[op->cpuid]; + struct processor_pminfo *pmpt = &processor_pminfo[op->cpuid]; + + /* to protect the case when Px was controlled by dom0-kernel */ + /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */ + if ( !pmpt->perf.init ) + return -EINVAL; + + if ( !cpu_online(op->cpuid) ) + return -EINVAL; + + switch( op->type ) + { + case PMSTAT_get_max_px: + { + op->u.getpx.total = pmpt->perf.state_count; + break; + } + + case PMSTAT_get_pxstat: + { + uint64_t now, ct; + + now = NOW(); + pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc; + pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall; + pxpt->prev_state_wall = now; + + ct = pmpt->perf.state_count; + if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) ) + { + ret = -EFAULT; + break; + } + + if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) ) + { + ret = -EFAULT; + break; + } + + op->u.getpx.total = pxpt->u.total; + op->u.getpx.usable = pxpt->u.usable; + op->u.getpx.last = pxpt->u.last; + op->u.getpx.cur = pxpt->u.cur; + + break; + } + + case PMSTAT_reset_pxstat: + { + px_statistic_reset(op->cpuid); + break; + } + + default: + printk("not defined sub-hypercall @ do_get_pm_info\n"); + ret = -ENOSYS; + break; + } + + return ret; +} diff -r c96507e0c83d -r 6d0cc186bf41 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/arch/x86/mm.c Fri May 16 21:59:38 2008 +0900 @@ -3297,7 +3297,7 @@ long arch_memory_op(int op, XEN_GUEST_HA { if ( is_xen_heap_mfn(prev_mfn) ) /* Xen heap frames are simply unhooked from this phys slot. */ - guest_physmap_remove_page(d, xatp.gpfn, prev_mfn); + guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0); else /* Normal domain memory is freed, to avoid leaking memory. */ guest_remove_page(d, xatp.gpfn); @@ -3306,10 +3306,10 @@ long arch_memory_op(int op, XEN_GUEST_HA /* Unmap from old location, if any. */ gpfn = get_gpfn_from_mfn(mfn); if ( gpfn != INVALID_M2P_ENTRY ) - guest_physmap_remove_page(d, gpfn, mfn); + guest_physmap_remove_page(d, gpfn, mfn, 0); /* Map at new location. */ - guest_physmap_add_page(d, xatp.gpfn, mfn); + guest_physmap_add_page(d, xatp.gpfn, mfn, 0); domain_unlock(d); diff -r c96507e0c83d -r 6d0cc186bf41 xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Fri May 16 21:59:38 2008 +0900 @@ -20,6 +20,7 @@ #include <xen/domain_page.h> #include <xen/sched.h> #include <asm/current.h> +#include <asm/paging.h> #include <asm/types.h> #include <asm/domain.h> #include <asm/p2m.h> @@ -46,6 +47,9 @@ static void ept_p2m_type_to_flags(ept_en } } +#define GUEST_TABLE_NORMAL_PAGE 1 +#define GUEST_TABLE_SUPER_PAGE 2 + static int ept_next_level(struct domain *d, bool_t read_only, ept_entry_t **table, unsigned long *gfn_remainder, u32 shift) @@ -54,7 +58,6 @@ static int ept_next_level(struct domain u32 index; index = *gfn_remainder >> shift; - *gfn_remainder &= (1UL << shift) - 1; ept_entry = (*table) + index; @@ -83,31 +86,53 @@ static int ept_next_level(struct domain ept_entry->r = ept_entry->w = ept_entry->x = 1; } - next = map_domain_page(ept_entry->mfn); - unmap_domain_page(*table); - *table = next; - - return 1; + if ( !ept_entry->sp_avail ) + { + *gfn_remainder &= (1UL << shift) - 1; + next = map_domain_page(ept_entry->mfn); + unmap_domain_page(*table); + *table = next; + return GUEST_TABLE_NORMAL_PAGE; + } + else + return GUEST_TABLE_SUPER_PAGE; } static int -ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt) -{ - ept_entry_t *table = - map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); - unsigned long gfn_remainder = gfn; +ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, + unsigned int order, p2m_type_t p2mt) +{ + ept_entry_t *table = NULL; + unsigned long gfn_remainder = gfn, offset = 0; ept_entry_t *ept_entry = NULL; u32 index; - int i, rv = 0; + int i, rv = 0, ret = 0; + int walk_level = order / EPT_TABLE_ORDER; /* Should check if gfn obeys GAW here */ - for ( i = EPT_DEFAULT_GAW; i > 0; i-- ) - if ( !ept_next_level(d, 0, &table, &gfn_remainder, - i * EPT_TABLE_ORDER) ) + if ( order != 0 ) + if ( (gfn & ((1UL << order) - 1)) ) + return 1; + + table = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); + + ASSERT(table != NULL); + + for ( i = EPT_DEFAULT_GAW; i > walk_level; i-- ) + { + ret = ept_next_level(d, 0, &table, &gfn_remainder, + i * EPT_TABLE_ORDER); + if ( !ret ) goto out; - - index = gfn_remainder; + else if ( ret == GUEST_TABLE_SUPER_PAGE ) + break; + } + + index = gfn_remainder >> ( i ? (i * EPT_TABLE_ORDER): order); + walk_level = ( i ? ( i * EPT_TABLE_ORDER) : order) / EPT_TABLE_ORDER; + offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1))); + ept_entry = table + index; if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) ) @@ -117,9 +142,20 @@ ept_set_entry(struct domain *d, unsigned d->arch.p2m->max_mapped_pfn = gfn; ept_entry->emt = EPT_DEFAULT_MT; - ept_entry->sp_avail = 0; + ept_entry->sp_avail = walk_level ? 1 : 0; + + if ( ret == GUEST_TABLE_SUPER_PAGE ) + { + ept_entry->mfn = mfn_x(mfn) - offset; + if ( ept_entry->avail1 == p2m_ram_logdirty && + p2mt == p2m_ram_rw ) + for ( i = 0; i < 512; i++ ) + paging_mark_dirty(d, mfn_x(mfn)-offset+i); + } + else + ept_entry->mfn = mfn_x(mfn); + ept_entry->avail1 = p2mt; - ept_entry->mfn = mfn_x(mfn); ept_entry->rsvd = 0; ept_entry->avail2 = 0; /* last step */ @@ -132,14 +168,42 @@ ept_set_entry(struct domain *d, unsigned /* Success */ rv = 1; - out: +out: unmap_domain_page(table); ept_sync_domain(d); + /* Now the p2m table is not shared with vt-d page table */ + + if ( iommu_enabled && is_hvm_domain(d) ) + { + if ( p2mt == p2m_ram_rw ) + { + if ( ret == GUEST_TABLE_SUPER_PAGE ) + { + for ( i = 0; i < 512; i++ ) + iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i); + } + else if ( ret ) + iommu_map_page(d, gfn, mfn_x(mfn)); + } + else + { + if ( ret == GUEST_TABLE_SUPER_PAGE ) + { + for ( i = 0; i < 512; i++ ) + iommu_unmap_page(d, gfn-offset+i); + } + else if ( ret ) + iommu_unmap_page(d, gfn); + } + } + +#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE /* If p2m table is shared with vtd page-table. */ if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) ) iommu_flush(d, gfn, (u64*)ept_entry); +#endif return rv; } @@ -152,7 +216,7 @@ static mfn_t ept_get_entry(struct domain unsigned long gfn_remainder = gfn; ept_entry_t *ept_entry; u32 index; - int i; + int i, ret=0; mfn_t mfn = _mfn(INVALID_MFN); *t = p2m_mmio_dm; @@ -164,17 +228,31 @@ static mfn_t ept_get_entry(struct domain /* Should check if gfn obeys GAW here. */ for ( i = EPT_DEFAULT_GAW; i > 0; i-- ) - if ( !ept_next_level(d, 1, &table, &gfn_remainder, - i * EPT_TABLE_ORDER) ) + { + ret = ept_next_level(d, 1, &table, &gfn_remainder, + i * EPT_TABLE_ORDER); + if ( !ret ) goto out; - - index = gfn_remainder; + else if ( ret == GUEST_TABLE_SUPER_PAGE ) + break; + } + + index = gfn_remainder >> ( i * EPT_TABLE_ORDER); ept_entry = table + index; if ( ept_entry->avail1 != p2m_invalid ) { *t = ept_entry->avail1; mfn = _mfn(ept_entry->mfn); + if ( i ) + { + /* we may meet super pages, and to split into 4k pages + * to emulate p2m table + */ + unsigned long split_mfn = + mfn_x(mfn) + (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1 ))); + mfn = _mfn(split_mfn); + } } out: @@ -205,33 +283,63 @@ static void ept_change_entry_type_global l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ ) { - if ( !l4e[i4].epte || l4e[i4].sp_avail ) + if ( !l4e[i4].epte ) continue; - l3e = map_domain_page(l4e[i4].mfn); - for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ ) - { - if ( !l3e[i3].epte || l3e[i3].sp_avail ) + if ( !l4e[i4].sp_avail ) + { + l3e = map_domain_page(l4e[i4].mfn); + for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ ) + { + if ( !l3e[i3].epte ) + continue; + if ( !l3e[i3].sp_avail ) + { + l2e = map_domain_page(l3e[i3].mfn); + for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ ) + { + if ( !l2e[i2].epte ) + continue; + if ( !l2e[i2].sp_avail ) + { + l1e = map_domain_page(l2e[i2].mfn); + for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ ) + { + if ( !l1e[i1].epte ) + continue; + if ( l1e[i1].avail1 != ot ) + continue; + l1e[i1].avail1 = nt; + ept_p2m_type_to_flags(l1e+i1, nt); + } + unmap_domain_page(l1e); + } + else + { + if ( l2e[i2].avail1 != ot ) + continue; + l2e[i2].avail1 = nt; + ept_p2m_type_to_flags(l2e+i2, nt); + } + } + unmap_domain_page(l2e); + } + else + { + if ( l3e[i3].avail1 != ot ) + continue; + l3e[i3].avail1 = nt; + ept_p2m_type_to_flags(l3e+i3, nt); + } + } + unmap_domain_page(l3e); + } + else + { + if ( l4e[i4].avail1 != ot ) continue; - l2e = map_domain_page(l3e[i3].mfn); - for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ ) - { - if ( !l2e[i2].epte || l2e[i2].sp_avail ) - continue; - l1e = map_domain_page(l2e[i2].mfn); - for ( i1 = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ ) - { - if ( !l1e[i1].epte ) - continue; - if ( l1e[i1].avail1 != ot ) - continue; - l1e[i1].avail1 = nt; - ept_p2m_type_to_flags(l1e+i1, nt); - } - unmap_domain_page(l1e); - } - unmap_domain_page(l2e); - } - unmap_domain_page(l3e); + l4e[i4].avail1 = nt; + ept_p2m_type_to_flags(l4e+i4, nt); + } } unmap_domain_page(l4e); diff -r c96507e0c83d -r 6d0cc186bf41 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/arch/x86/mm/p2m.c Fri May 16 21:59:38 2008 +0900 @@ -151,9 +151,11 @@ p2m_next_level(struct domain *d, mfn_t * unsigned long *gfn_remainder, unsigned long gfn, u32 shift, u32 max, unsigned long type) { + l1_pgentry_t *l1_entry; l1_pgentry_t *p2m_entry; l1_pgentry_t new_entry; void *next; + int i; ASSERT(d->arch.p2m->alloc_page); if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, @@ -194,6 +196,44 @@ p2m_next_level(struct domain *d, mfn_t * break; } } + + ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT); + + /* split single large page into 4KB page in P2M table */ + if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) + { + unsigned long flags, pfn; + struct page_info *pg = d->arch.p2m->alloc_page(d); + if ( pg == NULL ) + return 0; + list_add_tail(&pg->list, &d->arch.p2m->pages); + pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated; + pg->count_info = 1; + + /* New splintered mappings inherit the flags of the old superpage, + * with a little reorganisation for the _PAGE_PSE_PAT bit. */ + flags = l1e_get_flags(*p2m_entry); + pfn = l1e_get_pfn(*p2m_entry); + if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */ + pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */ + else + flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */ + + l1_entry = map_domain_page(mfn_x(page_to_mfn(pg))); + for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) + { + new_entry = l1e_from_pfn(pfn + i, flags); + paging_write_p2m_entry(d, gfn, + l1_entry+i, *table_mfn, new_entry, 1); + } + unmap_domain_page(l1_entry); + + new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), + __PAGE_HYPERVISOR|_PAGE_USER); + paging_write_p2m_entry(d, gfn, + p2m_entry, *table_mfn, new_entry, 2); + } + *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); next = map_domain_page(mfn_x(*table_mfn)); unmap_domain_page(*table); @@ -204,7 +244,8 @@ p2m_next_level(struct domain *d, mfn_t * // Returns 0 on error (out of memory) static int -p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt) +p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, + unsigned int page_order, p2m_type_t p2mt) { // XXX -- this might be able to be faster iff current->domain == d mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); @@ -212,6 +253,7 @@ p2m_set_entry(struct domain *d, unsigned unsigned long gfn_remainder = gfn; l1_pgentry_t *p2m_entry; l1_pgentry_t entry_content; + l2_pgentry_t l2e_content; int rv=0; #if CONFIG_PAGING_LEVELS >= 4 @@ -235,26 +277,53 @@ p2m_set_entry(struct domain *d, unsigned PGT_l2_page_table) ) goto out; - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, - L2_PAGETABLE_SHIFT - PAGE_SHIFT, - L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) - goto out; - - p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, - 0, L1_PAGETABLE_ENTRIES); - ASSERT(p2m_entry); + if ( page_order == 0 ) + { + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + L2_PAGETABLE_SHIFT - PAGE_SHIFT, + L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) + goto out; + + p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, + 0, L1_PAGETABLE_ENTRIES); + ASSERT(p2m_entry); + + if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) ) + entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt)); + else + entry_content = l1e_empty(); + + /* level 1 entry */ + paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1); + } + else + { + p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, + L2_PAGETABLE_SHIFT - PAGE_SHIFT, + L2_PAGETABLE_ENTRIES); + ASSERT(p2m_entry); + + if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) && + !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) + { + P2M_ERROR("configure P2M table 4KB L2 entry with large page\n"); + domain_crash(d); + goto out; + } + + if ( mfn_valid(mfn) ) + l2e_content = l2e_from_pfn(mfn_x(mfn), + p2m_type_to_flags(p2mt) | _PAGE_PSE); + else + l2e_content = l2e_empty(); + + entry_content.l1 = l2e_content.l2; + paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2); + } /* Track the highest gfn for which we have ever had a valid mapping */ if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) ) d->arch.p2m->max_mapped_pfn = gfn; - - if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) ) - entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt)); - else - entry_content = l1e_empty(); - - /* level 1 entry */ - paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1); if ( iommu_enabled && is_hvm_domain(d) ) { @@ -335,6 +404,16 @@ p2m_gfn_to_mfn(struct domain *d, unsigne unmap_domain_page(l2e); return _mfn(INVALID_MFN); } + else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) ) + { + mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr)); + *t = p2m_flags_to_type(l2e_get_flags(*l2e)); + unmap_domain_page(l2e); + + ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); + return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); + } + mfn = _mfn(l2e_get_pfn(*l2e)); unmap_domain_page(l2e); @@ -358,6 +437,7 @@ static mfn_t p2m_gfn_to_mfn_current(unsi { mfn_t mfn = _mfn(INVALID_MFN); p2m_type_t p2mt = p2m_mmio_dm; + paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; /* XXX This is for compatibility with the old model, where anything not * XXX marked as RAM was considered to be emulated MMIO space. * XXX Once we start explicitly registering MMIO regions in the p2m @@ -366,25 +446,44 @@ static mfn_t p2m_gfn_to_mfn_current(unsi if ( gfn <= current->domain->arch.p2m->max_mapped_pfn ) { l1_pgentry_t l1e = l1e_empty(); + l2_pgentry_t l2e = l2e_empty(); int ret; ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); - /* Need to __copy_from_user because the p2m is sparse and this - * part might not exist */ - ret = __copy_from_user(&l1e, - &phys_to_machine_mapping[gfn], - sizeof(l1e)); - - if ( ret == 0 ) { - p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); - ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt)); + ret = __copy_from_user(&l2e, + &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)], + sizeof(l2e)); + + if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) && + (l2e_get_flags(l2e) & _PAGE_PSE) ) + { + p2mt = p2m_flags_to_type(l2e_get_flags(l2e)); + ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt)); if ( p2m_is_valid(p2mt) ) - mfn = _mfn(l1e_get_pfn(l1e)); - else - /* XXX see above */ + mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr)); + else p2mt = p2m_mmio_dm; + } + else + { + + /* Need to __copy_from_user because the p2m is sparse and this + * part might not exist */ + ret = __copy_from_user(&l1e, + &phys_to_machine_mapping[gfn], + sizeof(l1e)); + + if ( ret == 0 ) { + p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); + ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt)); + if ( p2m_is_valid(p2mt) ) + mfn = _mfn(l1e_get_pfn(l1e)); + else + /* XXX see above */ + p2mt = p2m_mmio_dm; + } } } @@ -430,9 +529,10 @@ void p2m_change_entry_type_global(struct } static inline -int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt) -{ - return d->arch.p2m->set_entry(d, gfn, mfn, p2mt); +int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, + unsigned int page_order, p2m_type_t p2mt) +{ + return d->arch.p2m->set_entry(d, gfn, mfn, page_order, p2mt); } // Allocate a new p2m table for a domain. @@ -493,7 +593,8 @@ int p2m_alloc_table(struct domain *d, P2M_PRINTK("populating p2m table\n"); /* Initialise physmap tables for slot zero. Other code assumes this. */ - if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) ) + if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0, + p2m_invalid) ) goto error; /* Copy all existing mappings from the page list and m2p */ @@ -512,7 +613,7 @@ int p2m_alloc_table(struct domain *d, (gfn != 0x55555555L) #endif && gfn != INVALID_M2P_ENTRY - && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) ) + && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) ) goto error; } @@ -688,6 +789,28 @@ static void audit_p2m(struct domain *d) gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } + + /* check for super page */ + if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE ) + { + mfn = l2e_get_pfn(l2e[i2]); + ASSERT(mfn_valid(_mfn(mfn))); + for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++) + { + m2pfn = get_gpfn_from_mfn(mfn+i1); + if ( m2pfn != (gfn + i) ) + { + pmbad++; + P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" + " -> gfn %#lx\n", gfn+i, mfn+i, + m2pfn); + BUG(); + } + } + gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); + continue; + } + l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2])))); for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) @@ -737,32 +860,38 @@ static void audit_p2m(struct domain *d) static void -p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) -{ +p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn, + unsigned int page_order) +{ + unsigned long i; + if ( !paging_mode_translate(d) ) return; + P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); - set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid); - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); + set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid); + for ( i = 0; i < (1UL << page_order); i++ ) + set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); } void guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn) + unsigned long mfn, unsigned int page_order) { p2m_lock(d->arch.p2m); audit_p2m(d); - p2m_remove_page(d, gfn, mfn); + p2m_remove_page(d, gfn, mfn, page_order); audit_p2m(d); p2m_unlock(d->arch.p2m); } int guest_physmap_add_entry(struct domain *d, unsigned long gfn, - unsigned long mfn, p2m_type_t t) -{ - unsigned long ogfn; + unsigned long mfn, unsigned int page_order, + p2m_type_t t) +{ + unsigned long i, ogfn; p2m_type_t ot; mfn_t omfn; int rc = 0; @@ -795,7 +924,8 @@ guest_physmap_add_entry(struct domain *d if ( p2m_is_ram(ot) ) { ASSERT(mfn_valid(omfn)); - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); + for ( i = 0; i < (1UL << page_order); i++ ) + set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY); } ogfn = mfn_to_gfn(d, _mfn(mfn)); @@ -818,21 +948,23 @@ guest_physmap_add_entry(struct domain *d P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", ogfn , mfn_x(omfn)); if ( mfn_x(omfn) == mfn ) - p2m_remove_page(d, ogfn, mfn); + p2m_remove_page(d, ogfn, mfn, page_order); } } if ( mfn_valid(_mfn(mfn)) ) { - if ( !set_p2m_entry(d, gfn, _mfn(mfn), t) ) + if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) ) rc = -EINVAL; - set_gpfn_from_mfn(mfn, gfn); + for ( i = 0; i < (1UL << page_order); i++ ) + set_gpfn_from_mfn(mfn+i, gfn+i); } else { gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n", gfn, mfn); - if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid) ) + if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, + p2m_invalid) ) rc = -EINVAL; } @@ -851,7 +983,7 @@ void p2m_change_type_global(struct domai l1_pgentry_t l1e_content; l1_pgentry_t *l1e; l2_pgentry_t *l2e; - mfn_t l1mfn; + mfn_t l1mfn, l2mfn; int i1, i2; l3_pgentry_t *l3e; int i3; @@ -891,11 +1023,26 @@ void p2m_change_type_global(struct domai { continue; } + l2mfn = _mfn(l3e_get_pfn(l3e[i3])); l2e = map_domain_page(l3e_get_pfn(l3e[i3])); for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) { if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) { + continue; + } + + if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) ) + { + flags = l2e_get_flags(l2e[i2]); + if ( p2m_flags_to_type(flags) != ot ) + continue; + mfn = l2e_get_pfn(l2e[i2]); + gfn = get_gpfn_from_mfn(mfn); + flags = p2m_flags_to_type(nt); + l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE); + paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2], + l2mfn, l1e_content, 2); continue; } @@ -944,7 +1091,7 @@ p2m_type_t p2m_change_type(struct domain mfn = gfn_to_mfn(d, gfn, &pt); if ( pt == ot ) - set_p2m_entry(d, gfn, mfn, nt); + set_p2m_entry(d, gfn, mfn, 0, nt); p2m_unlock(d->arch.p2m); @@ -968,7 +1115,7 @@ set_mmio_p2m_entry(struct domain *d, uns set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); } - rc = set_p2m_entry(d, gfn, mfn, p2m_mmio_direct); + rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct); if ( 0 == rc ) gdprintk(XENLOG_ERR, "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", @@ -992,7 +1139,7 @@ clear_mmio_p2m_entry(struct domain *d, u "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn); return 0; } - rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0); + rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0); return rc; } diff -r c96507e0c83d -r 6d0cc186bf41 xen/arch/x86/platform_hypercall.c --- a/xen/arch/x86/platform_hypercall.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/arch/x86/platform_hypercall.c Fri May 16 21:59:38 2008 +0900 @@ -403,7 +403,10 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe if ( xenpxpt->flags == ( XEN_PX_PCT | XEN_PX_PSS | XEN_PX_PSD | XEN_PX_PPC ) ) + { + pxpt->init =1; cpu_count++; + } if ( cpu_count == num_online_cpus() ) ret = acpi_cpufreq_init(); break; diff -r c96507e0c83d -r 6d0cc186bf41 xen/common/grant_table.c --- a/xen/common/grant_table.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/common/grant_table.c Fri May 16 21:59:38 2008 +0900 @@ -1159,7 +1159,7 @@ gnttab_transfer( spin_lock(&e->grant_table->lock); sha = &shared_entry(e->grant_table, gop.ref); - guest_physmap_add_page(e, sha->frame, mfn); + guest_physmap_add_page(e, sha->frame, mfn, 0); sha->frame = mfn; wmb(); sha->flags |= GTF_transfer_completed; diff -r c96507e0c83d -r 6d0cc186bf41 xen/common/memory.c --- a/xen/common/memory.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/common/memory.c Fri May 16 21:59:38 2008 +0900 @@ -127,9 +127,7 @@ static void populate_physmap(struct memo if ( unlikely(paging_mode_translate(d)) ) { - for ( j = 0; j < (1 << a->extent_order); j++ ) - if ( guest_physmap_add_page(d, gpfn + j, mfn + j) ) - goto out; + guest_physmap_add_page(d, gpfn, mfn, a->extent_order); } else { @@ -172,7 +170,7 @@ int guest_remove_page(struct domain *d, if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); - guest_physmap_remove_page(d, gmfn, mfn); + guest_physmap_remove_page(d, gmfn, mfn, 0); put_page(page); @@ -419,7 +417,7 @@ static long memory_exchange(XEN_GUEST_HA if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) ) BUG(); mfn = page_to_mfn(page); - guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn); + guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn, 0); put_page(page); } @@ -440,9 +438,7 @@ static long memory_exchange(XEN_GUEST_HA mfn = page_to_mfn(page); if ( unlikely(paging_mode_translate(d)) ) { - /* Ignore failure here. There's nothing we can do. */ - for ( k = 0; k < (1UL << exch.out.extent_order); k++ ) - (void)guest_physmap_add_page(d, gpfn + k, mfn + k); + guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order); } else { diff -r c96507e0c83d -r 6d0cc186bf41 xen/common/sysctl.c --- a/xen/common/sysctl.c Thu May 15 16:23:56 2008 +0900 +++ b/xen/common/sysctl.c Fri May 16 21:59:38 2008 +0900 @@ -25,6 +25,8 @@ #include <xen/nodemask.h> #include <xsm/xsm.h> +extern int do_get_pm_info(struct xen_sysctl_get_pmstat *op); + extern long arch_do_sysctl( struct xen_sysctl *op, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl); @@ -193,6 +195,20 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc op->u.availheap.avail_bytes <<= PAGE_SHIFT; ret = copy_to_guest(u_sysctl, op, 1) ? -EFAULT : 0; + } + break; + + case XEN_SYSCTL_get_pmstat: + { + ret = do_get_pm_info(&op->u.get_pmstat); + if ( ret ) + break; + + if ( copy_to_guest(u_sysctl, op, 1) ) + { + ret = -EFAULT; + break; + } } break; diff -r c96507e0c83d -r 6d0cc186bf41 xen/include/acpi/cpufreq/processor_perf.h --- a/xen/include/acpi/cpufreq/processor_perf.h Thu May 15 16:23:56 2008 +0900 +++ b/xen/include/acpi/cpufreq/processor_perf.h Fri May 16 21:59:38 2008 +0900 @@ -2,9 +2,13 @@ #define __XEN_PROCESSOR_PM_H__ #include <public/platform.h> +#include <public/sysctl.h> int get_cpu_id(u8); int acpi_cpufreq_init(void); +void px_statistic_update(cpumask_t, uint8_t, uint8_t); +int px_statistic_init(int); +void px_statistic_reset(int); struct processor_performance { uint32_t state; @@ -16,15 +20,32 @@ struct processor_performance { struct xen_psd_package domain_info; cpumask_t shared_cpu_map; uint32_t shared_type; + + uint32_t init; }; struct processor_pminfo { uint32_t acpi_id; uint32_t id; - uint32_t flag; struct processor_performance perf; }; extern struct processor_pminfo processor_pminfo[NR_CPUS]; +struct px_stat { + uint8_t total; /* total Px states */ + uint8_t usable; /* usable Px states */ + uint8_t last; /* last Px state */ + uint8_t cur; /* current Px state */ + uint64_t *trans_pt; /* Px transition table */ + pm_px_val_t *pt; +}; + +struct pm_px { + struct px_stat u; + uint64_t prev_state_wall; +}; + +extern struct pm_px px_statistic_data[NR_CPUS]; + #endif /* __XEN_PROCESSOR_PM_H__ */ diff -r c96507e0c83d -r 6d0cc186bf41 xen/include/asm-ia64/grant_table.h --- a/xen/include/asm-ia64/grant_table.h Thu May 15 16:23:56 2008 +0900 +++ b/xen/include/asm-ia64/grant_table.h Fri May 16 21:59:38 2008 +0900 @@ -13,7 +13,7 @@ int replace_grant_host_mapping(unsigned int replace_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned long new_gpaddr, unsigned int flags); // for grant transfer -int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn); +int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn, unsigned int page_order); /* XXX * somewhere appropriate diff -r c96507e0c83d -r 6d0cc186bf41 xen/include/asm-ia64/shadow.h --- a/xen/include/asm-ia64/shadow.h Thu May 15 16:23:56 2008 +0900 +++ b/xen/include/asm-ia64/shadow.h Fri May 16 21:59:38 2008 +0900 @@ -40,8 +40,10 @@ * Utilities to change relationship of gpfn->mfn for designated domain, * which is required by gnttab transfer, balloon, device model and etc. */ -int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn); -void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn); +int guest_physmap_add_page(struct domain *d, unsigned long gpfn, + unsigned long mfn, unsigned int page_order); +void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, + unsigned long mfn, unsigned int page_order); static inline int shadow_mode_enabled(struct domain *d) diff -r c96507e0c83d -r 6d0cc186bf41 xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Thu May 15 16:23:56 2008 +0900 +++ b/xen/include/asm-x86/p2m.h Fri May 16 21:59:38 2008 +0900 @@ -102,7 +102,8 @@ struct p2m_domain { void (*free_page )(struct domain *d, struct page_info *pg); int (*set_entry )(struct domain *d, unsigned long gfn, - mfn_t mfn, p2m_type_t p2mt); + mfn_t mfn, unsigned int page_order, + p2m_type_t p2mt); mfn_t (*get_entry )(struct domain *d, unsigned long gfn, p2m_type_t *p2mt); mfn_t (*get_entry_current)(unsigned long gfn, @@ -203,21 +204,23 @@ void p2m_final_teardown(struct domain *d /* Add a page to a domain's p2m table */ int guest_physmap_add_entry(struct domain *d, unsigned long gfn, - unsigned long mfn, p2m_type_t t); + unsigned long mfn, unsigned int page_order, + p2m_type_t t); /* Untyped version for RAM only, for compatibility * * Return 0 for success */ static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn, - unsigned long mfn) -{ - return guest_physmap_add_entry(d, gfn, mfn, p2m_ram_rw); + unsigned long mfn, + unsigned int page_order) +{ + return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw); } /* Remove a page from a domain's p2m table */ void guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn); + unsigned long mfn, unsigned int page_order); /* Change types across all p2m entries in a domain */ void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt); diff -r c96507e0c83d -r 6d0cc186bf41 xen/include/public/sysctl.h --- a/xen/include/public/sysctl.h Thu May 15 16:23:56 2008 +0900 +++ b/xen/include/public/sysctl.h Fri May 16 21:59:38 2008 +0900 @@ -212,7 +212,41 @@ struct xen_sysctl_availheap { }; typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); - + +#define XEN_SYSCTL_get_pmstat 10 +struct pm_px_val { + uint64_aligned_t freq; /* Px core frequency */ + uint64_aligned_t residency; /* Px residency time */ + uint64_aligned_t count; /* Px transition count */ +}; +typedef struct pm_px_val pm_px_val_t; +DEFINE_XEN_GUEST_HANDLE(pm_px_val_t); + +struct pm_px_stat { + uint8_t total; /* total Px states */ + uint8_t usable; /* usable Px states */ + uint8_t last; /* last Px state */ + uint8_t cur; /* current Px state */ + XEN_GUEST_HANDLE_64(uint64) trans_pt; /* Px transition table */ + XEN_GUEST_HANDLE_64(pm_px_val_t) pt; +}; +typedef struct pm_px_stat pm_px_stat_t; +DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t); + +struct xen_sysctl_get_pmstat { +#define PMSTAT_get_max_px 0x11 +#define PMSTAT_get_pxstat 0x12 +#define PMSTAT_reset_pxstat 0x13 + uint32_t type; + uint32_t cpuid; + union { + struct pm_px_stat getpx; + /* other struct for cx, tx, etc */ + } u; +}; +typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); + struct xen_sysctl { uint32_t cmd; uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ @@ -226,6 +260,7 @@ struct xen_sysctl { struct xen_sysctl_debug_keys debug_keys; struct xen_sysctl_getcpuinfo getcpuinfo; struct xen_sysctl_availheap availheap; + struct xen_sysctl_get_pmstat get_pmstat; uint8_t pad[128]; } u; }; diff -r c96507e0c83d -r 6d0cc186bf41 xen/include/xen/paging.h --- a/xen/include/xen/paging.h Thu May 15 16:23:56 2008 +0900 +++ b/xen/include/xen/paging.h Fri May 16 21:59:38 2008 +0900 @@ -18,8 +18,8 @@ #else #define paging_mode_translate(d) (0) -#define guest_physmap_add_page(d, p, m) (0) -#define guest_physmap_remove_page(d, p, m) ((void)0) +#define guest_physmap_add_page(d, p, m, o) (0) +#define guest_physmap_remove_page(d, p, m, o) ((void)0) #endif _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |