 
	
| [Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v3 2/3] xen: remove tmem from hypervisor
 This patch removes all tmem related code and CONFIG_TMEM from the hypervisor. Also remove tmem hypercalls from the default XSM policy. It is written as if tmem is disabled and tmem freeable pages is 0. We will need to keep public/tmem.h around forever to avoid breaking guests. Remove the hypervisor only part and put guest visible part under a xen version check. Take the chance to remove trailing whitespaces. Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> Acked-by: Jan Beulich <jbeulich@xxxxxxxx> Acked-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx> Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> --- v3: 1. Fold in changes agreed in v2. 2. Add acks. 3. Rebase and fix up conflicts v2: 1. remove some more residuals 2. fix errors discovered by Gitlab CI 3. keep public/tmem.h --- MAINTAINERS | 8 - tools/flask/policy/modules/dom0.te | 4 +- tools/flask/policy/modules/guest_features.te | 3 - xen/arch/arm/configs/tiny64.conf | 1 - xen/arch/x86/configs/pvshim_defconfig | 1 - xen/arch/x86/guest/hypercall_page.S | 2 +- xen/arch/x86/hvm/hypercall.c | 3 - xen/arch/x86/hypercall.c | 1 - xen/arch/x86/pv/hypercall.c | 3 - xen/arch/x86/setup.c | 8 - xen/common/Kconfig | 15 - xen/common/Makefile | 4 - xen/common/compat/tmem_xen.c | 23 - xen/common/domain.c | 3 - xen/common/memory.c | 12 +- xen/common/page_alloc.c | 54 +- xen/common/sysctl.c | 5 - xen/common/tmem.c | 2095 ------------------ xen/common/tmem_control.c | 560 ----- xen/common/tmem_xen.c | 277 --- xen/include/Makefile | 1 - xen/include/public/sysctl.h | 108 +- xen/include/public/tmem.h | 14 +- xen/include/xen/hypercall.h | 7 - xen/include/xen/mm.h | 3 - xen/include/xen/sched.h | 3 - xen/include/xen/tmem.h | 45 - xen/include/xen/tmem_control.h | 39 - xen/include/xen/tmem_xen.h | 343 --- xen/include/xlat.lst | 2 - xen/include/xsm/dummy.h | 6 - xen/include/xsm/xsm.h | 6 - xen/xsm/dummy.c | 1 - xen/xsm/flask/hooks.c | 9 - xen/xsm/flask/policy/access_vectors | 4 - 35 files changed, 20 insertions(+), 3653 deletions(-) delete mode 100644 xen/common/compat/tmem_xen.c delete mode 100644 xen/common/tmem.c delete mode 100644 xen/common/tmem_control.c delete mode 100644 xen/common/tmem_xen.c delete mode 100644 xen/include/xen/tmem.h delete mode 100644 xen/include/xen/tmem_control.h delete mode 100644 xen/include/xen/tmem_xen.h diff --git a/MAINTAINERS b/MAINTAINERS index a208bbe304..1f422d96a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -402,14 +402,6 @@ F: */configure F: */*.ac F: tools/ -TRANSCENDENT MEMORY (TMEM) -M: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> -W: https://oss.oracle.com/projects/tmem -S: Supported -F: xen/common/tmem* -F: xen/include/xen/tmem* -F: docs/misc/tmem* - UNMODIFIED LINUX PV DRIVERS M: Jan Beulich <jbeulich@xxxxxxxx> S: Obsolete diff --git a/tools/flask/policy/modules/dom0.te b/tools/flask/policy/modules/dom0.te index a347d664f8..9970f9dc08 100644 --- a/tools/flask/policy/modules/dom0.te +++ b/tools/flask/policy/modules/dom0.te @@ -10,8 +10,8 @@ allow dom0_t xen_t:xen { settime tbufcontrol readconsole clearconsole perfcontrol mtrr_add mtrr_del mtrr_read microcode physinfo quirk writeconsole readapic writeapic privprofile nonprivprofile kexec firmware sleep frequency - getidle debug getcpuinfo heap pm_op mca_op lockprof cpupool_op tmem_op - tmem_control getscheduler setscheduler + getidle debug getcpuinfo heap pm_op mca_op lockprof cpupool_op + getscheduler setscheduler }; allow dom0_t xen_t:xen2 { resource_op psr_cmt_op psr_alloc pmu_ctrl get_symbol diff --git a/tools/flask/policy/modules/guest_features.te b/tools/flask/policy/modules/guest_features.te index fe4835db5b..2797a22761 100644 --- a/tools/flask/policy/modules/guest_features.te +++ b/tools/flask/policy/modules/guest_features.te @@ -1,6 +1,3 @@ -# Allow all domains to use (unprivileged parts of) the tmem hypercall -allow domain_type xen_t:xen tmem_op; - # Allow all domains to use PMU (but not to change its settings --- that's what # pmu_ctrl is for) allow domain_type xen_t:xen2 pmu_use; diff --git a/xen/arch/arm/configs/tiny64.conf b/xen/arch/arm/configs/tiny64.conf index aecc55c95f..cc6d93f2f8 100644 --- a/xen/arch/arm/configs/tiny64.conf +++ b/xen/arch/arm/configs/tiny64.conf @@ -11,7 +11,6 @@ CONFIG_ARM=y # # Common Features # -# CONFIG_TMEM is not set CONFIG_SCHED_CREDIT=y # CONFIG_SCHED_CREDIT2 is not set # CONFIG_SCHED_RTDS is not set diff --git a/xen/arch/x86/configs/pvshim_defconfig b/xen/arch/x86/configs/pvshim_defconfig index a12e3d0465..9710aa6238 100644 --- a/xen/arch/x86/configs/pvshim_defconfig +++ b/xen/arch/x86/configs/pvshim_defconfig @@ -11,7 +11,6 @@ CONFIG_NR_CPUS=32 # CONFIG_HVM_FEP is not set # CONFIG_TBOOT is not set # CONFIG_KEXEC is not set -# CONFIG_TMEM is not set # CONFIG_XENOPROF is not set # CONFIG_XSM is not set # CONFIG_SCHED_CREDIT2 is not set diff --git a/xen/arch/x86/guest/hypercall_page.S b/xen/arch/x86/guest/hypercall_page.S index 26afabf909..347fee4ff9 100644 --- a/xen/arch/x86/guest/hypercall_page.S +++ b/xen/arch/x86/guest/hypercall_page.S @@ -58,8 +58,8 @@ DECLARE_HYPERCALL(hvm_op) DECLARE_HYPERCALL(sysctl) DECLARE_HYPERCALL(domctl) DECLARE_HYPERCALL(kexec_op) -DECLARE_HYPERCALL(tmem_op) DECLARE_HYPERCALL(argo_op) +DECLARE_HYPERCALL(xc_reserved_op) DECLARE_HYPERCALL(xenpmu_op) DECLARE_HYPERCALL(arch_0) diff --git a/xen/arch/x86/hvm/hypercall.c b/xen/arch/x86/hvm/hypercall.c index 00455ff115..33dd2d99d2 100644 --- a/xen/arch/x86/hvm/hypercall.c +++ b/xen/arch/x86/hvm/hypercall.c @@ -135,9 +135,6 @@ static const hypercall_table_t hvm_hypercall_table[] = { HYPERCALL(hvm_op), HYPERCALL(sysctl), HYPERCALL(domctl), -#ifdef CONFIG_TMEM - HYPERCALL(tmem_op), -#endif #ifdef CONFIG_ARGO COMPAT_CALL(argo_op), #endif diff --git a/xen/arch/x86/hypercall.c b/xen/arch/x86/hypercall.c index cf44b82793..d483dbaa6b 100644 --- a/xen/arch/x86/hypercall.c +++ b/xen/arch/x86/hypercall.c @@ -65,7 +65,6 @@ const hypercall_args_t hypercall_args_table[NR_hypercalls] = ARGS(sysctl, 1), ARGS(domctl, 1), ARGS(kexec_op, 2), - ARGS(tmem_op, 1), #ifdef CONFIG_ARGO ARGS(argo_op, 5), #endif diff --git a/xen/arch/x86/pv/hypercall.c b/xen/arch/x86/pv/hypercall.c index e9da8419ca..5fdb8f988f 100644 --- a/xen/arch/x86/pv/hypercall.c +++ b/xen/arch/x86/pv/hypercall.c @@ -76,9 +76,6 @@ const hypercall_table_t pv_hypercall_table[] = { #ifdef CONFIG_KEXEC COMPAT_CALL(kexec_op), #endif -#ifdef CONFIG_TMEM - HYPERCALL(tmem_op), -#endif #ifdef CONFIG_ARGO COMPAT_CALL(argo_op), #endif diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 3440794275..aea9181a1f 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -25,7 +25,6 @@ #include <xen/dmi.h> #include <xen/pfn.h> #include <xen/nodemask.h> -#include <xen/tmem_xen.h> #include <xen/virtual_region.h> #include <xen/watchdog.h> #include <public/version.h> @@ -1485,13 +1484,6 @@ void __init noreturn __start_xen(unsigned long mbi_p) s = pfn_to_paddr(limit + 1); init_domheap_pages(s, e); } - - if ( tmem_enabled() ) - { - printk(XENLOG_WARNING - "TMEM physical RAM limit exceeded, disabling TMEM\n"); - tmem_disable(); - } } else end_boot_allocator(); diff --git a/xen/common/Kconfig b/xen/common/Kconfig index c838506241..7a12346f19 100644 --- a/xen/common/Kconfig +++ b/xen/common/Kconfig @@ -88,21 +88,6 @@ config KEXEC If unsure, say Y. -config TMEM - bool "Transcendent Memory Support (deprecated)" if EXPERT = "y" - ---help--- - Transcendent memory allows PV-aware guests to collaborate on memory - usage. Guests can 'swap' their memory to the hypervisor or have an - collective pool of memory shared across guests. The end result is - less memory usage by guests allowing higher guest density. - - You also have to enable it on the Xen commandline by using tmem=1. - - WARNING: This option (and its underlying code) is going to go away - in a future Xen version. - - If unsure, say N. - config XENOPROF def_bool y prompt "Xen Oprofile Support" if EXPERT = "y" diff --git a/xen/common/Makefile b/xen/common/Makefile index bca48e6e22..51df0ba844 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -73,10 +73,6 @@ obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma $(lzo-y) unl obj-$(CONFIG_COMPAT) += $(addprefix compat/,domain.o kernel.o memory.o multicall.o xlat.o) -tmem-y := tmem.o tmem_xen.o tmem_control.o -tmem-$(CONFIG_COMPAT) += compat/tmem_xen.o -obj-$(CONFIG_TMEM) += $(tmem-y) - extra-y := symbols-dummy.o subdir-$(CONFIG_COVERAGE) += coverage diff --git a/xen/common/compat/tmem_xen.c b/xen/common/compat/tmem_xen.c deleted file mode 100644 index 5111fd8df6..0000000000 --- a/xen/common/compat/tmem_xen.c +++ /dev/null @@ -1,23 +0,0 @@ -/****************************************************************************** - * tmem_xen.c - * - */ - -#include <xen/lib.h> -#include <xen/sched.h> -#include <xen/domain.h> -#include <xen/guest_access.h> -#include <xen/hypercall.h> -#include <compat/tmem.h> - -CHECK_tmem_oid; - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/xen/common/domain.c b/xen/common/domain.c index 88bbe984bc..90c66079f9 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -41,7 +41,6 @@ #include <public/vcpu.h> #include <xsm/xsm.h> #include <xen/trace.h> -#include <xen/tmem.h> #include <asm/setup.h> #ifdef CONFIG_X86 @@ -725,10 +724,8 @@ int domain_kill(struct domain *d) argo_destroy(d); evtchn_destroy(d); gnttab_release_mappings(d); - tmem_destroy(d->tmem_client); vnuma_destroy(d->vnuma); domain_set_outstanding_pages(d, 0); - d->tmem_client = NULL; /* fallthrough */ case DOMDYING_dying: rc = domain_relinquish_resources(d); diff --git a/xen/common/memory.c b/xen/common/memory.c index 86567e6117..20609e153d 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -7,6 +7,7 @@ * Copyright (c) 2003-2005, K A Fraser */ +#include <xen/domain_page.h> #include <xen/types.h> #include <xen/lib.h> #include <xen/mm.h> @@ -18,8 +19,6 @@ #include <xen/guest_access.h> #include <xen/hypercall.h> #include <xen/errno.h> -#include <xen/tmem.h> -#include <xen/tmem_xen.h> #include <xen/numa.h> #include <xen/mem_access.h> #include <xen/trace.h> @@ -250,11 +249,10 @@ static void populate_physmap(struct memop_args *a) if ( unlikely(!page) ) { - if ( !tmem_enabled() || a->extent_order ) - gdprintk(XENLOG_INFO, - "Could not allocate order=%u extent: id=%d memflags=%#x (%u of %u)\n", - a->extent_order, d->domain_id, a->memflags, - i, a->nr_extents); + gdprintk(XENLOG_INFO, + "Could not allocate order=%u extent: id=%d memflags=%#x (%u of %u)\n", + a->extent_order, d->domain_id, a->memflags, + i, a->nr_extents); goto out; } diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index be44158033..702e3e0b42 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -135,8 +135,6 @@ #include <xen/numa.h> #include <xen/nodemask.h> #include <xen/event.h> -#include <xen/tmem.h> -#include <xen/tmem_xen.h> #include <public/sysctl.h> #include <public/sched.h> #include <asm/page.h> @@ -451,10 +449,6 @@ static unsigned long node_need_scrub[MAX_NUMNODES]; static unsigned long *avail[MAX_NUMNODES]; static long total_avail_pages; -/* TMEM: Reserve a fraction of memory for mid-size (0<order<9) allocations.*/ -static long midsize_alloc_zone_pages; -#define MIDSIZE_ALLOC_FRAC 128 - static DEFINE_SPINLOCK(heap_lock); static long outstanding_claims; /* total outstanding claims by all domains */ @@ -530,16 +524,6 @@ int domain_set_outstanding_pages(struct domain *d, unsigned long pages) /* how much memory is available? */ avail_pages = total_avail_pages; - /* Note: The usage of claim means that allocation from a guest *might* - * have to come from freeable memory. Using free memory is always better, if - * it is available, than using freeable memory. - * - * But that is OK as once the claim has been made, it still can take minutes - * before the claim is fully satisfied. Tmem can make use of the unclaimed - * pages during this time (to store ephemeral/freeable pages only, - * not persistent pages). - */ - avail_pages += tmem_freeable_pages(); avail_pages -= outstanding_claims; /* @@ -711,8 +695,7 @@ static void __init setup_low_mem_virq(void) static void check_low_mem_virq(void) { - unsigned long avail_pages = total_avail_pages + - tmem_freeable_pages() - outstanding_claims; + unsigned long avail_pages = total_avail_pages - outstanding_claims; if ( unlikely(avail_pages <= low_mem_virq_th) ) { @@ -939,8 +922,7 @@ static struct page_info *alloc_heap_pages( * Claimed memory is considered unavailable unless the request * is made by a domain with sufficient unclaimed pages. */ - if ( (outstanding_claims + request > - total_avail_pages + tmem_freeable_pages()) && + if ( (outstanding_claims + request > total_avail_pages) && ((memflags & MEMF_no_refcount) || !d || d->outstanding_pages < request) ) { @@ -948,22 +930,6 @@ static struct page_info *alloc_heap_pages( return NULL; } - /* - * TMEM: When available memory is scarce due to tmem absorbing it, allow - * only mid-size allocations to avoid worst of fragmentation issues. - * Others try tmem pools then fail. This is a workaround until all - * post-dom0-creation-multi-page allocations can be eliminated. - */ - if ( ((order == 0) || (order >= 9)) && - (total_avail_pages <= midsize_alloc_zone_pages) && - tmem_freeable_pages() ) - { - /* Try to free memory from tmem. */ - pg = tmem_relinquish_pages(order, memflags); - spin_unlock(&heap_lock); - return pg; - } - pg = get_free_buddy(zone_lo, zone_hi, order, memflags, d); /* Try getting a dirty buddy if we couldn't get a clean one. */ if ( !pg && !(memflags & MEMF_no_scrub) ) @@ -1443,10 +1409,6 @@ static void free_heap_pages( else pg->u.free.first_dirty = INVALID_DIRTY_IDX; - if ( tmem_enabled() ) - midsize_alloc_zone_pages = max( - midsize_alloc_zone_pages, total_avail_pages / MIDSIZE_ALLOC_FRAC); - /* Merge chunks as far as possible. */ while ( order < MAX_ORDER ) { @@ -1833,11 +1795,6 @@ static unsigned long avail_heap_pages( return free_pages; } -unsigned long total_free_pages(void) -{ - return total_avail_pages - midsize_alloc_zone_pages; -} - void __init end_boot_allocator(void) { unsigned int i; @@ -2264,10 +2221,9 @@ int assign_pages( { if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) ) { - if ( !tmem_enabled() || order != 0 || d->tot_pages != d->max_pages ) - gprintk(XENLOG_INFO, "Over-allocation for domain %u: " - "%u > %u\n", d->domain_id, - d->tot_pages + (1 << order), d->max_pages); + gprintk(XENLOG_INFO, "Over-allocation for domain %u: " + "%u > %u\n", d->domain_id, + d->tot_pages + (1 << order), d->max_pages); rc = -E2BIG; goto out; } diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c index c0aa6bde4e..765effde8d 100644 --- a/xen/common/sysctl.c +++ b/xen/common/sysctl.c @@ -13,7 +13,6 @@ #include <xen/domain.h> #include <xen/event.h> #include <xen/domain_page.h> -#include <xen/tmem.h> #include <xen/trace.h> #include <xen/console.h> #include <xen/iocap.h> @@ -456,10 +455,6 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl) } #endif - case XEN_SYSCTL_tmem_op: - ret = tmem_control(&op->u.tmem_op); - break; - case XEN_SYSCTL_livepatch_op: ret = livepatch_op(&op->u.livepatch); if ( ret != -ENOSYS && ret != -EOPNOTSUPP ) diff --git a/xen/common/tmem.c b/xen/common/tmem.c deleted file mode 100644 index c077f87e77..0000000000 --- a/xen/common/tmem.c +++ /dev/null @@ -1,2095 +0,0 @@ -/****************************************************************************** - * tmem.c - * - * Transcendent memory - * - * Copyright (c) 2009, Dan Magenheimer, Oracle Corp. - */ - -/* TODO list: 090129 (updated 100318) - - any better reclamation policy? - - use different tlsf pools for each client (maybe each pool) - - test shared access more completely (ocfs2) - - add feedback-driven compression (not for persistent pools though!) - - add data-structure total bytes overhead stats - */ - -#ifdef __XEN__ -#include <xen/tmem_xen.h> /* host-specific (eg Xen) code goes here. */ -#endif - -#include <public/sysctl.h> -#include <xen/tmem.h> -#include <xen/rbtree.h> -#include <xen/radix-tree.h> -#include <xen/list.h> -#include <xen/init.h> - -#define TMEM_SPEC_VERSION 1 - -struct tmem_statistics tmem_stats = { - .global_obj_count = ATOMIC_INIT(0), - .global_pgp_count = ATOMIC_INIT(0), - .global_pcd_count = ATOMIC_INIT(0), - .global_page_count = ATOMIC_INIT(0), - .global_rtree_node_count = ATOMIC_INIT(0), -}; - -/************ CORE DATA STRUCTURES ************************************/ - -struct tmem_object_root { - struct xen_tmem_oid oid; - struct rb_node rb_tree_node; /* Protected by pool->pool_rwlock. */ - unsigned long objnode_count; /* Atomicity depends on obj_spinlock. */ - long pgp_count; /* Atomicity depends on obj_spinlock. */ - struct radix_tree_root tree_root; /* Tree of pages within object. */ - struct tmem_pool *pool; - domid_t last_client; - spinlock_t obj_spinlock; -}; - -struct tmem_object_node { - struct tmem_object_root *obj; - struct radix_tree_node rtn; -}; - -struct tmem_page_descriptor { - union { - struct list_head global_eph_pages; - struct list_head client_inv_pages; - }; - union { - struct { - union { - struct list_head client_eph_pages; - struct list_head pool_pers_pages; - }; - struct tmem_object_root *obj; - } us; - struct xen_tmem_oid inv_oid; /* Used for invalid list only. */ - }; - pagesize_t size; /* 0 == PAGE_SIZE (pfp), -1 == data invalid, - else compressed data (cdata). */ - uint32_t index; - bool eviction_attempted; /* CHANGE TO lifetimes? (settable). */ - union { - struct page_info *pfp; /* Page frame pointer. */ - char *cdata; /* Compressed data. */ - struct tmem_page_content_descriptor *pcd; /* Page dedup. */ - }; - union { - uint64_t timestamp; - uint32_t pool_id; /* Used for invalid list only. */ - }; -}; - -#define PCD_TZE_MAX_SIZE (PAGE_SIZE - (PAGE_SIZE/64)) - -struct tmem_page_content_descriptor { - union { - struct page_info *pfp; /* Page frame pointer. */ - char *cdata; /* If compression_enabled. */ - }; - pagesize_t size; /* If compression_enabled -> 0<size<PAGE_SIZE (*cdata) - * else if tze, 0<=size<PAGE_SIZE, rounded up to mult of 8 - * else PAGE_SIZE -> *pfp. */ -}; - -static int tmem_initialized = 0; - -struct xmem_pool *tmem_mempool = 0; -unsigned int tmem_mempool_maxalloc = 0; - -DEFINE_SPINLOCK(tmem_page_list_lock); -PAGE_LIST_HEAD(tmem_page_list); -unsigned long tmem_page_list_pages = 0; - -DEFINE_RWLOCK(tmem_rwlock); -static DEFINE_SPINLOCK(eph_lists_spinlock); /* Protects global AND clients. */ -static DEFINE_SPINLOCK(pers_lists_spinlock); - -#define ASSERT_SPINLOCK(_l) ASSERT(spin_is_locked(_l)) -#define ASSERT_WRITELOCK(_l) ASSERT(rw_is_write_locked(_l)) - - atomic_t client_weight_total; - -struct tmem_global tmem_global = { - .ephemeral_page_list = LIST_HEAD_INIT(tmem_global.ephemeral_page_list), - .client_list = LIST_HEAD_INIT(tmem_global.client_list), - .client_weight_total = ATOMIC_INIT(0), -}; - -/* - * There two types of memory allocation interfaces in tmem. - * One is based on xmem_pool and the other is used for allocate a whole page. - * Both of them are based on the lowlevel function __tmem_alloc_page/_thispool(). - * The call trace of alloc path is like below. - * Persistant pool: - * 1.tmem_malloc() - * > xmem_pool_alloc() - * > tmem_persistent_pool_page_get() - * > __tmem_alloc_page_thispool() - * 2.tmem_alloc_page() - * > __tmem_alloc_page_thispool() - * - * Ephemeral pool: - * 1.tmem_malloc() - * > xmem_pool_alloc() - * > tmem_mempool_page_get() - * > __tmem_alloc_page() - * 2.tmem_alloc_page() - * > __tmem_alloc_page() - * - * The free path is done in the same manner. - */ -static void *tmem_malloc(size_t size, struct tmem_pool *pool) -{ - void *v = NULL; - - if ( (pool != NULL) && is_persistent(pool) ) { - if ( pool->client->persistent_pool ) - v = xmem_pool_alloc(size, pool->client->persistent_pool); - } - else - { - ASSERT( size < tmem_mempool_maxalloc ); - ASSERT( tmem_mempool != NULL ); - v = xmem_pool_alloc(size, tmem_mempool); - } - if ( v == NULL ) - tmem_stats.alloc_failed++; - return v; -} - -static void tmem_free(void *p, struct tmem_pool *pool) -{ - if ( pool == NULL || !is_persistent(pool) ) - { - ASSERT( tmem_mempool != NULL ); - xmem_pool_free(p, tmem_mempool); - } - else - { - ASSERT( pool->client->persistent_pool != NULL ); - xmem_pool_free(p, pool->client->persistent_pool); - } -} - -static struct page_info *tmem_alloc_page(struct tmem_pool *pool) -{ - struct page_info *pfp = NULL; - - if ( pool != NULL && is_persistent(pool) ) - pfp = __tmem_alloc_page_thispool(pool->client->domain); - else - pfp = __tmem_alloc_page(); - if ( pfp == NULL ) - tmem_stats.alloc_page_failed++; - else - atomic_inc_and_max(global_page_count); - return pfp; -} - -static void tmem_free_page(struct tmem_pool *pool, struct page_info *pfp) -{ - ASSERT(pfp); - if ( pool == NULL || !is_persistent(pool) ) - __tmem_free_page(pfp); - else - __tmem_free_page_thispool(pfp); - atomic_dec_and_assert(global_page_count); -} - -static void *tmem_mempool_page_get(unsigned long size) -{ - struct page_info *pi; - - ASSERT(size == PAGE_SIZE); - if ( (pi = __tmem_alloc_page()) == NULL ) - return NULL; - return page_to_virt(pi); -} - -static void tmem_mempool_page_put(void *page_va) -{ - ASSERT(IS_PAGE_ALIGNED(page_va)); - __tmem_free_page(virt_to_page(page_va)); -} - -static int __init tmem_mempool_init(void) -{ - tmem_mempool = xmem_pool_create("tmem", tmem_mempool_page_get, - tmem_mempool_page_put, PAGE_SIZE, 0, PAGE_SIZE); - if ( tmem_mempool ) - tmem_mempool_maxalloc = xmem_pool_maxalloc(tmem_mempool); - return tmem_mempool != NULL; -} - -/* Persistent pools are per-domain. */ -static void *tmem_persistent_pool_page_get(unsigned long size) -{ - struct page_info *pi; - struct domain *d = current->domain; - - ASSERT(size == PAGE_SIZE); - if ( (pi = __tmem_alloc_page_thispool(d)) == NULL ) - return NULL; - ASSERT(IS_VALID_PAGE(pi)); - return page_to_virt(pi); -} - -static void tmem_persistent_pool_page_put(void *page_va) -{ - struct page_info *pi; - - ASSERT(IS_PAGE_ALIGNED(page_va)); - pi = mfn_to_page(_mfn(virt_to_mfn(page_va))); - ASSERT(IS_VALID_PAGE(pi)); - __tmem_free_page_thispool(pi); -} - -/* - * Page content descriptor manipulation routines. - */ -#define NOT_SHAREABLE ((uint16_t)-1UL) - -/************ PAGE DESCRIPTOR MANIPULATION ROUTINES *******************/ - -/* Allocate a struct tmem_page_descriptor and associate it with an object. */ -static struct tmem_page_descriptor *pgp_alloc(struct tmem_object_root *obj) -{ - struct tmem_page_descriptor *pgp; - struct tmem_pool *pool; - - ASSERT(obj != NULL); - ASSERT(obj->pool != NULL); - pool = obj->pool; - if ( (pgp = tmem_malloc(sizeof(struct tmem_page_descriptor), pool)) == NULL ) - return NULL; - pgp->us.obj = obj; - INIT_LIST_HEAD(&pgp->global_eph_pages); - INIT_LIST_HEAD(&pgp->us.client_eph_pages); - pgp->pfp = NULL; - pgp->size = -1; - pgp->index = -1; - pgp->timestamp = get_cycles(); - atomic_inc_and_max(global_pgp_count); - atomic_inc(&pool->pgp_count); - if ( _atomic_read(pool->pgp_count) > pool->pgp_count_max ) - pool->pgp_count_max = _atomic_read(pool->pgp_count); - return pgp; -} - -static struct tmem_page_descriptor *pgp_lookup_in_obj(struct tmem_object_root *obj, uint32_t index) -{ - ASSERT(obj != NULL); - ASSERT_SPINLOCK(&obj->obj_spinlock); - ASSERT(obj->pool != NULL); - return radix_tree_lookup(&obj->tree_root, index); -} - -static void pgp_free_data(struct tmem_page_descriptor *pgp, struct tmem_pool *pool) -{ - pagesize_t pgp_size = pgp->size; - - if ( pgp->pfp == NULL ) - return; - if ( pgp_size ) - tmem_free(pgp->cdata, pool); - else - tmem_free_page(pgp->us.obj->pool,pgp->pfp); - if ( pool != NULL && pgp_size ) - { - pool->client->compressed_pages--; - pool->client->compressed_sum_size -= pgp_size; - } - pgp->pfp = NULL; - pgp->size = -1; -} - -static void __pgp_free(struct tmem_page_descriptor *pgp, struct tmem_pool *pool) -{ - pgp->us.obj = NULL; - pgp->index = -1; - tmem_free(pgp, pool); -} - -static void pgp_free(struct tmem_page_descriptor *pgp) -{ - struct tmem_pool *pool = NULL; - - ASSERT(pgp->us.obj != NULL); - ASSERT(pgp->us.obj->pool != NULL); - ASSERT(pgp->us.obj->pool->client != NULL); - - pool = pgp->us.obj->pool; - if ( !is_persistent(pool) ) - { - ASSERT(list_empty(&pgp->global_eph_pages)); - ASSERT(list_empty(&pgp->us.client_eph_pages)); - } - pgp_free_data(pgp, pool); - atomic_dec_and_assert(global_pgp_count); - atomic_dec(&pool->pgp_count); - ASSERT(_atomic_read(pool->pgp_count) >= 0); - pgp->size = -1; - if ( is_persistent(pool) && pool->client->info.flags.u.migrating ) - { - pgp->inv_oid = pgp->us.obj->oid; - pgp->pool_id = pool->pool_id; - return; - } - __pgp_free(pgp, pool); -} - -/* Remove pgp from global/pool/client lists and free it. */ -static void pgp_delist_free(struct tmem_page_descriptor *pgp) -{ - struct client *client; - uint64_t life; - - ASSERT(pgp != NULL); - ASSERT(pgp->us.obj != NULL); - ASSERT(pgp->us.obj->pool != NULL); - client = pgp->us.obj->pool->client; - ASSERT(client != NULL); - - /* Delist pgp. */ - if ( !is_persistent(pgp->us.obj->pool) ) - { - spin_lock(&eph_lists_spinlock); - if ( !list_empty(&pgp->us.client_eph_pages) ) - client->eph_count--; - ASSERT(client->eph_count >= 0); - list_del_init(&pgp->us.client_eph_pages); - if ( !list_empty(&pgp->global_eph_pages) ) - tmem_global.eph_count--; - ASSERT(tmem_global.eph_count >= 0); - list_del_init(&pgp->global_eph_pages); - spin_unlock(&eph_lists_spinlock); - } - else - { - if ( client->info.flags.u.migrating ) - { - spin_lock(&pers_lists_spinlock); - list_add_tail(&pgp->client_inv_pages, - &client->persistent_invalidated_list); - if ( pgp != pgp->us.obj->pool->cur_pgp ) - list_del_init(&pgp->us.pool_pers_pages); - spin_unlock(&pers_lists_spinlock); - } - else - { - spin_lock(&pers_lists_spinlock); - list_del_init(&pgp->us.pool_pers_pages); - spin_unlock(&pers_lists_spinlock); - } - } - life = get_cycles() - pgp->timestamp; - pgp->us.obj->pool->sum_life_cycles += life; - - /* Free pgp. */ - pgp_free(pgp); -} - -/* Called only indirectly by radix_tree_destroy. */ -static void pgp_destroy(void *v) -{ - struct tmem_page_descriptor *pgp = (struct tmem_page_descriptor *)v; - - pgp->us.obj->pgp_count--; - pgp_delist_free(pgp); -} - -static int pgp_add_to_obj(struct tmem_object_root *obj, uint32_t index, struct tmem_page_descriptor *pgp) -{ - int ret; - - ASSERT_SPINLOCK(&obj->obj_spinlock); - ret = radix_tree_insert(&obj->tree_root, index, pgp); - if ( !ret ) - obj->pgp_count++; - return ret; -} - -static struct tmem_page_descriptor *pgp_delete_from_obj(struct tmem_object_root *obj, uint32_t index) -{ - struct tmem_page_descriptor *pgp; - - ASSERT(obj != NULL); - ASSERT_SPINLOCK(&obj->obj_spinlock); - ASSERT(obj->pool != NULL); - pgp = radix_tree_delete(&obj->tree_root, index); - if ( pgp != NULL ) - obj->pgp_count--; - ASSERT(obj->pgp_count >= 0); - - return pgp; -} - -/************ RADIX TREE NODE MANIPULATION ROUTINES *******************/ - -/* Called only indirectly from radix_tree_insert. */ -static struct radix_tree_node *rtn_alloc(void *arg) -{ - struct tmem_object_node *objnode; - struct tmem_object_root *obj = (struct tmem_object_root *)arg; - - ASSERT(obj->pool != NULL); - objnode = tmem_malloc(sizeof(struct tmem_object_node),obj->pool); - if (objnode == NULL) - return NULL; - objnode->obj = obj; - memset(&objnode->rtn, 0, sizeof(struct radix_tree_node)); - if (++obj->pool->objnode_count > obj->pool->objnode_count_max) - obj->pool->objnode_count_max = obj->pool->objnode_count; - atomic_inc_and_max(global_rtree_node_count); - obj->objnode_count++; - return &objnode->rtn; -} - -/* Called only indirectly from radix_tree_delete/destroy. */ -static void rtn_free(struct radix_tree_node *rtn, void *arg) -{ - struct tmem_pool *pool; - struct tmem_object_node *objnode; - - ASSERT(rtn != NULL); - objnode = container_of(rtn,struct tmem_object_node,rtn); - ASSERT(objnode->obj != NULL); - ASSERT_SPINLOCK(&objnode->obj->obj_spinlock); - pool = objnode->obj->pool; - ASSERT(pool != NULL); - pool->objnode_count--; - objnode->obj->objnode_count--; - objnode->obj = NULL; - tmem_free(objnode, pool); - atomic_dec_and_assert(global_rtree_node_count); -} - -/************ POOL OBJECT COLLECTION MANIPULATION ROUTINES *******************/ - -static int oid_compare(struct xen_tmem_oid *left, - struct xen_tmem_oid *right) -{ - if ( left->oid[2] == right->oid[2] ) - { - if ( left->oid[1] == right->oid[1] ) - { - if ( left->oid[0] == right->oid[0] ) - return 0; - else if ( left->oid[0] < right->oid[0] ) - return -1; - else - return 1; - } - else if ( left->oid[1] < right->oid[1] ) - return -1; - else - return 1; - } - else if ( left->oid[2] < right->oid[2] ) - return -1; - else - return 1; -} - -static void oid_set_invalid(struct xen_tmem_oid *oidp) -{ - oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL; -} - -static unsigned oid_hash(struct xen_tmem_oid *oidp) -{ - return (tmem_hash(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2], - BITS_PER_LONG) & OBJ_HASH_BUCKETS_MASK); -} - -/* Searches for object==oid in pool, returns locked object if found. */ -static struct tmem_object_root * obj_find(struct tmem_pool *pool, - struct xen_tmem_oid *oidp) -{ - struct rb_node *node; - struct tmem_object_root *obj; - -restart_find: - read_lock(&pool->pool_rwlock); - node = pool->obj_rb_root[oid_hash(oidp)].rb_node; - while ( node ) - { - obj = container_of(node, struct tmem_object_root, rb_tree_node); - switch ( oid_compare(&obj->oid, oidp) ) - { - case 0: /* Equal. */ - if ( !spin_trylock(&obj->obj_spinlock) ) - { - read_unlock(&pool->pool_rwlock); - goto restart_find; - } - read_unlock(&pool->pool_rwlock); - return obj; - case -1: - node = node->rb_left; - break; - case 1: - node = node->rb_right; - } - } - read_unlock(&pool->pool_rwlock); - return NULL; -} - -/* Free an object that has no more pgps in it. */ -static void obj_free(struct tmem_object_root *obj) -{ - struct tmem_pool *pool; - struct xen_tmem_oid old_oid; - - ASSERT_SPINLOCK(&obj->obj_spinlock); - ASSERT(obj != NULL); - ASSERT(obj->pgp_count == 0); - pool = obj->pool; - ASSERT(pool != NULL); - ASSERT(pool->client != NULL); - ASSERT_WRITELOCK(&pool->pool_rwlock); - if ( obj->tree_root.rnode != NULL ) /* May be a "stump" with no leaves. */ - radix_tree_destroy(&obj->tree_root, pgp_destroy); - ASSERT((long)obj->objnode_count == 0); - ASSERT(obj->tree_root.rnode == NULL); - pool->obj_count--; - ASSERT(pool->obj_count >= 0); - obj->pool = NULL; - old_oid = obj->oid; - oid_set_invalid(&obj->oid); - obj->last_client = TMEM_CLI_ID_NULL; - atomic_dec_and_assert(global_obj_count); - rb_erase(&obj->rb_tree_node, &pool->obj_rb_root[oid_hash(&old_oid)]); - spin_unlock(&obj->obj_spinlock); - tmem_free(obj, pool); -} - -static int obj_rb_insert(struct rb_root *root, struct tmem_object_root *obj) -{ - struct rb_node **new, *parent = NULL; - struct tmem_object_root *this; - - ASSERT(obj->pool); - ASSERT_WRITELOCK(&obj->pool->pool_rwlock); - - new = &(root->rb_node); - while ( *new ) - { - this = container_of(*new, struct tmem_object_root, rb_tree_node); - parent = *new; - switch ( oid_compare(&this->oid, &obj->oid) ) - { - case 0: - return 0; - case -1: - new = &((*new)->rb_left); - break; - case 1: - new = &((*new)->rb_right); - break; - } - } - rb_link_node(&obj->rb_tree_node, parent, new); - rb_insert_color(&obj->rb_tree_node, root); - return 1; -} - -/* - * Allocate, initialize, and insert an tmem_object_root - * (should be called only if find failed). - */ -static struct tmem_object_root * obj_alloc(struct tmem_pool *pool, - struct xen_tmem_oid *oidp) -{ - struct tmem_object_root *obj; - - ASSERT(pool != NULL); - if ( (obj = tmem_malloc(sizeof(struct tmem_object_root), pool)) == NULL ) - return NULL; - pool->obj_count++; - if (pool->obj_count > pool->obj_count_max) - pool->obj_count_max = pool->obj_count; - atomic_inc_and_max(global_obj_count); - radix_tree_init(&obj->tree_root); - radix_tree_set_alloc_callbacks(&obj->tree_root, rtn_alloc, rtn_free, obj); - spin_lock_init(&obj->obj_spinlock); - obj->pool = pool; - obj->oid = *oidp; - obj->objnode_count = 0; - obj->pgp_count = 0; - obj->last_client = TMEM_CLI_ID_NULL; - return obj; -} - -/* Free an object after destroying any pgps in it. */ -static void obj_destroy(struct tmem_object_root *obj) -{ - ASSERT_WRITELOCK(&obj->pool->pool_rwlock); - radix_tree_destroy(&obj->tree_root, pgp_destroy); - obj_free(obj); -} - -/* Destroys all objs in a pool, or only if obj->last_client matches cli_id. */ -static void pool_destroy_objs(struct tmem_pool *pool, domid_t cli_id) -{ - struct rb_node *node; - struct tmem_object_root *obj; - int i; - - write_lock(&pool->pool_rwlock); - pool->is_dying = 1; - for (i = 0; i < OBJ_HASH_BUCKETS; i++) - { - node = rb_first(&pool->obj_rb_root[i]); - while ( node != NULL ) - { - obj = container_of(node, struct tmem_object_root, rb_tree_node); - spin_lock(&obj->obj_spinlock); - node = rb_next(node); - if ( obj->last_client == cli_id ) - obj_destroy(obj); - else - spin_unlock(&obj->obj_spinlock); - } - } - write_unlock(&pool->pool_rwlock); -} - - -/************ POOL MANIPULATION ROUTINES ******************************/ - -static struct tmem_pool * pool_alloc(void) -{ - struct tmem_pool *pool; - int i; - - if ( (pool = xzalloc(struct tmem_pool)) == NULL ) - return NULL; - for (i = 0; i < OBJ_HASH_BUCKETS; i++) - pool->obj_rb_root[i] = RB_ROOT; - INIT_LIST_HEAD(&pool->persistent_page_list); - rwlock_init(&pool->pool_rwlock); - return pool; -} - -static void pool_free(struct tmem_pool *pool) -{ - pool->client = NULL; - xfree(pool); -} - -/* - * Register new_client as a user of this shared pool and return 0 on succ. - */ -static int shared_pool_join(struct tmem_pool *pool, struct client *new_client) -{ - struct share_list *sl; - ASSERT(is_shared(pool)); - - if ( (sl = tmem_malloc(sizeof(struct share_list), NULL)) == NULL ) - return -1; - sl->client = new_client; - list_add_tail(&sl->share_list, &pool->share_list); - if ( new_client->cli_id != pool->client->cli_id ) - tmem_client_info("adding new %s %d to shared pool owned by %s %d\n", - tmem_client_str, new_client->cli_id, tmem_client_str, - pool->client->cli_id); - else if ( pool->shared_count ) - tmem_client_info("inter-guest sharing of shared pool %s by client %d\n", - tmem_client_str, pool->client->cli_id); - ++pool->shared_count; - return 0; -} - -/* Reassign "ownership" of the pool to another client that shares this pool. */ -static void shared_pool_reassign(struct tmem_pool *pool) -{ - struct share_list *sl; - int poolid; - struct client *old_client = pool->client, *new_client; - - ASSERT(is_shared(pool)); - if ( list_empty(&pool->share_list) ) - { - ASSERT(pool->shared_count == 0); - return; - } - old_client->pools[pool->pool_id] = NULL; - sl = list_entry(pool->share_list.next, struct share_list, share_list); - /* - * The sl->client can be old_client if there are multiple shared pools - * within an guest. - */ - pool->client = new_client = sl->client; - for (poolid = 0; poolid < MAX_POOLS_PER_DOMAIN; poolid++) - if (new_client->pools[poolid] == pool) - break; - ASSERT(poolid != MAX_POOLS_PER_DOMAIN); - new_client->eph_count += _atomic_read(pool->pgp_count); - old_client->eph_count -= _atomic_read(pool->pgp_count); - list_splice_init(&old_client->ephemeral_page_list, - &new_client->ephemeral_page_list); - tmem_client_info("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n", - tmem_cli_id_str, old_client->cli_id, tmem_cli_id_str, new_client->cli_id, poolid); - pool->pool_id = poolid; -} - -/* - * Destroy all objects with last_client same as passed cli_id, - * remove pool's cli_id from list of sharers of this pool. - */ -static int shared_pool_quit(struct tmem_pool *pool, domid_t cli_id) -{ - struct share_list *sl; - int s_poolid; - - ASSERT(is_shared(pool)); - ASSERT(pool->client != NULL); - - ASSERT_WRITELOCK(&tmem_rwlock); - pool_destroy_objs(pool, cli_id); - list_for_each_entry(sl,&pool->share_list, share_list) - { - if (sl->client->cli_id != cli_id) - continue; - list_del(&sl->share_list); - tmem_free(sl, pool); - --pool->shared_count; - if (pool->client->cli_id == cli_id) - shared_pool_reassign(pool); - if (pool->shared_count) - return pool->shared_count; - for (s_poolid = 0; s_poolid < MAX_GLOBAL_SHARED_POOLS; s_poolid++) - if ( (tmem_global.shared_pools[s_poolid]) == pool ) - { - tmem_global.shared_pools[s_poolid] = NULL; - break; - } - return 0; - } - tmem_client_warn("tmem: no match unsharing pool, %s=%d\n", - tmem_cli_id_str,pool->client->cli_id); - return -1; -} - -/* Flush all data (owned by cli_id) from a pool and, optionally, free it. */ -static void pool_flush(struct tmem_pool *pool, domid_t cli_id) -{ - ASSERT(pool != NULL); - if ( (is_shared(pool)) && (shared_pool_quit(pool,cli_id) > 0) ) - { - tmem_client_warn("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n", - tmem_cli_id_str, cli_id, pool->pool_id, tmem_cli_id_str,pool->client->cli_id); - return; - } - tmem_client_info("Destroying %s-%s tmem pool %s=%d pool_id=%d\n", - is_persistent(pool) ? "persistent" : "ephemeral" , - is_shared(pool) ? "shared" : "private", - tmem_cli_id_str, pool->client->cli_id, pool->pool_id); - if ( pool->client->info.flags.u.migrating ) - { - tmem_client_warn("can't destroy pool while %s is live-migrating\n", - tmem_client_str); - return; - } - pool_destroy_objs(pool, TMEM_CLI_ID_NULL); - pool->client->pools[pool->pool_id] = NULL; - pool_free(pool); -} - -/************ CLIENT MANIPULATION OPERATIONS **************************/ - -struct client *client_create(domid_t cli_id) -{ - struct client *client = xzalloc(struct client); - int i, shift; - char name[5]; - struct domain *d; - - tmem_client_info("tmem: initializing tmem capability for %s=%d...", - tmem_cli_id_str, cli_id); - if ( client == NULL ) - { - tmem_client_err("failed... out of memory\n"); - goto fail; - } - - for (i = 0, shift = 12; i < 4; shift -=4, i++) - name[i] = (((unsigned short)cli_id >> shift) & 0xf) + '0'; - name[4] = '\0'; - client->persistent_pool = xmem_pool_create(name, tmem_persistent_pool_page_get, - tmem_persistent_pool_page_put, PAGE_SIZE, 0, PAGE_SIZE); - if ( client->persistent_pool == NULL ) - { - tmem_client_err("failed... can't alloc persistent pool\n"); - goto fail; - } - - d = rcu_lock_domain_by_id(cli_id); - if ( d == NULL ) { - tmem_client_err("failed... can't set client\n"); - xmem_pool_destroy(client->persistent_pool); - goto fail; - } - if ( !d->is_dying ) { - d->tmem_client = client; - client->domain = d; - } - rcu_unlock_domain(d); - - client->cli_id = cli_id; - client->info.version = TMEM_SPEC_VERSION; - client->info.maxpools = MAX_POOLS_PER_DOMAIN; - client->info.flags.u.compress = tmem_compression_enabled(); - for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) - client->shared_auth_uuid[i][0] = - client->shared_auth_uuid[i][1] = -1L; - list_add_tail(&client->client_list, &tmem_global.client_list); - INIT_LIST_HEAD(&client->ephemeral_page_list); - INIT_LIST_HEAD(&client->persistent_invalidated_list); - tmem_client_info("ok\n"); - return client; - - fail: - xfree(client); - return NULL; -} - -static void client_free(struct client *client) -{ - list_del(&client->client_list); - xmem_pool_destroy(client->persistent_pool); - xfree(client); -} - -/* Flush all data from a client and, optionally, free it. */ -static void client_flush(struct client *client) -{ - int i; - struct tmem_pool *pool; - - for (i = 0; i < MAX_POOLS_PER_DOMAIN; i++) - { - if ( (pool = client->pools[i]) == NULL ) - continue; - pool_flush(pool, client->cli_id); - client->pools[i] = NULL; - client->info.nr_pools--; - } - client_free(client); -} - -static bool client_over_quota(const struct client *client) -{ - int total = _atomic_read(tmem_global.client_weight_total); - - ASSERT(client != NULL); - if ( (total == 0) || (client->info.weight == 0) || - (client->eph_count == 0) ) - return false; - - return (((tmem_global.eph_count * 100L) / client->eph_count) > - ((total * 100L) / client->info.weight)); -} - -/************ MEMORY REVOCATION ROUTINES *******************************/ - -static bool tmem_try_to_evict_pgp(struct tmem_page_descriptor *pgp, - bool *hold_pool_rwlock) -{ - struct tmem_object_root *obj = pgp->us.obj; - struct tmem_pool *pool = obj->pool; - - if ( pool->is_dying ) - return false; - if ( spin_trylock(&obj->obj_spinlock) ) - { - if ( obj->pgp_count > 1 ) - return true; - if ( write_trylock(&pool->pool_rwlock) ) - { - *hold_pool_rwlock = 1; - return true; - } - spin_unlock(&obj->obj_spinlock); - } - return false; -} - -int tmem_evict(void) -{ - struct client *client = current->domain->tmem_client; - struct tmem_page_descriptor *pgp = NULL, *pgp_del; - struct tmem_object_root *obj; - struct tmem_pool *pool; - int ret = 0; - bool hold_pool_rwlock = false; - - tmem_stats.evict_attempts++; - spin_lock(&eph_lists_spinlock); - if ( (client != NULL) && client_over_quota(client) && - !list_empty(&client->ephemeral_page_list) ) - { - list_for_each_entry(pgp, &client->ephemeral_page_list, us.client_eph_pages) - if ( tmem_try_to_evict_pgp(pgp, &hold_pool_rwlock) ) - goto found; - } - else if ( !list_empty(&tmem_global.ephemeral_page_list) ) - { - list_for_each_entry(pgp, &tmem_global.ephemeral_page_list, global_eph_pages) - if ( tmem_try_to_evict_pgp(pgp, &hold_pool_rwlock) ) - { - client = pgp->us.obj->pool->client; - goto found; - } - } - /* Global_ephemeral_page_list is empty, so we bail out. */ - spin_unlock(&eph_lists_spinlock); - goto out; - -found: - /* Delist. */ - list_del_init(&pgp->us.client_eph_pages); - client->eph_count--; - list_del_init(&pgp->global_eph_pages); - tmem_global.eph_count--; - ASSERT(tmem_global.eph_count >= 0); - ASSERT(client->eph_count >= 0); - spin_unlock(&eph_lists_spinlock); - - ASSERT(pgp != NULL); - obj = pgp->us.obj; - ASSERT(obj != NULL); - ASSERT(obj->pool != NULL); - pool = obj->pool; - - ASSERT_SPINLOCK(&obj->obj_spinlock); - pgp_del = pgp_delete_from_obj(obj, pgp->index); - ASSERT(pgp_del == pgp); - - /* pgp already delist, so call pgp_free directly. */ - pgp_free(pgp); - if ( obj->pgp_count == 0 ) - { - ASSERT_WRITELOCK(&pool->pool_rwlock); - obj_free(obj); - } - else - spin_unlock(&obj->obj_spinlock); - if ( hold_pool_rwlock ) - write_unlock(&pool->pool_rwlock); - tmem_stats.evicted_pgs++; - ret = 1; -out: - return ret; -} - - -/* - * Under certain conditions (e.g. if each client is putting pages for exactly - * one object), once locks are held, freeing up memory may - * result in livelocks and very long "put" times, so we try to ensure there - * is a minimum amount of memory (1MB) available BEFORE any data structure - * locks are held. - */ -static inline bool tmem_ensure_avail_pages(void) -{ - int failed_evict = 10; - unsigned long free_mem; - - do { - free_mem = (tmem_page_list_pages + total_free_pages()) - >> (20 - PAGE_SHIFT); - if ( free_mem ) - return true; - if ( !tmem_evict() ) - failed_evict--; - } while ( failed_evict > 0 ); - - return false; -} - -/************ TMEM CORE OPERATIONS ************************************/ - -static int do_tmem_put_compress(struct tmem_page_descriptor *pgp, xen_pfn_t cmfn, - tmem_cli_va_param_t clibuf) -{ - void *dst, *p; - size_t size; - int ret = 0; - - ASSERT(pgp != NULL); - ASSERT(pgp->us.obj != NULL); - ASSERT_SPINLOCK(&pgp->us.obj->obj_spinlock); - ASSERT(pgp->us.obj->pool != NULL); - ASSERT(pgp->us.obj->pool->client != NULL); - - if ( pgp->pfp != NULL ) - pgp_free_data(pgp, pgp->us.obj->pool); - ret = tmem_compress_from_client(cmfn, &dst, &size, clibuf); - if ( ret <= 0 ) - goto out; - else if ( (size == 0) || (size >= tmem_mempool_maxalloc) ) { - ret = 0; - goto out; - } else if ( (p = tmem_malloc(size,pgp->us.obj->pool)) == NULL ) { - ret = -ENOMEM; - goto out; - } else { - memcpy(p,dst,size); - pgp->cdata = p; - } - pgp->size = size; - pgp->us.obj->pool->client->compressed_pages++; - pgp->us.obj->pool->client->compressed_sum_size += size; - ret = 1; - -out: - return ret; -} - -static int do_tmem_dup_put(struct tmem_page_descriptor *pgp, xen_pfn_t cmfn, - tmem_cli_va_param_t clibuf) -{ - struct tmem_pool *pool; - struct tmem_object_root *obj; - struct client *client; - struct tmem_page_descriptor *pgpfound = NULL; - int ret; - - ASSERT(pgp != NULL); - ASSERT(pgp->pfp != NULL); - ASSERT(pgp->size != -1); - obj = pgp->us.obj; - ASSERT_SPINLOCK(&obj->obj_spinlock); - ASSERT(obj != NULL); - pool = obj->pool; - ASSERT(pool != NULL); - client = pool->client; - if ( client->info.flags.u.migrating ) - goto failed_dup; /* No dups allowed when migrating. */ - /* Can we successfully manipulate pgp to change out the data? */ - if ( client->info.flags.u.compress && pgp->size != 0 ) - { - ret = do_tmem_put_compress(pgp, cmfn, clibuf); - if ( ret == 1 ) - goto done; - else if ( ret == 0 ) - goto copy_uncompressed; - else if ( ret == -ENOMEM ) - goto failed_dup; - else if ( ret == -EFAULT ) - goto bad_copy; - } - -copy_uncompressed: - if ( pgp->pfp ) - pgp_free_data(pgp, pool); - if ( ( pgp->pfp = tmem_alloc_page(pool) ) == NULL ) - goto failed_dup; - pgp->size = 0; - ret = tmem_copy_from_client(pgp->pfp, cmfn, tmem_cli_buf_null); - if ( ret < 0 ) - goto bad_copy; - -done: - /* Successfully replaced data, clean up and return success. */ - if ( is_shared(pool) ) - obj->last_client = client->cli_id; - spin_unlock(&obj->obj_spinlock); - pool->dup_puts_replaced++; - pool->good_puts++; - if ( is_persistent(pool) ) - client->succ_pers_puts++; - return 1; - -bad_copy: - tmem_stats.failed_copies++; - goto cleanup; - -failed_dup: - /* - * Couldn't change out the data, flush the old data and return - * -ENOSPC instead of -ENOMEM to differentiate failed _dup_ put. - */ - ret = -ENOSPC; -cleanup: - pgpfound = pgp_delete_from_obj(obj, pgp->index); - ASSERT(pgpfound == pgp); - pgp_delist_free(pgpfound); - if ( obj->pgp_count == 0 ) - { - write_lock(&pool->pool_rwlock); - obj_free(obj); - write_unlock(&pool->pool_rwlock); - } else { - spin_unlock(&obj->obj_spinlock); - } - pool->dup_puts_flushed++; - return ret; -} - -static int do_tmem_put(struct tmem_pool *pool, - struct xen_tmem_oid *oidp, uint32_t index, - xen_pfn_t cmfn, tmem_cli_va_param_t clibuf) -{ - struct tmem_object_root *obj = NULL; - struct tmem_page_descriptor *pgp = NULL; - struct client *client; - int ret, newobj = 0; - - ASSERT(pool != NULL); - client = pool->client; - ASSERT(client != NULL); - ret = client->info.flags.u.frozen ? -EFROZEN : -ENOMEM; - pool->puts++; - -refind: - /* Does page already exist (dup)? if so, handle specially. */ - if ( (obj = obj_find(pool, oidp)) != NULL ) - { - if ((pgp = pgp_lookup_in_obj(obj, index)) != NULL) - { - return do_tmem_dup_put(pgp, cmfn, clibuf); - } - else - { - /* No puts allowed into a frozen pool (except dup puts). */ - if ( client->info.flags.u.frozen ) - goto unlock_obj; - } - } - else - { - /* No puts allowed into a frozen pool (except dup puts). */ - if ( client->info.flags.u.frozen ) - return ret; - if ( (obj = obj_alloc(pool, oidp)) == NULL ) - return -ENOMEM; - - write_lock(&pool->pool_rwlock); - /* - * Parallel callers may already allocated obj and inserted to obj_rb_root - * before us. - */ - if ( !obj_rb_insert(&pool->obj_rb_root[oid_hash(oidp)], obj) ) - { - tmem_free(obj, pool); - write_unlock(&pool->pool_rwlock); - goto refind; - } - - spin_lock(&obj->obj_spinlock); - newobj = 1; - write_unlock(&pool->pool_rwlock); - } - - /* When arrive here, we have a spinlocked obj for use. */ - ASSERT_SPINLOCK(&obj->obj_spinlock); - if ( (pgp = pgp_alloc(obj)) == NULL ) - goto unlock_obj; - - ret = pgp_add_to_obj(obj, index, pgp); - if ( ret == -ENOMEM ) - /* Warning: may result in partially built radix tree ("stump"). */ - goto free_pgp; - - pgp->index = index; - pgp->size = 0; - - if ( client->info.flags.u.compress ) - { - ASSERT(pgp->pfp == NULL); - ret = do_tmem_put_compress(pgp, cmfn, clibuf); - if ( ret == 1 ) - goto insert_page; - if ( ret == -ENOMEM ) - { - client->compress_nomem++; - goto del_pgp_from_obj; - } - if ( ret == 0 ) - { - client->compress_poor++; - goto copy_uncompressed; - } - if ( ret == -EFAULT ) - goto bad_copy; - } - -copy_uncompressed: - if ( ( pgp->pfp = tmem_alloc_page(pool) ) == NULL ) - { - ret = -ENOMEM; - goto del_pgp_from_obj; - } - ret = tmem_copy_from_client(pgp->pfp, cmfn, clibuf); - if ( ret < 0 ) - goto bad_copy; - -insert_page: - if ( !is_persistent(pool) ) - { - spin_lock(&eph_lists_spinlock); - list_add_tail(&pgp->global_eph_pages, &tmem_global.ephemeral_page_list); - if (++tmem_global.eph_count > tmem_stats.global_eph_count_max) - tmem_stats.global_eph_count_max = tmem_global.eph_count; - list_add_tail(&pgp->us.client_eph_pages, - &client->ephemeral_page_list); - if (++client->eph_count > client->eph_count_max) - client->eph_count_max = client->eph_count; - spin_unlock(&eph_lists_spinlock); - } - else - { /* is_persistent. */ - spin_lock(&pers_lists_spinlock); - list_add_tail(&pgp->us.pool_pers_pages, - &pool->persistent_page_list); - spin_unlock(&pers_lists_spinlock); - } - - if ( is_shared(pool) ) - obj->last_client = client->cli_id; - - /* Free the obj spinlock. */ - spin_unlock(&obj->obj_spinlock); - pool->good_puts++; - - if ( is_persistent(pool) ) - client->succ_pers_puts++; - else - tmem_stats.tot_good_eph_puts++; - return 1; - -bad_copy: - tmem_stats.failed_copies++; - -del_pgp_from_obj: - ASSERT((obj != NULL) && (pgp != NULL) && (pgp->index != -1)); - pgp_delete_from_obj(obj, pgp->index); - -free_pgp: - pgp_free(pgp); -unlock_obj: - if ( newobj ) - { - write_lock(&pool->pool_rwlock); - obj_free(obj); - write_unlock(&pool->pool_rwlock); - } - else - { - spin_unlock(&obj->obj_spinlock); - } - pool->no_mem_puts++; - return ret; -} - -static int do_tmem_get(struct tmem_pool *pool, - struct xen_tmem_oid *oidp, uint32_t index, - xen_pfn_t cmfn, tmem_cli_va_param_t clibuf) -{ - struct tmem_object_root *obj; - struct tmem_page_descriptor *pgp; - struct client *client = pool->client; - int rc; - - if ( !_atomic_read(pool->pgp_count) ) - return -EEMPTY; - - pool->gets++; - obj = obj_find(pool,oidp); - if ( obj == NULL ) - return 0; - - ASSERT_SPINLOCK(&obj->obj_spinlock); - if (is_shared(pool) || is_persistent(pool) ) - pgp = pgp_lookup_in_obj(obj, index); - else - pgp = pgp_delete_from_obj(obj, index); - if ( pgp == NULL ) - { - spin_unlock(&obj->obj_spinlock); - return 0; - } - ASSERT(pgp->size != -1); - if ( pgp->size != 0 ) - { - rc = tmem_decompress_to_client(cmfn, pgp->cdata, pgp->size, clibuf); - } - else - rc = tmem_copy_to_client(cmfn, pgp->pfp, clibuf); - if ( rc <= 0 ) - goto bad_copy; - - if ( !is_persistent(pool) ) - { - if ( !is_shared(pool) ) - { - pgp_delist_free(pgp); - if ( obj->pgp_count == 0 ) - { - write_lock(&pool->pool_rwlock); - obj_free(obj); - obj = NULL; - write_unlock(&pool->pool_rwlock); - } - } else { - spin_lock(&eph_lists_spinlock); - list_del(&pgp->global_eph_pages); - list_add_tail(&pgp->global_eph_pages,&tmem_global.ephemeral_page_list); - list_del(&pgp->us.client_eph_pages); - list_add_tail(&pgp->us.client_eph_pages,&client->ephemeral_page_list); - spin_unlock(&eph_lists_spinlock); - obj->last_client = current->domain->domain_id; - } - } - if ( obj != NULL ) - { - spin_unlock(&obj->obj_spinlock); - } - pool->found_gets++; - if ( is_persistent(pool) ) - client->succ_pers_gets++; - else - client->succ_eph_gets++; - return 1; - -bad_copy: - spin_unlock(&obj->obj_spinlock); - tmem_stats.failed_copies++; - return rc; -} - -static int do_tmem_flush_page(struct tmem_pool *pool, - struct xen_tmem_oid *oidp, uint32_t index) -{ - struct tmem_object_root *obj; - struct tmem_page_descriptor *pgp; - - pool->flushs++; - obj = obj_find(pool,oidp); - if ( obj == NULL ) - goto out; - pgp = pgp_delete_from_obj(obj, index); - if ( pgp == NULL ) - { - spin_unlock(&obj->obj_spinlock); - goto out; - } - pgp_delist_free(pgp); - if ( obj->pgp_count == 0 ) - { - write_lock(&pool->pool_rwlock); - obj_free(obj); - write_unlock(&pool->pool_rwlock); - } else { - spin_unlock(&obj->obj_spinlock); - } - pool->flushs_found++; - -out: - if ( pool->client->info.flags.u.frozen ) - return -EFROZEN; - else - return 1; -} - -static int do_tmem_flush_object(struct tmem_pool *pool, - struct xen_tmem_oid *oidp) -{ - struct tmem_object_root *obj; - - pool->flush_objs++; - obj = obj_find(pool,oidp); - if ( obj == NULL ) - goto out; - write_lock(&pool->pool_rwlock); - obj_destroy(obj); - pool->flush_objs_found++; - write_unlock(&pool->pool_rwlock); - -out: - if ( pool->client->info.flags.u.frozen ) - return -EFROZEN; - else - return 1; -} - -static int do_tmem_destroy_pool(uint32_t pool_id) -{ - struct client *client = current->domain->tmem_client; - struct tmem_pool *pool; - - if ( pool_id >= MAX_POOLS_PER_DOMAIN ) - return 0; - if ( (pool = client->pools[pool_id]) == NULL ) - return 0; - client->pools[pool_id] = NULL; - pool_flush(pool, client->cli_id); - client->info.nr_pools--; - return 1; -} - -int do_tmem_new_pool(domid_t this_cli_id, - uint32_t d_poolid, uint32_t flags, - uint64_t uuid_lo, uint64_t uuid_hi) -{ - struct client *client; - domid_t cli_id; - int persistent = flags & TMEM_POOL_PERSIST; - int shared = flags & TMEM_POOL_SHARED; - int pagebits = (flags >> TMEM_POOL_PAGESIZE_SHIFT) - & TMEM_POOL_PAGESIZE_MASK; - int specversion = (flags >> TMEM_POOL_VERSION_SHIFT) - & TMEM_POOL_VERSION_MASK; - struct tmem_pool *pool, *shpool; - int i, first_unused_s_poolid; - - if ( this_cli_id == TMEM_CLI_ID_NULL ) - cli_id = current->domain->domain_id; - else - cli_id = this_cli_id; - tmem_client_info("tmem: allocating %s-%s tmem pool for %s=%d...", - persistent ? "persistent" : "ephemeral" , - shared ? "shared" : "private", tmem_cli_id_str, cli_id); - if ( specversion != TMEM_SPEC_VERSION ) - { - tmem_client_err("failed... unsupported spec version\n"); - return -EPERM; - } - if ( shared && persistent ) - { - tmem_client_err("failed... unable to create a shared-persistant pool\n"); - return -EPERM; - } - if ( pagebits != (PAGE_SHIFT - 12) ) - { - tmem_client_err("failed... unsupported pagesize %d\n", - 1 << (pagebits + 12)); - return -EPERM; - } - if ( flags & TMEM_POOL_PRECOMPRESSED ) - { - tmem_client_err("failed... precompression flag set but unsupported\n"); - return -EPERM; - } - if ( flags & TMEM_POOL_RESERVED_BITS ) - { - tmem_client_err("failed... reserved bits must be zero\n"); - return -EPERM; - } - if ( this_cli_id != TMEM_CLI_ID_NULL ) - { - if ( (client = tmem_client_from_cli_id(this_cli_id)) == NULL - || d_poolid >= MAX_POOLS_PER_DOMAIN - || client->pools[d_poolid] != NULL ) - return -EPERM; - } - else - { - client = current->domain->tmem_client; - ASSERT(client != NULL); - for ( d_poolid = 0; d_poolid < MAX_POOLS_PER_DOMAIN; d_poolid++ ) - if ( client->pools[d_poolid] == NULL ) - break; - if ( d_poolid >= MAX_POOLS_PER_DOMAIN ) - { - tmem_client_err("failed... no more pool slots available for this %s\n", - tmem_client_str); - return -EPERM; - } - } - - if ( (pool = pool_alloc()) == NULL ) - { - tmem_client_err("failed... out of memory\n"); - return -ENOMEM; - } - client->pools[d_poolid] = pool; - pool->client = client; - pool->pool_id = d_poolid; - pool->shared = shared; - pool->persistent = persistent; - pool->uuid[0] = uuid_lo; - pool->uuid[1] = uuid_hi; - - /* - * Already created a pool when arrived here, but need some special process - * for shared pool. - */ - if ( shared ) - { - if ( uuid_lo == -1L && uuid_hi == -1L ) - { - tmem_client_info("Invalid uuid, create non shared pool instead!\n"); - pool->shared = 0; - goto out; - } - if ( !tmem_global.shared_auth ) - { - for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) - if ( (client->shared_auth_uuid[i][0] == uuid_lo) && - (client->shared_auth_uuid[i][1] == uuid_hi) ) - break; - if ( i == MAX_GLOBAL_SHARED_POOLS ) - { - tmem_client_info("Shared auth failed, create non shared pool instead!\n"); - pool->shared = 0; - goto out; - } - } - - /* - * Authorize okay, match a global shared pool or use the newly allocated - * one. - */ - first_unused_s_poolid = MAX_GLOBAL_SHARED_POOLS; - for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++ ) - { - if ( (shpool = tmem_global.shared_pools[i]) != NULL ) - { - if ( shpool->uuid[0] == uuid_lo && shpool->uuid[1] == uuid_hi ) - { - /* Succ to match a global shared pool. */ - tmem_client_info("(matches shared pool uuid=%"PRIx64".%"PRIx64") pool_id=%d\n", - uuid_hi, uuid_lo, d_poolid); - client->pools[d_poolid] = shpool; - if ( !shared_pool_join(shpool, client) ) - { - pool_free(pool); - goto out; - } - else - goto fail; - } - } - else - { - if ( first_unused_s_poolid == MAX_GLOBAL_SHARED_POOLS ) - first_unused_s_poolid = i; - } - } - - /* Failed to find a global shared pool slot. */ - if ( first_unused_s_poolid == MAX_GLOBAL_SHARED_POOLS ) - { - tmem_client_warn("tmem: failed... no global shared pool slots available\n"); - goto fail; - } - /* Add pool to global shared pool. */ - else - { - INIT_LIST_HEAD(&pool->share_list); - pool->shared_count = 0; - if ( shared_pool_join(pool, client) ) - goto fail; - tmem_global.shared_pools[first_unused_s_poolid] = pool; - } - } - -out: - tmem_client_info("pool_id=%d\n", d_poolid); - client->info.nr_pools++; - return d_poolid; - -fail: - pool_free(pool); - return -EPERM; -} - -/************ TMEM CONTROL OPERATIONS ************************************/ - -int tmemc_shared_pool_auth(domid_t cli_id, uint64_t uuid_lo, - uint64_t uuid_hi, bool auth) -{ - struct client *client; - int i, free = -1; - - if ( cli_id == TMEM_CLI_ID_NULL ) - { - tmem_global.shared_auth = auth; - return 1; - } - client = tmem_client_from_cli_id(cli_id); - if ( client == NULL ) - return -EINVAL; - - for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) - { - if ( auth == 0 ) - { - if ( (client->shared_auth_uuid[i][0] == uuid_lo) && - (client->shared_auth_uuid[i][1] == uuid_hi) ) - { - client->shared_auth_uuid[i][0] = -1L; - client->shared_auth_uuid[i][1] = -1L; - return 1; - } - } - else - { - if ( (client->shared_auth_uuid[i][0] == -1L) && - (client->shared_auth_uuid[i][1] == -1L) ) - { - free = i; - break; - } - } - } - if ( auth == 0 ) - return 0; - else if ( free == -1) - return -ENOMEM; - else - { - client->shared_auth_uuid[free][0] = uuid_lo; - client->shared_auth_uuid[free][1] = uuid_hi; - return 1; - } -} - -static int tmemc_save_subop(int cli_id, uint32_t pool_id, - uint32_t subop, tmem_cli_va_param_t buf, uint32_t arg) -{ - struct client *client = tmem_client_from_cli_id(cli_id); - uint32_t p; - struct tmem_page_descriptor *pgp, *pgp2; - int rc = -ENOENT; - - switch(subop) - { - case XEN_SYSCTL_TMEM_OP_SAVE_BEGIN: - if ( client == NULL ) - break; - for (p = 0; p < MAX_POOLS_PER_DOMAIN; p++) - if ( client->pools[p] != NULL ) - break; - - if ( p == MAX_POOLS_PER_DOMAIN ) - break; - - client->was_frozen = client->info.flags.u.frozen; - client->info.flags.u.frozen = 1; - if ( arg != 0 ) - client->info.flags.u.migrating = 1; - rc = 0; - break; - case XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN: - if ( client == NULL ) - rc = client_create(cli_id) ? 0 : -ENOMEM; - else - rc = -EEXIST; - break; - case XEN_SYSCTL_TMEM_OP_SAVE_END: - if ( client == NULL ) - break; - client->info.flags.u.migrating = 0; - if ( !list_empty(&client->persistent_invalidated_list) ) - list_for_each_entry_safe(pgp,pgp2, - &client->persistent_invalidated_list, client_inv_pages) - __pgp_free(pgp, client->pools[pgp->pool_id]); - client->info.flags.u.frozen = client->was_frozen; - rc = 0; - break; - } - return rc; -} - -static int tmemc_save_get_next_page(int cli_id, uint32_t pool_id, - tmem_cli_va_param_t buf, uint32_t bufsize) -{ - struct client *client = tmem_client_from_cli_id(cli_id); - struct tmem_pool *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) - ? NULL : client->pools[pool_id]; - struct tmem_page_descriptor *pgp; - struct xen_tmem_oid *oid; - int ret = 0; - struct tmem_handle h; - - if ( pool == NULL || !is_persistent(pool) ) - return -1; - - if ( bufsize < PAGE_SIZE + sizeof(struct tmem_handle) ) - return -ENOMEM; - - spin_lock(&pers_lists_spinlock); - if ( list_empty(&pool->persistent_page_list) ) - { - ret = -1; - goto out; - } - /* Note: pool->cur_pgp is the pgp last returned by get_next_page. */ - if ( pool->cur_pgp == NULL ) - { - /* Process the first one. */ - pool->cur_pgp = pgp = list_entry((&pool->persistent_page_list)->next, - struct tmem_page_descriptor,us.pool_pers_pages); - } else if ( list_is_last(&pool->cur_pgp->us.pool_pers_pages, - &pool->persistent_page_list) ) - { - /* Already processed the last one in the list. */ - ret = -1; - goto out; - } - pgp = list_entry((&pool->cur_pgp->us.pool_pers_pages)->next, - struct tmem_page_descriptor,us.pool_pers_pages); - pool->cur_pgp = pgp; - oid = &pgp->us.obj->oid; - h.pool_id = pool_id; - BUILD_BUG_ON(sizeof(h.oid) != sizeof(*oid)); - memcpy(&(h.oid), oid, sizeof(h.oid)); - h.index = pgp->index; - if ( copy_to_guest(guest_handle_cast(buf, void), &h, 1) ) - { - ret = -EFAULT; - goto out; - } - guest_handle_add_offset(buf, sizeof(h)); - ret = do_tmem_get(pool, oid, pgp->index, 0, buf); - -out: - spin_unlock(&pers_lists_spinlock); - return ret; -} - -static int tmemc_save_get_next_inv(int cli_id, tmem_cli_va_param_t buf, - uint32_t bufsize) -{ - struct client *client = tmem_client_from_cli_id(cli_id); - struct tmem_page_descriptor *pgp; - struct tmem_handle h; - int ret = 0; - - if ( client == NULL ) - return 0; - if ( bufsize < sizeof(struct tmem_handle) ) - return 0; - spin_lock(&pers_lists_spinlock); - if ( list_empty(&client->persistent_invalidated_list) ) - goto out; - if ( client->cur_pgp == NULL ) - { - pgp = list_entry((&client->persistent_invalidated_list)->next, - struct tmem_page_descriptor,client_inv_pages); - client->cur_pgp = pgp; - } else if ( list_is_last(&client->cur_pgp->client_inv_pages, - &client->persistent_invalidated_list) ) - { - client->cur_pgp = NULL; - ret = 0; - goto out; - } else { - pgp = list_entry((&client->cur_pgp->client_inv_pages)->next, - struct tmem_page_descriptor,client_inv_pages); - client->cur_pgp = pgp; - } - h.pool_id = pgp->pool_id; - BUILD_BUG_ON(sizeof(h.oid) != sizeof(pgp->inv_oid)); - memcpy(&(h.oid), &(pgp->inv_oid), sizeof(h.oid)); - h.index = pgp->index; - ret = 1; - if ( copy_to_guest(guest_handle_cast(buf, void), &h, 1) ) - ret = -EFAULT; -out: - spin_unlock(&pers_lists_spinlock); - return ret; -} - -static int tmemc_restore_put_page(int cli_id, uint32_t pool_id, - struct xen_tmem_oid *oidp, - uint32_t index, tmem_cli_va_param_t buf, - uint32_t bufsize) -{ - struct client *client = tmem_client_from_cli_id(cli_id); - struct tmem_pool *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) - ? NULL : client->pools[pool_id]; - - if ( pool == NULL ) - return -1; - if (bufsize != PAGE_SIZE) { - tmem_client_err("tmem: %s: invalid parameter bufsize(%d) != (%ld)\n", - __func__, bufsize, PAGE_SIZE); - return -EINVAL; - } - return do_tmem_put(pool, oidp, index, 0, buf); -} - -static int tmemc_restore_flush_page(int cli_id, uint32_t pool_id, - struct xen_tmem_oid *oidp, - uint32_t index) -{ - struct client *client = tmem_client_from_cli_id(cli_id); - struct tmem_pool *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) - ? NULL : client->pools[pool_id]; - - if ( pool == NULL ) - return -1; - return do_tmem_flush_page(pool,oidp,index); -} - -int do_tmem_control(struct xen_sysctl_tmem_op *op) -{ - int ret; - uint32_t pool_id = op->pool_id; - uint32_t cmd = op->cmd; - struct xen_tmem_oid *oidp = &op->oid; - - ASSERT(rw_is_write_locked(&tmem_rwlock)); - - switch (cmd) - { - case XEN_SYSCTL_TMEM_OP_SAVE_BEGIN: - case XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN: - case XEN_SYSCTL_TMEM_OP_SAVE_END: - ret = tmemc_save_subop(op->cli_id, pool_id, cmd, - guest_handle_cast(op->u.buf, char), op->arg); - break; - case XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_PAGE: - ret = tmemc_save_get_next_page(op->cli_id, pool_id, - guest_handle_cast(op->u.buf, char), op->len); - break; - case XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_INV: - ret = tmemc_save_get_next_inv(op->cli_id, - guest_handle_cast(op->u.buf, char), op->len); - break; - case XEN_SYSCTL_TMEM_OP_RESTORE_PUT_PAGE: - ret = tmemc_restore_put_page(op->cli_id, pool_id, oidp, op->arg, - guest_handle_cast(op->u.buf, char), op->len); - break; - case XEN_SYSCTL_TMEM_OP_RESTORE_FLUSH_PAGE: - ret = tmemc_restore_flush_page(op->cli_id, pool_id, oidp, op->arg); - break; - default: - ret = -1; - } - - return ret; -} - -/************ EXPORTed FUNCTIONS **************************************/ - -long do_tmem_op(tmem_cli_op_t uops) -{ - struct tmem_op op; - struct client *client = current->domain->tmem_client; - struct tmem_pool *pool = NULL; - struct xen_tmem_oid *oidp; - int rc = 0; - - if ( !tmem_initialized ) - return -ENODEV; - - if ( xsm_tmem_op(XSM_HOOK) ) - return -EPERM; - - tmem_stats.total_tmem_ops++; - - if ( client != NULL && client->domain->is_dying ) - { - tmem_stats.errored_tmem_ops++; - return -ENODEV; - } - - if ( unlikely(tmem_get_tmemop_from_client(&op, uops) != 0) ) - { - tmem_client_err("tmem: can't get tmem struct from %s\n", tmem_client_str); - tmem_stats.errored_tmem_ops++; - return -EFAULT; - } - - /* Acquire write lock for all commands at first. */ - write_lock(&tmem_rwlock); - - switch ( op.cmd ) - { - case TMEM_CONTROL: - case TMEM_RESTORE_NEW: - case TMEM_AUTH: - rc = -EOPNOTSUPP; - break; - - default: - /* - * For other commands, create per-client tmem structure dynamically on - * first use by client. - */ - if ( client == NULL ) - { - if ( (client = client_create(current->domain->domain_id)) == NULL ) - { - tmem_client_err("tmem: can't create tmem structure for %s\n", - tmem_client_str); - rc = -ENOMEM; - goto out; - } - } - - if ( op.cmd == TMEM_NEW_POOL || op.cmd == TMEM_DESTROY_POOL ) - { - if ( op.cmd == TMEM_NEW_POOL ) - rc = do_tmem_new_pool(TMEM_CLI_ID_NULL, 0, op.u.creat.flags, - op.u.creat.uuid[0], op.u.creat.uuid[1]); - else - rc = do_tmem_destroy_pool(op.pool_id); - } - else - { - if ( ((uint32_t)op.pool_id >= MAX_POOLS_PER_DOMAIN) || - ((pool = client->pools[op.pool_id]) == NULL) ) - { - tmem_client_err("tmem: operation requested on uncreated pool\n"); - rc = -ENODEV; - goto out; - } - /* Commands that only need read lock. */ - write_unlock(&tmem_rwlock); - read_lock(&tmem_rwlock); - - oidp = &op.u.gen.oid; - switch ( op.cmd ) - { - case TMEM_NEW_POOL: - case TMEM_DESTROY_POOL: - BUG(); /* Done earlier. */ - break; - case TMEM_PUT_PAGE: - if (tmem_ensure_avail_pages()) - rc = do_tmem_put(pool, oidp, op.u.gen.index, op.u.gen.cmfn, - tmem_cli_buf_null); - else - rc = -ENOMEM; - break; - case TMEM_GET_PAGE: - rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn, - tmem_cli_buf_null); - break; - case TMEM_FLUSH_PAGE: - rc = do_tmem_flush_page(pool, oidp, op.u.gen.index); - break; - case TMEM_FLUSH_OBJECT: - rc = do_tmem_flush_object(pool, oidp); - break; - default: - tmem_client_warn("tmem: op %d not implemented\n", op.cmd); - rc = -ENOSYS; - break; - } - read_unlock(&tmem_rwlock); - if ( rc < 0 ) - tmem_stats.errored_tmem_ops++; - return rc; - } - break; - - } -out: - write_unlock(&tmem_rwlock); - if ( rc < 0 ) - tmem_stats.errored_tmem_ops++; - return rc; -} - -/* This should be called when the host is destroying a client (domain). */ -void tmem_destroy(void *v) -{ - struct client *client = (struct client *)v; - - if ( client == NULL ) - return; - - if ( !client->domain->is_dying ) - { - printk("tmem: tmem_destroy can only destroy dying client\n"); - return; - } - - write_lock(&tmem_rwlock); - - printk("tmem: flushing tmem pools for %s=%d\n", - tmem_cli_id_str, client->cli_id); - client_flush(client); - - write_unlock(&tmem_rwlock); -} - -#define MAX_EVICTS 10 /* Should be variable or set via XEN_SYSCTL_TMEM_OP_ ?? */ -void *tmem_relinquish_pages(unsigned int order, unsigned int memflags) -{ - struct page_info *pfp; - unsigned long evicts_per_relinq = 0; - int max_evictions = 10; - - if (!tmem_enabled() || !tmem_freeable_pages()) - return NULL; - - tmem_stats.relinq_attempts++; - if ( order > 0 ) - { -#ifndef NDEBUG - printk("tmem_relinquish_page: failing order=%d\n", order); -#endif - return NULL; - } - - while ( (pfp = tmem_page_list_get()) == NULL ) - { - if ( (max_evictions-- <= 0) || !tmem_evict()) - break; - evicts_per_relinq++; - } - if ( evicts_per_relinq > tmem_stats.max_evicts_per_relinq ) - tmem_stats.max_evicts_per_relinq = evicts_per_relinq; - if ( pfp != NULL ) - { - if ( !(memflags & MEMF_tmem) ) - scrub_one_page(pfp); - tmem_stats.relinq_pgs++; - } - - return pfp; -} - -unsigned long tmem_freeable_pages(void) -{ - if ( !tmem_enabled() ) - return 0; - - return tmem_page_list_pages + _atomic_read(freeable_page_count); -} - -/* Called at hypervisor startup. */ -static int __init init_tmem(void) -{ - if ( !tmem_enabled() ) - return 0; - - if ( !tmem_mempool_init() ) - return 0; - - if ( tmem_init() ) - { - printk("tmem: initialized comp=%d\n", tmem_compression_enabled()); - tmem_initialized = 1; - } - else - printk("tmem: initialization FAILED\n"); - - return 0; -} -__initcall(init_tmem); - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/xen/common/tmem_control.c b/xen/common/tmem_control.c deleted file mode 100644 index 30bf6fb362..0000000000 --- a/xen/common/tmem_control.c +++ /dev/null @@ -1,560 +0,0 @@ -/* - * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved. - * - */ - -#include <xen/init.h> -#include <xen/list.h> -#include <xen/radix-tree.h> -#include <xen/rbtree.h> -#include <xen/rwlock.h> -#include <xen/tmem_control.h> -#include <xen/tmem.h> -#include <xen/tmem_xen.h> -#include <public/sysctl.h> - -/************ TMEM CONTROL OPERATIONS ************************************/ - -/* Freeze/thaw all pools belonging to client cli_id (all domains if -1). */ -static int tmemc_freeze_pools(domid_t cli_id, int arg) -{ - struct client *client; - bool freeze = arg == XEN_SYSCTL_TMEM_OP_FREEZE; - bool destroy = arg == XEN_SYSCTL_TMEM_OP_DESTROY; - char *s; - - s = destroy ? "destroyed" : ( freeze ? "frozen" : "thawed" ); - if ( cli_id == TMEM_CLI_ID_NULL ) - { - list_for_each_entry(client,&tmem_global.client_list,client_list) - client->info.flags.u.frozen = freeze; - tmem_client_info("tmem: all pools %s for all %ss\n", s, tmem_client_str); - } - else - { - if ( (client = tmem_client_from_cli_id(cli_id)) == NULL) - return -1; - client->info.flags.u.frozen = freeze; - tmem_client_info("tmem: all pools %s for %s=%d\n", - s, tmem_cli_id_str, cli_id); - } - return 0; -} - -static unsigned long tmem_flush_npages(unsigned long n) -{ - unsigned long avail_pages = 0; - - while ( (avail_pages = tmem_page_list_pages) < n ) - { - if ( !tmem_evict() ) - break; - } - if ( avail_pages ) - { - spin_lock(&tmem_page_list_lock); - while ( !page_list_empty(&tmem_page_list) ) - { - struct page_info *pg = page_list_remove_head(&tmem_page_list); - scrub_one_page(pg); - tmem_page_list_pages--; - free_domheap_page(pg); - } - ASSERT(tmem_page_list_pages == 0); - INIT_PAGE_LIST_HEAD(&tmem_page_list); - spin_unlock(&tmem_page_list_lock); - } - return avail_pages; -} - -static int tmemc_flush_mem(domid_t cli_id, uint32_t kb) -{ - uint32_t npages, flushed_pages, flushed_kb; - - if ( cli_id != TMEM_CLI_ID_NULL ) - { - tmem_client_warn("tmem: %s-specific flush not supported yet, use --all\n", - tmem_client_str); - return -1; - } - /* Convert kb to pages, rounding up if necessary. */ - npages = (kb + ((1 << (PAGE_SHIFT-10))-1)) >> (PAGE_SHIFT-10); - flushed_pages = tmem_flush_npages(npages); - flushed_kb = flushed_pages << (PAGE_SHIFT-10); - return flushed_kb; -} - -/* - * These tmemc_list* routines output lots of stats in a format that is - * intended to be program-parseable, not human-readable. Further, by - * tying each group of stats to a line format indicator (e.g. G= for - * global stats) and each individual stat to a two-letter specifier - * (e.g. Ec:nnnnn in the G= line says there are nnnnn pages in the - * global ephemeral pool), it should allow the stats reported to be - * forward and backwards compatible as tmem evolves. - */ -#define BSIZE 1024 - -static int tmemc_list_client(struct client *c, tmem_cli_va_param_t buf, - int off, uint32_t len, bool use_long) -{ - char info[BSIZE]; - int i, n = 0, sum = 0; - struct tmem_pool *p; - bool s; - - n = scnprintf(info,BSIZE,"C=CI:%d,ww:%d,co:%d,fr:%d," - "Tc:%"PRIu64",Ge:%ld,Pp:%ld,Gp:%ld%c", - c->cli_id, c->info.weight, c->info.flags.u.compress, c->info.flags.u.frozen, - c->total_cycles, c->succ_eph_gets, c->succ_pers_puts, c->succ_pers_gets, - use_long ? ',' : '\n'); - if (use_long) - n += scnprintf(info+n,BSIZE-n, - "Ec:%ld,Em:%ld,cp:%ld,cb:%"PRId64",cn:%ld,cm:%ld\n", - c->eph_count, c->eph_count_max, - c->compressed_pages, c->compressed_sum_size, - c->compress_poor, c->compress_nomem); - if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) ) - sum += n; - for ( i = 0; i < MAX_POOLS_PER_DOMAIN; i++ ) - { - if ( (p = c->pools[i]) == NULL ) - continue; - s = is_shared(p); - n = scnprintf(info,BSIZE,"P=CI:%d,PI:%d," - "PT:%c%c,U0:%"PRIx64",U1:%"PRIx64"%c", - c->cli_id, p->pool_id, - is_persistent(p) ? 'P' : 'E', s ? 'S' : 'P', - (uint64_t)(s ? p->uuid[0] : 0), - (uint64_t)(s ? p->uuid[1] : 0LL), - use_long ? ',' : '\n'); - if (use_long) - n += scnprintf(info+n,BSIZE-n, - "Pc:%d,Pm:%d,Oc:%ld,Om:%ld,Nc:%lu,Nm:%lu," - "ps:%lu,pt:%lu,pd:%lu,pr:%lu,px:%lu,gs:%lu,gt:%lu," - "fs:%lu,ft:%lu,os:%lu,ot:%lu\n", - _atomic_read(p->pgp_count), p->pgp_count_max, - p->obj_count, p->obj_count_max, - p->objnode_count, p->objnode_count_max, - p->good_puts, p->puts,p->dup_puts_flushed, p->dup_puts_replaced, - p->no_mem_puts, - p->found_gets, p->gets, - p->flushs_found, p->flushs, p->flush_objs_found, p->flush_objs); - if ( sum + n >= len ) - return sum; - if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) ) - sum += n; - } - return sum; -} - -static int tmemc_list_shared(tmem_cli_va_param_t buf, int off, uint32_t len, - bool use_long) -{ - char info[BSIZE]; - int i, n = 0, sum = 0; - struct tmem_pool *p; - struct share_list *sl; - - for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++ ) - { - if ( (p = tmem_global.shared_pools[i]) == NULL ) - continue; - n = scnprintf(info+n,BSIZE-n,"S=SI:%d,PT:%c%c,U0:%"PRIx64",U1:%"PRIx64, - i, is_persistent(p) ? 'P' : 'E', - is_shared(p) ? 'S' : 'P', - p->uuid[0], p->uuid[1]); - list_for_each_entry(sl,&p->share_list, share_list) - n += scnprintf(info+n,BSIZE-n,",SC:%d",sl->client->cli_id); - n += scnprintf(info+n,BSIZE-n,"%c", use_long ? ',' : '\n'); - if (use_long) - n += scnprintf(info+n,BSIZE-n, - "Pc:%d,Pm:%d,Oc:%ld,Om:%ld,Nc:%lu,Nm:%lu," - "ps:%lu,pt:%lu,pd:%lu,pr:%lu,px:%lu,gs:%lu,gt:%lu," - "fs:%lu,ft:%lu,os:%lu,ot:%lu\n", - _atomic_read(p->pgp_count), p->pgp_count_max, - p->obj_count, p->obj_count_max, - p->objnode_count, p->objnode_count_max, - p->good_puts, p->puts,p->dup_puts_flushed, p->dup_puts_replaced, - p->no_mem_puts, - p->found_gets, p->gets, - p->flushs_found, p->flushs, p->flush_objs_found, p->flush_objs); - if ( sum + n >= len ) - return sum; - if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) ) - sum += n; - } - return sum; -} - -static int tmemc_list_global_perf(tmem_cli_va_param_t buf, int off, - uint32_t len, bool use_long) -{ - char info[BSIZE]; - int n = 0, sum = 0; - - n = scnprintf(info+n,BSIZE-n,"T="); - n--; /* Overwrite trailing comma. */ - n += scnprintf(info+n,BSIZE-n,"\n"); - if ( sum + n >= len ) - return sum; - if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) ) - sum += n; - return sum; -} - -static int tmemc_list_global(tmem_cli_va_param_t buf, int off, uint32_t len, - bool use_long) -{ - char info[BSIZE]; - int n = 0, sum = off; - - n += scnprintf(info,BSIZE,"G=" - "Tt:%lu,Te:%lu,Cf:%lu,Af:%lu,Pf:%lu,Ta:%lu," - "Lm:%lu,Et:%lu,Ea:%lu,Rt:%lu,Ra:%lu,Rx:%lu,Fp:%lu%c", - tmem_stats.total_tmem_ops, tmem_stats.errored_tmem_ops, tmem_stats.failed_copies, - tmem_stats.alloc_failed, tmem_stats.alloc_page_failed, tmem_page_list_pages, - tmem_stats.low_on_memory, tmem_stats.evicted_pgs, - tmem_stats.evict_attempts, tmem_stats.relinq_pgs, tmem_stats.relinq_attempts, - tmem_stats.max_evicts_per_relinq, - tmem_stats.total_flush_pool, use_long ? ',' : '\n'); - if (use_long) - n += scnprintf(info+n,BSIZE-n, - "Ec:%ld,Em:%ld,Oc:%d,Om:%d,Nc:%d,Nm:%d,Pc:%d,Pm:%d," - "Fc:%d,Fm:%d,Sc:%d,Sm:%d,Ep:%lu,Gd:%lu,Zt:%lu,Gz:%lu\n", - tmem_global.eph_count, tmem_stats.global_eph_count_max, - _atomic_read(tmem_stats.global_obj_count), tmem_stats.global_obj_count_max, - _atomic_read(tmem_stats.global_rtree_node_count), tmem_stats.global_rtree_node_count_max, - _atomic_read(tmem_stats.global_pgp_count), tmem_stats.global_pgp_count_max, - _atomic_read(tmem_stats.global_page_count), tmem_stats.global_page_count_max, - _atomic_read(tmem_stats.global_pcd_count), tmem_stats.global_pcd_count_max, - tmem_stats.tot_good_eph_puts,tmem_stats.deduped_puts,tmem_stats.pcd_tot_tze_size, - tmem_stats.pcd_tot_csize); - if ( sum + n >= len ) - return sum; - if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) ) - sum += n; - return sum; -} - -static int tmemc_list(domid_t cli_id, tmem_cli_va_param_t buf, uint32_t len, - bool use_long) -{ - struct client *client; - int off = 0; - - if ( cli_id == TMEM_CLI_ID_NULL ) { - off = tmemc_list_global(buf,0,len,use_long); - off += tmemc_list_shared(buf,off,len-off,use_long); - list_for_each_entry(client,&tmem_global.client_list,client_list) - off += tmemc_list_client(client, buf, off, len-off, use_long); - off += tmemc_list_global_perf(buf,off,len-off,use_long); - } - else if ( (client = tmem_client_from_cli_id(cli_id)) == NULL) - return -1; - else - off = tmemc_list_client(client, buf, 0, len, use_long); - - return 0; -} - -static int __tmemc_set_client_info(struct client *client, - XEN_GUEST_HANDLE(xen_tmem_client_t) buf) -{ - domid_t cli_id; - uint32_t old_weight; - xen_tmem_client_t info = { }; - - ASSERT(client); - - if ( copy_from_guest(&info, buf, 1) ) - return -EFAULT; - - if ( info.version != TMEM_SPEC_VERSION ) - return -EOPNOTSUPP; - - if ( info.maxpools > MAX_POOLS_PER_DOMAIN ) - return -ERANGE; - - /* Ignore info.nr_pools. */ - cli_id = client->cli_id; - - if ( info.weight != client->info.weight ) - { - old_weight = client->info.weight; - client->info.weight = info.weight; - tmem_client_info("tmem: weight set to %d for %s=%d\n", - info.weight, tmem_cli_id_str, cli_id); - atomic_sub(old_weight,&tmem_global.client_weight_total); - atomic_add(client->info.weight,&tmem_global.client_weight_total); - } - - - if ( info.flags.u.compress != client->info.flags.u.compress ) - { - client->info.flags.u.compress = info.flags.u.compress; - tmem_client_info("tmem: compression %s for %s=%d\n", - info.flags.u.compress ? "enabled" : "disabled", - tmem_cli_id_str,cli_id); - } - return 0; -} - -static int tmemc_set_client_info(domid_t cli_id, - XEN_GUEST_HANDLE(xen_tmem_client_t) info) -{ - struct client *client; - int ret = -ENOENT; - - if ( cli_id == TMEM_CLI_ID_NULL ) - { - list_for_each_entry(client,&tmem_global.client_list,client_list) - { - ret = __tmemc_set_client_info(client, info); - if (ret) - break; - } - } - else - { - client = tmem_client_from_cli_id(cli_id); - if ( client ) - ret = __tmemc_set_client_info(client, info); - } - return ret; -} - -static int tmemc_get_client_info(int cli_id, - XEN_GUEST_HANDLE(xen_tmem_client_t) info) -{ - struct client *client = tmem_client_from_cli_id(cli_id); - - if ( client ) - { - if ( copy_to_guest(info, &client->info, 1) ) - return -EFAULT; - } - else - { - static const xen_tmem_client_t generic = { - .version = TMEM_SPEC_VERSION, - .maxpools = MAX_POOLS_PER_DOMAIN - }; - - if ( copy_to_guest(info, &generic, 1) ) - return -EFAULT; - } - - return 0; -} - -static int tmemc_get_pool(int cli_id, - XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools, - uint32_t len) -{ - struct client *client = tmem_client_from_cli_id(cli_id); - unsigned int i, idx; - int rc = 0; - unsigned int nr = len / sizeof(xen_tmem_pool_info_t); - - if ( len % sizeof(xen_tmem_pool_info_t) ) - return -EINVAL; - - if ( nr > MAX_POOLS_PER_DOMAIN ) - return -E2BIG; - - if ( !guest_handle_okay(pools, nr) ) - return -EINVAL; - - if ( !client ) - return -EINVAL; - - for ( idx = 0, i = 0; i < MAX_POOLS_PER_DOMAIN; i++ ) - { - struct tmem_pool *pool = client->pools[i]; - xen_tmem_pool_info_t out; - - if ( pool == NULL ) - continue; - - out.flags.raw = (pool->persistent ? TMEM_POOL_PERSIST : 0) | - (pool->shared ? TMEM_POOL_SHARED : 0) | - (POOL_PAGESHIFT << TMEM_POOL_PAGESIZE_SHIFT) | - (TMEM_SPEC_VERSION << TMEM_POOL_VERSION_SHIFT); - out.n_pages = _atomic_read(pool->pgp_count); - out.uuid[0] = pool->uuid[0]; - out.uuid[1] = pool->uuid[1]; - out.id = i; - - /* N.B. 'idx' != 'i'. */ - if ( __copy_to_guest_offset(pools, idx, &out, 1) ) - { - rc = -EFAULT; - break; - } - idx++; - /* Don't try to put more than what was requested. */ - if ( idx >= nr ) - break; - } - - /* And how many we have processed. */ - return rc ? : idx; -} - -static int tmemc_set_pools(int cli_id, - XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools, - uint32_t len) -{ - unsigned int i; - int rc = 0; - unsigned int nr = len / sizeof(xen_tmem_pool_info_t); - struct client *client = tmem_client_from_cli_id(cli_id); - - if ( len % sizeof(xen_tmem_pool_info_t) ) - return -EINVAL; - - if ( nr > MAX_POOLS_PER_DOMAIN ) - return -E2BIG; - - if ( !guest_handle_okay(pools, nr) ) - return -EINVAL; - - if ( !client ) - { - client = client_create(cli_id); - if ( !client ) - return -ENOMEM; - } - for ( i = 0; i < nr; i++ ) - { - xen_tmem_pool_info_t pool; - - if ( __copy_from_guest_offset(&pool, pools, i, 1 ) ) - return -EFAULT; - - if ( pool.n_pages ) - return -EINVAL; - - rc = do_tmem_new_pool(cli_id, pool.id, pool.flags.raw, - pool.uuid[0], pool.uuid[1]); - if ( rc < 0 ) - break; - - pool.id = rc; - if ( __copy_to_guest_offset(pools, i, &pool, 1) ) - return -EFAULT; - } - - /* And how many we have processed. */ - return rc ? : i; -} - -static int tmemc_auth_pools(int cli_id, - XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools, - uint32_t len) -{ - unsigned int i; - int rc = 0; - unsigned int nr = len / sizeof(xen_tmem_pool_info_t); - struct client *client = tmem_client_from_cli_id(cli_id); - - if ( len % sizeof(xen_tmem_pool_info_t) ) - return -EINVAL; - - if ( nr > MAX_POOLS_PER_DOMAIN ) - return -E2BIG; - - if ( !guest_handle_okay(pools, nr) ) - return -EINVAL; - - if ( !client ) - { - client = client_create(cli_id); - if ( !client ) - return -ENOMEM; - } - - for ( i = 0; i < nr; i++ ) - { - xen_tmem_pool_info_t pool; - - if ( __copy_from_guest_offset(&pool, pools, i, 1 ) ) - return -EFAULT; - - if ( pool.n_pages ) - return -EINVAL; - - rc = tmemc_shared_pool_auth(cli_id, pool.uuid[0], pool.uuid[1], - pool.flags.u.auth); - - if ( rc < 0 ) - break; - - } - - /* And how many we have processed. */ - return rc ? : i; -} - -int tmem_control(struct xen_sysctl_tmem_op *op) -{ - int ret; - uint32_t cmd = op->cmd; - - if ( op->pad != 0 ) - return -EINVAL; - - write_lock(&tmem_rwlock); - - switch (cmd) - { - case XEN_SYSCTL_TMEM_OP_THAW: - case XEN_SYSCTL_TMEM_OP_FREEZE: - case XEN_SYSCTL_TMEM_OP_DESTROY: - ret = tmemc_freeze_pools(op->cli_id, cmd); - break; - case XEN_SYSCTL_TMEM_OP_FLUSH: - ret = tmemc_flush_mem(op->cli_id, op->arg); - break; - case XEN_SYSCTL_TMEM_OP_LIST: - ret = tmemc_list(op->cli_id, - guest_handle_cast(op->u.buf, char), op->len, op->arg); - break; - case XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO: - ret = tmemc_set_client_info(op->cli_id, op->u.client); - break; - case XEN_SYSCTL_TMEM_OP_QUERY_FREEABLE_MB: - ret = tmem_freeable_pages() >> (20 - PAGE_SHIFT); - break; - case XEN_SYSCTL_TMEM_OP_GET_CLIENT_INFO: - ret = tmemc_get_client_info(op->cli_id, op->u.client); - break; - case XEN_SYSCTL_TMEM_OP_GET_POOLS: - ret = tmemc_get_pool(op->cli_id, op->u.pool, op->len); - break; - case XEN_SYSCTL_TMEM_OP_SET_POOLS: /* TMEM_RESTORE_NEW */ - ret = tmemc_set_pools(op->cli_id, op->u.pool, op->len); - break; - case XEN_SYSCTL_TMEM_OP_SET_AUTH: /* TMEM_AUTH */ - ret = tmemc_auth_pools(op->cli_id, op->u.pool, op->len); - break; - default: - ret = do_tmem_control(op); - break; - } - - write_unlock(&tmem_rwlock); - - return ret; -} - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/xen/common/tmem_xen.c b/xen/common/tmem_xen.c deleted file mode 100644 index bf7b14f79a..0000000000 --- a/xen/common/tmem_xen.c +++ /dev/null @@ -1,277 +0,0 @@ -/****************************************************************************** - * tmem-xen.c - * - * Xen-specific Transcendent memory - * - * Copyright (c) 2009, Dan Magenheimer, Oracle Corp. - */ - -#include <xen/tmem.h> -#include <xen/tmem_xen.h> -#include <xen/lzo.h> /* compression code */ -#include <xen/paging.h> -#include <xen/domain_page.h> -#include <xen/cpu.h> -#include <xen/init.h> - -bool __read_mostly opt_tmem; -boolean_param("tmem", opt_tmem); - -bool __read_mostly opt_tmem_compress; -boolean_param("tmem_compress", opt_tmem_compress); - -atomic_t freeable_page_count = ATOMIC_INIT(0); - -/* these are a concurrency bottleneck, could be percpu and dynamically - * allocated iff opt_tmem_compress */ -#define LZO_WORKMEM_BYTES LZO1X_1_MEM_COMPRESS -#define LZO_DSTMEM_PAGES 2 -static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, workmem); -static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, dstmem); -static DEFINE_PER_CPU_READ_MOSTLY(void *, scratch_page); - -#if defined(CONFIG_ARM) -static inline void *cli_get_page(xen_pfn_t cmfn, mfn_t *pcli_mfn, - struct page_info **pcli_pfp, bool cli_write) -{ - ASSERT_UNREACHABLE(); - return NULL; -} - -static inline void cli_put_page(void *cli_va, struct page_info *cli_pfp, - mfn_t cli_mfn, bool mark_dirty) -{ - ASSERT_UNREACHABLE(); -} -#else -#include <asm/p2m.h> - -static inline void *cli_get_page(xen_pfn_t cmfn, mfn_t *pcli_mfn, - struct page_info **pcli_pfp, bool cli_write) -{ - p2m_type_t t; - struct page_info *page; - - page = get_page_from_gfn(current->domain, cmfn, &t, P2M_ALLOC); - if ( !page || t != p2m_ram_rw ) - { - if ( page ) - put_page(page); - return NULL; - } - - if ( cli_write && !get_page_type(page, PGT_writable_page) ) - { - put_page(page); - return NULL; - } - - *pcli_mfn = page_to_mfn(page); - *pcli_pfp = page; - - return map_domain_page(*pcli_mfn); -} - -static inline void cli_put_page(void *cli_va, struct page_info *cli_pfp, - mfn_t cli_mfn, bool mark_dirty) -{ - if ( mark_dirty ) - { - put_page_and_type(cli_pfp); - paging_mark_dirty(current->domain, cli_mfn); - } - else - put_page(cli_pfp); - unmap_domain_page(cli_va); -} -#endif - -int tmem_copy_from_client(struct page_info *pfp, - xen_pfn_t cmfn, tmem_cli_va_param_t clibuf) -{ - mfn_t tmem_mfn, cli_mfn = INVALID_MFN; - char *tmem_va, *cli_va = NULL; - struct page_info *cli_pfp = NULL; - int rc = 1; - - ASSERT(pfp != NULL); - tmem_mfn = page_to_mfn(pfp); - tmem_va = map_domain_page(tmem_mfn); - if ( guest_handle_is_null(clibuf) ) - { - cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0); - if ( cli_va == NULL ) - { - unmap_domain_page(tmem_va); - return -EFAULT; - } - } - smp_mb(); - if ( cli_va ) - { - memcpy(tmem_va, cli_va, PAGE_SIZE); - cli_put_page(cli_va, cli_pfp, cli_mfn, 0); - } - else - rc = -EINVAL; - unmap_domain_page(tmem_va); - return rc; -} - -int tmem_compress_from_client(xen_pfn_t cmfn, - void **out_va, size_t *out_len, tmem_cli_va_param_t clibuf) -{ - int ret = 0; - unsigned char *dmem = this_cpu(dstmem); - unsigned char *wmem = this_cpu(workmem); - char *scratch = this_cpu(scratch_page); - struct page_info *cli_pfp = NULL; - mfn_t cli_mfn = INVALID_MFN; - void *cli_va = NULL; - - if ( dmem == NULL || wmem == NULL ) - return 0; /* no buffer, so can't compress */ - if ( guest_handle_is_null(clibuf) ) - { - cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0); - if ( cli_va == NULL ) - return -EFAULT; - } - else if ( !scratch ) - return 0; - else if ( copy_from_guest(scratch, clibuf, PAGE_SIZE) ) - return -EFAULT; - smp_mb(); - ret = lzo1x_1_compress(cli_va ?: scratch, PAGE_SIZE, dmem, out_len, wmem); - ASSERT(ret == LZO_E_OK); - *out_va = dmem; - if ( cli_va ) - cli_put_page(cli_va, cli_pfp, cli_mfn, 0); - return 1; -} - -int tmem_copy_to_client(xen_pfn_t cmfn, struct page_info *pfp, - tmem_cli_va_param_t clibuf) -{ - mfn_t tmem_mfn, cli_mfn = INVALID_MFN; - char *tmem_va, *cli_va = NULL; - struct page_info *cli_pfp = NULL; - int rc = 1; - - ASSERT(pfp != NULL); - if ( guest_handle_is_null(clibuf) ) - { - cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1); - if ( cli_va == NULL ) - return -EFAULT; - } - tmem_mfn = page_to_mfn(pfp); - tmem_va = map_domain_page(tmem_mfn); - - if ( cli_va ) - { - memcpy(cli_va, tmem_va, PAGE_SIZE); - cli_put_page(cli_va, cli_pfp, cli_mfn, 1); - } - else - rc = -EINVAL; - unmap_domain_page(tmem_va); - smp_mb(); - return rc; -} - -int tmem_decompress_to_client(xen_pfn_t cmfn, void *tmem_va, - size_t size, tmem_cli_va_param_t clibuf) -{ - mfn_t cli_mfn = INVALID_MFN; - struct page_info *cli_pfp = NULL; - void *cli_va = NULL; - char *scratch = this_cpu(scratch_page); - size_t out_len = PAGE_SIZE; - int ret; - - if ( guest_handle_is_null(clibuf) ) - { - cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1); - if ( cli_va == NULL ) - return -EFAULT; - } - else if ( !scratch ) - return 0; - ret = lzo1x_decompress_safe(tmem_va, size, cli_va ?: scratch, &out_len); - ASSERT(ret == LZO_E_OK); - ASSERT(out_len == PAGE_SIZE); - if ( cli_va ) - cli_put_page(cli_va, cli_pfp, cli_mfn, 1); - else if ( copy_to_guest(clibuf, scratch, PAGE_SIZE) ) - return -EFAULT; - smp_mb(); - return 1; -} - -/****************** XEN-SPECIFIC HOST INITIALIZATION ********************/ -static int dstmem_order, workmem_order; - -static int cpu_callback( - struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - - switch ( action ) - { - case CPU_UP_PREPARE: { - if ( per_cpu(dstmem, cpu) == NULL ) - per_cpu(dstmem, cpu) = alloc_xenheap_pages(dstmem_order, 0); - if ( per_cpu(workmem, cpu) == NULL ) - per_cpu(workmem, cpu) = alloc_xenheap_pages(workmem_order, 0); - if ( per_cpu(scratch_page, cpu) == NULL ) - per_cpu(scratch_page, cpu) = alloc_xenheap_page(); - break; - } - case CPU_DEAD: - case CPU_UP_CANCELED: { - if ( per_cpu(dstmem, cpu) != NULL ) - { - free_xenheap_pages(per_cpu(dstmem, cpu), dstmem_order); - per_cpu(dstmem, cpu) = NULL; - } - if ( per_cpu(workmem, cpu) != NULL ) - { - free_xenheap_pages(per_cpu(workmem, cpu), workmem_order); - per_cpu(workmem, cpu) = NULL; - } - if ( per_cpu(scratch_page, cpu) != NULL ) - { - free_xenheap_page(per_cpu(scratch_page, cpu)); - per_cpu(scratch_page, cpu) = NULL; - } - break; - } - default: - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block cpu_nfb = { - .notifier_call = cpu_callback -}; - -int __init tmem_init(void) -{ - unsigned int cpu; - - dstmem_order = get_order_from_pages(LZO_DSTMEM_PAGES); - workmem_order = get_order_from_bytes(LZO1X_1_MEM_COMPRESS); - - for_each_online_cpu ( cpu ) - { - void *hcpu = (void *)(long)cpu; - cpu_callback(&cpu_nfb, CPU_UP_PREPARE, hcpu); - } - - register_cpu_notifier(&cpu_nfb); - - return 1; -} diff --git a/xen/include/Makefile b/xen/include/Makefile index 3d14532dbd..c3e0283d34 100644 --- a/xen/include/Makefile +++ b/xen/include/Makefile @@ -17,7 +17,6 @@ headers-y := \ compat/physdev.h \ compat/platform.h \ compat/sched.h \ - compat/tmem.h \ compat/trace.h \ compat/vcpu.h \ compat/version.h \ diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h index c49b4dcc99..02cbd3e5a5 100644 --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -34,7 +34,6 @@ #include "xen.h" #include "domctl.h" #include "physdev.h" -#include "tmem.h" #define XEN_SYSCTL_INTERFACE_VERSION 0x00000012 @@ -732,110 +731,6 @@ struct xen_sysctl_psr_alloc { } u; }; -#define XEN_SYSCTL_TMEM_OP_ALL_CLIENTS 0xFFFFU - -#define XEN_SYSCTL_TMEM_OP_THAW 0 -#define XEN_SYSCTL_TMEM_OP_FREEZE 1 -#define XEN_SYSCTL_TMEM_OP_FLUSH 2 -#define XEN_SYSCTL_TMEM_OP_DESTROY 3 -#define XEN_SYSCTL_TMEM_OP_LIST 4 -#define XEN_SYSCTL_TMEM_OP_GET_CLIENT_INFO 5 -#define XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO 6 -#define XEN_SYSCTL_TMEM_OP_GET_POOLS 7 -#define XEN_SYSCTL_TMEM_OP_QUERY_FREEABLE_MB 8 -#define XEN_SYSCTL_TMEM_OP_SET_POOLS 9 -#define XEN_SYSCTL_TMEM_OP_SAVE_BEGIN 10 -#define XEN_SYSCTL_TMEM_OP_SET_AUTH 11 -#define XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_PAGE 19 -#define XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_INV 20 -#define XEN_SYSCTL_TMEM_OP_SAVE_END 21 -#define XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN 30 -#define XEN_SYSCTL_TMEM_OP_RESTORE_PUT_PAGE 32 -#define XEN_SYSCTL_TMEM_OP_RESTORE_FLUSH_PAGE 33 - -/* - * XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_[PAGE|INV] override the 'buf' in - * xen_sysctl_tmem_op with this structure - sometimes with an extra - * page tackled on. - */ -struct tmem_handle { - uint32_t pool_id; - uint32_t index; - xen_tmem_oid_t oid; -}; - -/* - * XEN_SYSCTL_TMEM_OP_[GET,SAVE]_CLIENT uses the 'client' in - * xen_tmem_op with this structure, which is mostly used during migration. - */ -struct xen_tmem_client { - uint32_t version; /* If mismatched we will get XEN_EOPNOTSUPP. */ - uint32_t maxpools; /* If greater than what hypervisor supports, will get - XEN_ERANGE. */ - uint32_t nr_pools; /* Current amount of pools. Ignored on SET*/ - union { /* See TMEM_CLIENT_[COMPRESS,FROZEN] */ - uint32_t raw; - struct { - uint8_t frozen:1, - compress:1, - migrating:1; - } u; - } flags; - uint32_t weight; -}; -typedef struct xen_tmem_client xen_tmem_client_t; -DEFINE_XEN_GUEST_HANDLE(xen_tmem_client_t); - -/* - * XEN_SYSCTL_TMEM_OP_[GET|SET]_POOLS or XEN_SYSCTL_TMEM_OP_SET_AUTH - * uses the 'pool' array in * xen_sysctl_tmem_op with this structure. - * The XEN_SYSCTL_TMEM_OP_GET_POOLS hypercall will - * return the number of entries in 'pool' or a negative value - * if an error was encountered. - * The XEN_SYSCTL_TMEM_OP_SET_[AUTH|POOLS] will return the number of - * entries in 'pool' processed or a negative value if an error - * was encountered. - */ -struct xen_tmem_pool_info { - union { - uint32_t raw; - struct { - uint32_t persist:1, /* See TMEM_POOL_PERSIST. */ - shared:1, /* See TMEM_POOL_SHARED. */ - auth:1, /* See TMEM_POOL_AUTH. */ - rsv1:1, - pagebits:8, /* TMEM_POOL_PAGESIZE_[SHIFT,MASK]. */ - rsv2:12, - version:8; /* TMEM_POOL_VERSION_[SHIFT,MASK]. */ - } u; - } flags; - uint32_t id; /* Less than tmem_client.maxpools. */ - uint64_t n_pages; /* Zero on XEN_SYSCTL_TMEM_OP_SET_[AUTH|POOLS]. */ - uint64_aligned_t uuid[2]; -}; -typedef struct xen_tmem_pool_info xen_tmem_pool_info_t; -DEFINE_XEN_GUEST_HANDLE(xen_tmem_pool_info_t); - -struct xen_sysctl_tmem_op { - uint32_t cmd; /* IN: XEN_SYSCTL_TMEM_OP_* . */ - int32_t pool_id; /* IN: 0 by default unless _SAVE_*, RESTORE_* .*/ - uint32_t cli_id; /* IN: client id, 0 for XEN_SYSCTL_TMEM_QUERY_FREEABLE_MB - for all others can be the domain id or - XEN_SYSCTL_TMEM_OP_ALL_CLIENTS for all. */ - uint32_t len; /* IN: length of 'buf'. If not applicable to use 0. */ - uint32_t arg; /* IN: If not applicable to command use 0. */ - uint32_t pad; /* Padding so structure is the same under 32 and 64. */ - xen_tmem_oid_t oid; /* IN: If not applicable to command use 0s. */ - union { - XEN_GUEST_HANDLE_64(char) buf; /* IN/OUT: Buffer to save/restore */ - XEN_GUEST_HANDLE_64(xen_tmem_client_t) client; /* IN/OUT for */ - /* XEN_SYSCTL_TMEM_OP_[GET,SAVE]_CLIENT. */ - XEN_GUEST_HANDLE_64(xen_tmem_pool_info_t) pool; /* OUT for */ - /* XEN_SYSCTL_TMEM_OP_GET_POOLS. Must have 'len' */ - /* of them. */ - } u; -}; - /* * XEN_SYSCTL_get_cpu_levelling_caps (x86 specific) * @@ -1124,7 +1019,7 @@ struct xen_sysctl { #define XEN_SYSCTL_psr_cmt_op 21 #define XEN_SYSCTL_pcitopoinfo 22 #define XEN_SYSCTL_psr_alloc 23 -#define XEN_SYSCTL_tmem_op 24 +/* #define XEN_SYSCTL_tmem_op 24 */ #define XEN_SYSCTL_get_cpu_levelling_caps 25 #define XEN_SYSCTL_get_cpu_featureset 26 #define XEN_SYSCTL_livepatch_op 27 @@ -1154,7 +1049,6 @@ struct xen_sysctl { struct xen_sysctl_coverage_op coverage_op; struct xen_sysctl_psr_cmt_op psr_cmt_op; struct xen_sysctl_psr_alloc psr_alloc; - struct xen_sysctl_tmem_op tmem_op; struct xen_sysctl_cpu_levelling_caps cpu_levelling_caps; struct xen_sysctl_cpu_featureset cpu_featureset; struct xen_sysctl_livepatch_op livepatch; diff --git a/xen/include/public/tmem.h b/xen/include/public/tmem.h index aa0aafaa9d..c02be9f704 100644 --- a/xen/include/public/tmem.h +++ b/xen/include/public/tmem.h @@ -1,8 +1,8 @@ /****************************************************************************** * tmem.h - * + * * Guest OS interface to Xen Transcendent Memory. - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the @@ -29,15 +29,11 @@ #include "xen.h" +#if __XEN_INTERFACE_VERSION__ < 0x00041200 + /* version of ABI */ #define TMEM_SPEC_VERSION 1 -/* Commands to HYPERVISOR_tmem_op() */ -#ifdef __XEN__ -#define TMEM_CONTROL 0 /* Now called XEN_SYSCTL_tmem_op */ -#else -#undef TMEM_CONTROL -#endif #define TMEM_NEW_POOL 1 #define TMEM_DESTROY_POOL 2 #define TMEM_PUT_PAGE 4 @@ -111,6 +107,8 @@ typedef struct tmem_op tmem_op_t; DEFINE_XEN_GUEST_HANDLE(tmem_op_t); #endif +#endif /* __XEN_INTERFACE_VERSION__ < 0x00041200 */ + #endif /* __XEN_PUBLIC_TMEM_H__ */ /* diff --git a/xen/include/xen/hypercall.h b/xen/include/xen/hypercall.h index 6154c48cb8..fc00a67448 100644 --- a/xen/include/xen/hypercall.h +++ b/xen/include/xen/hypercall.h @@ -12,7 +12,6 @@ #include <public/sysctl.h> #include <public/platform.h> #include <public/event_channel.h> -#include <public/tmem.h> #include <public/version.h> #include <public/pmu.h> #include <public/hvm/dm_op.h> @@ -130,12 +129,6 @@ extern long do_xsm_op( XEN_GUEST_HANDLE_PARAM(xsm_op_t) u_xsm_op); -#ifdef CONFIG_TMEM -extern long -do_tmem_op( - XEN_GUEST_HANDLE_PARAM(tmem_op_t) uops); -#endif - #ifdef CONFIG_ARGO extern long do_argo_op( unsigned int cmd, diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index e971147234..7fc3039593 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -209,7 +209,6 @@ unsigned long avail_node_heap_pages(unsigned int); unsigned int online_page(unsigned long mfn, uint32_t *status); int offline_page(unsigned long mfn, int broken, uint32_t *status); int query_page_offline(unsigned long mfn, uint32_t *status); -unsigned long total_free_pages(void); void heap_init_late(void); @@ -249,8 +248,6 @@ struct npfec { #define MEMF_no_refcount (1U<<_MEMF_no_refcount) #define _MEMF_populate_on_demand 1 #define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand) -#define _MEMF_tmem 2 -#define MEMF_tmem (1U<<_MEMF_tmem) #define _MEMF_no_dma 3 #define MEMF_no_dma (1U<<_MEMF_no_dma) #define _MEMF_exact_node 4 diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 748bb0f2f9..2201faca6b 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -454,9 +454,6 @@ struct domain */ spinlock_t hypercall_deadlock_mutex; - /* transcendent memory, auto-allocated on first tmem op by each domain */ - struct client *tmem_client; - struct lock_profile_qhead profile_head; /* Various vm_events */ diff --git a/xen/include/xen/tmem.h b/xen/include/xen/tmem.h deleted file mode 100644 index 414a14d808..0000000000 --- a/xen/include/xen/tmem.h +++ /dev/null @@ -1,45 +0,0 @@ -/****************************************************************************** - * tmem.h - * - * Transcendent memory - * - * Copyright (c) 2008, Dan Magenheimer, Oracle Corp. - */ - -#ifndef __XEN_TMEM_H__ -#define __XEN_TMEM_H__ - -struct xen_sysctl_tmem_op; - -#ifdef CONFIG_TMEM -extern int tmem_control(struct xen_sysctl_tmem_op *op); -extern void tmem_destroy(void *); -extern void *tmem_relinquish_pages(unsigned int, unsigned int); -extern unsigned long tmem_freeable_pages(void); -#else -static inline int -tmem_control(struct xen_sysctl_tmem_op *op) -{ - return -ENOSYS; -} - -static inline void -tmem_destroy(void *p) -{ - return; -} - -static inline void * -tmem_relinquish_pages(unsigned int x, unsigned int y) -{ - return NULL; -} - -static inline unsigned long -tmem_freeable_pages(void) -{ - return 0; -} -#endif /* CONFIG_TMEM */ - -#endif /* __XEN_TMEM_H__ */ diff --git a/xen/include/xen/tmem_control.h b/xen/include/xen/tmem_control.h deleted file mode 100644 index ad04cf707b..0000000000 --- a/xen/include/xen/tmem_control.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved. - * - */ - -#ifndef __XEN_TMEM_CONTROL_H__ -#define __XEN_TMEM_CONTROL_H__ - -#ifdef CONFIG_TMEM -#include <public/sysctl.h> -/* Variables and functions that tmem_control.c needs from tmem.c */ - -extern struct tmem_statistics tmem_stats; -extern struct tmem_global tmem_global; - -extern rwlock_t tmem_rwlock; - -int tmem_evict(void); -int do_tmem_control(struct xen_sysctl_tmem_op *op); - -struct client *client_create(domid_t cli_id); -int do_tmem_new_pool(domid_t this_cli_id, uint32_t d_poolid, uint32_t flags, - uint64_t uuid_lo, uint64_t uuid_hi); - -int tmemc_shared_pool_auth(domid_t cli_id, uint64_t uuid_lo, - uint64_t uuid_hi, bool auth); -#endif /* CONFIG_TMEM */ - -#endif /* __XEN_TMEM_CONTROL_H__ */ - -/* - * Local variables: - * mode: C - * c-file-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff --git a/xen/include/xen/tmem_xen.h b/xen/include/xen/tmem_xen.h deleted file mode 100644 index 8516a0b131..0000000000 --- a/xen/include/xen/tmem_xen.h +++ /dev/null @@ -1,343 +0,0 @@ -/****************************************************************************** - * tmem_xen.h - * - * Xen-specific Transcendent memory - * - * Copyright (c) 2009, Dan Magenheimer, Oracle Corp. - */ - -#ifndef __XEN_TMEM_XEN_H__ -#define __XEN_TMEM_XEN_H__ - -#include <xen/mm.h> /* heap alloc/free */ -#include <xen/pfn.h> -#include <xen/xmalloc.h> /* xmalloc/xfree */ -#include <xen/sched.h> /* struct domain */ -#include <xen/guest_access.h> /* copy_from_guest */ -#include <xen/hash.h> /* hash_long */ -#include <xen/domain_page.h> /* __map_domain_page */ -#include <xen/rbtree.h> /* struct rb_root */ -#include <xsm/xsm.h> /* xsm_tmem_control */ -#include <public/tmem.h> -#ifdef CONFIG_COMPAT -#include <compat/tmem.h> -#endif -typedef uint32_t pagesize_t; /* like size_t, must handle largest PAGE_SIZE */ - -#define IS_PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) -#define IS_VALID_PAGE(_pi) mfn_valid(page_to_mfn(_pi)) - -extern struct page_list_head tmem_page_list; -extern spinlock_t tmem_page_list_lock; -extern unsigned long tmem_page_list_pages; -extern atomic_t freeable_page_count; - -extern int tmem_init(void); -#define tmem_hash hash_long - -extern bool opt_tmem_compress; -static inline bool tmem_compression_enabled(void) -{ - return opt_tmem_compress; -} - -#ifdef CONFIG_TMEM -extern bool opt_tmem; -static inline bool tmem_enabled(void) -{ - return opt_tmem; -} - -static inline void tmem_disable(void) -{ - opt_tmem = false; -} -#else -static inline bool tmem_enabled(void) -{ - return false; -} - -static inline void tmem_disable(void) -{ -} -#endif /* CONFIG_TMEM */ - -/* - * Memory free page list management - */ - -static inline struct page_info *tmem_page_list_get(void) -{ - struct page_info *pi; - - spin_lock(&tmem_page_list_lock); - if ( (pi = page_list_remove_head(&tmem_page_list)) != NULL ) - tmem_page_list_pages--; - spin_unlock(&tmem_page_list_lock); - ASSERT((pi == NULL) || IS_VALID_PAGE(pi)); - return pi; -} - -static inline void tmem_page_list_put(struct page_info *pi) -{ - ASSERT(IS_VALID_PAGE(pi)); - spin_lock(&tmem_page_list_lock); - page_list_add(pi, &tmem_page_list); - tmem_page_list_pages++; - spin_unlock(&tmem_page_list_lock); -} - -/* - * Memory allocation for persistent data - */ -static inline struct page_info *__tmem_alloc_page_thispool(struct domain *d) -{ - struct page_info *pi; - - /* note that this tot_pages check is not protected by d->page_alloc_lock, - * so may race and periodically fail in donate_page or alloc_domheap_pages - * That's OK... neither is a problem, though chatty if log_lvl is set */ - if ( d->tot_pages >= d->max_pages ) - return NULL; - - if ( tmem_page_list_pages ) - { - if ( (pi = tmem_page_list_get()) != NULL ) - { - if ( donate_page(d,pi,0) == 0 ) - goto out; - else - tmem_page_list_put(pi); - } - } - - pi = alloc_domheap_pages(d,0,MEMF_tmem); - -out: - ASSERT((pi == NULL) || IS_VALID_PAGE(pi)); - return pi; -} - -static inline void __tmem_free_page_thispool(struct page_info *pi) -{ - struct domain *d = page_get_owner(pi); - - ASSERT(IS_VALID_PAGE(pi)); - if ( (d == NULL) || steal_page(d,pi,0) == 0 ) - tmem_page_list_put(pi); - else - { - scrub_one_page(pi); - ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0); - free_domheap_pages(pi,0); - } -} - -/* - * Memory allocation for ephemeral (non-persistent) data - */ -static inline struct page_info *__tmem_alloc_page(void) -{ - struct page_info *pi = tmem_page_list_get(); - - if ( pi == NULL) - pi = alloc_domheap_pages(0,0,MEMF_tmem); - - if ( pi ) - atomic_inc(&freeable_page_count); - ASSERT((pi == NULL) || IS_VALID_PAGE(pi)); - return pi; -} - -static inline void __tmem_free_page(struct page_info *pi) -{ - ASSERT(IS_VALID_PAGE(pi)); - tmem_page_list_put(pi); - atomic_dec(&freeable_page_count); -} - -/* "Client" (==domain) abstraction */ -static inline struct client *tmem_client_from_cli_id(domid_t cli_id) -{ - struct client *c; - struct domain *d = rcu_lock_domain_by_id(cli_id); - if (d == NULL) - return NULL; - c = d->tmem_client; - rcu_unlock_domain(d); - return c; -} - -/* these typedefs are in the public/tmem.h interface -typedef XEN_GUEST_HANDLE(void) cli_mfn_t; -typedef XEN_GUEST_HANDLE(char) cli_va_t; -*/ -typedef XEN_GUEST_HANDLE_PARAM(tmem_op_t) tmem_cli_op_t; -typedef XEN_GUEST_HANDLE_PARAM(char) tmem_cli_va_param_t; - -static inline int tmem_get_tmemop_from_client(tmem_op_t *op, tmem_cli_op_t uops) -{ -#ifdef CONFIG_COMPAT - if ( is_hvm_vcpu(current) ? hvm_guest_x86_mode(current) != 8 - : is_pv_32bit_vcpu(current) ) - { - int rc; - enum XLAT_tmem_op_u u; - tmem_op_compat_t cop; - - rc = copy_from_guest(&cop, guest_handle_cast(uops, void), 1); - if ( rc ) - return rc; - switch ( cop.cmd ) - { - case TMEM_NEW_POOL: u = XLAT_tmem_op_u_creat; break; - default: u = XLAT_tmem_op_u_gen ; break; - } - XLAT_tmem_op(op, &cop); - return 0; - } -#endif - return copy_from_guest(op, uops, 1); -} - -#define tmem_cli_buf_null guest_handle_from_ptr(NULL, char) -#define TMEM_CLI_ID_NULL ((domid_t)((domid_t)-1L)) -#define tmem_cli_id_str "domid" -#define tmem_client_str "domain" - -int tmem_decompress_to_client(xen_pfn_t, void *, size_t, - tmem_cli_va_param_t); -int tmem_compress_from_client(xen_pfn_t, void **, size_t *, - tmem_cli_va_param_t); - -int tmem_copy_from_client(struct page_info *, xen_pfn_t, tmem_cli_va_param_t); -int tmem_copy_to_client(xen_pfn_t, struct page_info *, tmem_cli_va_param_t); - -#define tmem_client_err(fmt, args...) printk(XENLOG_G_ERR fmt, ##args) -#define tmem_client_warn(fmt, args...) printk(XENLOG_G_WARNING fmt, ##args) -#define tmem_client_info(fmt, args...) printk(XENLOG_G_INFO fmt, ##args) - -/* Global statistics (none need to be locked). */ -struct tmem_statistics { - unsigned long total_tmem_ops; - unsigned long errored_tmem_ops; - unsigned long total_flush_pool; - unsigned long alloc_failed; - unsigned long alloc_page_failed; - unsigned long evicted_pgs; - unsigned long evict_attempts; - unsigned long relinq_pgs; - unsigned long relinq_attempts; - unsigned long max_evicts_per_relinq; - unsigned long low_on_memory; - unsigned long deduped_puts; - unsigned long tot_good_eph_puts; - int global_obj_count_max; - int global_pgp_count_max; - int global_pcd_count_max; - int global_page_count_max; - int global_rtree_node_count_max; - long global_eph_count_max; - unsigned long failed_copies; - unsigned long pcd_tot_tze_size; - unsigned long pcd_tot_csize; - /* Global counters (should use long_atomic_t access). */ - atomic_t global_obj_count; - atomic_t global_pgp_count; - atomic_t global_pcd_count; - atomic_t global_page_count; - atomic_t global_rtree_node_count; -}; - -#define atomic_inc_and_max(_c) do { \ - atomic_inc(&tmem_stats._c); \ - if ( _atomic_read(tmem_stats._c) > tmem_stats._c##_max ) \ - tmem_stats._c##_max = _atomic_read(tmem_stats._c); \ -} while (0) - -#define atomic_dec_and_assert(_c) do { \ - atomic_dec(&tmem_stats._c); \ - ASSERT(_atomic_read(tmem_stats._c) >= 0); \ -} while (0) - -#define MAX_GLOBAL_SHARED_POOLS 16 -struct tmem_global { - struct list_head ephemeral_page_list; /* All pages in ephemeral pools. */ - struct list_head client_list; - struct tmem_pool *shared_pools[MAX_GLOBAL_SHARED_POOLS]; - bool shared_auth; - long eph_count; /* Atomicity depends on eph_lists_spinlock. */ - atomic_t client_weight_total; -}; - -#define MAX_POOLS_PER_DOMAIN 16 - -struct tmem_pool; -struct tmem_page_descriptor; -struct tmem_page_content_descriptor; -struct client { - struct list_head client_list; - struct tmem_pool *pools[MAX_POOLS_PER_DOMAIN]; - struct domain *domain; - struct xmem_pool *persistent_pool; - struct list_head ephemeral_page_list; - long eph_count, eph_count_max; - domid_t cli_id; - xen_tmem_client_t info; - /* For save/restore/migration. */ - bool was_frozen; - struct list_head persistent_invalidated_list; - struct tmem_page_descriptor *cur_pgp; - /* Statistics collection. */ - unsigned long compress_poor, compress_nomem; - unsigned long compressed_pages; - uint64_t compressed_sum_size; - uint64_t total_cycles; - unsigned long succ_pers_puts, succ_eph_gets, succ_pers_gets; - /* Shared pool authentication. */ - uint64_t shared_auth_uuid[MAX_GLOBAL_SHARED_POOLS][2]; -}; - -#define POOL_PAGESHIFT (PAGE_SHIFT - 12) -#define OBJ_HASH_BUCKETS 256 /* Must be power of two. */ -#define OBJ_HASH_BUCKETS_MASK (OBJ_HASH_BUCKETS-1) - -#define is_persistent(_p) (_p->persistent) -#define is_shared(_p) (_p->shared) - -struct tmem_pool { - bool shared; - bool persistent; - bool is_dying; - struct client *client; - uint64_t uuid[2]; /* 0 for private, non-zero for shared. */ - uint32_t pool_id; - rwlock_t pool_rwlock; - struct rb_root obj_rb_root[OBJ_HASH_BUCKETS]; /* Protected by pool_rwlock. */ - struct list_head share_list; /* Valid if shared. */ - int shared_count; /* Valid if shared. */ - /* For save/restore/migration. */ - struct list_head persistent_page_list; - struct tmem_page_descriptor *cur_pgp; - /* Statistics collection. */ - atomic_t pgp_count; - int pgp_count_max; - long obj_count; /* Atomicity depends on pool_rwlock held for write. */ - long obj_count_max; - unsigned long objnode_count, objnode_count_max; - uint64_t sum_life_cycles; - uint64_t sum_evicted_cycles; - unsigned long puts, good_puts, no_mem_puts; - unsigned long dup_puts_flushed, dup_puts_replaced; - unsigned long gets, found_gets; - unsigned long flushs, flushs_found; - unsigned long flush_objs, flush_objs_found; -}; - -struct share_list { - struct list_head share_list; - struct client *client; -}; - -#endif /* __XEN_TMEM_XEN_H__ */ diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst index e330f72580..95f5e5592b 100644 --- a/xen/include/xlat.lst +++ b/xen/include/xlat.lst @@ -135,8 +135,6 @@ ? sched_pin_override sched.h ? sched_remote_shutdown sched.h ? sched_shutdown sched.h -? tmem_oid tmem.h -! tmem_op tmem.h ? t_buf trace.h ? vcpu_get_physid vcpu.h ? vcpu_register_vcpu_info vcpu.h diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h index e628b1c6af..01d2814fed 100644 --- a/xen/include/xsm/dummy.h +++ b/xen/include/xsm/dummy.h @@ -433,12 +433,6 @@ static XSM_INLINE int xsm_page_offline(XSM_DEFAULT_ARG uint32_t cmd) return xsm_default_action(action, current->domain, NULL); } -static XSM_INLINE int xsm_tmem_op(XSM_DEFAULT_VOID) -{ - XSM_ASSERT_ACTION(XSM_HOOK); - return xsm_default_action(action, current->domain, NULL); -} - static XSM_INLINE long xsm_do_xsm_op(XEN_GUEST_HANDLE_PARAM(xsm_op_t) op) { return -ENOSYS; diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h index 8a78d8abd3..9a90355056 100644 --- a/xen/include/xsm/xsm.h +++ b/xen/include/xsm/xsm.h @@ -127,7 +127,6 @@ struct xsm_operations { int (*resource_setup_misc) (void); int (*page_offline)(uint32_t cmd); - int (*tmem_op)(void); long (*do_xsm_op) (XEN_GUEST_HANDLE_PARAM(xsm_op_t) op); #ifdef CONFIG_COMPAT @@ -537,11 +536,6 @@ static inline int xsm_page_offline(xsm_default_t def, uint32_t cmd) return xsm_ops->page_offline(cmd); } -static inline int xsm_tmem_op(xsm_default_t def) -{ - return xsm_ops->tmem_op(); -} - static inline long xsm_do_xsm_op (XEN_GUEST_HANDLE_PARAM(xsm_op_t) op) { return xsm_ops->do_xsm_op(op); diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c index 1fe0e746fa..c9a566f2b5 100644 --- a/xen/xsm/dummy.c +++ b/xen/xsm/dummy.c @@ -103,7 +103,6 @@ void __init xsm_fixup_ops (struct xsm_operations *ops) set_to_dummy_if_null(ops, resource_setup_misc); set_to_dummy_if_null(ops, page_offline); - set_to_dummy_if_null(ops, tmem_op); set_to_dummy_if_null(ops, hvm_param); set_to_dummy_if_null(ops, hvm_control); set_to_dummy_if_null(ops, hvm_param_nested); diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c index 3d00c747f6..a7d690ac3c 100644 --- a/xen/xsm/flask/hooks.c +++ b/xen/xsm/flask/hooks.c @@ -810,9 +810,6 @@ static int flask_sysctl(int cmd) return avc_current_has_perm(SECINITSID_XEN, SECCLASS_XEN2, XEN2__PSR_ALLOC, NULL); - case XEN_SYSCTL_tmem_op: - return domain_has_xen(current->domain, XEN__TMEM_CONTROL); - case XEN_SYSCTL_get_cpu_levelling_caps: return avc_current_has_perm(SECINITSID_XEN, SECCLASS_XEN2, XEN2__GET_CPU_LEVELLING_CAPS, NULL); @@ -1178,11 +1175,6 @@ static inline int flask_page_offline(uint32_t cmd) } } -static inline int flask_tmem_op(void) -{ - return domain_has_xen(current->domain, XEN__TMEM_OP); -} - static int flask_add_to_physmap(struct domain *d1, struct domain *d2) { return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP); @@ -1818,7 +1810,6 @@ static struct xsm_operations flask_ops = { .resource_setup_misc = flask_resource_setup_misc, .page_offline = flask_page_offline, - .tmem_op = flask_tmem_op, .hvm_param = flask_hvm_param, .hvm_control = flask_hvm_param, .hvm_param_nested = flask_hvm_param_nested, diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors index e00448b776..194d743a71 100644 --- a/xen/xsm/flask/policy/access_vectors +++ b/xen/xsm/flask/policy/access_vectors @@ -67,10 +67,6 @@ class xen lockprof # XEN_SYSCTL_cpupool_op cpupool_op -# tmem hypercall (any access) - tmem_op -# XEN_SYSCTL_tmem_op command of tmem (part of sysctl) - tmem_control # XEN_SYSCTL_scheduler_op with XEN_DOMCTL_SCHEDOP_getinfo, XEN_SYSCTL_sched_id, XEN_DOMCTL_SCHEDOP_getvcpuinfo getscheduler # XEN_SYSCTL_scheduler_op with XEN_DOMCTL_SCHEDOP_putinfo, XEN_DOMCTL_SCHEDOP_putvcpuinfo -- 2.20.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel 
 
 
 | 
|  | Lists.xenproject.org is hosted with RackSpace, monitoring our |