|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCHv5 04/14] xen: remove scratch frames for ballooned pages and m2p override
On Tue, 27 Jan 2015, David Vrabel wrote:
> The scratch frame mappings for ballooned pages and the m2p override
> are broken. Remove them in preparation for replacing them with
> simpler mechanisms that works.
>
> The scratch pages did not ensure that the page was not in use. In
> particular, the foreign page could still be in use by hardware. If
> the guest reused the frame the hardware could read or write that
> frame.
>
> The m2p override did not handle the same frame being granted by two
> different grant references. Trying an M2P override lookup in this
> case is impossible.
>
> With the m2p override removed, the grant map/unmap for the kernel
> mappings (for x86 PV) can be easily batched in
> set_foreign_p2m_mapping() and clear_foreign_p2m_mapping().
>
> Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
Reviewed-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
> arch/x86/include/asm/xen/page.h | 18 +--
> arch/x86/xen/p2m.c | 254
> ++-------------------------------------
> drivers/xen/balloon.c | 86 +------------
> 3 files changed, 14 insertions(+), 344 deletions(-)
>
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index e9f52fe..358dcd3 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -57,7 +57,6 @@ extern int set_foreign_p2m_mapping(struct
> gnttab_map_grant_ref *map_ops,
> extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref
> *unmap_ops,
> struct gnttab_unmap_grant_ref *kunmap_ops,
> struct page **pages, unsigned int count);
> -extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long
> pfn);
>
> /*
> * Helper functions to write or read unsigned long values to/from
> @@ -154,21 +153,12 @@ static inline unsigned long mfn_to_pfn(unsigned long
> mfn)
> return mfn;
>
> pfn = mfn_to_pfn_no_overrides(mfn);
> - if (__pfn_to_mfn(pfn) != mfn) {
> - /*
> - * If this appears to be a foreign mfn (because the pfn
> - * doesn't map back to the mfn), then check the local override
> - * table to see if there's a better pfn to use.
> - *
> - * m2p_find_override_pfn returns ~0 if it doesn't find anything.
> - */
> - pfn = m2p_find_override_pfn(mfn, ~0);
> - }
> + if (__pfn_to_mfn(pfn) != mfn)
> + pfn = ~0;
>
> /*
> - * pfn is ~0 if there are no entries in the m2p for mfn or if the
> - * entry doesn't map back to the mfn and m2p_override doesn't have a
> - * valid entry for it.
> + * pfn is ~0 if there are no entries in the m2p for mfn or the
> + * entry doesn't map back to the mfn.
> */
> if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn))
> pfn = mfn;
> diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
> index df40b28..c9bc53f 100644
> --- a/arch/x86/xen/p2m.c
> +++ b/arch/x86/xen/p2m.c
> @@ -84,8 +84,6 @@
>
> #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE)
>
> -static void __init m2p_override_init(void);
> -
> unsigned long *xen_p2m_addr __read_mostly;
> EXPORT_SYMBOL_GPL(xen_p2m_addr);
> unsigned long xen_p2m_size __read_mostly;
> @@ -402,8 +400,6 @@ void __init xen_vmalloc_p2m_tree(void)
> xen_p2m_size = xen_max_p2m_pfn;
>
> xen_inv_extra_mem();
> -
> - m2p_override_init();
> }
>
> unsigned long get_phys_to_machine(unsigned long pfn)
> @@ -652,100 +648,21 @@ bool set_phys_to_machine(unsigned long pfn, unsigned
> long mfn)
> return true;
> }
>
> -#define M2P_OVERRIDE_HASH_SHIFT 10
> -#define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT)
> -
> -static struct list_head *m2p_overrides;
> -static DEFINE_SPINLOCK(m2p_override_lock);
> -
> -static void __init m2p_override_init(void)
> -{
> - unsigned i;
> -
> - m2p_overrides = alloc_bootmem_align(
> - sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH,
> - sizeof(unsigned long));
> -
> - for (i = 0; i < M2P_OVERRIDE_HASH; i++)
> - INIT_LIST_HEAD(&m2p_overrides[i]);
> -}
> -
> -static unsigned long mfn_hash(unsigned long mfn)
> -{
> - return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT);
> -}
> -
> -/* Add an MFN override for a particular page */
> -static int m2p_add_override(unsigned long mfn, struct page *page,
> - struct gnttab_map_grant_ref *kmap_op)
> -{
> - unsigned long flags;
> - unsigned long pfn;
> - unsigned long uninitialized_var(address);
> - unsigned level;
> - pte_t *ptep = NULL;
> -
> - pfn = page_to_pfn(page);
> - if (!PageHighMem(page)) {
> - address = (unsigned long)__va(pfn << PAGE_SHIFT);
> - ptep = lookup_address(address, &level);
> - if (WARN(ptep == NULL || level != PG_LEVEL_4K,
> - "m2p_add_override: pfn %lx not mapped", pfn))
> - return -EINVAL;
> - }
> -
> - if (kmap_op != NULL) {
> - if (!PageHighMem(page)) {
> - struct multicall_space mcs =
> - xen_mc_entry(sizeof(*kmap_op));
> -
> - MULTI_grant_table_op(mcs.mc,
> - GNTTABOP_map_grant_ref, kmap_op, 1);
> -
> - xen_mc_issue(PARAVIRT_LAZY_MMU);
> - }
> - }
> - spin_lock_irqsave(&m2p_override_lock, flags);
> - list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
> - spin_unlock_irqrestore(&m2p_override_lock, flags);
> -
> - /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
> - * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
> - * pfn so that the following mfn_to_pfn(mfn) calls will return the
> - * pfn from the m2p_override (the backend pfn) instead.
> - * We need to do this because the pages shared by the frontend
> - * (xen-blkfront) can be already locked (lock_page, called by
> - * do_read_cache_page); when the userspace backend tries to use them
> - * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
> - * do_blockdev_direct_IO is going to try to lock the same pages
> - * again resulting in a deadlock.
> - * As a side effect get_user_pages_fast might not be safe on the
> - * frontend pages while they are being shared with the backend,
> - * because mfn_to_pfn (that ends up being called by GUPF) will
> - * return the backend pfn rather than the frontend pfn. */
> - pfn = mfn_to_pfn_no_overrides(mfn);
> - if (__pfn_to_mfn(pfn) == mfn)
> - set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
> -
> - return 0;
> -}
> -
> int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
> struct gnttab_map_grant_ref *kmap_ops,
> struct page **pages, unsigned int count)
> {
> int i, ret = 0;
> - bool lazy = false;
> pte_t *pte;
>
> if (xen_feature(XENFEAT_auto_translated_physmap))
> return 0;
>
> - if (kmap_ops &&
> - !in_interrupt() &&
> - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
> - arch_enter_lazy_mmu_mode();
> - lazy = true;
> + if (kmap_ops) {
> + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
> + kmap_ops, count);
> + if (ret)
> + goto out;
> }
>
> for (i = 0; i < count; i++) {
> @@ -773,160 +690,22 @@ int set_foreign_p2m_mapping(struct
> gnttab_map_grant_ref *map_ops,
> ret = -ENOMEM;
> goto out;
> }
> -
> - if (kmap_ops) {
> - ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
> - if (ret)
> - goto out;
> - }
> }
>
> out:
> - if (lazy)
> - arch_leave_lazy_mmu_mode();
> -
> return ret;
> }
> EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
>
> -static struct page *m2p_find_override(unsigned long mfn)
> -{
> - unsigned long flags;
> - struct list_head *bucket;
> - struct page *p, *ret;
> -
> - if (unlikely(!m2p_overrides))
> - return NULL;
> -
> - ret = NULL;
> - bucket = &m2p_overrides[mfn_hash(mfn)];
> -
> - spin_lock_irqsave(&m2p_override_lock, flags);
> -
> - list_for_each_entry(p, bucket, lru) {
> - if (page_private(p) == mfn) {
> - ret = p;
> - break;
> - }
> - }
> -
> - spin_unlock_irqrestore(&m2p_override_lock, flags);
> -
> - return ret;
> -}
> -
> -static int m2p_remove_override(struct page *page,
> - struct gnttab_unmap_grant_ref *kunmap_op,
> - unsigned long mfn)
> -{
> - unsigned long flags;
> - unsigned long pfn;
> - unsigned long uninitialized_var(address);
> - unsigned level;
> - pte_t *ptep = NULL;
> -
> - pfn = page_to_pfn(page);
> -
> - if (!PageHighMem(page)) {
> - address = (unsigned long)__va(pfn << PAGE_SHIFT);
> - ptep = lookup_address(address, &level);
> -
> - if (WARN(ptep == NULL || level != PG_LEVEL_4K,
> - "m2p_remove_override: pfn %lx not mapped", pfn))
> - return -EINVAL;
> - }
> -
> - spin_lock_irqsave(&m2p_override_lock, flags);
> - list_del(&page->lru);
> - spin_unlock_irqrestore(&m2p_override_lock, flags);
> -
> - if (kunmap_op != NULL) {
> - if (!PageHighMem(page)) {
> - struct multicall_space mcs;
> - struct gnttab_unmap_and_replace *unmap_op;
> - struct page *scratch_page = get_balloon_scratch_page();
> - unsigned long scratch_page_address = (unsigned long)
> - __va(page_to_pfn(scratch_page) << PAGE_SHIFT);
> -
> - /*
> - * It might be that we queued all the m2p grant table
> - * hypercalls in a multicall, then m2p_remove_override
> - * get called before the multicall has actually been
> - * issued. In this case handle is going to -1 because
> - * it hasn't been modified yet.
> - */
> - if (kunmap_op->handle == -1)
> - xen_mc_flush();
> - /*
> - * Now if kmap_op->handle is negative it means that the
> - * hypercall actually returned an error.
> - */
> - if (kunmap_op->handle == GNTST_general_error) {
> - pr_warn("m2p_remove_override: pfn %lx mfn %lx,
> failed to modify kernel mappings",
> - pfn, mfn);
> - put_balloon_scratch_page();
> - return -1;
> - }
> -
> - xen_mc_batch();
> -
> - mcs = __xen_mc_entry(
> - sizeof(struct gnttab_unmap_and_replace));
> - unmap_op = mcs.args;
> - unmap_op->host_addr = kunmap_op->host_addr;
> - unmap_op->new_addr = scratch_page_address;
> - unmap_op->handle = kunmap_op->handle;
> -
> - MULTI_grant_table_op(mcs.mc,
> - GNTTABOP_unmap_and_replace, unmap_op, 1);
> -
> - mcs = __xen_mc_entry(0);
> - MULTI_update_va_mapping(mcs.mc, scratch_page_address,
> - pfn_pte(page_to_pfn(scratch_page),
> - PAGE_KERNEL_RO), 0);
> -
> - xen_mc_issue(PARAVIRT_LAZY_MMU);
> -
> - put_balloon_scratch_page();
> - }
> - }
> -
> - /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
> - * somewhere in this domain, even before being added to the
> - * m2p_override (see comment above in m2p_add_override).
> - * If there are no other entries in the m2p_override corresponding
> - * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
> - * the original pfn (the one shared by the frontend): the backend
> - * cannot do any IO on this page anymore because it has been
> - * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
> - * the original pfn causes mfn_to_pfn(mfn) to return the frontend
> - * pfn again. */
> - mfn &= ~FOREIGN_FRAME_BIT;
> - pfn = mfn_to_pfn_no_overrides(mfn);
> - if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) &&
> - m2p_find_override(mfn) == NULL)
> - set_phys_to_machine(pfn, mfn);
> -
> - return 0;
> -}
> -
> int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
> struct gnttab_unmap_grant_ref *kunmap_ops,
> struct page **pages, unsigned int count)
> {
> int i, ret = 0;
> - bool lazy = false;
>
> if (xen_feature(XENFEAT_auto_translated_physmap))
> return 0;
>
> - if (kunmap_ops &&
> - !in_interrupt() &&
> - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) {
> - arch_enter_lazy_mmu_mode();
> - lazy = true;
> - }
> -
> for (i = 0; i < count; i++) {
> unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
> unsigned long pfn = page_to_pfn(pages[i]);
> @@ -940,32 +719,15 @@ int clear_foreign_p2m_mapping(struct
> gnttab_unmap_grant_ref *unmap_ops,
> WARN_ON(!PagePrivate(pages[i]));
> ClearPagePrivate(pages[i]);
> set_phys_to_machine(pfn, pages[i]->index);
> -
> - if (kunmap_ops)
> - ret = m2p_remove_override(pages[i], &kunmap_ops[i],
> mfn);
> - if (ret)
> - goto out;
> }
> -
> + if (kunmap_ops)
> + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
> + kunmap_ops, count);
> out:
> - if (lazy)
> - arch_leave_lazy_mmu_mode();
> return ret;
> }
> EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
>
> -unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
> -{
> - struct page *p = m2p_find_override(mfn);
> - unsigned long ret = pfn;
> -
> - if (p)
> - ret = page_to_pfn(p);
> -
> - return ret;
> -}
> -EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
> -
> #ifdef CONFIG_XEN_DEBUG_FS
> #include <linux/debugfs.h>
> #include "debugfs.h"
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index 3860d02..0b52d92 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -92,7 +92,6 @@ EXPORT_SYMBOL_GPL(balloon_stats);
>
> /* We increase/decrease in batches which fit in a page */
> static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
> -static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
>
>
> /* List of ballooned pages, threaded through the mem_map array. */
> @@ -423,22 +422,12 @@ static enum bp_state decrease_reservation(unsigned long
> nr_pages, gfp_t gfp)
> page = pfn_to_page(pfn);
>
> #ifdef CONFIG_XEN_HAVE_PVMMU
> - /*
> - * Ballooned out frames are effectively replaced with
> - * a scratch frame. Ensure direct mappings and the
> - * p2m are consistent.
> - */
> if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> if (!PageHighMem(page)) {
> - struct page *scratch_page =
> get_balloon_scratch_page();
> -
> ret = HYPERVISOR_update_va_mapping(
> (unsigned long)__va(pfn <<
> PAGE_SHIFT),
> -
> pfn_pte(page_to_pfn(scratch_page),
> - PAGE_KERNEL_RO), 0);
> + __pte_ma(0), 0);
> BUG_ON(ret);
> -
> - put_balloon_scratch_page();
> }
> __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
> }
> @@ -500,18 +489,6 @@ static void balloon_process(struct work_struct *work)
> mutex_unlock(&balloon_mutex);
> }
>
> -struct page *get_balloon_scratch_page(void)
> -{
> - struct page *ret = get_cpu_var(balloon_scratch_page);
> - BUG_ON(ret == NULL);
> - return ret;
> -}
> -
> -void put_balloon_scratch_page(void)
> -{
> - put_cpu_var(balloon_scratch_page);
> -}
> -
> /* Resets the Xen limit, sets new target, and kicks off processing. */
> void balloon_set_new_target(unsigned long target)
> {
> @@ -605,61 +582,13 @@ static void __init balloon_add_region(unsigned long
> start_pfn,
> }
> }
>
> -static int alloc_balloon_scratch_page(int cpu)
> -{
> - if (per_cpu(balloon_scratch_page, cpu) != NULL)
> - return 0;
> -
> - per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
> - if (per_cpu(balloon_scratch_page, cpu) == NULL) {
> - pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n",
> cpu);
> - return -ENOMEM;
> - }
> -
> - return 0;
> -}
> -
> -
> -static int balloon_cpu_notify(struct notifier_block *self,
> - unsigned long action, void *hcpu)
> -{
> - int cpu = (long)hcpu;
> - switch (action) {
> - case CPU_UP_PREPARE:
> - if (alloc_balloon_scratch_page(cpu))
> - return NOTIFY_BAD;
> - break;
> - default:
> - break;
> - }
> - return NOTIFY_OK;
> -}
> -
> -static struct notifier_block balloon_cpu_notifier = {
> - .notifier_call = balloon_cpu_notify,
> -};
> -
> static int __init balloon_init(void)
> {
> - int i, cpu;
> + int i;
>
> if (!xen_domain())
> return -ENODEV;
>
> - if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> - register_cpu_notifier(&balloon_cpu_notifier);
> -
> - get_online_cpus();
> - for_each_online_cpu(cpu) {
> - if (alloc_balloon_scratch_page(cpu)) {
> - put_online_cpus();
> - unregister_cpu_notifier(&balloon_cpu_notifier);
> - return -ENOMEM;
> - }
> - }
> - put_online_cpus();
> - }
> -
> pr_info("Initialising balloon driver\n");
>
> balloon_stats.current_pages = xen_pv_domain()
> @@ -696,15 +625,4 @@ static int __init balloon_init(void)
>
> subsys_initcall(balloon_init);
>
> -static int __init balloon_clear(void)
> -{
> - int cpu;
> -
> - for_each_possible_cpu(cpu)
> - per_cpu(balloon_scratch_page, cpu) = NULL;
> -
> - return 0;
> -}
> -early_initcall(balloon_clear);
> -
> MODULE_LICENSE("GPL");
> --
> 1.7.10.4
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |