|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] Load increase after memory upgrade (part2)
On Fri, May 11, 2012 at 03:41:38PM -0400, Konrad Rzeszutek Wilk wrote:
> On Fri, May 11, 2012 at 11:39:08AM +0200, Carsten Schiers wrote:
> > Hi Konrad,
> >
> >
> > don't want to be pushy, as I have no real issue. I simply use the Xenified
> > kernel or take the double load.
> >
> > But I think this mistery is still open. My last status was that the latest
> > patch you produced resulted in a BUG,
>
> Yes, that is right. Thank you for reminding me.
> >
> > so we still have not checked whether our theory is correct.
>
> No we haven't. And I should be have no trouble reproducing this. I can just
> write
> a tiny module that allocates vmalloc_32().
Done. Found some bugs.. and here is anew version. Can you please
try it out? It has the #define DEBUG 1 set so it should print a lot of
stuff when the DVB module loads. If it crashes please send me the full log.
Thanks.
>From 5afb4ab1fb3d2b059fe1a6db93ab65cb76f43b8a Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Date: Thu, 31 May 2012 14:21:04 -0400
Subject: [PATCH] xen/vmalloc_32: Use xen_exchange_.. when GFP flags are DMA.
[v3]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
---
arch/x86/xen/mmu.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++-
include/xen/xen-ops.h | 2 +
mm/vmalloc.c | 18 +++++-
3 files changed, 202 insertions(+), 5 deletions(-)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3a73785..960d206 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -47,6 +47,7 @@
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/seq_file.h>
+#include <linux/slab.h>
#include <trace/events/xen.h>
@@ -2051,6 +2052,7 @@ void __init xen_init_mmu_ops(void)
/* Protected by xen_reservation_lock. */
#define MAX_CONTIG_ORDER 9 /* 2MB */
static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
+static unsigned long limited_frames[1<<MAX_CONTIG_ORDER];
#define VOID_PTE (mfn_pte(0, __pgprot(0)))
static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
@@ -2075,6 +2077,42 @@ static void xen_zap_pfn_range(unsigned long vaddr,
unsigned int order,
}
xen_mc_issue(0);
}
+static int xen_zap_page_range(struct page *pages, unsigned int order,
+ unsigned long *in_frames,
+ unsigned long *out_frames,
+ void *limit_bitmap)
+{
+ int i, n = 0;
+ struct multicall_space mcs;
+ struct page *page;
+
+ xen_mc_batch();
+ for (i = 0; i < (1UL<<order); i++) {
+ if (!test_bit(i, limit_bitmap))
+ continue;
+
+ page = &pages[i];
+ mcs = __xen_mc_entry(0);
+#define DEBUG 1
+ if (in_frames) {
+#ifdef DEBUG
+ printk(KERN_INFO "%s:%d 0x%lx(pfn) 0x%lx (mfn)
0x%lx(vaddr)\n",
+ __func__, i, page_to_pfn(page),
+ pfn_to_mfn(page_to_pfn(page)),
page_address(page));
+#endif
+ in_frames[i] = pfn_to_mfn(page_to_pfn(page));
+ }
+ MULTI_update_va_mapping(mcs.mc, (unsigned
long)page_address(page), VOID_PTE, 0);
+ set_phys_to_machine(page_to_pfn(page), INVALID_P2M_ENTRY);
+
+ if (out_frames)
+ out_frames[i] = page_to_pfn(page);
+ ++n;
+
+ }
+ xen_mc_issue(0);
+ return n;
+}
/*
* Update the pfn-to-mfn mappings for a virtual address range, either to
@@ -2118,6 +2156,53 @@ static void xen_remap_exchanged_ptes(unsigned long
vaddr, int order,
xen_mc_issue(0);
}
+static void xen_remap_exchanged_pages(struct page *pages, int order,
+ unsigned long *mfns,
+ unsigned long first_mfn, /* in_frame if we
failed*/
+ void *limit_map)
+{
+ unsigned i, limit;
+ unsigned long mfn;
+ struct page *page;
+
+ xen_mc_batch();
+
+ limit = 1ULL << order;
+ for (i = 0; i < limit; i++) {
+ struct multicall_space mcs;
+ unsigned flags;
+
+ if (!test_bit(i, limit_map))
+ continue;
+
+ page = &pages[i];
+ mcs = __xen_mc_entry(0);
+ if (mfns)
+ mfn = mfns[i];
+ else
+ mfn = first_mfn + i;
+
+ if (i < (limit - 1))
+ flags = 0;
+ else {
+ if (order == 0)
+ flags = UVMF_INVLPG | UVMF_ALL;
+ else
+ flags = UVMF_TLB_FLUSH | UVMF_ALL;
+ }
+#ifdef DEBUG
+ printk(KERN_INFO "%s (%d) pfn:0x%lx, pfn: 0x%lx vaddr: 0x%lx\n",
+ __func__, i, page_to_pfn(page), mfn,
page_address(page));
+#endif
+ MULTI_update_va_mapping(mcs.mc, (unsigned
long)page_address(page),
+ mfn_pte(mfn, PAGE_KERNEL), flags);
+
+ set_phys_to_machine(page_to_pfn(page), mfn);
+ }
+
+ xen_mc_issue(0);
+}
+
/*
* Perform the hypercall to exchange a region of our pfns to point to
@@ -2136,7 +2221,9 @@ static int xen_exchange_memory(unsigned long extents_in,
unsigned int order_in,
{
long rc;
int success;
-
+#ifdef DEBUG
+ int i;
+#endif
struct xen_memory_exchange exchange = {
.in = {
.nr_extents = extents_in,
@@ -2157,7 +2244,11 @@ static int xen_exchange_memory(unsigned long extents_in,
unsigned int order_in,
rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
success = (exchange.nr_exchanged == extents_in);
-
+#ifdef DEBUG
+ for (i = 0; i < exchange.nr_exchanged; i++) {
+ printk(KERN_INFO "%s 0x%lx (mfn) <-> 0x%lx (mfn)\n",
__func__,pfns_in[i], mfns_out[i]);
+ }
+#endif
BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
BUG_ON(success && (rc != 0));
@@ -2231,8 +2322,8 @@ void xen_destroy_contiguous_region(unsigned long vstart,
unsigned int order)
xen_zap_pfn_range(vstart, order, NULL, out_frames);
/* 3. Do the exchange for non-contiguous MFNs. */
- success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
- 0, out_frames, 0);
+ success = xen_exchange_memory(1, order, &in_frame,
+ 1UL << order, 0, out_frames, 0);
/* 4. Map new pages in place of old pages. */
if (success)
@@ -2244,6 +2335,94 @@ void xen_destroy_contiguous_region(unsigned long vstart,
unsigned int order)
}
EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+ unsigned int address_bits)
+{
+ unsigned long *in_frames = discontig_frames, *out_frames =
limited_frames;
+ unsigned long flags;
+ struct page *page;
+ int success;
+ int i, n = 0;
+ unsigned long _limit_map;
+ unsigned long *limit_map;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
+ if (unlikely(order > MAX_CONTIG_ORDER))
+ return -ENOMEM;
+
+ if (BITS_PER_LONG >> order) {
+ limit_map = kzalloc(BITS_TO_LONGS(1U << order) *
+ sizeof(*limit_map), GFP_KERNEL);
+ if (unlikely(!limit_map))
+ return -ENOMEM;
+ } else
+ limit_map = &_limit_map;
+
+ /* 0. Construct our per page bitmap lookup. */
+
+ if (address_bits && (address_bits < PAGE_SHIFT))
+ return -EINVAL;
+
+ if (order)
+ bitmap_zero(limit_map, 1U << order);
+ else
+ __set_bit(0, limit_map);
+
+ /* 1. Clear the pages */
+ for (i = 0; i < (1ULL << order); i++) {
+ void *vaddr;
+ page = &pages[i];
+
+ vaddr = page_address(page);
+#ifdef DEBUG
+ printk(KERN_INFO "%s: page: %p vaddr: %p 0x%lx(mfn)
0x%lx(pfn)\n", __func__, page, vaddr, virt_to_mfn(vaddr),
mfn_to_pfn(virt_to_mfn(vaddr)));
+#endif
+ if (address_bits) {
+ if (!(virt_to_mfn(vaddr) >> (address_bits -
PAGE_SHIFT)))
+ continue;
+ __set_bit(i, limit_map);
+ }
+ if (!PageHighMem(page))
+ memset(vaddr, 0, PAGE_SIZE);
+ else {
+ memset(kmap(page), 0, PAGE_SIZE);
+ kunmap(page);
+ ++n;
+ }
+ }
+ /* Check to see if we actually have to do any work. */
+ if (bitmap_empty(limit_map, 1U << order)) {
+ if (limit_map != &_limit_map)
+ kfree(limit_map);
+ return 0;
+ }
+ if (n)
+ kmap_flush_unused();
+
+ spin_lock_irqsave(&xen_reservation_lock, flags);
+
+ /* 2. Zap current PTEs. */
+ n = xen_zap_page_range(pages, order, in_frames, NULL /*out_frames */,
limit_map);
+
+ /* 3. Do the exchange for non-contiguous MFNs. */
+ success = xen_exchange_memory(n, 0 /* this is always called per page
*/, in_frames,
+ n, 0, out_frames, address_bits);
+
+ /* 4. Map new pages in place of old pages. */
+ if (success)
+ xen_remap_exchanged_pages(pages, order, out_frames, 0,
limit_map);
+ else
+ xen_remap_exchanged_pages(pages, order, NULL, *in_frames,
limit_map);
+
+ spin_unlock_irqrestore(&xen_reservation_lock, flags);
+ if (limit_map != &_limit_map)
+ kfree(limit_map);
+
+ return success ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(xen_limit_pages_to_max_mfn);
#ifdef CONFIG_XEN_PVHVM
static void xen_hvm_exit_mmap(struct mm_struct *mm)
{
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 6a198e4..2f8709f 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -29,4 +29,6 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long mfn, int nr,
pgprot_t prot, unsigned domid);
+int xen_limit_pages_to_max_mfn(struct page *pages, unsigned int order,
+ unsigned int address_bits);
#endif /* INCLUDE_XEN_OPS_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2aad499..194af07 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,6 +31,8 @@
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
/*** Page table manipulation functions ***/
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
@@ -1576,7 +1578,11 @@ static void *__vmalloc_area_node(struct vm_struct *area,
gfp_t gfp_mask,
struct page **pages;
unsigned int nr_pages, array_size, i;
gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
-
+ gfp_t dma_mask = gfp_mask & (__GFP_DMA | __GFP_DMA32);
+ if (xen_pv_domain()) {
+ if (dma_mask == (__GFP_DMA | __GFP_DMA32))
+ gfp_mask &= ~(__GFP_DMA | __GFP_DMA32);
+ }
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
@@ -1612,6 +1618,16 @@ static void *__vmalloc_area_node(struct vm_struct *area,
gfp_t gfp_mask,
goto fail;
}
area->pages[i] = page;
+ if (xen_pv_domain()) {
+ if (dma_mask) {
+ if (xen_limit_pages_to_max_mfn(page, 0, 32)) {
+ area->nr_pages = i + 1;
+ goto fail;
+ }
+ if (gfp_mask & __GFP_ZERO)
+ clear_highpage(page);
+ }
+ }
}
if (map_vm_area(area, prot, &pages))
--
1.7.7.6
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |