|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH 3/8]: PVH: memory manager and paging related changes
---
arch/x86/xen/mmu.c | 179 ++++++++++++++++++++++++++++++++++++---
arch/x86/xen/mmu.h | 2 +
include/xen/interface/memory.h | 27 ++++++-
include/xen/interface/physdev.h | 10 ++
include/xen/xen-ops.h | 7 ++
5 files changed, 211 insertions(+), 14 deletions(-)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b65a761..44a6477 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -330,6 +330,38 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
__xen_set_pte(ptep, pteval);
}
+/* This for PV guest in hvm container */
+void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
+ int nr_mfns, int add_mapping)
+{
+ int rc;
+ struct physdev_map_iomem iomem;
+
+ iomem.first_gfn = pfn;
+ iomem.first_mfn = mfn;
+ iomem.nr_mfns = nr_mfns;
+ iomem.add_mapping = add_mapping;
+
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_pvh_map_iomem, &iomem);
+ BUG_ON(rc);
+}
+
+/* This for PV guest in hvm container.
+ * We need this because during boot early_ioremap path eventually calls
+ * set_pte that maps io space. Also, ACPI pages are not mapped into to the
+ * EPT during dom0 creation. The pages are mapped initially here from
+ * kernel_physical_mapping_init() then later the memtype is changed. */
+static void xen_dom0pvh_set_pte(pte_t *ptep, pte_t pteval)
+{
+ native_set_pte(ptep, pteval);
+}
+
+static void xen_dom0pvh_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pteval)
+{
+ native_set_pte(ptep, pteval);
+}
+
static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
@@ -1197,6 +1229,10 @@ static void xen_post_allocator_init(void);
static void __init xen_pagetable_setup_done(pgd_t *base)
{
xen_setup_shared_info();
+
+ if (xen_pvh_domain())
+ return;
+
xen_post_allocator_init();
}
@@ -1652,6 +1688,10 @@ static void set_page_prot(void *addr, pgprot_t prot)
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);
+ /* for PVH, page tables are native. */
+ if (xen_pvh_domain())
+ return;
+
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
BUG();
}
@@ -1745,6 +1785,7 @@ static void convert_pfn_mfn(void *v)
* but that's enough to get __va working. We need to fill in the rest
* of the physical mapping once some sort of allocator has been set
* up.
+ * NOTE: for PVH, the page tables are native with HAP required.
*/
pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
unsigned long max_pfn)
@@ -1761,10 +1802,12 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
/* Zap identity mapping */
init_level4_pgt[0] = __pgd(0);
- /* Pre-constructed entries are in pfn, so convert to mfn */
- convert_pfn_mfn(init_level4_pgt);
- convert_pfn_mfn(level3_ident_pgt);
- convert_pfn_mfn(level3_kernel_pgt);
+ if (!xen_pvh_domain()) {
+ /* Pre-constructed entries are in pfn, so convert to mfn */
+ convert_pfn_mfn(init_level4_pgt);
+ convert_pfn_mfn(level3_ident_pgt);
+ convert_pfn_mfn(level3_kernel_pgt);
+ }
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
@@ -1787,12 +1830,14 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
- /* Pin down new L4 */
- pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
- PFN_DOWN(__pa_symbol(init_level4_pgt)));
+ if (!xen_pvh_domain()) {
+ /* Pin down new L4 */
+ pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+ PFN_DOWN(__pa_symbol(init_level4_pgt)));
- /* Unpin Xen-provided one */
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+ /* Unpin Xen-provided one */
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+ }
/* Switch over */
pgd = init_level4_pgt;
@@ -1802,9 +1847,13 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
* structure to attach it to, so make sure we just set kernel
* pgd.
*/
- xen_mc_batch();
- __xen_write_cr3(true, __pa(pgd));
- xen_mc_issue(PARAVIRT_LAZY_CPU);
+ if (xen_pvh_domain()) {
+ native_write_cr3(__pa(pgd));
+ } else {
+ xen_mc_batch();
+ __xen_write_cr3(true, __pa(pgd));
+ xen_mc_issue(PARAVIRT_LAZY_CPU);
+ }
memblock_reserve(__pa(xen_start_info->pt_base),
xen_start_info->nr_pt_frames * PAGE_SIZE);
@@ -2067,9 +2116,21 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst =
{
void __init xen_init_mmu_ops(void)
{
+ x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
+
+ if (xen_pvh_domain()) {
+ pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
+
+ /* set_pte* for PCI devices to map iomem. */
+ if (xen_initial_domain()) {
+ pv_mmu_ops.set_pte = xen_dom0pvh_set_pte;
+ pv_mmu_ops.set_pte_at = xen_dom0pvh_set_pte_at;
+ }
+ return;
+ }
+
x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
- x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
@@ -2305,6 +2366,93 @@ void __init xen_hvm_init_mmu_ops(void)
}
#endif
+/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space
+ * creating new guest on PVH dom0 and needs to map domU pages. Called from
+ * exported function, so no need to export this.
+ */
+static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
+ unsigned int domid)
+{
+ int rc;
+ struct xen_add_to_physmap pmb = {.foreign_domid = domid};
+
+ pmb.gpfn = lpfn;
+ pmb.idx = fgmfn;
+ pmb.space = XENMAPSPACE_gmfn_foreign;
+ rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &pmb);
+ if (rc) {
+ pr_warn("Failed to map pfn to mfn rc:%d pfn:%lx mfn:%lx\n",
+ rc, lpfn, fgmfn);
+ return 1;
+ }
+ return 0;
+}
+
+/* Unmap an entry from xen p2m table */
+int pvh_rem_xen_p2m(unsigned long spfn, int count)
+{
+ struct xen_remove_from_physmap xrp;
+ int i, rc;
+
+ for (i=0; i < count; i++) {
+ xrp.domid = DOMID_SELF;
+ xrp.gpfn = spfn+i;
+ rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
+ if (rc) {
+ pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n",
+ spfn+i, rc, i);
+ return 1;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pvh_rem_xen_p2m);
+
+struct pvh_remap_data {
+ unsigned long fgmfn; /* foreign domain's gmfn */
+ pgprot_t prot;
+ domid_t domid;
+ struct vm_area_struct *vma;
+};
+
+static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
+ void *data)
+{
+ struct pvh_remap_data *pvhp = data;
+ struct xen_pvh_sav_pfn_info *savp = pvhp->vma->vm_private_data;
+ unsigned long pfn = page_to_pfn(savp->sp_paga[savp->sp_next_todo++]);
+ pte_t pteval = pte_mkspecial(pfn_pte(pfn, pvhp->prot));
+
+ native_set_pte(ptep, pteval);
+ if (pvh_add_to_xen_p2m(pfn, pvhp->fgmfn, pvhp->domid))
+ return -EFAULT;
+
+ return 0;
+}
+
+/* The only caller at moment passes one gmfn at a time.
+ * PVH TBD/FIXME: expand this in future to honor batch requests.
+ */
+static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long mfn, int nr,
+ pgprot_t prot, unsigned domid)
+{
+ int err;
+ struct pvh_remap_data pvhdata;
+
+ if (nr > 1)
+ return -EINVAL;
+
+ pvhdata.fgmfn = mfn;
+ pvhdata.prot = prot;
+ pvhdata.domid = domid;
+ pvhdata.vma = vma;
+ err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+ pvh_map_pte_fn, &pvhdata);
+ flush_tlb_all();
+ return err;
+}
+
#define REMAP_BATCH_SIZE 16
struct remap_data {
@@ -2342,6 +2490,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct
*vma,
BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
(VM_PFNMAP | VM_RESERVED | VM_IO)));
+ if (xen_pvh_domain()) {
+ /* We need to update the local page tables and the xen HAP */
+ return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid);
+ }
+
rmd.mfn = mfn;
rmd.prot = prot;
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 73809bb..6d0bb56 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void);
extern void xen_init_mmu_ops(void);
extern void xen_hvm_init_mmu_ops(void);
+extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
+ int nr_mfns, int add_mapping);
#endif /* _XEN_MMU_H */
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index eac3ce1..1b213b1 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -163,10 +163,19 @@ struct xen_add_to_physmap {
/* Which domain to change the mapping for. */
domid_t domid;
+ /* Number of pages to go through for gmfn_range */
+ uint16_t size;
+
/* Source mapping space. */
#define XENMAPSPACE_shared_info 0 /* shared info page */
#define XENMAPSPACE_grant_table 1 /* grant table page */
- unsigned int space;
+#define XENMAPSPACE_gmfn 2 /* GMFN */
+#define XENMAPSPACE_gmfn_range 3 /* GMFN range */
+#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
+ uint16_t space;
+ domid_t foreign_domid; /* IFF XENMAPSPACE_gmfn_foreign */
+
+#define XENMAPIDX_grant_table_status 0x80000000
/* Index into source mapping space. */
unsigned long idx;
@@ -234,4 +243,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
* during a driver critical region.
*/
extern spinlock_t xen_reservation_lock;
+
+/*
+ * Unmaps the page appearing at a particular GPFN from the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_remove_from_physmap_t.
+ */
+#define XENMEM_remove_from_physmap 15
+struct xen_remove_from_physmap {
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+
+ /* GPFN of the current mapping of the page. */
+ unsigned long gpfn;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
+
#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index 9ce788d..80f792e 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -258,6 +258,16 @@ struct physdev_pci_device {
uint8_t devfn;
};
+#define PHYSDEVOP_pvh_map_iomem 29
+struct physdev_map_iomem {
+ /* IN */
+ uint64_t first_gfn;
+ uint64_t first_mfn;
+ uint32_t nr_mfns;
+ uint32_t add_mapping; /* 1 == add mapping; 0 == unmap */
+
+};
+
/*
* Notify that some PIRQ-bound event channels have been unmasked.
* ** This command is obsolete since interface version 0x00030202 and is **
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 6a198e4..fa595e1 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -29,4 +29,11 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long mfn, int nr,
pgprot_t prot, unsigned domid);
+struct xen_pvh_sav_pfn_info {
+ struct page **sp_paga; /* save pfn (info) page array */
+ int sp_num_pgs;
+ int sp_next_todo;
+};
+extern int pvh_rem_xen_p2m(unsigned long spfn, int count);
+
#endif /* INCLUDE_XEN_OPS_H */
--
1.7.2.3
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |