[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] crash on boot with 4.6.1 on fedora 24



On 11/05/16 08:00, Juergen Gross wrote:
> On 11/05/16 08:35, Jan Beulich wrote:
>>>>> On 11.05.16 at 07:49, <JGross@xxxxxxxx> wrote:
>>> On 10/05/16 18:35, Boris Ostrovsky wrote:
>>>> On 05/10/2016 11:43 AM, Juergen Gross wrote:
>>>>> On 10/05/16 17:35, Jan Beulich wrote:
>>>>>>>>> On 10.05.16 at 17:19, <JGross@xxxxxxxx> wrote:
>>>>>>> On 10/05/16 15:57, Jan Beulich wrote:
>>>>>>>>>>> On 10.05.16 at 15:39, <boris.ostrovsky@xxxxxxxxxx> wrote:
>>>>>>>>> I didn't finish unwrapping the stack yesterday. Here it is:
>>>>>>>>>
>>>>>>>>> setup_arch -> dmi_scan_machine -> dmi_walk_early -> early_ioremap
>>>>>>>> Ah, that makes sense. Yet why would early_ioremap() involve an
>>>>>>>> M2P lookup? As said, MMIO addresses shouldn't be subject to such
>>>>>>>> lookups.
>>>>>>> early_ioremap()->
>>>>>>>   __early_ioremap()->
>>>>>>>     __early_set_fixmap()->
>>>>>>>       set_pte()->
>>>>>>>         xen_set_pte_init()->
>>>>>>>           mask_rw_pte()->
>>>>>>>             pte_pfn()->
>>>>>>>               pte_val()->
>>>>>>>                 xen_pte_val()->
>>>>>>>                   pte_mfn_to_pfn()
>>>>>> Well, I understand (also from Boris' first reply) that's how it is,
>>>>>> but not why it is so. I.e. the call flow above doesn't answer my
>>>>>> question.
>>>>> On x86 early_ioremap() and early_memremap() share a common sub-function
>>>>> __early_ioremap(). This together with pvops requires a common set_pte()
>>>>> implementation leading to the mfn validation in the end.
>>>>
>>>> Do we make any assumptions about where DMI data lives?
>>>
>>> I don't think so.
>>>
>>> So the basic problem is the page fault due to the sparse m2p map before
>>> the #PF handler is registered.
>>>
>>> What do you think about registering a minimal #PF handler in
>>> xen_arch_setup() being capable to handle this problem? This should be
>>> doable without major problems. I can do a patch.
>>
>> To me that would feel like working around the issue instead of
>> admitting that the removal of _PAGE_IOMAP was a mistake.
> 
> Hmm, I don't think so.
> 
> Having a Xen specific pte flag seems to be much more intrusive than
> having an early boot page fault handler consisting of just one line
> being capable to mimic the default handler in just one aspect (see
> attached patch - only compile tested).
> 
> Adding David as he removed _PAGE_IOMAP in kernel 3.18.

Why don't we get the RW bits correct when making the pteval when we
already have the pfn, instead trying to fix it up afterwards.

Something like this:

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 478a2de..d187368 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -430,6 +430,22 @@ __visible pte_t xen_make_pte(pteval_t pte)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);

+__visible __init pte_t xen_make_pte_init(pteval_t pte)
+{
+       unsigned long pfn = pte_mfn(pte);
+
+#ifdef CONFIG_X86_64
+       pte = mask_rw_pte(pte);
+#endif
+       pte = pte_pfn_to_mfn(pte);
+
+       if (pte_mfn(pte) == INVALID_P2M_ENTRY)
+               pte = __pte_ma(0);
+
+       return native_make_pte(pte);
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
+
 __visible pgd_t xen_make_pgd(pgdval_t pgd)
 {
        pgd = pte_pfn_to_mfn(pgd);
@@ -1562,7 +1578,7 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t
pte)
        return pte;
 }
 #else /* CONFIG_X86_64 */
-static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
+static pte_t __init mask_rw_pte(pte_t pte)
 {
        unsigned long pfn;

@@ -1577,7 +1593,7 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t
pte)
         * page tables for mapping the p2m list, too, and page tables MUST be
         * mapped read-only.
         */
-       pfn = pte_pfn(pte);
+       pfn = pte_mfn(pte);
        if (pfn >= xen_start_info->first_p2m_pfn &&
            pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
                pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW);
@@ -1602,11 +1618,10 @@ static pte_t __init mask_rw_pte(pte_t *ptep,
pte_t pte)
  */
 static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
 {
+#ifdef CONFIG_X86_32
        if (pte_mfn(pte) != INVALID_P2M_ENTRY)
                pte = mask_rw_pte(ptep, pte);
-       else
-               pte = __pte_ma(0);
-
+#endif
        native_set_pte(ptep, pte);
 }

@@ -2407,6 +2422,7 @@ static void __init xen_post_allocator_init(void)
        pv_mmu_ops.alloc_pud = xen_alloc_pud;
        pv_mmu_ops.release_pud = xen_release_pud;
 #endif
+       pv_mmu_ops.make_pte = xen_make_pte;

 #ifdef CONFIG_X86_64
        pv_mmu_ops.write_cr3 = &xen_write_cr3;
@@ -2455,7 +2471,7 @@ static const struct pv_mmu_ops xen_mmu_ops
__initconst = {
        .pte_val = PV_CALLEE_SAVE(xen_pte_val),
        .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),

-       .make_pte = PV_CALLEE_SAVE(xen_make_pte),
+       .make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
        .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),

 #ifdef CONFIG_X86_PAE


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.