[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH] x86/pat: fix W^X violation false-positives when running as Xen PV guest
* Juergen Gross <jgross@xxxxxxxx> wrote: > When running as Xen PV guest in some cases W^X violation WARN()s have > been observed. Those WARN()s are produced by verify_rwx(), which looks > into the PTE to verify that writable kernel pages have the NX bit set > in order to avoid code modifications of the kernel by rogue code. > > As the NX bits of all levels of translation entries are or-ed and the > RW bits of all levels are and-ed, looking just into the PTE isn't enough > for the decision that a writable page is executable, too. When running > as a Xen PV guest, kernel initialization will set the NX bit in PMD > entries of the initial page tables covering the .data segment. > > When finding the PTE to have set the RW bit but no NX bit, higher level > entries must be looked at. Only when all levels have the RW bit set and > no NX bit set, the W^X violation should be flagged. > > Additionally show_fault_oops() has a similar problem: it will issue the > "kernel tried to execute NX-protected page" message only if it finds > the NX bit set in the leaf translation entry, while any NX bit in > non-leaf entries are being ignored for issuing the message. > > Modify lookup_address_in_pgd() to return the effective NX and RW bit > values of the non-leaf translation entries and evaluate those as well > in verify_rwx() and show_fault_oops(). Ok, this fix makes sense, as that's how the hardware works and we interpret the pagetables poorly. > Fixes: 652c5bf380ad ("x86/mm: Refuse W^X violations") > Reported-by: Jason Andryuk <jandryuk@xxxxxxxxx> > Signed-off-by: Juergen Gross <jgross@xxxxxxxx> > --- > arch/x86/include/asm/pgtable_types.h | 2 +- > arch/x86/kernel/sev.c | 3 +- > arch/x86/mm/fault.c | 7 ++-- > arch/x86/mm/pat/set_memory.c | 56 +++++++++++++++++++++------- > arch/x86/virt/svm/sev.c | 3 +- > 5 files changed, 52 insertions(+), 19 deletions(-) > > diff --git a/arch/x86/include/asm/pgtable_types.h > b/arch/x86/include/asm/pgtable_types.h > index 0b748ee16b3d..91ab538d3872 100644 > --- a/arch/x86/include/asm/pgtable_types.h > +++ b/arch/x86/include/asm/pgtable_types.h > @@ -565,7 +565,7 @@ static inline void update_page_count(int level, unsigned > long pages) { } > */ > extern pte_t *lookup_address(unsigned long address, unsigned int *level); > extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, > - unsigned int *level); > + unsigned int *level, bool *nx, bool *rw); > extern pmd_t *lookup_pmd_address(unsigned long address); > extern phys_addr_t slow_virt_to_phys(void *__address); > extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, Please introduce a new lookup_address_in_pgd_attr() function or so, which is used by code intentionally. This avoids changing the arch/x86/kernel/sev.c and arch/x86/virt/svm/sev.c uses, that retrieve these attributes but don't do anything with them: > diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c > index 38ad066179d8..adba581e999d 100644 > --- a/arch/x86/kernel/sev.c > +++ b/arch/x86/kernel/sev.c > @@ -516,12 +516,13 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb > *ghcb, struct es_em_ctxt > unsigned long va = (unsigned long)vaddr; > unsigned int level; > phys_addr_t pa; > + bool nx, rw; > pgd_t *pgd; > pte_t *pte; > > pgd = __va(read_cr3_pa()); > pgd = &pgd[pgd_index(va)]; > - pte = lookup_address_in_pgd(pgd, va, &level); > + pte = lookup_address_in_pgd(pgd, va, &level, &nx, &rw); > if (!pte) { > ctxt->fi.vector = X86_TRAP_PF; > ctxt->fi.cr2 = vaddr; > diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c > index 622d12ec7f08..eb8e897a5653 100644 > --- a/arch/x86/mm/fault.c > +++ b/arch/x86/mm/fault.c > @@ -514,18 +514,19 @@ show_fault_oops(struct pt_regs *regs, unsigned long > error_code, unsigned long ad > > if (error_code & X86_PF_INSTR) { > unsigned int level; > + bool nx, rw; > pgd_t *pgd; > pte_t *pte; > > pgd = __va(read_cr3_pa()); > pgd += pgd_index(address); > > - pte = lookup_address_in_pgd(pgd, address, &level); > + pte = lookup_address_in_pgd(pgd, address, &level, &nx, &rw); > > - if (pte && pte_present(*pte) && !pte_exec(*pte)) > + if (pte && pte_present(*pte) && (!pte_exec(*pte) || nx)) > pr_crit("kernel tried to execute NX-protected page - > exploit attempt? (uid: %d)\n", > from_kuid(&init_user_ns, current_uid())); > - if (pte && pte_present(*pte) && pte_exec(*pte) && > + if (pte && pte_present(*pte) && pte_exec(*pte) && !nx && > (pgd_flags(*pgd) & _PAGE_USER) && > (__read_cr4() & X86_CR4_SMEP)) > pr_crit("unable to execute userspace code (SMEP?) (uid: > %d)\n", This should be a separate patch - as it might change observed behavior. > diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c > index 80c9037ffadf..baa4dc4748e9 100644 > --- a/arch/x86/mm/pat/set_memory.c > +++ b/arch/x86/mm/pat/set_memory.c > @@ -619,7 +619,8 @@ static inline pgprot_t static_protections(pgprot_t prot, > unsigned long start, > * Validate strict W^X semantics. > */ > static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long > start, > - unsigned long pfn, unsigned long npg) > + unsigned long pfn, unsigned long npg, > + bool nx, bool rw) > { > unsigned long end; > > @@ -641,6 +642,10 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t > new, unsigned long star > if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW) > return new; > > + /* Non-leaf translation entries can disable writing or execution. */ > + if (!rw || nx) > + return new; > + > end = start + npg * PAGE_SIZE - 1; > WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: > 0x%016lx - 0x%016lx PFN %lx\n", > (unsigned long long)pgprot_val(old), > @@ -660,17 +665,22 @@ static inline pgprot_t verify_rwx(pgprot_t old, > pgprot_t new, unsigned long star > * Return a pointer to the entry and the level of the mapping. > */ > pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, > - unsigned int *level) > + unsigned int *level, bool *nx, bool *rw) > { > p4d_t *p4d; > pud_t *pud; > pmd_t *pmd; > > *level = PG_LEVEL_NONE; > + *nx = false; > + *rw = true; > > if (pgd_none(*pgd)) > return NULL; > > + *nx |= pgd_flags(*pgd) & _PAGE_NX; > + *rw &= pgd_flags(*pgd) & _PAGE_RW; > + > p4d = p4d_offset(pgd, address); > if (p4d_none(*p4d)) > return NULL; > @@ -679,6 +689,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long > address, > if (p4d_leaf(*p4d) || !p4d_present(*p4d)) > return (pte_t *)p4d; > > + *nx |= p4d_flags(*p4d) & _PAGE_NX; > + *rw &= p4d_flags(*p4d) & _PAGE_RW; > + > pud = pud_offset(p4d, address); > if (pud_none(*pud)) > return NULL; > @@ -687,6 +700,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long > address, > if (pud_leaf(*pud) || !pud_present(*pud)) > return (pte_t *)pud; > > + *nx |= pud_flags(*pud) & _PAGE_NX; > + *rw &= pud_flags(*pud) & _PAGE_RW; > + > pmd = pmd_offset(pud, address); > if (pmd_none(*pmd)) > return NULL; > @@ -695,6 +711,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long > address, > if (pmd_leaf(*pmd) || !pmd_present(*pmd)) > return (pte_t *)pmd; > > + *nx |= pmd_flags(*pmd) & _PAGE_NX; > + *rw &= pmd_flags(*pmd) & _PAGE_RW; > + > *level = PG_LEVEL_4K; > This should be a separate preparatory patch that also introduces the new method - without changing any behavior. return pte_offset_kernel(pmd, address); > @@ -710,18 +729,24 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long > address, > */ > pte_t *lookup_address(unsigned long address, unsigned int *level) > { > - return lookup_address_in_pgd(pgd_offset_k(address), address, level); > + bool nx, rw; > + > + return lookup_address_in_pgd(pgd_offset_k(address), address, level, > + &nx, &rw); > } > EXPORT_SYMBOL_GPL(lookup_address); > > static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long > address, > - unsigned int *level) > + unsigned int *level, bool *nx, bool *rw) > { > - if (cpa->pgd) > - return lookup_address_in_pgd(cpa->pgd + pgd_index(address), > - address, level); > + pgd_t *pgd; > + > + if (!cpa->pgd) > + pgd = pgd_offset_k(address); > + else > + pgd = cpa->pgd + pgd_index(address); > > - return lookup_address(address, level); > + return lookup_address_in_pgd(pgd, address, level, nx, rw); I think it would be better to split out this change as well into a separate patch. It changes the flow from lookup_address_in_pgd() + lookup_address() to only use lookup_address_in_pgd(), which is an identity transformation that should be better done separately. > } > > /* > @@ -849,12 +874,13 @@ static int __should_split_large_page(pte_t *kpte, > unsigned long address, > pgprot_t old_prot, new_prot, req_prot, chk_prot; > pte_t new_pte, *tmp; > enum pg_level level; > + bool nx, rw; > > /* > * Check for races, another CPU might have split this page > * up already: > */ > - tmp = _lookup_address_cpa(cpa, address, &level); > + tmp = _lookup_address_cpa(cpa, address, &level, &nx, &rw); > if (tmp != kpte) > return 1; > > @@ -965,7 +991,8 @@ static int __should_split_large_page(pte_t *kpte, > unsigned long address, > new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, > psize, CPA_DETECT); > > - new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages); > + new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages, > + nx, rw); > > /* > * If there is a conflict, split the large page. > @@ -1046,6 +1073,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, > unsigned long address, > pte_t *pbase = (pte_t *)page_address(base); > unsigned int i, level; > pgprot_t ref_prot; > + bool nx, rw; > pte_t *tmp; > > spin_lock(&pgd_lock); > @@ -1053,7 +1081,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, > unsigned long address, > * Check for races, another CPU might have split this page > * up for us already: > */ > - tmp = _lookup_address_cpa(cpa, address, &level); > + tmp = _lookup_address_cpa(cpa, address, &level, &nx, &rw); > if (tmp != kpte) { > spin_unlock(&pgd_lock); > return 1; > @@ -1594,10 +1622,11 @@ static int __change_page_attr(struct cpa_data *cpa, > int primary) > int do_split, err; > unsigned int level; > pte_t *kpte, old_pte; > + bool nx, rw; > > address = __cpa_addr(cpa, cpa->curpage); > repeat: > - kpte = _lookup_address_cpa(cpa, address, &level); > + kpte = _lookup_address_cpa(cpa, address, &level, &nx, &rw); > if (!kpte) > return __cpa_process_fault(cpa, address, primary); > > @@ -1619,7 +1648,8 @@ static int __change_page_attr(struct cpa_data *cpa, int > primary) > new_prot = static_protections(new_prot, address, pfn, 1, 0, > CPA_PROTECT); > > - new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1); > + new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1, > + nx, rw); > > new_prot = pgprot_clear_protnone_bits(new_prot); And then this should be the final patch, which fixes RWX verification within the CPA code. Thanks, Ingo
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |