Xen project Mailing List

[Xen-changelog] Final changes for linux 2.6.13 rebasing and some directory reorgs

From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>

Date: Thu, 15 Sep 2005 07:48:16 +0000

Delivery-date: Thu, 15 Sep 2005 07:52:15 +0000

List-id: BK change log <xen-changelog.lists.xensource.com>

# HG changeset patch # User djm@xxxxxxxxxxxxxxx # Node ID 3ca4ca7a9cc234d33c3981852fc37c73fcd72218 # Parent d34925e4144bcdadb020ee2deef766a994bf7b04 Final changes for linux 2.6.13 rebasing and some directory reorgs diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/Makefile --- a/xen/arch/ia64/Makefile Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/Makefile Thu Sep 1 18:46:28 2005 @@ -1,9 +1,6 @@ include $(BASEDIR)/Rules.mk -VPATH = linux linux-xen linux/lib -#VPATH = linux-xen linux/lib - -# libs-y += arch/ia64/lib/lib.a +VPATH = xen vmx linux linux-xen OBJS = xensetup.o setup.o time.o irq.o ia64_ksyms.o process.o smp.o \ xenmisc.o acpi.o hypercall.o \ @@ -15,8 +12,6 @@ irq_ia64.o irq_lsapic.o vhpt.o xenasm.o hyperprivop.o dom_fw.o \ grant_table.o sn_console.o -#OBJS += idiv64.o idiv32.o \ - # TMP holder to contain *.0 moved out of CONFIG_VTI OBJS += vmx_init.o @@ -27,7 +22,7 @@ pal_emul.o vmx_irq_ia64.o endif -# files from xen/arch/ia64/linux/lib (linux/arch/ia64/lib) +# lib files from xen/arch/ia64/linux/ (linux/arch/ia64/lib) OBJS += bitop.o clear_page.o flush.o copy_page_mck.o \ memset.o strlen.o memcpy_mck.o \ __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ @@ -86,9 +81,9 @@ touch $@ # I'm sure a Makefile wizard would know a better way to do this -xen.lds.s: xen.lds.S +xen.lds.s: xen/xen.lds.S $(CC) -E $(CPPFLAGS) -P -DXEN -D__ASSEMBLY__ \ - -o xen.lds.s xen.lds.S + -o xen.lds.s xen/xen.lds.S # variants of divide/modulo # see files in xen/arch/ia64/linux/lib (linux/arch/ia64/lib) @@ -111,7 +106,7 @@ clean: - rm -f *.o *~ core xen.lds.s $(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s + rm -f *.o *~ core xen.lds.s $(BASEDIR)/include/asm-ia64/.offsets.h.stamp asm-offsets.s map.out rm -f asm-xsi-offsets.s $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h rm -f linux/lib/*.o diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/README.origin --- a/xen/arch/ia64/linux/README.origin Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/linux/README.origin Thu Sep 1 18:46:28 2005 @@ -13,12 +13,13 @@ machvec.c -> linux/arch/ia64/kernel/machvec.c patch.c -> linux/arch/ia64/kernel/patch.c pcdp.h -> drivers/firmware/pcdp.h -lib/bitop.c -> linux/arch/ia64/lib/bitop.c -lib/clear_page.S -> linux/arch/ia64/lib/clear_page.S -lib/copy_page_mck.S -> linux/arch/ia64/lib/copy_page_mck.S -lib/flush.S -> linux/arch/ia64/lib/flush.S -lib/idiv32.S -> linux/arch/ia64/lib/idiv32.S -lib/idiv64.S -> linux/arch/ia64/lib/idiv64.S -lib/memcpy_mck.S -> linux/arch/ia64/lib/memcpy_mck.S -lib/memset.S -> linux/arch/ia64/lib/memset.S -lib/strlen.S -> linux/arch/ia64/lib/strlen.S + +bitop.c -> linux/arch/ia64/lib/bitop.c +clear_page.S -> linux/arch/ia64/lib/clear_page.S +copy_page_mck.S -> linux/arch/ia64/lib/copy_page_mck.S +flush.S -> linux/arch/ia64/lib/flush.S +idiv32.S -> linux/arch/ia64/lib/idiv32.S +idiv64.S -> linux/arch/ia64/lib/idiv64.S +memcpy_mck.S -> linux/arch/ia64/lib/memcpy_mck.S +memset.S -> linux/arch/ia64/lib/memset.S +strlen.S -> linux/arch/ia64/lib/strlen.S diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/config.h --- a/xen/include/asm-ia64/config.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/config.h Thu Sep 1 18:46:28 2005 @@ -203,6 +203,7 @@ #endif // CONFIG_VTI #define __attribute_used__ __attribute__ ((unused)) +#define __nocast // see include/asm-x86/atomic.h (different from standard linux) #define _atomic_set(v,i) (((v).counter) = (i)) @@ -262,9 +263,6 @@ // these declarations got moved at some point, find a better place for them extern int ht_per_core; -// needed for include/xen/smp.h -#define __smp_processor_id() 0 - // xen/include/asm/config.h /****************************************************************************** * config.h @@ -297,6 +295,10 @@ #endif /* __ASSEMBLY__ */ #endif /* __XEN_IA64_CONFIG_H__ */ +// needed for include/xen/smp.h +#define __smp_processor_id() 0 + + // FOLLOWING ADDED FOR XEN POST-NGIO and/or LINUX 2.6.7 // following derived from linux/include/linux/compiler-gcc3.h diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm-generic/bug.h --- a/xen/include/asm-ia64/linux/asm-generic/bug.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm-generic/bug.h Thu Sep 1 18:46:28 2005 @@ -4,17 +4,11 @@ #include <linux/compiler.h> #include <linux/config.h> +#ifdef CONFIG_BUG #ifndef HAVE_ARCH_BUG #define BUG() do { \ printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ panic("BUG!"); \ -} while (0) -#endif - -#ifndef HAVE_ARCH_PAGE_BUG -#define PAGE_BUG(page) do { \ - printk("page BUG for page at %p\n", page); \ - BUG(); \ } while (0) #endif @@ -31,4 +25,18 @@ } while (0) #endif +#else /* !CONFIG_BUG */ +#ifndef HAVE_ARCH_BUG +#define BUG() #endif + +#ifndef HAVE_ARCH_BUG_ON +#define BUG_ON(condition) do { if (condition) ; } while(0) +#endif + +#ifndef HAVE_ARCH_WARN_ON +#define WARN_ON(condition) do { if (condition) ; } while(0) +#endif +#endif + +#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm-generic/errno.h --- a/xen/include/asm-ia64/linux/asm-generic/errno.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm-generic/errno.h Thu Sep 1 18:46:28 2005 @@ -102,4 +102,8 @@ #define EKEYREVOKED 128 /* Key has been revoked */ #define EKEYREJECTED 129 /* Key was rejected by service */ +/* for robust mutexes */ +#define EOWNERDEAD 130 /* Owner died */ +#define ENOTRECOVERABLE 131 /* State not recoverable */ + #endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm-generic/iomap.h --- a/xen/include/asm-ia64/linux/asm-generic/iomap.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm-generic/iomap.h Thu Sep 1 18:46:28 2005 @@ -2,6 +2,7 @@ #define __GENERIC_IO_H #include <linux/linkage.h> +#include <asm/byteorder.h> /* * These are the "generic" interfaces for doing new-style @@ -26,11 +27,15 @@ */ extern unsigned int fastcall ioread8(void __iomem *); extern unsigned int fastcall ioread16(void __iomem *); +extern unsigned int fastcall ioread16be(void __iomem *); extern unsigned int fastcall ioread32(void __iomem *); +extern unsigned int fastcall ioread32be(void __iomem *); extern void fastcall iowrite8(u8, void __iomem *); extern void fastcall iowrite16(u16, void __iomem *); +extern void fastcall iowrite16be(u16, void __iomem *); extern void fastcall iowrite32(u32, void __iomem *); +extern void fastcall iowrite32be(u32, void __iomem *); /* * "string" versions of the above. Note that they diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm-generic/pci.h --- a/xen/include/asm-ia64/linux/asm-generic/pci.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm-generic/pci.h Thu Sep 1 18:46:28 2005 @@ -22,6 +22,14 @@ region->end = res->end; } +static inline void +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + res->start = region->start; + res->end = region->end; +} + #define pcibios_scan_all_fns(a, b) 0 #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h --- a/xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h Thu Sep 1 18:46:28 2005 @@ -2,6 +2,8 @@ #define _PGTABLE_NOPUD_H #ifndef __ASSEMBLY__ + +#define __PAGETABLE_PUD_FOLDED /* * Having the pud type consist of a pgd gets the size right, and allows @@ -52,5 +54,8 @@ #define pud_free(x) do { } while (0) #define __pud_free_tlb(tlb, x) do { } while (0) +#undef pud_addr_end +#define pud_addr_end(addr, end) (end) + #endif /* __ASSEMBLY__ */ #endif /* _PGTABLE_NOPUD_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm-generic/pgtable.h --- a/xen/include/asm-ia64/linux/asm-generic/pgtable.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm-generic/pgtable.h Thu Sep 1 18:46:28 2005 @@ -16,7 +16,7 @@ #ifndef __HAVE_ARCH_SET_PTE_ATOMIC #define ptep_establish(__vma, __address, __ptep, __entry) \ do { \ - set_pte(__ptep, __entry); \ + set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \ flush_tlb_page(__vma, __address); \ } while (0) #else /* __HAVE_ARCH_SET_PTE_ATOMIC */ @@ -37,26 +37,30 @@ */ #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ do { \ - set_pte(__ptep, __entry); \ + set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \ flush_tlb_page(__vma, __address); \ } while (0) #endif #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int ptep_test_and_clear_young(pte_t *ptep) -{ - pte_t pte = *ptep; - if (!pte_young(pte)) - return 0; - set_pte(ptep, pte_mkold(pte)); - return 1; -} +#define ptep_test_and_clear_young(__vma, __address, __ptep) \ +({ \ + pte_t __pte = *(__ptep); \ + int r = 1; \ + if (!pte_young(__pte)) \ + r = 0; \ + else \ + set_pte_at((__vma)->vm_mm, (__address), \ + (__ptep), pte_mkold(__pte)); \ + r; \ +}) #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH #define ptep_clear_flush_young(__vma, __address, __ptep) \ ({ \ - int __young = ptep_test_and_clear_young(__ptep); \ + int __young; \ + __young = ptep_test_and_clear_young(__vma, __address, __ptep); \ if (__young) \ flush_tlb_page(__vma, __address); \ __young; \ @@ -64,20 +68,24 @@ #endif #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY -static inline int ptep_test_and_clear_dirty(pte_t *ptep) -{ - pte_t pte = *ptep; - if (!pte_dirty(pte)) - return 0; - set_pte(ptep, pte_mkclean(pte)); - return 1; -} +#define ptep_test_and_clear_dirty(__vma, __address, __ptep) \ +({ \ + pte_t __pte = *__ptep; \ + int r = 1; \ + if (!pte_dirty(__pte)) \ + r = 0; \ + else \ + set_pte_at((__vma)->vm_mm, (__address), (__ptep), \ + pte_mkclean(__pte)); \ + r; \ +}) #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH #define ptep_clear_flush_dirty(__vma, __address, __ptep) \ ({ \ - int __dirty = ptep_test_and_clear_dirty(__ptep); \ + int __dirty; \ + __dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep); \ if (__dirty) \ flush_tlb_page(__vma, __address); \ __dirty; \ @@ -85,36 +93,29 @@ #endif #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(pte_t *ptep) -{ - pte_t pte = *ptep; - pte_clear(ptep); - return pte; -} +#define ptep_get_and_clear(__mm, __address, __ptep) \ +({ \ + pte_t __pte = *(__ptep); \ + pte_clear((__mm), (__address), (__ptep)); \ + __pte; \ +}) #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH #define ptep_clear_flush(__vma, __address, __ptep) \ ({ \ - pte_t __pte = ptep_get_and_clear(__ptep); \ + pte_t __pte; \ + __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \ flush_tlb_page(__vma, __address); \ __pte; \ }) #endif #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(pte_t *ptep) +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) { pte_t old_pte = *ptep; - set_pte(ptep, pte_wrprotect(old_pte)); -} -#endif - -#ifndef __HAVE_ARCH_PTEP_MKDIRTY -static inline void ptep_mkdirty(pte_t *ptep) -{ - pte_t old_pte = *ptep; - set_pte(ptep, pte_mkdirty(old_pte)); + set_pte_at(mm, address, ptep, pte_wrprotect(old_pte)); } #endif @@ -124,6 +125,9 @@ #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY #define page_test_and_clear_dirty(page) (0) +#define pte_maybe_dirty(pte) pte_dirty(pte) +#else +#define pte_maybe_dirty(pte) (1) #endif #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG @@ -134,4 +138,77 @@ #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) #endif +#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE +#define lazy_mmu_prot_update(pte) do { } while (0) +#endif + +/* + * When walking page tables, get the address of the next boundary, + * or the end address of the range if that comes earlier. Although no + * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. + */ + +#define pgd_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) + +#ifndef pud_addr_end +#define pud_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) +#endif + +#ifndef pmd_addr_end +#define pmd_addr_end(addr, end) \ +({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \ + (__boundary - 1 < (end) - 1)? __boundary: (end); \ +}) +#endif + +#ifndef __ASSEMBLY__ +/* + * When walking page tables, we usually want to skip any p?d_none entries; + * and any p?d_bad entries - reporting the error before resetting to none. + * Do the tests inline, but report and clear the bad entry in mm/memory.c. + */ +void pgd_clear_bad(pgd_t *); +void pud_clear_bad(pud_t *); +void pmd_clear_bad(pmd_t *); + +static inline int pgd_none_or_clear_bad(pgd_t *pgd) +{ + if (pgd_none(*pgd)) + return 1; + if (unlikely(pgd_bad(*pgd))) { + pgd_clear_bad(pgd); + return 1; + } + return 0; +} + +static inline int pud_none_or_clear_bad(pud_t *pud) +{ + if (pud_none(*pud)) + return 1; + if (unlikely(pud_bad(*pud))) { + pud_clear_bad(pud); + return 1; + } + return 0; +} + +static inline int pmd_none_or_clear_bad(pmd_t *pmd) +{ + if (pmd_none(*pmd)) + return 1; + if (unlikely(pmd_bad(*pmd))) { + pmd_clear_bad(pmd); + return 1; + } + return 0; +} +#endif /* !__ASSEMBLY__ */ + #endif /* _ASM_GENERIC_PGTABLE_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm-generic/sections.h --- a/xen/include/asm-ia64/linux/asm-generic/sections.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm-generic/sections.h Thu Sep 1 18:46:28 2005 @@ -8,6 +8,9 @@ extern char __bss_start[], __bss_stop[]; extern char __init_begin[], __init_end[]; extern char _sinittext[], _einittext[]; +extern char _sextratext[] __attribute__((weak)); +extern char _eextratext[] __attribute__((weak)); extern char _end[]; +extern char __per_cpu_start[], __per_cpu_end[]; #endif /* _ASM_GENERIC_SECTIONS_H_ */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm-generic/topology.h --- a/xen/include/asm-ia64/linux/asm-generic/topology.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm-generic/topology.h Thu Sep 1 18:46:28 2005 @@ -41,8 +41,15 @@ #ifndef node_to_first_cpu #define node_to_first_cpu(node) (0) #endif +#ifndef pcibus_to_node +#define pcibus_to_node(node) (-1) +#endif + #ifndef pcibus_to_cpumask -#define pcibus_to_cpumask(bus) (cpu_online_map) +#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \ + CPU_MASK_ALL : \ + node_to_cpumask(pcibus_to_node(bus)) \ + ) #endif #endif /* _ASM_GENERIC_TOPOLOGY_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h --- a/xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h Thu Sep 1 18:46:28 2005 @@ -73,7 +73,7 @@ } #define SECURITY_INIT \ - .security_initcall.init : { \ + .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__security_initcall_start) = .; \ *(.security_initcall.init) \ VMLINUX_SYMBOL(__security_initcall_end) = .; \ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/acpi.h --- a/xen/include/asm-ia64/linux/asm/acpi.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/acpi.h Thu Sep 1 18:46:28 2005 @@ -98,6 +98,15 @@ int acpi_request_vector (u32 int_type); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); +/* + * Record the cpei override flag and current logical cpu. This is + * useful for CPU removal. + */ +extern unsigned int can_cpei_retarget(void); +extern unsigned int is_cpu_cpei_target(unsigned int cpu); +extern void set_cpei_target_cpu(unsigned int cpu); +extern unsigned int get_cpei_target_cpu(void); + #ifdef CONFIG_ACPI_NUMA /* Proximity bitmap length; _PXM is at most 255 (8 bit)*/ #define MAX_PXM_DOMAINS (256) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/bitops.h --- a/xen/include/asm-ia64/linux/asm/bitops.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/bitops.h Thu Sep 1 18:46:28 2005 @@ -314,8 +314,8 @@ #ifdef __KERNEL__ /* - * find_last_zero_bit - find the last zero bit in a 64 bit quantity - * @x: The value to search + * Return bit number of last (most-significant) bit set. Undefined + * for x==0. Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3). */ static inline unsigned long ia64_fls (unsigned long x) @@ -327,10 +327,23 @@ return exp - 0xffff; } +/* + * Find the last (most significant) bit set. Returns 0 for x==0 and + * bits are numbered from 1..32 (e.g., fls(9) == 4). + */ static inline int -fls (int x) -{ - return ia64_fls((unsigned int) x); +fls (int t) +{ + unsigned long x = t & 0xffffffffu; + + if (!x) + return 0; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return ia64_popcnt(x); } /* @@ -353,9 +366,9 @@ return result; } -#define hweight32(x) hweight64 ((x) & 0xfffffffful) -#define hweight16(x) hweight64 ((x) & 0xfffful) -#define hweight8(x) hweight64 ((x) & 0xfful) +#define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful) +#define hweight16(x) (unsigned int) hweight64((x) & 0xfffful) +#define hweight8(x) (unsigned int) hweight64((x) & 0xfful) #endif /* __KERNEL__ */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/break.h --- a/xen/include/asm-ia64/linux/asm/break.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/break.h Thu Sep 1 18:46:28 2005 @@ -12,6 +12,8 @@ * OS-specific debug break numbers: */ #define __IA64_BREAK_KDB 0x80100 +#define __IA64_BREAK_KPROBE 0x80200 +#define __IA64_BREAK_JPROBE 0x80300 /* * OS-specific break numbers: diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/bug.h --- a/xen/include/asm-ia64/linux/asm/bug.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/bug.h Thu Sep 1 18:46:28 2005 @@ -1,6 +1,7 @@ #ifndef _ASM_IA64_BUG_H #define _ASM_IA64_BUG_H +#ifdef CONFIG_BUG #if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) # define ia64_abort() __builtin_trap() #else @@ -8,8 +9,10 @@ #endif #define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0) -/* should this BUG should be made generic? */ +/* should this BUG be made generic? */ #define HAVE_ARCH_BUG +#endif + #include <asm-generic/bug.h> #endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/cacheflush.h --- a/xen/include/asm-ia64/linux/asm/cacheflush.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/cacheflush.h Thu Sep 1 18:46:28 2005 @@ -19,7 +19,7 @@ #define flush_cache_all() do { } while (0) #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) -#define flush_cache_page(vma, vmaddr) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) #define flush_icache_page(vma,page) do { } while (0) #define flush_cache_vmap(start, end) do { } while (0) #define flush_cache_vunmap(start, end) do { } while (0) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/param.h --- a/xen/include/asm-ia64/linux/asm/param.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/param.h Thu Sep 1 18:46:28 2005 @@ -27,7 +27,7 @@ */ # define HZ 32 # else -# define HZ 1024 +# define HZ CONFIG_HZ # endif # define USER_HZ HZ # define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/pci.h --- a/xen/include/asm-ia64/linux/asm/pci.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/pci.h Thu Sep 1 18:46:28 2005 @@ -47,7 +47,7 @@ } static inline void -pcibios_penalize_isa_irq (int irq) +pcibios_penalize_isa_irq (int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } @@ -82,6 +82,25 @@ #define sg_dma_len(sg) ((sg)->dma_length) #define sg_dma_address(sg) ((sg)->dma_address) +#ifdef CONFIG_PCI +static inline void pci_dma_burst_advice(struct pci_dev *pdev, + enum pci_dma_burst_strategy *strat, + unsigned long *strategy_parameter) +{ + unsigned long cacheline_size; + u8 byte; + + pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); + if (byte == 0) + cacheline_size = 1024; + else + cacheline_size = (int) byte * 4; + + *strat = PCI_DMA_BURST_MULTIPLE; + *strategy_parameter = cacheline_size; +} +#endif + #define HAVE_PCI_MMAP extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); @@ -109,6 +128,7 @@ void *acpi_handle; void *iommu; int segment; + int node; /* nearest node with memory or -1 for global allocation */ unsigned int windows; struct pci_window *window; @@ -121,14 +141,9 @@ extern struct pci_ops pci_root_ops; -static inline int pci_name_bus(char *name, struct pci_bus *bus) +static inline int pci_proc_domain(struct pci_bus *bus) { - if (pci_domain_nr(bus) == 0) { - sprintf(name, "%02x", bus->number); - } else { - sprintf(name, "%04x:%02x", pci_domain_nr(bus), bus->number); - } - return 0; + return (pci_domain_nr(bus) != 0); } static inline void pcibios_add_platform_entries(struct pci_dev *dev) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/percpu.h --- a/xen/include/asm-ia64/linux/asm/percpu.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/percpu.h Thu Sep 1 18:46:28 2005 @@ -50,7 +50,7 @@ #else /* ! SMP */ -#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) +#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var #define per_cpu_init() (__phys_per_cpu_start) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/sections.h --- a/xen/include/asm-ia64/linux/asm/sections.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/sections.h Thu Sep 1 18:46:28 2005 @@ -17,6 +17,7 @@ extern char __start_gate_fsyscall_patchlist[], __end_gate_fsyscall_patchlist[]; extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_bubble_down_patchlist[]; extern char __start_unwind[], __end_unwind[]; +extern char __start_ivt_text[], __end_ivt_text[]; #endif /* _ASM_IA64_SECTIONS_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/signal.h --- a/xen/include/asm-ia64/linux/asm/signal.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/signal.h Thu Sep 1 18:46:28 2005 @@ -114,27 +114,11 @@ #define _NSIG_BPW 64 #define _NSIG_WORDS (_NSIG / _NSIG_BPW) -/* - * These values of sa_flags are used only by the kernel as part of the - * irq handling routines. - * - * SA_INTERRUPT is also used by the irq handling routines. - * SA_SHIRQ is for shared interrupt support on PCI and EISA. - */ -#define SA_PROBE SA_ONESHOT -#define SA_SAMPLE_RANDOM SA_RESTART -#define SA_SHIRQ 0x04000000 #define SA_PERCPU_IRQ 0x02000000 #endif /* __KERNEL__ */ -#define SIG_BLOCK 0 /* for blocking signals */ -#define SIG_UNBLOCK 1 /* for unblocking signals */ -#define SIG_SETMASK 2 /* for setting the signal mask */ - -#define SIG_DFL ((__sighandler_t)0) /* default signal handling */ -#define SIG_IGN ((__sighandler_t)1) /* ignore signal */ -#define SIG_ERR ((__sighandler_t)-1) /* error return from signal */ +#include <asm-generic/signal.h> # ifndef __ASSEMBLY__ @@ -142,9 +126,6 @@ /* Avoid too many header ordering problems. */ struct siginfo; - -/* Type of a signal handler. */ -typedef void __user (*__sighandler_t)(int); typedef struct sigaltstack { void __user *ss_sp; diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/smp.h --- a/xen/include/asm-ia64/linux/asm/smp.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/smp.h Thu Sep 1 18:46:28 2005 @@ -3,16 +3,14 @@ * * Copyright (C) 1999 VA Linux Systems * Copyright (C) 1999 Walt Drummond <drummond@xxxxxxxxxxx> - * Copyright (C) 2001-2003 Hewlett-Packard Co + * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P. * David Mosberger-Tang <davidm@xxxxxxxxxx> + * Bjorn Helgaas <bjorn.helgaas@xxxxxx> */ #ifndef _ASM_IA64_SMP_H #define _ASM_IA64_SMP_H #include <linux/config.h> - -#ifdef CONFIG_SMP - #include <linux/init.h> #include <linux/threads.h> #include <linux/kernel.h> @@ -24,12 +22,31 @@ #include <asm/processor.h> #include <asm/ptrace.h> +static inline unsigned int +ia64_get_lid (void) +{ + union { + struct { + unsigned long reserved : 16; + unsigned long eid : 8; + unsigned long id : 8; + unsigned long ignored : 32; + } f; + unsigned long bits; + } lid; + + lid.bits = ia64_getreg(_IA64_REG_CR_LID); + return lid.f.id << 8 | lid.f.eid; +} + +#ifdef CONFIG_SMP + #define XTP_OFFSET 0x1e0008 #define SMP_IRQ_REDIRECTION (1 << 0) #define SMP_IPI_REDIRECTION (1 << 1) -#define smp_processor_id() (current_thread_info()->cpu) +#define raw_smp_processor_id() (current_thread_info()->cpu) extern struct smp_boot_data { int cpu_count; @@ -39,6 +56,10 @@ extern char no_int_routing __devinitdata; extern cpumask_t cpu_online_map; +extern cpumask_t cpu_core_map[NR_CPUS]; +extern cpumask_t cpu_sibling_map[NR_CPUS]; +extern int smp_num_siblings; +extern int smp_num_cpucores; extern void __iomem *ipi_base_addr; extern unsigned char smp_int_redirect; @@ -90,22 +111,7 @@ writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */ } -static inline unsigned int -hard_smp_processor_id (void) -{ - union { - struct { - unsigned long reserved : 16; - unsigned long eid : 8; - unsigned long id : 8; - unsigned long ignored : 32; - } f; - unsigned long bits; - } lid; - - lid.bits = ia64_getreg(_IA64_REG_CR_LID); - return lid.f.id << 8 | lid.f.eid; -} +#define hard_smp_processor_id() ia64_get_lid() /* Upping and downing of CPUs */ extern int __cpu_disable (void); @@ -122,10 +128,12 @@ extern void smp_send_reschedule (int cpu); extern void lock_ipi_calllock(void); extern void unlock_ipi_calllock(void); +extern void identify_siblings (struct cpuinfo_ia64 *); #else -#define cpu_logical_id(cpuid) 0 +#define cpu_logical_id(i) 0 +#define cpu_physical_id(i) ia64_get_lid() #endif /* CONFIG_SMP */ #endif /* _ASM_IA64_SMP_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/thread_info.h --- a/xen/include/asm-ia64/linux/asm/thread_info.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/thread_info.h Thu Sep 1 18:46:28 2005 @@ -25,7 +25,7 @@ __u32 flags; /* thread_info flags (see TIF_*) */ __u32 cpu; /* current CPU */ mm_segment_t addr_limit; /* user-level address space limit */ - __s32 preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ + int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ struct restart_block restart_block; struct { int signo; diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/topology.h --- a/xen/include/asm-ia64/linux/asm/topology.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/topology.h Thu Sep 1 18:46:28 2005 @@ -40,27 +40,61 @@ */ #define node_to_first_cpu(node) (__ffs(node_to_cpumask(node))) +/* + * Determines the node for a given pci bus + */ +#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node + void build_cpu_to_node_map(void); + +#define SD_CPU_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_hot_time = (10*1000000), \ + .per_cpu_gain = 100, \ + .cache_nice_tries = 2, \ + .busy_idx = 2, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ + .wake_idx = 1, \ + .forkexec_idx = 1, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_AFFINE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} /* sched_domains SD_NODE_INIT for IA64 NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ .span = CPU_MASK_NONE, \ .parent = NULL, \ .groups = NULL, \ - .min_interval = 80, \ - .max_interval = 320, \ - .busy_factor = 320, \ + .min_interval = 8, \ + .max_interval = 8*(min(num_online_cpus(), 32)), \ + .busy_factor = 64, \ .imbalance_pct = 125, \ .cache_hot_time = (10*1000000), \ - .cache_nice_tries = 1, \ + .cache_nice_tries = 2, \ + .busy_idx = 3, \ + .idle_idx = 2, \ + .newidle_idx = 0, /* unused */ \ + .wake_idx = 1, \ + .forkexec_idx = 1, \ .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ - | SD_BALANCE_NEWIDLE \ - | SD_WAKE_IDLE \ + | SD_BALANCE_FORK \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ - .balance_interval = 1, \ + .balance_interval = 64, \ .nr_balance_failed = 0, \ } @@ -69,17 +103,21 @@ .span = CPU_MASK_NONE, \ .parent = NULL, \ .groups = NULL, \ - .min_interval = 80, \ - .max_interval = 320, \ - .busy_factor = 320, \ - .imbalance_pct = 125, \ + .min_interval = 64, \ + .max_interval = 64*num_online_cpus(), \ + .busy_factor = 128, \ + .imbalance_pct = 133, \ .cache_hot_time = (10*1000000), \ .cache_nice_tries = 1, \ + .busy_idx = 3, \ + .idle_idx = 3, \ + .newidle_idx = 0, /* unused */ \ + .wake_idx = 0, /* unused */ \ + .forkexec_idx = 0, /* unused */ \ .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC, \ + .flags = SD_LOAD_BALANCE, \ .last_balance = jiffies, \ - .balance_interval = 100*(63+num_online_cpus())/64, \ + .balance_interval = 64, \ .nr_balance_failed = 0, \ } diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/unaligned.h --- a/xen/include/asm-ia64/linux/asm/unaligned.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/unaligned.h Thu Sep 1 18:46:28 2005 @@ -1,121 +1,6 @@ #ifndef _ASM_IA64_UNALIGNED_H #define _ASM_IA64_UNALIGNED_H -#include <linux/types.h> - -/* - * The main single-value unaligned transfer routines. - * - * Based on <asm-alpha/unaligned.h>. - * - * Copyright (C) 1998, 1999, 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@xxxxxxxxxx> - */ -#define get_unaligned(ptr) \ - ((__typeof__(*(ptr)))ia64_get_unaligned((ptr), sizeof(*(ptr)))) - -#define put_unaligned(x,ptr) \ - ia64_put_unaligned((unsigned long)(x), (ptr), sizeof(*(ptr))) - -struct __una_u64 { __u64 x __attribute__((packed)); }; -struct __una_u32 { __u32 x __attribute__((packed)); }; -struct __una_u16 { __u16 x __attribute__((packed)); }; - -static inline unsigned long -__uld8 (const unsigned long * addr) -{ - const struct __una_u64 *ptr = (const struct __una_u64 *) addr; - return ptr->x; -} - -static inline unsigned long -__uld4 (const unsigned int * addr) -{ - const struct __una_u32 *ptr = (const struct __una_u32 *) addr; - return ptr->x; -} - -static inline unsigned long -__uld2 (const unsigned short * addr) -{ - const struct __una_u16 *ptr = (const struct __una_u16 *) addr; - return ptr->x; -} - -static inline void -__ust8 (unsigned long val, unsigned long * addr) -{ - struct __una_u64 *ptr = (struct __una_u64 *) addr; - ptr->x = val; -} - -static inline void -__ust4 (unsigned long val, unsigned int * addr) -{ - struct __una_u32 *ptr = (struct __una_u32 *) addr; - ptr->x = val; -} - -static inline void -__ust2 (unsigned long val, unsigned short * addr) -{ - struct __una_u16 *ptr = (struct __una_u16 *) addr; - ptr->x = val; -} - - -/* - * This function doesn't actually exist. The idea is that when someone uses the macros - * below with an unsupported size (datatype), the linker will alert us to the problem via - * an unresolved reference error. - */ -extern unsigned long ia64_bad_unaligned_access_length (void); - -#define ia64_get_unaligned(_ptr,size) \ -({ \ - const void *__ia64_ptr = (_ptr); \ - unsigned long __ia64_val; \ - \ - switch (size) { \ - case 1: \ - __ia64_val = *(const unsigned char *) __ia64_ptr; \ - break; \ - case 2: \ - __ia64_val = __uld2((const unsigned short *)__ia64_ptr); \ - break; \ - case 4: \ - __ia64_val = __uld4((const unsigned int *)__ia64_ptr); \ - break; \ - case 8: \ - __ia64_val = __uld8((const unsigned long *)__ia64_ptr); \ - break; \ - default: \ - __ia64_val = ia64_bad_unaligned_access_length(); \ - } \ - __ia64_val; \ -}) - -#define ia64_put_unaligned(_val,_ptr,size) \ -do { \ - const void *__ia64_ptr = (_ptr); \ - unsigned long __ia64_val = (_val); \ - \ - switch (size) { \ - case 1: \ - *(unsigned char *)__ia64_ptr = (__ia64_val); \ - break; \ - case 2: \ - __ust2(__ia64_val, (unsigned short *)__ia64_ptr); \ - break; \ - case 4: \ - __ust4(__ia64_val, (unsigned int *)__ia64_ptr); \ - break; \ - case 8: \ - __ust8(__ia64_val, (unsigned long *)__ia64_ptr); \ - break; \ - default: \ - ia64_bad_unaligned_access_length(); \ - } \ -} while (0) +#include <asm-generic/unaligned.h> #endif /* _ASM_IA64_UNALIGNED_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/unistd.h --- a/xen/include/asm-ia64/linux/asm/unistd.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/unistd.h Thu Sep 1 18:46:28 2005 @@ -263,6 +263,12 @@ #define __NR_add_key 1271 #define __NR_request_key 1272 #define __NR_keyctl 1273 +#define __NR_ioprio_set 1274 +#define __NR_ioprio_get 1275 +#define __NR_set_zone_reclaim 1276 +#define __NR_inotify_init 1277 +#define __NR_inotify_add_watch 1278 +#define __NR_inotify_rm_watch 1279 #ifdef __KERNEL__ @@ -392,7 +398,7 @@ * proper prototype, but we can't use __typeof__ either, because not all cond_syscall() * declarations have prototypes at the moment. */ -#define cond_syscall(x) asmlinkage long x (void) __attribute__((weak,alias("sys_ni_syscall"))); +#define cond_syscall(x) asmlinkage long x (void) __attribute__((weak,alias("sys_ni_syscall"))) #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/bitmap.h --- a/xen/include/asm-ia64/linux/bitmap.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/bitmap.h Thu Sep 1 18:46:28 2005 @@ -41,7 +41,9 @@ * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf - * bitmap_parse(ubuf, ulen, dst, nbits) Parse bitmap dst from buf + * bitmap_parse(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf + * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf + * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from list */ /* @@ -98,6 +100,10 @@ const unsigned long *src, int nbits); extern int bitmap_parse(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); +extern int bitmap_scnlistprintf(char *buf, unsigned int len, + const unsigned long *src, int nbits); +extern int bitmap_parselist(const char *buf, unsigned long *maskp, + int nmaskbits); extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/bitops.h --- a/xen/include/asm-ia64/linux/bitops.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/bitops.h Thu Sep 1 18:46:28 2005 @@ -134,4 +134,26 @@ return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w); } +/* + * rol32 - rotate a 32-bit value left + * + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> (32 - shift)); +} + +/* + * ror32 - rotate a 32-bit value right + * + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u32 ror32(__u32 word, unsigned int shift) +{ + return (word >> shift) | (word << (32 - shift)); +} + #endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/dma-mapping.h --- a/xen/include/asm-ia64/linux/dma-mapping.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/dma-mapping.h Thu Sep 1 18:46:28 2005 @@ -14,7 +14,12 @@ }; #define DMA_64BIT_MASK 0xffffffffffffffffULL +#define DMA_40BIT_MASK 0x000000ffffffffffULL +#define DMA_39BIT_MASK 0x0000007fffffffffULL #define DMA_32BIT_MASK 0x00000000ffffffffULL +#define DMA_31BIT_MASK 0x000000007fffffffULL +#define DMA_30BIT_MASK 0x000000003fffffffULL +#define DMA_29BIT_MASK 0x000000001fffffffULL #include <asm/dma-mapping.h> diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/efi.h --- a/xen/include/asm-ia64/linux/efi.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/efi.h Thu Sep 1 18:46:28 2005 @@ -301,7 +301,6 @@ extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource); -extern efi_status_t phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc); extern unsigned long __init efi_get_time(void); extern int __init efi_set_rtc_mmss(unsigned long nowtime); extern struct efi_memory_map memmap; @@ -316,7 +315,7 @@ */ static inline int efi_range_is_wc(unsigned long start, unsigned long len) { - int i; + unsigned long i; for (i = 0; i < len; i += (1UL << EFI_PAGE_SHIFT)) { unsigned long paddr = __pa(start + i); diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/err.h --- a/xen/include/asm-ia64/linux/err.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/err.h Thu Sep 1 18:46:28 2005 @@ -13,6 +13,8 @@ * This should be a per-architecture thing, to allow different * error and pointer decisions. */ +#define IS_ERR_VALUE(x) unlikely((x) > (unsigned long)-1000L) + static inline void *ERR_PTR(long error) { return (void *) error; @@ -25,7 +27,7 @@ static inline long IS_ERR(const void *ptr) { - return unlikely((unsigned long)ptr > (unsigned long)-1000L); + return IS_ERR_VALUE((unsigned long)ptr); } #endif /* _LINUX_ERR_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/gfp.h --- a/xen/include/asm-ia64/linux/gfp.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/gfp.h Thu Sep 1 18:46:28 2005 @@ -12,8 +12,8 @@ * GFP bitmasks.. */ /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ -#define __GFP_DMA 0x01 -#define __GFP_HIGHMEM 0x02 +#define __GFP_DMA 0x01u +#define __GFP_HIGHMEM 0x02u /* * Action modifiers - doesn't change the zoning @@ -26,26 +26,29 @@ * * __GFP_NORETRY: The VM implementation must not retry indefinitely. */ -#define __GFP_WAIT 0x10 /* Can wait and reschedule? */ -#define __GFP_HIGH 0x20 /* Should access emergency pools? */ -#define __GFP_IO 0x40 /* Can start physical IO? */ -#define __GFP_FS 0x80 /* Can call down to low-level FS? */ -#define __GFP_COLD 0x100 /* Cache-cold page required */ -#define __GFP_NOWARN 0x200 /* Suppress page allocation failure warning */ -#define __GFP_REPEAT 0x400 /* Retry the allocation. Might fail */ -#define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */ -#define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */ -#define __GFP_NO_GROW 0x2000 /* Slab internal usage */ -#define __GFP_COMP 0x4000 /* Add compound page metadata */ -#define __GFP_ZERO 0x8000 /* Return zeroed page on success */ +#define __GFP_WAIT 0x10u /* Can wait and reschedule? */ +#define __GFP_HIGH 0x20u /* Should access emergency pools? */ +#define __GFP_IO 0x40u /* Can start physical IO? */ +#define __GFP_FS 0x80u /* Can call down to low-level FS? */ +#define __GFP_COLD 0x100u /* Cache-cold page required */ +#define __GFP_NOWARN 0x200u /* Suppress page allocation failure warning */ +#define __GFP_REPEAT 0x400u /* Retry the allocation. Might fail */ +#define __GFP_NOFAIL 0x800u /* Retry for ever. Cannot fail */ +#define __GFP_NORETRY 0x1000u /* Do not retry. Might fail */ +#define __GFP_NO_GROW 0x2000u /* Slab internal usage */ +#define __GFP_COMP 0x4000u /* Add compound page metadata */ +#define __GFP_ZERO 0x8000u /* Return zeroed page on success */ +#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ +#define __GFP_NORECLAIM 0x20000u /* No realy zone reclaim during allocation */ -#define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) /* if you forget to add the bitmask here kernel will crash, period */ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ - __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP) + __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ + __GFP_NOMEMALLOC|__GFP_NORECLAIM) #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) @@ -82,7 +85,7 @@ extern struct page * FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *)); -static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask, +static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_mask, unsigned int order) { if (unlikely(order >= MAX_ORDER)) @@ -93,17 +96,17 @@ } #ifdef CONFIG_NUMA -extern struct page *alloc_pages_current(unsigned gfp_mask, unsigned order); +extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask, unsigned order); static inline struct page * -alloc_pages(unsigned int gfp_mask, unsigned int order) +alloc_pages(unsigned int __nocast gfp_mask, unsigned int order) { if (unlikely(order >= MAX_ORDER)) return NULL; return alloc_pages_current(gfp_mask, order); } -extern struct page *alloc_page_vma(unsigned gfp_mask, +extern struct page *alloc_page_vma(unsigned __nocast gfp_mask, struct vm_area_struct *vma, unsigned long addr); #else #define alloc_pages(gfp_mask, order) \ @@ -112,8 +115,8 @@ #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order)); -extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask)); +extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask, unsigned int order)); +extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask)); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask),0) @@ -130,5 +133,10 @@ #define free_page(addr) free_pages((addr),0) void page_alloc_init(void); +#ifdef CONFIG_NUMA +void drain_remote_pages(void); +#else +static inline void drain_remote_pages(void) { }; +#endif #endif /* __LINUX_GFP_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/mmzone.h --- a/xen/include/asm-ia64/linux/mmzone.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/mmzone.h Thu Sep 1 18:46:28 2005 @@ -11,6 +11,7 @@ #include <linux/cache.h> #include <linux/threads.h> #include <linux/numa.h> +#include <linux/init.h> #include <asm/atomic.h> /* Free memory management - zoned buddy allocator. */ @@ -61,6 +62,12 @@ unsigned long other_node; /* allocation from other node */ #endif } ____cacheline_aligned_in_smp; + +#ifdef CONFIG_NUMA +#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)]) +#else +#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) +#endif #define ZONE_DMA 0 #define ZONE_NORMAL 1 @@ -121,8 +128,11 @@ */ unsigned long lowmem_reserve[MAX_NR_ZONES]; +#ifdef CONFIG_NUMA + struct per_cpu_pageset *pageset[NR_CPUS]; +#else struct per_cpu_pageset pageset[NR_CPUS]; - +#endif /* * free areas of different sizes */ @@ -144,6 +154,14 @@ int all_unreclaimable; /* All pages pinned */ /* + * Does the allocator try to reclaim pages from the zone as soon + * as it fails a watermark_ok() in __alloc_pages? + */ + int reclaim_pages; + /* A count of how many reclaimers are scanning this zone */ + atomic_t reclaim_in_progress; + + /* * prev_priority holds the scanning priority for this zone. It is * defined as the scanning priority at which we achieved our reclaim * target at the previous try_to_free_pages() or balance_pgdat() @@ -251,7 +269,9 @@ struct zone node_zones[MAX_NR_ZONES]; struct zonelist node_zonelists[GFP_ZONETYPES]; int nr_zones; +#ifdef CONFIG_FLAT_NODE_MEM_MAP struct page *node_mem_map; +#endif struct bootmem_data *bdata; unsigned long node_start_pfn; unsigned long node_present_pages; /* total number of physical pages */ @@ -266,6 +286,12 @@ #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) +#ifdef CONFIG_FLAT_NODE_MEM_MAP +#define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) +#else +#define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) +#endif +#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) extern struct pglist_data *pgdat_list; @@ -278,6 +304,16 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int alloc_type, int can_try_harder, int gfp_high); +#ifdef CONFIG_HAVE_MEMORY_PRESENT +void memory_present(int nid, unsigned long start, unsigned long end); +#else +static inline void memory_present(int nid, unsigned long start, unsigned long end) {} +#endif + +#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE +unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); +#endif + /* * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. */ @@ -370,9 +406,9 @@ #include <linux/topology.h> /* Returns the number of the current Node. */ -#define numa_node_id() (cpu_to_node(_smp_processor_id())) - -#ifndef CONFIG_DISCONTIGMEM +#define numa_node_id() (cpu_to_node(raw_smp_processor_id())) + +#ifndef CONFIG_NEED_MULTIPLE_NODES extern struct pglist_data contig_page_data; #define NODE_DATA(nid) (&contig_page_data) @@ -380,35 +416,176 @@ #define MAX_NODES_SHIFT 1 #define pfn_to_nid(pfn) (0) -#else /* CONFIG_DISCONTIGMEM */ +#else /* CONFIG_NEED_MULTIPLE_NODES */ #include <asm/mmzone.h> + +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ + +#ifdef CONFIG_SPARSEMEM +#include <asm/sparsemem.h> +#endif #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) /* * with 32 bit page->flags field, we reserve 8 bits for node/zone info. * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes. */ -#define MAX_NODES_SHIFT 6 +#define FLAGS_RESERVED 8 + #elif BITS_PER_LONG == 64 /* * with 64 bit flags field, there's plenty of room. */ -#define MAX_NODES_SHIFT 10 -#endif - -#endif /* !CONFIG_DISCONTIGMEM */ - -#if NODES_SHIFT > MAX_NODES_SHIFT -#error NODES_SHIFT > MAX_NODES_SHIFT -#endif - -/* There are currently 3 zones: DMA, Normal & Highmem, thus we need 2 bits */ -#define MAX_ZONES_SHIFT 2 - -#if ZONES_SHIFT > MAX_ZONES_SHIFT -#error ZONES_SHIFT > MAX_ZONES_SHIFT -#endif +#define FLAGS_RESERVED 32 + +#else + +#error BITS_PER_LONG not defined + +#endif + +#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +#define early_pfn_to_nid(nid) (0UL) +#endif + +#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) +#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) + +#ifdef CONFIG_SPARSEMEM + +/* + * SECTION_SHIFT #bits space required to store a section # + * + * PA_SECTION_SHIFT physical address to/from section number + * PFN_SECTION_SHIFT pfn to/from section number + */ +#define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) + +#define PA_SECTION_SHIFT (SECTION_SIZE_BITS) +#define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) + +#define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) + +#define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) +#define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) + +#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS +#error Allocator MAX_ORDER exceeds SECTION_SIZE +#endif + +struct page; +struct mem_section { + /* + * This is, logically, a pointer to an array of struct + * pages. However, it is stored with some other magic. + * (see sparse.c::sparse_init_one_section()) + * + * Making it a UL at least makes someone do a cast + * before using it wrong. + */ + unsigned long section_mem_map; +}; + +extern struct mem_section mem_section[NR_MEM_SECTIONS]; + +static inline struct mem_section *__nr_to_section(unsigned long nr) +{ + return &mem_section[nr]; +} + +/* + * We use the lower bits of the mem_map pointer to store + * a little bit of information. There should be at least + * 3 bits here due to 32-bit alignment. + */ +#define SECTION_MARKED_PRESENT (1UL<<0) +#define SECTION_HAS_MEM_MAP (1UL<<1) +#define SECTION_MAP_LAST_BIT (1UL<<2) +#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) + +static inline struct page *__section_mem_map_addr(struct mem_section *section) +{ + unsigned long map = section->section_mem_map; + map &= SECTION_MAP_MASK; + return (struct page *)map; +} + +static inline int valid_section(struct mem_section *section) +{ + return (section->section_mem_map & SECTION_MARKED_PRESENT); +} + +static inline int section_has_mem_map(struct mem_section *section) +{ + return (section->section_mem_map & SECTION_HAS_MEM_MAP); +} + +static inline int valid_section_nr(unsigned long nr) +{ + return valid_section(__nr_to_section(nr)); +} + +/* + * Given a kernel address, find the home node of the underlying memory. + */ +#define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT) + +static inline struct mem_section *__pfn_to_section(unsigned long pfn) +{ + return __nr_to_section(pfn_to_section_nr(pfn)); +} + +#define pfn_to_page(pfn) \ +({ \ + unsigned long __pfn = (pfn); \ + __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \ +}) +#define page_to_pfn(page) \ +({ \ + page - __section_mem_map_addr(__nr_to_section( \ + page_to_section(page))); \ +}) + +static inline int pfn_valid(unsigned long pfn) +{ + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) + return 0; + return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); +} + +/* + * These are _only_ used during initialisation, therefore they + * can use __initdata ... They could have names to indicate + * this restriction. + */ +#ifdef CONFIG_NUMA +#define pfn_to_nid early_pfn_to_nid +#endif + +#define pfn_to_pgdat(pfn) \ +({ \ + NODE_DATA(pfn_to_nid(pfn)); \ +}) + +#define early_pfn_valid(pfn) pfn_valid(pfn) +void sparse_init(void); +#else +#define sparse_init() do {} while (0) +#endif /* CONFIG_SPARSEMEM */ + +#ifdef CONFIG_NODES_SPAN_OTHER_NODES +#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid)) +#else +#define early_pfn_in_nid(pfn, nid) (1) +#endif + +#ifndef early_pfn_valid +#define early_pfn_valid(pfn) (1) +#endif + +void memory_present(int nid, unsigned long start, unsigned long end); +unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/numa.h --- a/xen/include/asm-ia64/linux/numa.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/numa.h Thu Sep 1 18:46:28 2005 @@ -3,7 +3,7 @@ #include <linux/config.h> -#ifdef CONFIG_DISCONTIGMEM +#ifndef CONFIG_FLATMEM #include <asm/numnodes.h> #endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/page-flags.h --- a/xen/include/asm-ia64/linux/page-flags.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/page-flags.h Thu Sep 1 18:46:28 2005 @@ -61,21 +61,20 @@ #define PG_active 6 #define PG_slab 7 /* slab debug (Suparna wants this) */ -#define PG_highmem 8 -#define PG_checked 9 /* kill me in 2.5.<early>. */ -#define PG_arch_1 10 -#define PG_reserved 11 - -#define PG_private 12 /* Has something at ->private */ -#define PG_writeback 13 /* Page is under writeback */ -#define PG_nosave 14 /* Used for system suspend/resume */ -#define PG_compound 15 /* Part of a compound page */ - -#define PG_swapcache 16 /* Swap page: swp_entry_t in private */ -#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ -#define PG_reclaim 18 /* To be reclaimed asap */ -#define PG_nosave_free 19 /* Free, should not be written */ - +#define PG_checked 8 /* kill me in 2.5.<early>. */ +#define PG_arch_1 9 +#define PG_reserved 10 +#define PG_private 11 /* Has something at ->private */ + +#define PG_writeback 12 /* Page is under writeback */ +#define PG_nosave 13 /* Used for system suspend/resume */ +#define PG_compound 14 /* Part of a compound page */ +#define PG_swapcache 15 /* Swap page: swp_entry_t in private */ + +#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ +#define PG_reclaim 17 /* To be reclaimed asap */ +#define PG_nosave_free 18 /* Free, should not be written */ +#define PG_uncached 19 /* Page has been mapped as uncached */ /* * Global page accounting. One instance per CPU. Only unsigned longs are @@ -131,12 +130,13 @@ unsigned long allocstall; /* direct reclaim calls */ unsigned long pgrotated; /* pages rotated to tail of the LRU */ + unsigned long nr_bounce; /* pages for bounce buffers */ }; extern void get_page_state(struct page_state *ret); extern void get_full_page_state(struct page_state *ret); -extern unsigned long __read_page_state(unsigned offset); -extern void __mod_page_state(unsigned offset, unsigned long delta); +extern unsigned long __read_page_state(unsigned long offset); +extern void __mod_page_state(unsigned long offset, unsigned long delta); #define read_page_state(member) \ __read_page_state(offsetof(struct page_state, member)) @@ -214,7 +214,7 @@ #define TestSetPageSlab(page) test_and_set_bit(PG_slab, &(page)->flags) #ifdef CONFIG_HIGHMEM -#define PageHighMem(page) test_bit(PG_highmem, &(page)->flags) +#define PageHighMem(page) is_highmem(page_zone(page)) #else #define PageHighMem(page) 0 /* needed to optimize away at compile time */ #endif @@ -301,10 +301,13 @@ #define PageSwapCache(page) 0 #endif +#define PageUncached(page) test_bit(PG_uncached, &(page)->flags) +#define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags) +#define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags) + struct page; /* forward declaration */ int test_clear_page_dirty(struct page *page); -int __clear_page_dirty(struct page *page); int test_clear_page_writeback(struct page *page); int test_set_page_writeback(struct page *page); diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/slab.h --- a/xen/include/asm-ia64/linux/slab.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/slab.h Thu Sep 1 18:46:28 2005 @@ -1,3 +1,137 @@ -#include <xen/xmalloc.h> -#include <linux/gfp.h> -#include <asm/delay.h> +/* + * linux/mm/slab.h + * Written by Mark Hemment, 1996. + * (markhe@xxxxxxxxxxxxxxxxx) + */ + +#ifndef _LINUX_SLAB_H +#define _LINUX_SLAB_H + +#if defined(__KERNEL__) + +typedef struct kmem_cache_s kmem_cache_t; + +#include <linux/config.h> /* kmalloc_sizes.h needs CONFIG_ options */ +#include <linux/gfp.h> +#include <linux/init.h> +#include <linux/types.h> +#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ +#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ + +/* flags for kmem_cache_alloc() */ +#define SLAB_NOFS GFP_NOFS +#define SLAB_NOIO GFP_NOIO +#define SLAB_ATOMIC GFP_ATOMIC +#define SLAB_USER GFP_USER +#define SLAB_KERNEL GFP_KERNEL +#define SLAB_DMA GFP_DMA + +#define SLAB_LEVEL_MASK GFP_LEVEL_MASK + +#define SLAB_NO_GROW __GFP_NO_GROW /* don't grow a cache */ + +/* flags to pass to kmem_cache_create(). + * The first 3 are only valid when the allocator as been build + * SLAB_DEBUG_SUPPORT. + */ +#define SLAB_DEBUG_FREE 0x00000100UL /* Peform (expensive) checks on free */ +#define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as verifier) */ +#define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */ +#define SLAB_POISON 0x00000800UL /* Poison objects */ +#define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */ +#define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ +#define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ +#define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ +#define SLAB_STORE_USER 0x00010000UL /* store the last owner for bug hunting */ +#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* track pages allocated to indicate + what is reclaimable later*/ +#define SLAB_PANIC 0x00040000UL /* panic if kmem_cache_create() fails */ +#define SLAB_DESTROY_BY_RCU 0x00080000UL /* defer freeing pages to RCU */ + +/* flags passed to a constructor func */ +#define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */ +#define SLAB_CTOR_ATOMIC 0x002UL /* tell constructor it can't sleep */ +#define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */ + +/* prototypes */ +extern void __init kmem_cache_init(void); + +extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long, + void (*)(void *, kmem_cache_t *, unsigned long), + void (*)(void *, kmem_cache_t *, unsigned long)); +extern int kmem_cache_destroy(kmem_cache_t *); +extern int kmem_cache_shrink(kmem_cache_t *); +extern void *kmem_cache_alloc(kmem_cache_t *, unsigned int __nocast); +extern void kmem_cache_free(kmem_cache_t *, void *); +extern unsigned int kmem_cache_size(kmem_cache_t *); +extern const char *kmem_cache_name(kmem_cache_t *); +extern kmem_cache_t *kmem_find_general_cachep(size_t size, unsigned int __nocast gfpflags); + +/* Size description struct for general caches. */ +struct cache_sizes { + size_t cs_size; + kmem_cache_t *cs_cachep; + kmem_cache_t *cs_dmacachep; +}; +extern struct cache_sizes malloc_sizes[]; +extern void *__kmalloc(size_t, unsigned int __nocast); + +static inline void *kmalloc(size_t size, unsigned int __nocast flags) +{ + if (__builtin_constant_p(size)) { + int i = 0; +#define CACHE(x) \ + if (size <= x) \ + goto found; \ + else \ + i++; +#include "kmalloc_sizes.h" +#undef CACHE + { + extern void __you_cannot_kmalloc_that_much(void); + __you_cannot_kmalloc_that_much(); + } +found: + return kmem_cache_alloc((flags & GFP_DMA) ? + malloc_sizes[i].cs_dmacachep : + malloc_sizes[i].cs_cachep, flags); + } + return __kmalloc(size, flags); +} + +extern void *kcalloc(size_t, size_t, unsigned int __nocast); +extern void kfree(const void *); +extern unsigned int ksize(const void *); + +#ifdef CONFIG_NUMA +extern void *kmem_cache_alloc_node(kmem_cache_t *, int flags, int node); +extern void *kmalloc_node(size_t size, unsigned int __nocast flags, int node); +#else +static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, int flags, int node) +{ + return kmem_cache_alloc(cachep, flags); +} +static inline void *kmalloc_node(size_t size, unsigned int __nocast flags, int node) +{ + return kmalloc(size, flags); +} +#endif + +extern int FASTCALL(kmem_cache_reap(int)); +extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)); + +/* System wide caches */ +extern kmem_cache_t *vm_area_cachep; +extern kmem_cache_t *names_cachep; +extern kmem_cache_t *files_cachep; +extern kmem_cache_t *filp_cachep; +extern kmem_cache_t *fs_cachep; +extern kmem_cache_t *signal_cachep; +extern kmem_cache_t *sighand_cachep; +extern kmem_cache_t *bio_cachep; + +extern atomic_t slab_reclaim_pages; + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_SLAB_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/threads.h --- a/xen/include/asm-ia64/linux/threads.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/threads.h Thu Sep 1 18:46:28 2005 @@ -7,7 +7,7 @@ * The default limit for the nr of threads is now in * /proc/sys/kernel/threads-max. */ - + /* * Maximum supported processors that can run under SMP. This value is * set via configure setting. The maximum is equal to the size of the @@ -25,11 +25,12 @@ /* * This controls the default maximum pid allocated to a process */ -#define PID_MAX_DEFAULT 0x8000 +#define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000) /* * A maximum of 4 million PIDs should be enough for a while: */ -#define PID_MAX_LIMIT (sizeof(long) > 4 ? 4*1024*1024 : PID_MAX_DEFAULT) +#define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \ + (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT)) #endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/timex.h --- a/xen/include/asm-ia64/linux/timex.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/timex.h Thu Sep 1 18:46:28 2005 @@ -240,9 +240,7 @@ extern long time_maxerror; /* maximum error */ extern long time_esterror; /* estimated error */ -extern long time_phase; /* phase offset (scaled us) */ extern long time_freq; /* frequency offset (scaled ppm) */ -extern long time_adj; /* tick adjust (scaled 1 / HZ) */ extern long time_reftime; /* time at last adjustment (s) */ extern long time_adjust; /* The amount of adjtime left */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/topology.h --- a/xen/include/asm-ia64/linux/topology.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/topology.h Thu Sep 1 18:46:28 2005 @@ -31,8 +31,11 @@ #include <linux/bitops.h> #include <linux/mmzone.h> #include <linux/smp.h> +#include <asm/topology.h> -#include <asm/topology.h> +#ifndef node_has_online_mem +#define node_has_online_mem(nid) (1) +#endif #ifndef nr_cpus_node #define nr_cpus_node(node) \ @@ -86,6 +89,11 @@ .cache_hot_time = 0, \ .cache_nice_tries = 0, \ .per_cpu_gain = 25, \ + .busy_idx = 0, \ + .idle_idx = 0, \ + .newidle_idx = 1, \ + .wake_idx = 0, \ + .forkexec_idx = 0, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ @@ -112,12 +120,15 @@ .cache_hot_time = (5*1000000/2), \ .cache_nice_tries = 1, \ .per_cpu_gain = 100, \ + .busy_idx = 2, \ + .idle_idx = 1, \ + .newidle_idx = 2, \ + .wake_idx = 1, \ + .forkexec_idx = 1, \ .flags = SD_LOAD_BALANCE \ | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ - | SD_WAKE_AFFINE \ - | SD_WAKE_IDLE \ - | SD_WAKE_BALANCE, \ + | SD_WAKE_AFFINE, \ .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/wait.h --- a/xen/include/asm-ia64/linux/wait.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/wait.h Thu Sep 1 18:46:28 2005 @@ -33,7 +33,7 @@ struct __wait_queue { unsigned int flags; #define WQ_FLAG_EXCLUSIVE 0x01 - struct task_struct * task; + void *private; wait_queue_func_t func; struct list_head task_list; }; @@ -60,7 +60,7 @@ */ #define __WAITQUEUE_INITIALIZER(name, tsk) { \ - .task = tsk, \ + .private = tsk, \ .func = default_wake_function, \ .task_list = { NULL, NULL } } @@ -79,14 +79,14 @@ static inline void init_waitqueue_head(wait_queue_head_t *q) { - q->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&q->lock); INIT_LIST_HEAD(&q->task_list); } static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p) { q->flags = 0; - q->task = p; + q->private = p; q->func = default_wake_function; } @@ -94,7 +94,7 @@ wait_queue_func_t func) { q->flags = 0; - q->task = NULL; + q->private = NULL; q->func = func; } @@ -110,7 +110,7 @@ * aio specifies a wait queue entry with an async notification * callback routine, not associated with any task. */ -#define is_sync_wait(wait) (!(wait) || ((wait)->task)) +#define is_sync_wait(wait) (!(wait) || ((wait)->private)) extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)); @@ -169,6 +169,18 @@ finish_wait(&wq, &__wait); \ } while (0) +/** + * wait_event - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + */ #define wait_event(wq, condition) \ do { \ if (condition) \ @@ -191,6 +203,22 @@ finish_wait(&wq, &__wait); \ } while (0) +/** + * wait_event_timeout - sleep until a condition gets true or a timeout elapses + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function returns 0 if the @timeout elapsed, and the remaining + * jiffies if the condition evaluated to true before the timeout elapsed. + */ #define wait_event_timeout(wq, condition, timeout) \ ({ \ long __ret = timeout; \ @@ -217,6 +245,21 @@ finish_wait(&wq, &__wait); \ } while (0) +/** + * wait_event_interruptible - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function will return -ERESTARTSYS if it was interrupted by a + * signal and 0 if @condition evaluated to true. + */ #define wait_event_interruptible(wq, condition) \ ({ \ int __ret = 0; \ @@ -245,6 +288,23 @@ finish_wait(&wq, &__wait); \ } while (0) +/** + * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it + * was interrupted by a signal, and the remaining jiffies otherwise + * if the condition evaluated to true before the timeout elapsed. + */ #define wait_event_interruptible_timeout(wq, condition, timeout) \ ({ \ long __ret = timeout; \ @@ -324,18 +384,16 @@ #define DEFINE_WAIT(name) \ wait_queue_t name = { \ - .task = current, \ + .private = current, \ .func = autoremove_wake_function, \ - .task_list = { .next = &(name).task_list, \ - .prev = &(name).task_list, \ - }, \ + .task_list = LIST_HEAD_INIT((name).task_list), \ } #define DEFINE_WAIT_BIT(name, word, bit) \ struct wait_bit_queue name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ .wait = { \ - .task = current, \ + .private = current, \ .func = wake_bit_function, \ .task_list = \ LIST_HEAD_INIT((name).wait.task_list), \ @@ -344,7 +402,7 @@ #define init_wait(wait) \ do { \ - (wait)->task = current; \ + (wait)->private = current; \ (wait)->func = autoremove_wake_function; \ INIT_LIST_HEAD(&(wait)->task_list); \ } while (0) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/mm.h Thu Sep 1 18:46:28 2005 @@ -316,6 +316,7 @@ #define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) #endif +#if 0 /* removed when rebasing to 2.6.13 */ /* * The zone field is never updated after free_area_init_core() * sets it, so none of the operations on it need to be atomic. @@ -347,6 +348,7 @@ page->flags &= ~(~0UL << NODEZONE_SHIFT); page->flags |= nodezone_num << NODEZONE_SHIFT; } +#endif #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/time.h --- a/xen/include/asm-ia64/time.h Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/time.h Thu Sep 1 18:46:28 2005 @@ -1,1 +1,1 @@ -#include <xen/linuxtime.h> +#include <asm/linux/time.h> diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/bitop.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/linux/bitop.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,88 @@ +#include <linux/compiler.h> +#include <linux/types.h> +#include <asm/intrinsics.h> +#include <linux/module.h> +#include <linux/bitops.h> + +/* + * Find next zero bit in a bitmap reasonably efficiently.. + */ + +int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long offset) +{ + unsigned long *p = ((unsigned long *) addr) + (offset >> 6); + unsigned long result = offset & ~63UL; + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 63UL; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (64-offset); + if (size < 64) + goto found_first; + if (~tmp) + goto found_middle; + size -= 64; + result += 64; + } + while (size & ~63UL) { + if (~(tmp = *(p++))) + goto found_middle; + result += 64; + size -= 64; + } + if (!size) + return result; + tmp = *p; +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* any bits zero? */ + return result + size; /* nope */ +found_middle: + return result + ffz(tmp); +} +EXPORT_SYMBOL(__find_next_zero_bit); + +/* + * Find next bit in a bitmap reasonably efficiently.. + */ +int __find_next_bit(const void *addr, unsigned long size, unsigned long offset) +{ + unsigned long *p = ((unsigned long *) addr) + (offset >> 6); + unsigned long result = offset & ~63UL; + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= 63UL; + if (offset) { + tmp = *(p++); + tmp &= ~0UL << offset; + if (size < 64) + goto found_first; + if (tmp) + goto found_middle; + size -= 64; + result += 64; + } + while (size & ~63UL) { + if ((tmp = *(p++))) + goto found_middle; + result += 64; + size -= 64; + } + if (!size) + return result; + tmp = *p; + found_first: + tmp &= ~0UL >> (64-size); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ + found_middle: + return result + __ffs(tmp); +} +EXPORT_SYMBOL(__find_next_bit); diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/clear_page.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/linux/clear_page.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,77 @@ +/* + * Copyright (C) 1999-2002 Hewlett-Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + * David Mosberger-Tang <davidm@xxxxxxxxxx> + * Copyright (C) 2002 Ken Chen <kenneth.w.chen@xxxxxxxxx> + * + * 1/06/01 davidm Tuned for Itanium. + * 2/12/02 kchen Tuned for both Itanium and McKinley + * 3/08/02 davidm Some more tweaking + */ +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/page.h> + +#ifdef CONFIG_ITANIUM +# define L3_LINE_SIZE 64 // Itanium L3 line size +# define PREFETCH_LINES 9 // magic number +#else +# define L3_LINE_SIZE 128 // McKinley L3 line size +# define PREFETCH_LINES 12 // magic number +#endif + +#define saved_lc r2 +#define dst_fetch r3 +#define dst1 r8 +#define dst2 r9 +#define dst3 r10 +#define dst4 r11 + +#define dst_last r31 + +GLOBAL_ENTRY(clear_page) + .prologue + .regstk 1,0,0,0 + mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until + .save ar.lc, saved_lc + mov saved_lc = ar.lc + + .body + mov ar.lc = (PREFETCH_LINES - 1) + mov dst_fetch = in0 + adds dst1 = 16, in0 + adds dst2 = 32, in0 + ;; +.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE + adds dst3 = 48, in0 // executing this multiple times is harmless + br.cloop.sptk.few .fetch + ;; + addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch + mov ar.lc = r16 // one L3 line per iteration + adds dst4 = 64, in0 + ;; +#ifdef CONFIG_ITANIUM + // Optimized for Itanium +1: stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 + cmp.lt p8,p0=dst_fetch, dst_last + ;; +#else + // Optimized for McKinley +1: stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 + stf.spill.nta [dst3] = f0, 64 + stf.spill.nta [dst4] = f0, 128 + cmp.lt p8,p0=dst_fetch, dst_last + ;; + stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 +#endif + stf.spill.nta [dst3] = f0, 64 +(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE + br.cloop.sptk.few 1b + ;; + mov ar.lc = saved_lc // restore lc + br.ret.sptk.many rp +END(clear_page) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/copy_page_mck.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/linux/copy_page_mck.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,185 @@ +/* + * McKinley-optimized version of copy_page(). + * + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger <davidm@xxxxxxxxxx> + * + * Inputs: + * in0: address of target page + * in1: address of source page + * Output: + * no return value + * + * General idea: + * - use regular loads and stores to prefetch data to avoid consuming M-slot just for + * lfetches => good for in-cache performance + * - avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single + * cycle + * + * Principle of operation: + * First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes. + * To avoid secondary misses in L2, we prefetch both source and destination with a line-size + * of 128 bytes. When both of these lines are in the L2 and the first half of the + * source line is in L1, we start copying the remaining words. The second half of the + * source line is prefetched in an earlier iteration, so that by the time we start + * accessing it, it's also present in the L1. + * + * We use a software-pipelined loop to control the overall operation. The pipeline + * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used for prefetching + * source cache-lines. The second PREFETCH_DIST stages are used for prefetching destination + * cache-lines, the last K stages are used to copy the cache-line words not copied by + * the prefetches. The four relevant points in the pipelined are called A, B, C, D: + * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line + * should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought + * into L1D and p[D] is TRUE if a cacheline needs to be copied. + * + * This all sounds very complicated, but thanks to the modulo-scheduled loop support, + * the resulting code is very regular and quite easy to follow (once you get the idea). + * + * As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented + * as the separate .prefetch_loop. Logically, this loop performs exactly like the + * main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed, + * so that each loop iteration is faster (again, good for cached case). + * + * When reading the code, it helps to keep the following picture in mind: + * + * word 0 word 1 + * +------+------+--- + * | v[x] | t1 | ^ + * | t2 | t3 | | + * | t4 | t5 | | + * | t6 | t7 | | 128 bytes + * | n[y] | t9 | | (L2 cache line) + * | t10 | t11 | | + * | t12 | t13 | | + * | t14 | t15 | v + * +------+------+--- + * + * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C] + * to fetch the second-half of the L2 cache line into L1, and the tX words are copied in + * an order that avoids bank conflicts. + */ +#include <asm/asmmacro.h> +#include <asm/page.h> + +#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st) + +#define src0 r2 +#define src1 r3 +#define dst0 r9 +#define dst1 r10 +#define src_pre_mem r11 +#define dst_pre_mem r14 +#define src_pre_l2 r15 +#define dst_pre_l2 r16 +#define t1 r17 +#define t2 r18 +#define t3 r19 +#define t4 r20 +#define t5 t1 // alias! +#define t6 t2 // alias! +#define t7 t3 // alias! +#define t9 t5 // alias! +#define t10 t4 // alias! +#define t11 t7 // alias! +#define t12 t6 // alias! +#define t14 t10 // alias! +#define t13 r21 +#define t15 r22 + +#define saved_lc r23 +#define saved_pr r24 + +#define A 0 +#define B (PREFETCH_DIST) +#define C (B + PREFETCH_DIST) +#define D (C + 3) +#define N (D + 1) +#define Nrot ((N + 7) & ~7) + +GLOBAL_ENTRY(copy_page) + .prologue + alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot + + .rotr v[2*PREFETCH_DIST], n[D-C+1] + .rotp p[N] + + .save ar.lc, saved_lc + mov saved_lc = ar.lc + .save pr, saved_pr + mov saved_pr = pr + .body + + mov src_pre_mem = in1 + mov pr.rot = 0x10000 + mov ar.ec = 1 // special unrolled loop + + mov dst_pre_mem = in0 + mov ar.lc = 2*PREFETCH_DIST - 1 + + add src_pre_l2 = 8*8, in1 + add dst_pre_l2 = 8*8, in0 + add src0 = 8, in1 // first t1 src + add src1 = 3*8, in1 // first t3 src + add dst0 = 8, in0 // first t1 dst + add dst1 = 3*8, in0 // first t3 dst + mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1 + nop.m 0 + nop.i 0 + ;; + // same as .line_copy loop, but with all predicated-off instructions removed: +.prefetch_loop: +(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 +(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 + br.ctop.sptk .prefetch_loop + ;; + cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop cleared it to zero) + mov ar.lc = t1 // with 64KB pages, t1 is too big to fit in 8 bits! + mov ar.ec = N // # of stages in pipeline + ;; +.line_copy: +(p[D]) ld8 t2 = [src0], 3*8 // M0 +(p[D]) ld8 t4 = [src1], 3*8 // M1 +(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory +(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2 + ;; +(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory +(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2 +(p[D]) st8 [dst0] = t1, 8 // M2 +(p[D]) st8 [dst1] = t3, 8 // M3 + ;; +(p[D]) ld8 t5 = [src0], 8 +(p[D]) ld8 t7 = [src1], 3*8 +(p[D]) st8 [dst0] = t2, 3*8 +(p[D]) st8 [dst1] = t4, 3*8 + ;; +(p[D]) ld8 t6 = [src0], 3*8 +(p[D]) ld8 t10 = [src1], 8 +(p[D]) st8 [dst0] = t5, 8 +(p[D]) st8 [dst1] = t7, 3*8 + ;; +(p[D]) ld8 t9 = [src0], 3*8 +(p[D]) ld8 t11 = [src1], 3*8 +(p[D]) st8 [dst0] = t6, 3*8 +(p[D]) st8 [dst1] = t10, 8 + ;; +(p[D]) ld8 t12 = [src0], 8 +(p[D]) ld8 t14 = [src1], 8 +(p[D]) st8 [dst0] = t9, 3*8 +(p[D]) st8 [dst1] = t11, 3*8 + ;; +(p[D]) ld8 t13 = [src0], 4*8 +(p[D]) ld8 t15 = [src1], 4*8 +(p[D]) st8 [dst0] = t12, 8 +(p[D]) st8 [dst1] = t14, 8 + ;; +(p[D-1])ld8 t1 = [src0], 8 +(p[D-1])ld8 t3 = [src1], 8 +(p[D]) st8 [dst0] = t13, 4*8 +(p[D]) st8 [dst1] = t15, 4*8 + br.ctop.sptk .line_copy + ;; + mov ar.lc = saved_lc + mov pr = saved_pr, -1 + br.ret.sptk.many rp +END(copy_page) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/flush.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/linux/flush.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,61 @@ +/* + * Cache flushing routines. + * + * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@xxxxxxxxxx> + * + * 05/28/05 Zoltan Menyhart Dynamic stride size + */ + +#include <asm/asmmacro.h> + + + /* + * flush_icache_range(start,end) + * + * Make i-cache(s) coherent with d-caches. + * + * Must deal with range from start to end-1 but nothing else (need to + * be careful not to touch addresses that may be unmapped). + * + * Note: "in0" and "in1" are preserved for debugging purposes. + */ +GLOBAL_ENTRY(flush_icache_range) + + .prologue + alloc r2=ar.pfs,2,0,0,0 + movl r3=ia64_i_cache_stride_shift + mov r21=1 + ;; + ld8 r20=[r3] // r20: stride shift + sub r22=in1,r0,1 // last byte address + ;; + shr.u r23=in0,r20 // start / (stride size) + shr.u r22=r22,r20 // (last byte address) / (stride size) + shl r21=r21,r20 // r21: stride size of the i-cache(s) + ;; + sub r8=r22,r23 // number of strides - 1 + shl r24=r23,r20 // r24: addresses for "fc.i" = + // "start" rounded down to stride boundary + .save ar.lc,r3 + mov r3=ar.lc // save ar.lc + ;; + + .body + mov ar.lc=r8 + ;; + /* + * 32 byte aligned loop, even number of (actually 2) bundles + */ +.Loop: fc.i r24 // issuable on M0 only + add r24=r21,r24 // we flush "stride size" bytes per iteration + nop.i 0 + br.cloop.sptk.few .Loop + ;; + sync.i + ;; + srlz.i + ;; + mov ar.lc=r3 // restore ar.lc + br.ret.sptk.many rp +END(flush_icache_range) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/idiv32.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/linux/idiv32.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang <davidm@xxxxxxxxxx> + * + * 32-bit integer division. + * + * This code is based on the application note entitled "Divide, Square Root + * and Remainder Algorithms for the IA-64 Architecture". This document + * is available as Intel document number 248725-002 or via the web at + * http://developer.intel.com/software/opensource/numerics/ + * + * For more details on the theory behind these algorithms, see "IA-64 + * and Elementary Functions" by Peter Markstein; HP Professional Books + * (http://www.hp.com/go/retailbooks/) + */ + +#include <asm/asmmacro.h> + +#ifdef MODULO +# define OP mod +#else +# define OP div +#endif + +#ifdef UNSIGNED +# define SGN u +# define EXTEND zxt4 +# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b +# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b +#else +# define SGN +# define EXTEND sxt4 +# define INT_TO_FP(a,b) fcvt.xf a=b +# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b +#endif + +#define PASTE1(a,b) a##b +#define PASTE(a,b) PASTE1(a,b) +#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3)) + +GLOBAL_ENTRY(NAME) + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias) + EXTEND in0 = in0 // in0 = a + EXTEND in1 = in1 // in1 = b + ;; + setf.sig f8 = in0 + setf.sig f9 = in1 +#ifdef MODULO + sub in1 = r0, in1 // in1 = -b +#endif + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + INT_TO_FP(f8, f8) + INT_TO_FP(f9, f9) + ;; + setf.exp f7 = r2 // f7 = 2^-34 + frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b) + ;; +(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0 +(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1 + ;; +#ifdef MODULO + setf.sig f9 = in1 // f9 = -b +#endif +(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0 +(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34 + ;; +#ifdef MODULO + setf.sig f7 = in0 +#endif +(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1 + ;; + FP_TO_INT(f6, f6) // q = trunc(q2) + ;; +#ifdef MODULO + xma.l f6 = f6, f9, f7 // r = q*(-b) + a + ;; +#endif + getf.sig r8 = f6 // transfer result to result register + br.ret.sptk.many rp +END(NAME) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/idiv64.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/linux/idiv64.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,80 @@ +/* + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@xxxxxxxxxx> + * + * 64-bit integer division. + * + * This code is based on the application note entitled "Divide, Square Root + * and Remainder Algorithms for the IA-64 Architecture". This document + * is available as Intel document number 248725-002 or via the web at + * http://developer.intel.com/software/opensource/numerics/ + * + * For more details on the theory behind these algorithms, see "IA-64 + * and Elementary Functions" by Peter Markstein; HP Professional Books + * (http://www.hp.com/go/retailbooks/) + */ + +#include <asm/asmmacro.h> + +#ifdef MODULO +# define OP mod +#else +# define OP div +#endif + +#ifdef UNSIGNED +# define SGN u +# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b +# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b +#else +# define SGN +# define INT_TO_FP(a,b) fcvt.xf a=b +# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b +#endif + +#define PASTE1(a,b) a##b +#define PASTE(a,b) PASTE1(a,b) +#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3)) + +GLOBAL_ENTRY(NAME) + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + INT_TO_FP(f8, f8) + INT_TO_FP(f9, f9) + ;; + frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b) + ;; +(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0 +(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1 + ;; +(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0 +(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0 + ;; +#ifdef MODULO + sub in1 = r0, in1 // in1 = -b +#endif +(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1 +(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0 + ;; +(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1 +(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a + ;; +#ifdef MODULO + setf.sig f8 = in0 // f8 = a + setf.sig f9 = in1 // f9 = -b +#endif +(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2 + ;; + FP_TO_INT(f11, f11) // q = trunc(q3) + ;; +#ifdef MODULO + xma.l f11 = f11, f9, f8 // r = q*(-b) + a + ;; +#endif + getf.sig r8 = f11 // transfer result to result register + br.ret.sptk.many rp +END(NAME) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/memcpy_mck.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/linux/memcpy_mck.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,661 @@ +/* + * Itanium 2-optimized version of memcpy and copy_user function + * + * Inputs: + * in0: destination address + * in1: source address + * in2: number of bytes to copy + * Output: + * 0 if success, or number of byte NOT copied if error occurred. + * + * Copyright (C) 2002 Intel Corp. + * Copyright (C) 2002 Ken Chen <kenneth.w.chen@xxxxxxxxx> + */ +#include <linux/config.h> +#include <asm/asmmacro.h> +#include <asm/page.h> + +#define EK(y...) EX(y) + +/* McKinley specific optimization */ + +#define retval r8 +#define saved_pfs r31 +#define saved_lc r10 +#define saved_pr r11 +#define saved_in0 r14 +#define saved_in1 r15 +#define saved_in2 r16 + +#define src0 r2 +#define src1 r3 +#define dst0 r17 +#define dst1 r18 +#define cnt r9 + +/* r19-r30 are temp for each code section */ +#define PREFETCH_DIST 8 +#define src_pre_mem r19 +#define dst_pre_mem r20 +#define src_pre_l2 r21 +#define dst_pre_l2 r22 +#define t1 r23 +#define t2 r24 +#define t3 r25 +#define t4 r26 +#define t5 t1 // alias! +#define t6 t2 // alias! +#define t7 t3 // alias! +#define n8 r27 +#define t9 t5 // alias! +#define t10 t4 // alias! +#define t11 t7 // alias! +#define t12 t6 // alias! +#define t14 t10 // alias! +#define t13 r28 +#define t15 r29 +#define tmp r30 + +/* defines for long_copy block */ +#define A 0 +#define B (PREFETCH_DIST) +#define C (B + PREFETCH_DIST) +#define D (C + 1) +#define N (D + 1) +#define Nrot ((N + 7) & ~7) + +/* alias */ +#define in0 r32 +#define in1 r33 +#define in2 r34 + +GLOBAL_ENTRY(memcpy) + and r28=0x7,in0 + and r29=0x7,in1 + mov f6=f0 + br.cond.sptk .common_code + ;; +END(memcpy) +GLOBAL_ENTRY(__copy_user) + .prologue +// check dest alignment + and r28=0x7,in0 + and r29=0x7,in1 + mov f6=f1 + mov saved_in0=in0 // save dest pointer + mov saved_in1=in1 // save src pointer + mov saved_in2=in2 // save len + ;; +.common_code: + cmp.gt p15,p0=8,in2 // check for small size + cmp.ne p13,p0=0,r28 // check dest alignment + cmp.ne p14,p0=0,r29 // check src alignment + add src0=0,in1 + sub r30=8,r28 // for .align_dest + mov retval=r0 // initialize return value + ;; + add dst0=0,in0 + add dst1=1,in0 // dest odd index + cmp.le p6,p0 = 1,r30 // for .align_dest +(p15) br.cond.dpnt .memcpy_short +(p13) br.cond.dpnt .align_dest +(p14) br.cond.dpnt .unaligned_src + ;; + +// both dest and src are aligned on 8-byte boundary +.aligned_src: + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot + .save pr, saved_pr + mov saved_pr=pr + + shr.u cnt=in2,7 // this much cache line + ;; + cmp.lt p6,p0=2*PREFETCH_DIST,cnt + cmp.lt p7,p8=1,cnt + .save ar.lc, saved_lc + mov saved_lc=ar.lc + .body + add cnt=-1,cnt + add src_pre_mem=0,in1 // prefetch src pointer + add dst_pre_mem=0,in0 // prefetch dest pointer + ;; +(p7) mov ar.lc=cnt // prefetch count +(p8) mov ar.lc=r0 +(p6) br.cond.dpnt .long_copy + ;; + +.prefetch: + lfetch.fault [src_pre_mem], 128 + lfetch.fault.excl [dst_pre_mem], 128 + br.cloop.dptk.few .prefetch + ;; + +.medium_copy: + and tmp=31,in2 // copy length after iteration + shr.u r29=in2,5 // number of 32-byte iteration + add dst1=8,dst0 // 2nd dest pointer + ;; + add cnt=-1,r29 // ctop iteration adjustment + cmp.eq p10,p0=r29,r0 // do we really need to loop? + add src1=8,src0 // 2nd src pointer + cmp.le p6,p0=8,tmp + ;; + cmp.le p7,p0=16,tmp + mov ar.lc=cnt // loop setup + cmp.eq p16,p17 = r0,r0 + mov ar.ec=2 +(p10) br.dpnt.few .aligned_src_tail + ;; + TEXT_ALIGN(32) +1: +EX(.ex_handler, (p16) ld8 r34=[src0],16) +EK(.ex_handler, (p16) ld8 r38=[src1],16) +EX(.ex_handler, (p17) st8 [dst0]=r33,16) +EK(.ex_handler, (p17) st8 [dst1]=r37,16) + ;; +EX(.ex_handler, (p16) ld8 r32=[src0],16) +EK(.ex_handler, (p16) ld8 r36=[src1],16) +EX(.ex_handler, (p16) st8 [dst0]=r34,16) +EK(.ex_handler, (p16) st8 [dst1]=r38,16) + br.ctop.dptk.few 1b + ;; + +.aligned_src_tail: +EX(.ex_handler, (p6) ld8 t1=[src0]) + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs +EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8) + cmp.le p8,p0=24,tmp + and r21=-8,tmp + ;; +EX(.ex_hndlr_s, (p8) ld8 t3=[src1]) +EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1 + and in2=7,tmp // remaining length +EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2 + add src0=src0,r21 // setting up src pointer + add dst0=dst0,r21 // setting up dest pointer + ;; +EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3 + mov pr=saved_pr,-1 + br.dptk.many .memcpy_short + ;; + +/* code taken from copy_page_mck */ +.long_copy: + .rotr v[2*PREFETCH_DIST] + .rotp p[N] + + mov src_pre_mem = src0 + mov pr.rot = 0x10000 + mov ar.ec = 1 // special unrolled loop + + mov dst_pre_mem = dst0 + + add src_pre_l2 = 8*8, src0 + add dst_pre_l2 = 8*8, dst0 + ;; + add src0 = 8, src_pre_mem // first t1 src + mov ar.lc = 2*PREFETCH_DIST - 1 + shr.u cnt=in2,7 // number of lines + add src1 = 3*8, src_pre_mem // first t3 src + add dst0 = 8, dst_pre_mem // first t1 dst + add dst1 = 3*8, dst_pre_mem // first t3 dst + ;; + and tmp=127,in2 // remaining bytes after this block + add cnt = -(2*PREFETCH_DIST) - 1, cnt + // same as .line_copy loop, but with all predicated-off instructions removed: +.prefetch_loop: +EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 +EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 + br.ctop.sptk .prefetch_loop + ;; + cmp.eq p16, p0 = r0, r0 // reset p16 to 1 + mov ar.lc = cnt + mov ar.ec = N // # of stages in pipeline + ;; +.line_copy: +EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0 +EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1 +EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 prefetch dst from memory +EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3 prefetch dst from L2 + ;; +EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 prefetch src from memory +EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1 prefetch src from L2 +EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2 +EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3 + ;; +EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8) +EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8) +EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8) +EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8) +EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8) + ;; +EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8) +EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8) +EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8) +EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8) + ;; +EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8) +EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8) + br.ctop.sptk .line_copy + ;; + + add dst0=-8,dst0 + add src0=-8,src0 + mov in2=tmp + .restore sp + br.sptk.many .medium_copy + ;; + +#define BLOCK_SIZE 128*32 +#define blocksize r23 +#define curlen r24 + +// dest is on 8-byte boundary, src is not. We need to do +// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle. +.unaligned_src: + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,5,0,8 + .save ar.lc, saved_lc + mov saved_lc=ar.lc + .save pr, saved_pr + mov saved_pr=pr + .body +.4k_block: + mov saved_in0=dst0 // need to save all input arguments + mov saved_in2=in2 + mov blocksize=BLOCK_SIZE + ;; + cmp.lt p6,p7=blocksize,in2 + mov saved_in1=src0 + ;; +(p6) mov in2=blocksize + ;; + shr.u r21=in2,7 // this much cache line + shr.u r22=in2,4 // number of 16-byte iteration + and curlen=15,in2 // copy length after iteration + and r30=7,src0 // source alignment + ;; + cmp.lt p7,p8=1,r21 + add cnt=-1,r21 + ;; + + add src_pre_mem=0,src0 // prefetch src pointer + add dst_pre_mem=0,dst0 // prefetch dest pointer + and src0=-8,src0 // 1st src pointer +(p7) mov ar.lc = cnt +(p8) mov ar.lc = r0 + ;; + TEXT_ALIGN(32) +1: lfetch.fault [src_pre_mem], 128 + lfetch.fault.excl [dst_pre_mem], 128 + br.cloop.dptk.few 1b + ;; + + shladd dst1=r22,3,dst0 // 2nd dest pointer + shladd src1=r22,3,src0 // 2nd src pointer + cmp.eq p8,p9=r22,r0 // do we really need to loop? + cmp.le p6,p7=8,curlen; // have at least 8 byte remaining? + add cnt=-1,r22 // ctop iteration adjustment + ;; +EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer +EK(.ex_handler, (p9) ld8 r37=[src1],8) +(p8) br.dpnt.few .noloop + ;; + +// The jump address is calculated based on src alignment. The COPYU +// macro below need to confine its size to power of two, so an entry +// can be caulated using shl instead of an expensive multiply. The +// size is then hard coded by the following #define to match the +// actual size. This make it somewhat tedious when COPYU macro gets +// changed and this need to be adjusted to match. +#define LOOP_SIZE 6 +1: + mov r29=ip // jmp_table thread + mov ar.lc=cnt + ;; + add r29=.jump_table - 1b - (.jmp1-.jump_table), r29 + shl r28=r30, LOOP_SIZE // jmp_table thread + mov ar.ec=2 // loop setup + ;; + add r29=r29,r28 // jmp_table thread + cmp.eq p16,p17=r0,r0 + ;; + mov b6=r29 // jmp_table thread + ;; + br.cond.sptk.few b6 + +// for 8-15 byte case +// We will skip the loop, but need to replicate the side effect +// that the loop produces. +.noloop: +EX(.ex_handler, (p6) ld8 r37=[src1],8) + add src0=8,src0 +(p6) shl r25=r30,3 + ;; +EX(.ex_handler, (p6) ld8 r27=[src1]) +(p6) shr.u r28=r37,r25 +(p6) sub r26=64,r25 + ;; +(p6) shl r27=r27,r26 + ;; +(p6) or r21=r28,r27 + +.unaligned_src_tail: +/* check if we have more than blocksize to copy, if so go back */ + cmp.gt p8,p0=saved_in2,blocksize + ;; +(p8) add dst0=saved_in0,blocksize +(p8) add src0=saved_in1,blocksize +(p8) sub in2=saved_in2,blocksize +(p8) br.dpnt .4k_block + ;; + +/* we have up to 15 byte to copy in the tail. + * part of work is already done in the jump table code + * we are at the following state. + * src side: + * + * xxxxxx xx <----- r21 has xxxxxxxx already + * -------- -------- -------- + * 0 8 16 + * ^ + * | + * src1 + * + * dst + * -------- -------- -------- + * ^ + * | + * dst1 + */ +EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy +(p6) add curlen=-8,curlen // update length + mov ar.pfs=saved_pfs + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov in2=curlen // remaining length + mov dst0=dst1 // dest pointer + add src0=src1,r30 // forward by src alignment + ;; + +// 7 byte or smaller. +.memcpy_short: + cmp.le p8,p9 = 1,in2 + cmp.le p10,p11 = 2,in2 + cmp.le p12,p13 = 3,in2 + cmp.le p14,p15 = 4,in2 + add src1=1,src0 // second src pointer + add dst1=1,dst0 // second dest pointer + ;; + +EX(.ex_handler_short, (p8) ld1 t1=[src0],2) +EK(.ex_handler_short, (p10) ld1 t2=[src1],2) +(p9) br.ret.dpnt rp // 0 byte copy + ;; + +EX(.ex_handler_short, (p8) st1 [dst0]=t1,2) +EK(.ex_handler_short, (p10) st1 [dst1]=t2,2) +(p11) br.ret.dpnt rp // 1 byte copy + +EX(.ex_handler_short, (p12) ld1 t3=[src0],2) +EK(.ex_handler_short, (p14) ld1 t4=[src1],2) +(p13) br.ret.dpnt rp // 2 byte copy + ;; + + cmp.le p6,p7 = 5,in2 + cmp.le p8,p9 = 6,in2 + cmp.le p10,p11 = 7,in2 + +EX(.ex_handler_short, (p12) st1 [dst0]=t3,2) +EK(.ex_handler_short, (p14) st1 [dst1]=t4,2) +(p15) br.ret.dpnt rp // 3 byte copy + ;; + +EX(.ex_handler_short, (p6) ld1 t5=[src0],2) +EK(.ex_handler_short, (p8) ld1 t6=[src1],2) +(p7) br.ret.dpnt rp // 4 byte copy + ;; + +EX(.ex_handler_short, (p6) st1 [dst0]=t5,2) +EK(.ex_handler_short, (p8) st1 [dst1]=t6,2) +(p9) br.ret.dptk rp // 5 byte copy + +EX(.ex_handler_short, (p10) ld1 t7=[src0],2) +(p11) br.ret.dptk rp // 6 byte copy + ;; + +EX(.ex_handler_short, (p10) st1 [dst0]=t7,2) + br.ret.dptk rp // done all cases + + +/* Align dest to nearest 8-byte boundary. We know we have at + * least 7 bytes to copy, enough to crawl to 8-byte boundary. + * Actual number of byte to crawl depend on the dest alignment. + * 7 byte or less is taken care at .memcpy_short + + * src0 - source even index + * src1 - source odd index + * dst0 - dest even index + * dst1 - dest odd index + * r30 - distance to 8-byte boundary + */ + +.align_dest: + add src1=1,in1 // source odd index + cmp.le p7,p0 = 2,r30 // for .align_dest + cmp.le p8,p0 = 3,r30 // for .align_dest +EX(.ex_handler_short, (p6) ld1 t1=[src0],2) + cmp.le p9,p0 = 4,r30 // for .align_dest + cmp.le p10,p0 = 5,r30 + ;; +EX(.ex_handler_short, (p7) ld1 t2=[src1],2) +EK(.ex_handler_short, (p8) ld1 t3=[src0],2) + cmp.le p11,p0 = 6,r30 +EX(.ex_handler_short, (p6) st1 [dst0] = t1,2) + cmp.le p12,p0 = 7,r30 + ;; +EX(.ex_handler_short, (p9) ld1 t4=[src1],2) +EK(.ex_handler_short, (p10) ld1 t5=[src0],2) +EX(.ex_handler_short, (p7) st1 [dst1] = t2,2) +EK(.ex_handler_short, (p8) st1 [dst0] = t3,2) + ;; +EX(.ex_handler_short, (p11) ld1 t6=[src1],2) +EK(.ex_handler_short, (p12) ld1 t7=[src0],2) + cmp.eq p6,p7=r28,r29 +EX(.ex_handler_short, (p9) st1 [dst1] = t4,2) +EK(.ex_handler_short, (p10) st1 [dst0] = t5,2) + sub in2=in2,r30 + ;; +EX(.ex_handler_short, (p11) st1 [dst1] = t6,2) +EK(.ex_handler_short, (p12) st1 [dst0] = t7) + add dst0=in0,r30 // setup arguments + add src0=in1,r30 +(p6) br.cond.dptk .aligned_src +(p7) br.cond.dpnt .unaligned_src + ;; + +/* main loop body in jump table format */ +#define COPYU(shift) \ +1: \ +EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */ \ +EK(.ex_handler, (p16) ld8 r36=[src1],8); \ + (p17) shrp r35=r33,r34,shift;; /* 1 */ \ +EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail section */ \ + nop.m 0; \ + (p16) shrp r38=r36,r37,shift; \ +EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */ \ +EK(.ex_handler, (p17) st8 [dst1]=r39,8); \ + br.ctop.dptk.few 1b;; \ + (p7) add src1=-8,src1; /* back out for <8 byte case */ \ + shrp r21=r22,r38,shift; /* speculative work */ \ + br.sptk.few .unaligned_src_tail /* branch out of jump table */ \ + ;; + TEXT_ALIGN(32) +.jump_table: + COPYU(8) // unaligned cases +.jmp1: + COPYU(16) + COPYU(24) + COPYU(32) + COPYU(40) + COPYU(48) + COPYU(56) + +#undef A +#undef B +#undef C +#undef D + +/* + * Due to lack of local tag support in gcc 2.x assembler, it is not clear which + * instruction failed in the bundle. The exception algorithm is that we + * first figure out the faulting address, then detect if there is any + * progress made on the copy, if so, redo the copy from last known copied + * location up to the faulting address (exclusive). In the copy_from_user + * case, remaining byte in kernel buffer will be zeroed. + * + * Take copy_from_user as an example, in the code there are multiple loads + * in a bundle and those multiple loads could span over two pages, the + * faulting address is calculated as page_round_down(max(src0, src1)). + * This is based on knowledge that if we can access one byte in a page, we + * can access any byte in that page. + * + * predicate used in the exception handler: + * p6-p7: direction + * p10-p11: src faulting addr calculation + * p12-p13: dst faulting addr calculation + */ + +#define A r19 +#define B r20 +#define C r21 +#define D r22 +#define F r28 + +#define memset_arg0 r32 +#define memset_arg2 r33 + +#define saved_retval loc0 +#define saved_rtlink loc1 +#define saved_pfs_stack loc2 + +.ex_hndlr_s: + add src0=8,src0 + br.sptk .ex_handler + ;; +.ex_hndlr_d: + add dst0=8,dst0 + br.sptk .ex_handler + ;; +.ex_hndlr_lcpy_1: + mov src1=src_pre_mem + mov dst1=dst_pre_mem + cmp.gtu p10,p11=src_pre_mem,saved_in1 + cmp.gtu p12,p13=dst_pre_mem,saved_in0 + ;; +(p10) add src0=8,saved_in1 +(p11) mov src0=saved_in1 +(p12) add dst0=8,saved_in0 +(p13) mov dst0=saved_in0 + br.sptk .ex_handler +.ex_handler_lcpy: + // in line_copy block, the preload addresses should always ahead + // of the other two src/dst pointers. Furthermore, src1/dst1 should + // always ahead of src0/dst0. + mov src1=src_pre_mem + mov dst1=dst_pre_mem +.ex_handler: + mov pr=saved_pr,-1 // first restore pr, lc, and pfs + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + ;; +.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs + cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction + cmp.ltu p10,p11=src0,src1 + cmp.ltu p12,p13=dst0,dst1 + fcmp.eq p8,p0=f6,f0 // is it memcpy? + mov tmp = dst0 + ;; +(p11) mov src1 = src0 // pick the larger of the two +(p13) mov dst0 = dst1 // make dst0 the smaller one +(p13) mov dst1 = tmp // and dst1 the larger one + ;; +(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary +(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary + ;; +(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store +(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load + mov retval=saved_in2 +(p8) ld1 tmp=[src1] // force an oops for memcpy call +(p8) st1 [dst1]=r0 // force an oops for memcpy call +(p14) br.ret.sptk.many rp + +/* + * The remaining byte to copy is calculated as: + * + * A = (faulting_addr - orig_src) -> len to faulting ld address + * or + * (faulting_addr - orig_dst) -> len to faulting st address + * B = (cur_dst - orig_dst) -> len copied so far + * C = A - B -> len need to be copied + * D = orig_len - A -> len need to be zeroed + */ +(p6) sub A = F, saved_in0 +(p7) sub A = F, saved_in1 + clrrrb + ;; + alloc saved_pfs_stack=ar.pfs,3,3,3,0 + sub B = dst0, saved_in0 // how many byte copied so far + ;; + sub C = A, B + sub D = saved_in2, A + ;; + cmp.gt p8,p0=C,r0 // more than 1 byte? + add memset_arg0=saved_in0, A +(p6) mov memset_arg2=0 // copy_to_user should not call memset +(p7) mov memset_arg2=D // copy_from_user need to have kbuf zeroed + mov r8=0 + mov saved_retval = D + mov saved_rtlink = b0 + + add out0=saved_in0, B + add out1=saved_in1, B + mov out2=C +(p8) br.call.sptk.few b0=__copy_user // recursive call + ;; + + add saved_retval=saved_retval,r8 // above might return non-zero value + cmp.gt p8,p0=memset_arg2,r0 // more than 1 byte? + mov out0=memset_arg0 // *s + mov out1=r0 // c + mov out2=memset_arg2 // n +(p8) br.call.sptk.few b0=memset + ;; + + mov retval=saved_retval + mov ar.pfs=saved_pfs_stack + mov b0=saved_rtlink + br.ret.sptk.many rp + +/* end of McKinley specific optimization */ +END(__copy_user) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/memset.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/linux/memset.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,362 @@ +/* Optimized version of the standard memset() function. + + Copyright (c) 2002 Hewlett-Packard Co/CERN + Sverre Jarp <Sverre.Jarp@xxxxxxx> + + Return: dest + + Inputs: + in0: dest + in1: value + in2: count + + The algorithm is fairly straightforward: set byte by byte until we + we get to a 16B-aligned address, then loop on 128 B chunks using an + early store as prefetching, then loop on 32B chucks, then clear remaining + words, finally clear remaining bytes. + Since a stf.spill f0 can store 16B in one go, we use this instruction + to get peak speed when value = 0. */ + +#include <asm/asmmacro.h> +#undef ret + +#define dest in0 +#define value in1 +#define cnt in2 + +#define tmp r31 +#define save_lc r30 +#define ptr0 r29 +#define ptr1 r28 +#define ptr2 r27 +#define ptr3 r26 +#define ptr9 r24 +#define loopcnt r23 +#define linecnt r22 +#define bytecnt r21 + +#define fvalue f6 + +// This routine uses only scratch predicate registers (p6 - p15) +#define p_scr p6 // default register for same-cycle branches +#define p_nz p7 +#define p_zr p8 +#define p_unalgn p9 +#define p_y p11 +#define p_n p12 +#define p_yy p13 +#define p_nn p14 + +#define MIN1 15 +#define MIN1P1HALF 8 +#define LINE_SIZE 128 +#define LSIZE_SH 7 // shift amount +#define PREF_AHEAD 8 + +GLOBAL_ENTRY(memset) +{ .mmi + .prologue + alloc tmp = ar.pfs, 3, 0, 0, 0 + lfetch.nt1 [dest] // + .save ar.lc, save_lc + mov.i save_lc = ar.lc + .body +} { .mmi + mov ret0 = dest // return value + cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero + cmp.eq p_scr, p0 = cnt, r0 +;; } +{ .mmi + and ptr2 = -(MIN1+1), dest // aligned address + and tmp = MIN1, dest // prepare to check for correct alignment + tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) +} { .mib + mov ptr1 = dest + mux1 value = value, @brcst // create 8 identical bytes in word +(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 +;; } +{ .mib + cmp.ne p_unalgn, p0 = tmp, r0 // +} { .mib + sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1 higher than loopcnt + cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? +(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +;; } +{ .mmi +(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +;; } +{ .mib +(p_y) add cnt = -8, cnt // +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +} { .mib +(p_y) st8 [ptr2] = value,-4 // +(p_n) add ptr2 = 4, ptr2 // +;; } +{ .mib +(p_yy) add cnt = -4, cnt // +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +} { .mib +(p_yy) st4 [ptr2] = value,-2 // +(p_nn) add ptr2 = 2, ptr2 // +;; } +{ .mmi + mov tmp = LINE_SIZE+1 // for compare +(p_y) add cnt = -2, cnt // +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +} { .mmi + setf.sig fvalue=value // transfer value to FLP side +(p_y) st2 [ptr2] = value,-1 // +(p_n) add ptr2 = 1, ptr2 // +;; } + +{ .mmi +(p_yy) st1 [ptr2] = value // + cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? +} { .mbb +(p_yy) add cnt = -1, cnt // +(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +;; } + +{ .mib + nop.m 0 + shr.u linecnt = cnt, LSIZE_SH +(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill +;; } + + TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache lines; fill later +{ .mmi + and tmp = -(LINE_SIZE), cnt // compute end of range + mov ptr9 = ptr1 // used for prefetching + and cnt = (LINE_SIZE-1), cnt // remainder +} { .mmi + mov loopcnt = PREF_AHEAD-1 // default prefetch loop + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value +;; } +{ .mmi +(p_scr) add loopcnt = -1, linecnt // + add ptr2 = 8, ptr1 // start of stores (beyond prefetch stores) + add ptr1 = tmp, ptr1 // first address beyond total range +;; } +{ .mmi + add tmp = -1, linecnt // next loop count + mov.i ar.lc = loopcnt // +;; } +.pref_l1a: +{ .mib + stf8 [ptr9] = fvalue, 128 // Do stores one cache line apart + nop.i 0 + br.cloop.dptk.few .pref_l1a +;; } +{ .mmi + add ptr0 = 16, ptr2 // Two stores in parallel + mov.i ar.lc = tmp // +;; } +.l1ax: + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 32 + cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + ;; } +{ .mmb + stf8 [ptr2] = fvalue, 24 +(p_scr) stf8 [ptr9] = fvalue, 128 + br.cloop.dptk.few .l1ax +;; } +{ .mbb + cmp.le p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2 + br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3 +;; } + + TEXT_ALIGN(32) +.l1b: // ------------------------------------ // L1B: store ahead into cache lines; fill later +{ .mmi + and tmp = -(LINE_SIZE), cnt // compute end of range + mov ptr9 = ptr1 // used for prefetching + and cnt = (LINE_SIZE-1), cnt // remainder +} { .mmi + mov loopcnt = PREF_AHEAD-1 // default prefetch loop + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value +;; } +{ .mmi +(p_scr) add loopcnt = -1, linecnt + add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) + add ptr1 = tmp, ptr1 // first address beyond total range +;; } +{ .mmi + add tmp = -1, linecnt // next loop count + mov.i ar.lc = loopcnt +;; } +.pref_l1b: +{ .mib + stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + nop.i 0 + br.cloop.dptk.few .pref_l1b +;; } +{ .mmi + add ptr0 = 16, ptr2 // Two stores in parallel + mov.i ar.lc = tmp +;; } +.l1bx: + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 32 + ;; } + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 32 + ;; } + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 64 + cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + ;; } +{ .mmb + stf.spill [ptr2] = f0, 32 +(p_scr) stf.spill [ptr9] = f0, 128 + br.cloop.dptk.few .l1bx +;; } +{ .mib + cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // +;; } + +.fraction_of_line: +{ .mib + add ptr2 = 16, ptr1 + shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 +;; } +{ .mib + cmp.eq p_scr, p0 = loopcnt, r0 + add loopcnt = -1, loopcnt +(p_scr) br.cond.dpnt.many .store_words +;; } +{ .mib + and cnt = 0x1f, cnt // compute the remaining cnt + mov.i ar.lc = loopcnt +;; } + TEXT_ALIGN(32) +.l2: // ------------------------------------ // L2A: store 32B in 2 cycles +{ .mmb + stf8 [ptr1] = fvalue, 8 + stf8 [ptr2] = fvalue, 8 +;; } { .mmb + stf8 [ptr1] = fvalue, 24 + stf8 [ptr2] = fvalue, 24 + br.cloop.dptk.many .l2 +;; } +.store_words: +{ .mib + cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch +;; } + +{ .mmi + stf8 [ptr1] = fvalue, 8 // store + cmp.le p_y, p_n = 16, cnt + add cnt = -8, cnt // subtract +;; } +{ .mmi +(p_y) stf8 [ptr1] = fvalue, 8 // store +(p_y) cmp.le.unc p_yy, p_nn = 16, cnt +(p_y) add cnt = -8, cnt // subtract +;; } +{ .mmi // store +(p_yy) stf8 [ptr1] = fvalue, 8 +(p_yy) add cnt = -8, cnt // subtract +;; } + +.move_bytes_from_alignment: +{ .mib + cmp.eq p_scr, p0 = cnt, r0 + tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? +(p_scr) br.cond.dpnt.few .restore_and_exit +;; } +{ .mib +(p_y) st4 [ptr1] = value,4 + tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? +;; } +{ .mib +(p_yy) st2 [ptr1] = value,2 + tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? +;; } + +{ .mib +(p_y) st1 [ptr1] = value +;; } +.restore_and_exit: +{ .mib + nop.m 0 + mov.i ar.lc = save_lc + br.ret.sptk.many rp +;; } + +.move_bytes_unaligned: +{ .mmi + .pred.rel "mutex",p_y, p_n + .pred.rel "mutex",p_yy, p_nn +(p_n) cmp.le p_yy, p_nn = 4, cnt +(p_y) cmp.le p_yy, p_nn = 5, cnt +(p_n) add ptr2 = 2, ptr1 +} { .mmi +(p_y) add ptr2 = 3, ptr1 +(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte [15, 14 (or less) left] +(p_y) add cnt = -1, cnt +;; } +{ .mmi +(p_yy) cmp.le.unc p_y, p0 = 8, cnt + add ptr3 = ptr1, cnt // prepare last store + mov.i ar.lc = save_lc +} { .mmi +(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11, 10 (o less) left] +(p_yy) add cnt = -4, cnt +;; } +{ .mmi +(p_y) cmp.le.unc p_yy, p0 = 8, cnt + add ptr3 = -1, ptr3 // last store + tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? +} { .mmi +(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6 (or less) left] +(p_y) add cnt = -4, cnt +;; } +{ .mmi +(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2 (or less) left] + tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +} { .mmi +(p_yy) add cnt = -4, cnt +;; } +{ .mmb +(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes +(p_y) st1 [ptr3] = value // fill last byte (using ptr3) + br.ret.sptk.many rp +} +END(memset) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/strlen.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/linux/strlen.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,192 @@ +/* + * + * Optimized version of the standard strlen() function + * + * + * Inputs: + * in0 address of string + * + * Outputs: + * ret0 the number of characters in the string (0 if empty string) + * does not count the \0 + * + * Copyright (C) 1999, 2001 Hewlett-Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + * + * 09/24/99 S.Eranian add speculation recovery code + */ + +#include <asm/asmmacro.h> + +// +// +// This is an enhanced version of the basic strlen. it includes a combination +// of compute zero index (czx), parallel comparisons, speculative loads and +// loop unroll using rotating registers. +// +// General Ideas about the algorithm: +// The goal is to look at the string in chunks of 8 bytes. +// so we need to do a few extra checks at the beginning because the +// string may not be 8-byte aligned. In this case we load the 8byte +// quantity which includes the start of the string and mask the unused +// bytes with 0xff to avoid confusing czx. +// We use speculative loads and software pipelining to hide memory +// latency and do read ahead safely. This way we defer any exception. +// +// Because we don't want the kernel to be relying on particular +// settings of the DCR register, we provide recovery code in case +// speculation fails. The recovery code is going to "redo" the work using +// only normal loads. If we still get a fault then we generate a +// kernel panic. Otherwise we return the strlen as usual. +// +// The fact that speculation may fail can be caused, for instance, by +// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., +// a NaT bit will be set if the translation is not present. The normal +// load, on the other hand, will cause the translation to be inserted +// if the mapping exists. +// +// It should be noted that we execute recovery code only when we need +// to use the data that has been speculatively loaded: we don't execute +// recovery code on pure read ahead data. +// +// Remarks: +// - the cmp r0,r0 is used as a fast way to initialize a predicate +// register to 1. This is required to make sure that we get the parallel +// compare correct. +// +// - we don't use the epilogue counter to exit the loop but we need to set +// it to zero beforehand. +// +// - after the loop we must test for Nat values because neither the +// czx nor cmp instruction raise a NaT consumption fault. We must be +// careful not to look too far for a Nat for which we don't care. +// For instance we don't need to look at a NaT in val2 if the zero byte +// was in val1. +// +// - Clearly performance tuning is required. +// +// +// +#define saved_pfs r11 +#define tmp r10 +#define base r16 +#define orig r17 +#define saved_pr r18 +#define src r19 +#define mask r20 +#define val r21 +#define val1 r22 +#define val2 r23 + +GLOBAL_ENTRY(strlen) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8 + + .rotr v[2], w[2] // declares our 4 aliases + + extr.u tmp=in0,0,3 // tmp=least significant 3 bits + mov orig=in0 // keep trackof initial byte address + dep src=0,in0,0,3 // src=8byte-aligned in0 address + .save pr, saved_pr + mov saved_pr=pr // preserve predicates (rotation) + ;; + + .body + + ld8 v[1]=[src],8 // must not speculate: can fail here + shl tmp=tmp,3 // multiply by 8bits/byte + mov mask=-1 // our mask + ;; + ld8.s w[1]=[src],8 // speculatively load next + cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and + sub tmp=64,tmp // how many bits to shift our mask on the right + ;; + shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part + mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) + ;; + add base=-16,src // keep track of aligned base + or v[1]=v[1],mask // now we have a safe initial byte pattern + ;; +1: + ld8.s v[0]=[src],8 // speculatively load next + czx1.r val1=v[1] // search 0 byte from right + czx1.r val2=w[1] // search 0 byte from right following 8bytes + ;; + ld8.s w[0]=[src],8 // speculatively load next to next + cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 + cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 +(p6) br.wtop.dptk 1b // loop until p6 == 0 + ;; + // + // We must return try the recovery code iff + // val1_is_nat || (val1==8 && val2_is_nat) + // + // XXX Fixme + // - there must be a better way of doing the test + // + cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) + tnat.nz p6,p7=val1 // test NaT on val1 +(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT + ;; + // + // if we come here p7 is true, i.e., initialized for // cmp + // + cmp.eq.and p7,p0=8,val1// val1==8? + tnat.nz.and p7,p0=val2 // test NaT if val2 +(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT + ;; +(p8) mov val1=val2 // the other test got us out of the loop +(p8) adds src=-16,src // correct position when 3 ahead +(p9) adds src=-24,src // correct position when 4 ahead + ;; + sub ret0=src,orig // distance from base + sub tmp=8,val1 // which byte in word + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // adjust + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of normal execution + + // + // Outlined recovery code when speculation failed + // + // This time we don't use speculation and rely on the normal exception + // mechanism. that's why the loop is not as good as the previous one + // because read ahead is not possible + // + // IMPORTANT: + // Please note that in the case of strlen() as opposed to strlen_user() + // we don't use the exception mechanism, as this function is not + // supposed to fail. If that happens it means we have a bug and the + // code will cause of kernel fault. + // + // XXX Fixme + // - today we restart from the beginning of the string instead + // of trying to continue where we left off. + // +.recover: + ld8 val=[base],8 // will fail if unrecoverable fault + ;; + or val=val,mask // remask first bytes + cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop + ;; + // + // ar.ec is still zero here + // +2: +(p6) ld8 val=[base],8 // will fail if unrecoverable fault + ;; + czx1.r val1=val // search 0 byte from right + ;; + cmp.eq p6,p0=8,val1 // val1==8 ? +(p6) br.wtop.dptk 2b // loop until p6 == 0 + ;; // (avoid WAW on p63) + sub ret0=base,orig // distance from base + sub tmp=8,val1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of successful recovery code +END(strlen) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/mm.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/mm.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,152 @@ +/****************************************************************************** + * arch/ia64/mm.c + * + * Copyright (c) 2002-2005 K A Fraser + * Copyright (c) 2004 Christian Limpach + * Copyright (c) 2005, Intel Corporation. + * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * A description of the x86 page table API: + * + * Domains trap to do_mmu_update with a list of update requests. + * This is a list of (ptr, val) pairs, where the requested operation + * is *ptr = val. + * + * Reference counting of pages: + * ---------------------------- + * Each page has two refcounts: tot_count and type_count. + * + * TOT_COUNT is the obvious reference count. It counts all uses of a + * physical page frame by a domain, including uses as a page directory, + * a page table, or simple mappings via a PTE. This count prevents a + * domain from releasing a frame back to the free pool when it still holds + * a reference to it. + * + * TYPE_COUNT is more subtle. A frame can be put to one of three + * mutually-exclusive uses: it might be used as a page directory, or a + * page table, or it may be mapped writable by the domain [of course, a + * frame may not be used in any of these three ways!]. + * So, type_count is a count of the number of times a frame is being + * referred to in its current incarnation. Therefore, a page can only + * change its type when its type count is zero. + * + * Pinning the page type: + * ---------------------- + * The type of a page can be pinned/unpinned with the commands + * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is, + * pinning is not reference counted, so it can't be nested). + * This is useful to prevent a page's type count falling to zero, at which + * point safety checks would need to be carried out next time the count + * is increased again. + * + * A further note on writable page mappings: + * ----------------------------------------- + * For simplicity, the count of writable mappings for a page may not + * correspond to reality. The 'writable count' is incremented for every + * PTE which maps the page with the _PAGE_RW flag set. However, for + * write access to be possible the page directory entry must also have + * its _PAGE_RW bit set. We do not check this as it complicates the + * reference counting considerably [consider the case of multiple + * directory entries referencing a single page table, some with the RW + * bit set, others not -- it starts getting a bit messy]. + * In normal use, this simplification shouldn't be a problem. + * However, the logic can be added if required. + * + * One more note on read-only page mappings: + * ----------------------------------------- + * We want domains to be able to map pages for read-only access. The + * main reason is that page tables and directories should be readable + * by a domain, but it would not be safe for them to be writable. + * However, domains have free access to rings 1 & 2 of the Intel + * privilege model. In terms of page protection, these are considered + * to be part of 'supervisor mode'. The WP bit in CR0 controls whether + * read-only restrictions are respected in supervisor mode -- if the + * bit is clear then any mapped page is writable. + * + * We get round this by always setting the WP bit and disallowing + * updates to it. This is very unlikely to cause a problem for guest + * OS's, which will generally use the WP bit to simplify copy-on-write + * implementation (in that case, OS wants a fault when it writes to + * an application-supplied buffer). + */ + +#include <xen/config.h> +#include <public/xen.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/mm.h> +#include <xen/errno.h> +#include <asm/vmx_vcpu.h> +#include <asm/vmmu.h> +#include <asm/regionreg.h> +#include <asm/vmx_mm_def.h> +/* + uregs->ptr is virtual address + uregs->val is pte value + */ +#ifdef CONFIG_VTI +int do_mmu_update(mmu_update_t *ureqs,u64 count,u64 *pdone,u64 foreigndom) +{ + int i,cmd; + u64 mfn, gpfn; + VCPU *vcpu; + mmu_update_t req; + ia64_rr rr; + thash_cb_t *hcb; + thash_data_t entry={0},*ovl; + vcpu = current; + search_section_t sections; + hcb = vmx_vcpu_get_vtlb(vcpu); + for ( i = 0; i < count; i++ ) + { + copy_from_user(&req, ureqs, sizeof(req)); + cmd = req.ptr&3; + req.ptr &= ~3; + if(cmd ==MMU_NORMAL_PT_UPDATE){ + entry.page_flags = req.val; + entry.locked = 1; + entry.tc = 1; + entry.cl = DSIDE_TLB; + rr = vmx_vcpu_rr(vcpu, req.ptr); + entry.ps = rr.ps; + entry.key = redistribute_rid(rr.rid); + entry.rid = rr.rid; + entry.vadr = PAGEALIGN(req.ptr,entry.ps); + sections.tr = 1; + sections.tc = 0; + ovl = thash_find_overlap(hcb, &entry, sections); + if (ovl) { + // generate MCA. + panic("Tlb conflict!!"); + return; + } + thash_purge_and_insert(hcb, &entry); + }else if(cmd == MMU_MACHPHYS_UPDATE){ + mfn = req.ptr >>PAGE_SHIFT; + gpfn = req.val; + set_machinetophys(mfn,gpfn); + }else{ + printf("Unkown command of mmu_update:ptr: %lx,val: %lx \n",req.ptr,req.val); + while(1); + } + ureqs ++; + } + return 0; +} +#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/mmio.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/mmio.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,515 @@ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * mmio.c: MMIO emulation components. + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) + * Kun Tian (Kevin Tian) (Kevin.tian@xxxxxxxxx) + */ + +#include <linux/sched.h> +#include <asm/tlb.h> +#include <asm/vmx_mm_def.h> +#include <asm/gcc_intrin.h> +#include <linux/interrupt.h> +#include <asm/vmx_vcpu.h> +#include <asm/privop.h> +#include <asm/types.h> +#include <public/io/ioreq.h> +#include <asm/mm.h> +#include <asm/vmx.h> + +/* +struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base) +{ + int i; + for (i=0; mio_base[i].iot != NOT_IO; i++ ) { + if ( gpa >= mio_base[i].start && gpa <= mio_base[i].end ) + return &mio_base[i]; + } + return NULL; +} +*/ + +#define PIB_LOW_HALF(ofst) !(ofst&(1<<20)) +#define PIB_OFST_INTA 0x1E0000 +#define PIB_OFST_XTP 0x1E0008 + +static void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int ma) +{ + switch (pib_off) { + case PIB_OFST_INTA: + panic("Undefined write on PIB INTA\n"); + break; + case PIB_OFST_XTP: + if ( s == 1 && ma == 4 /* UC */) { + vmx_vcpu_get_plat(vcpu)->xtp = *(uint8_t *)src; + } + else { + panic("Undefined write on PIB XTP\n"); + } + break; + default: + if ( PIB_LOW_HALF(pib_off) ) { // lower half + if ( s != 8 || ma != 0x4 /* UC */ ) { + panic("Undefined IPI-LHF write with s %d, ma %d!\n", s, ma); + } + else { + write_ipi(vcpu, pib_off, *(uint64_t *)src); + // TODO for SM-VP + } + } + else { // upper half + printf("IPI-UHF write %lx\n",pib_off); + panic("Not support yet for SM-VP\n"); + } + break; + } +} + +static void pib_read(VCPU *vcpu, uint64_t pib_off, void *dest, size_t s, int ma) +{ + switch (pib_off) { + case PIB_OFST_INTA: + // todo --- emit on processor system bus. + if ( s == 1 && ma == 4) { // 1 byte load + // TODO: INTA read from IOSAPIC + } + else { + panic("Undefined read on PIB INTA\n"); + } + break; + case PIB_OFST_XTP: + if ( s == 1 && ma == 4) { + *((uint8_t*)dest) = vmx_vcpu_get_plat(vcpu)->xtp; + } + else { + panic("Undefined read on PIB XTP\n"); + } + break; + default: + if ( PIB_LOW_HALF(pib_off) ) { // lower half + if ( s != 8 || ma != 4 ) { + panic("Undefined IPI-LHF read!\n"); + } + else { +#ifdef IPI_DEBUG + printf("IPI-LHF read %lx\n",pib_off); +#endif + *(uint64_t *)dest = 0; // TODO for SM-VP + } + } + else { // upper half + if ( s != 1 || ma != 4 ) { + panic("Undefined PIB-UHF read!\n"); + } + else { +#ifdef IPI_DEBUG + printf("IPI-UHF read %lx\n",pib_off); +#endif + *(uint8_t *)dest = 0; // TODO for SM-VP + } + } + break; + } +} + +static void low_mmio_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir) +{ + struct vcpu *v = current; + vcpu_iodata_t *vio; + ioreq_t *p; + unsigned long addr; + + vio = get_vio(v->domain, v->vcpu_id); + if (vio == 0) { + panic("bad shared page: %lx", (unsigned long)vio); + } + p = &vio->vp_ioreq; + p->addr = pa; + p->size = s; + p->count = 1; + p->dir = dir; + if(dir==IOREQ_WRITE) //write; + p->u.data = *val; + p->pdata_valid = 0; + p->port_mm = 1; + p->df = 0; + + set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); + p->state = STATE_IOREQ_READY; + evtchn_send(iopacket_port(v->domain)); + vmx_wait_io(); + if(dir==IOREQ_READ){ //read + *val=p->u.data; + } + return; +} +#define TO_LEGACY_IO(pa) (((pa)>>12<<2)|((pa)&0x3)) + +static void legacy_io_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir) +{ + struct vcpu *v = current; + vcpu_iodata_t *vio; + ioreq_t *p; + unsigned long addr; + + vio = get_vio(v->domain, v->vcpu_id); + if (vio == 0) { + panic("bad shared page: %lx"); + } + p = &vio->vp_ioreq; + p->addr = TO_LEGACY_IO(pa&0x3ffffffUL); + p->size = s; + p->count = 1; + p->dir = dir; + if(dir==IOREQ_WRITE) //write; + p->u.data = *val; + p->pdata_valid = 0; + p->port_mm = 0; + p->df = 0; + + set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); + p->state = STATE_IOREQ_READY; + evtchn_send(iopacket_port(v->domain)); + + vmx_wait_io(); + if(dir==IOREQ_READ){ //read + *val=p->u.data; + } +#ifdef DEBUG_PCI + if(dir==IOREQ_WRITE) + if(p->addr == 0xcf8UL) + printk("Write 0xcf8, with val [0x%lx]\n", p->u.data); + else + if(p->addr == 0xcfcUL) + printk("Read 0xcfc, with val [0x%lx]\n", p->u.data); +#endif //DEBUG_PCI + return; +} + +static void mmio_access(VCPU *vcpu, u64 src_pa, u64 *dest, size_t s, int ma, int dir) +{ + struct virutal_platform_def *v_plat; + //mmio_type_t iot; + unsigned long iot; + iot=__gpfn_is_io(vcpu->domain, src_pa>>PAGE_SHIFT); + v_plat = vmx_vcpu_get_plat(vcpu); + + switch (iot) { + case GPFN_PIB: + if(!dir) + pib_write(vcpu, dest, src_pa - v_plat->pib_base, s, ma); + else + pib_read(vcpu, src_pa - v_plat->pib_base, dest, s, ma); + break; + case GPFN_GFW: + break; + case GPFN_IOSAPIC: + case GPFN_FRAME_BUFFER: + case GPFN_LOW_MMIO: + low_mmio_access(vcpu, src_pa, dest, s, dir); + break; + case GPFN_LEGACY_IO: + legacy_io_access(vcpu, src_pa, dest, s, dir); + break; + default: + panic("Bad I/O access\n"); + break; + } + return; +} + +/* + * Read or write data in guest virtual address mode. + */ +/* +void +memwrite_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s) +{ + uint64_t pa; + + if (!vtlb->nomap) + panic("Normal memory write shouldn't go to this point!"); + pa = PPN_2_PA(vtlb->ppn); + pa += POFFSET((u64)dest, vtlb->ps); + mmio_write (vcpu, src, pa, s, vtlb->ma); +} + + +void +memwrite_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s) +{ + uint64_t pa = (uint64_t)dest; + int ma; + + if ( pa & (1UL <<63) ) { + // UC + ma = 4; + pa <<=1; + pa >>=1; + } + else { + // WBL + ma = 0; // using WB for WBL + } + mmio_write (vcpu, src, pa, s, ma); +} + +void +memread_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s) +{ + uint64_t pa; + + if (!vtlb->nomap) + panic("Normal memory write shouldn't go to this point!"); + pa = PPN_2_PA(vtlb->ppn); + pa += POFFSET((u64)src, vtlb->ps); + + mmio_read(vcpu, pa, dest, s, vtlb->ma); +} + +void +memread_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s) +{ + uint64_t pa = (uint64_t)src; + int ma; + + if ( pa & (1UL <<63) ) { + // UC + ma = 4; + pa <<=1; + pa >>=1; + } + else { + // WBL + ma = 0; // using WB for WBL + } + mmio_read(vcpu, pa, dest, s, ma); +} +*/ + + +/* + * Deliver IPI message. (Only U-VP is supported now) + * offset: address offset to IPI space. + * value: deliver value. + */ +static void deliver_ipi (VCPU *vcpu, uint64_t dm, uint64_t vector) +{ +#ifdef IPI_DEBUG + printf ("deliver_ipi %lx %lx\n",dm,vector); +#endif + switch ( dm ) { + case 0: // INT + vmx_vcpu_pend_interrupt (vcpu, vector); + break; + case 2: // PMI + // TODO -- inject guest PMI + panic ("Inject guest PMI!\n"); + break; + case 4: // NMI + vmx_vcpu_pend_interrupt (vcpu, 2); + break; + case 5: // INIT + // TODO -- inject guest INIT + panic ("Inject guest INIT!\n"); + break; + case 7: // ExtINT + vmx_vcpu_pend_interrupt (vcpu, 0); + break; + case 1: + case 3: + case 6: + default: + panic ("Deliver reserved IPI!\n"); + break; + } +} + +/* + * TODO: Use hash table for the lookup. + */ +static inline VCPU *lid_2_vcpu (struct domain *d, u64 id, u64 eid) +{ + int i; + VCPU *vcpu; + LID lid; + for (i=0; i<MAX_VIRT_CPUS; i++) { + vcpu = d->vcpu[i]; + if (!vcpu) + continue; + lid.val = VPD_CR(vcpu, lid); + if ( lid.id == id && lid.eid == eid ) { + return vcpu; + } + } + return NULL; +} + +/* + * execute write IPI op. + */ +static int write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value) +{ + VCPU *target_cpu; + + target_cpu = lid_2_vcpu(vcpu->domain, + ((ipi_a_t)addr).id, ((ipi_a_t)addr).eid); + if ( target_cpu == NULL ) panic("Unknown IPI cpu\n"); + if ( target_cpu == vcpu ) { + // IPI to self + deliver_ipi (vcpu, ((ipi_d_t)value).dm, + ((ipi_d_t)value).vector); + return 1; + } + else { + // TODO: send Host IPI to inject guest SMP IPI interruption + panic ("No SM-VP supported!\n"); + return 0; + } +} + + +/* + dir 1: read 0:write + inst_type 0:integer 1:floating point + */ +extern IA64_BUNDLE __vmx_get_domain_bundle(u64 iip); +#define SL_INTEGER 0 // store/load interger +#define SL_FLOATING 1 // store/load floating + +void emulate_io_inst(VCPU *vcpu, u64 padr, u64 ma) +{ + REGS *regs; + IA64_BUNDLE bundle; + int slot, dir, inst_type; + size_t size; + u64 data, value,post_update, slot1a, slot1b, temp; + INST64 inst; + regs=vcpu_regs(vcpu); + bundle = __vmx_get_domain_bundle(regs->cr_iip); + slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; + if (!slot) inst.inst = bundle.slot0; + else if (slot == 1){ + slot1a=bundle.slot1a; + slot1b=bundle.slot1b; + inst.inst =slot1a + (slot1b<<18); + } + else if (slot == 2) inst.inst = bundle.slot2; + + + // Integer Load/Store + if(inst.M1.major==4&&inst.M1.m==0&&inst.M1.x==0){ + inst_type = SL_INTEGER; // + size=(inst.M1.x6&0x3); + if((inst.M1.x6>>2)>0xb){ // write + dir=IOREQ_WRITE; //write + vmx_vcpu_get_gr(vcpu,inst.M4.r2,&data); + }else if((inst.M1.x6>>2)<0xb){ // read + dir=IOREQ_READ; + vmx_vcpu_get_gr(vcpu,inst.M1.r1,&value); + } + } + // Integer Load + Reg update + else if(inst.M2.major==4&&inst.M2.m==1&&inst.M2.x==0){ + inst_type = SL_INTEGER; + dir = IOREQ_READ; //write + size = (inst.M2.x6&0x3); + vmx_vcpu_get_gr(vcpu,inst.M2.r1,&value); + vmx_vcpu_get_gr(vcpu,inst.M2.r3,&temp); + vmx_vcpu_get_gr(vcpu,inst.M2.r2,&post_update); + temp += post_update; + vmx_vcpu_set_gr(vcpu,inst.M2.r3,temp,0); + } + // Integer Load/Store + Imm update + else if(inst.M3.major==5){ + inst_type = SL_INTEGER; // + size=(inst.M3.x6&0x3); + if((inst.M5.x6>>2)>0xb){ // write + dir=IOREQ_WRITE; //write + vmx_vcpu_get_gr(vcpu,inst.M5.r2,&data); + vmx_vcpu_get_gr(vcpu,inst.M5.r3,&temp); + post_update = (inst.M5.i<<7)+inst.M5.imm7; + if(inst.M5.s) + temp -= post_update; + else + temp += post_update; + vmx_vcpu_set_gr(vcpu,inst.M5.r3,temp,0); + + }else if((inst.M3.x6>>2)<0xb){ // read + dir=IOREQ_READ; + vmx_vcpu_get_gr(vcpu,inst.M3.r1,&value); + vmx_vcpu_get_gr(vcpu,inst.M3.r3,&temp); + post_update = (inst.M3.i<<7)+inst.M3.imm7; + if(inst.M3.s) + temp -= post_update; + else + temp += post_update; + vmx_vcpu_set_gr(vcpu,inst.M3.r3,temp,0); + + } + } + // Floating-point Load/Store +// else if(inst.M6.major==6&&inst.M6.m==0&&inst.M6.x==0&&inst.M6.x6==3){ +// inst_type=SL_FLOATING; //fp +// dir=IOREQ_READ; +// size=3; //ldfd +// } + else{ + printf("This memory access instruction can't be emulated two: %lx\n ",inst.inst); + while(1); + } + + size = 1 << size; + if(dir==IOREQ_WRITE){ + mmio_access(vcpu, padr, &data, size, ma, dir); + }else{ + mmio_access(vcpu, padr, &data, size, ma, dir); + if(size==0) + data = (value & 0xffffffffffffff00U) | (data & 0xffU); + else if(size==1) + data = (value & 0xffffffffffff0000U) | (data & 0xffffU); + else if(size==2) + data = (value & 0xffffffff00000000U) | (data & 0xffffffffU); + + if(inst_type==SL_INTEGER){ //gp + vmx_vcpu_set_gr(vcpu,inst.M1.r1,data,0); + }else{ + panic("Don't support ldfd now !"); +/* switch(inst.M6.f1){ + + case 6: + regs->f6=(struct ia64_fpreg)data; + case 7: + regs->f7=(struct ia64_fpreg)data; + case 8: + regs->f8=(struct ia64_fpreg)data; + case 9: + regs->f9=(struct ia64_fpreg)data; + case 10: + regs->f10=(struct ia64_fpreg)data; + case 11: + regs->f11=(struct ia64_fpreg)data; + default : + ia64_ldfs(inst.M6.f1,&data); + } +*/ + } + } + vmx_vcpu_increment_iip(vcpu); +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/pal_emul.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/pal_emul.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,280 @@ +/* + * PAL/SAL call delegation + * + * Copyright (c) 2004 Li Susie <susie.li@xxxxxxxxx> + * Copyright (c) 2005 Yu Ke <ke.yu@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <asm/vmx_vcpu.h> + +static void +get_pal_parameters (VCPU *vcpu, UINT64 *gr29, + UINT64 *gr30, UINT64 *gr31) { + + vmx_vcpu_get_gr(vcpu,29,gr29); + vmx_vcpu_get_gr(vcpu,30,gr30); + vmx_vcpu_get_gr(vcpu,31,gr31); +} + +static void +set_pal_result (VCPU *vcpu,struct ia64_pal_retval result) { + + vmx_vcpu_set_gr(vcpu,8, result.status,0); + vmx_vcpu_set_gr(vcpu,9, result.v0,0); + vmx_vcpu_set_gr(vcpu,10, result.v1,0); + vmx_vcpu_set_gr(vcpu,11, result.v2,0); +} + + +static struct ia64_pal_retval +pal_cache_flush (VCPU *vcpu) { + UINT64 gr28,gr29, gr30, gr31; + struct ia64_pal_retval result; + + get_pal_parameters (vcpu, &gr29, &gr30, &gr31); + vmx_vcpu_get_gr(vcpu,28,&gr28); + + /* Always call Host Pal in int=1 */ + gr30 = gr30 &(~(0x2UL)); + + /* call Host PAL cache flush */ + result=ia64_pal_call_static(gr28 ,gr29, gr30,gr31,1); // Clear psr.ic when call PAL_CACHE_FLUSH + + /* If host PAL call is interrupted, then loop to complete it */ +// while (result.status == 1) { +// ia64_pal_call_static(gr28 ,gr29, gr30, +// result.v1,1LL); +// } + while (result.status != 0) { + panic("PAL_CACHE_FLUSH ERROR, status %d", result.status); + } + + return result; +} + +static struct ia64_pal_retval +pal_vm_tr_read (VCPU *vcpu ) { +#warning pal_vm_tr_read: to be implemented + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + + return result; +} + + +static struct ia64_pal_retval +pal_prefetch_visibility (VCPU *vcpu) { + /* Due to current MM virtualization algorithm, + * We do not allow guest to change mapping attribute. + * Thus we will not support PAL_PREFETCH_VISIBILITY + */ + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + + return result; +} + +static struct ia64_pal_retval +pal_platform_addr(VCPU *vcpu) { + struct ia64_pal_retval result; + + result.status= 0; //success + + return result; +} + +static struct ia64_pal_retval +pal_halt (VCPU *vcpu) { +#warning pal_halt: to be implemented + //bugbug: to be implement. + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + + return result; +} + + +static struct ia64_pal_retval +pal_halt_light (VCPU *vcpu) { + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + + return result; +} + +static struct ia64_pal_retval +pal_cache_read (VCPU *vcpu) { + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + + return result; +} + +static struct ia64_pal_retval +pal_cache_write (VCPU *vcpu) { + struct ia64_pal_retval result; + + result.status= -1; //unimplemented + + return result; +} + +static struct ia64_pal_retval +pal_bus_get_features(VCPU *vcpu){ + +} + +static struct ia64_pal_retval +pal_cache_summary(VCPU *vcpu){ + +} + +static struct ia64_pal_retval +pal_cache_init(VCPU *vcpu){ + struct ia64_pal_retval result; + result.status=0; + return result; +} + +static struct ia64_pal_retval +pal_cache_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_cache_prot_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_cache_shared_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_mem_attrib(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_debug_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_fixed_addr(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_freq_base(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_freq_ratios(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_halt_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_logical_to_physica(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_perf_mon_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_proc_get_features(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_ptce_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_register_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_rse_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_test_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_vm_summary(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_vm_info(VCPU *vcpu){ +} + +static struct ia64_pal_retval +pal_vm_page_size(VCPU *vcpu){ +} + +void +pal_emul( VCPU *vcpu) { + UINT64 gr28; + struct ia64_pal_retval result; + + + vmx_vcpu_get_gr(vcpu,28,&gr28); //bank1 + + switch (gr28) { + case PAL_CACHE_FLUSH: + result = pal_cache_flush (vcpu); + break; + + case PAL_PREFETCH_VISIBILITY: + result = pal_prefetch_visibility (vcpu); + break; + + case PAL_VM_TR_READ: + result = pal_vm_tr_read (vcpu); + break; + + case PAL_HALT: + result = pal_halt (vcpu); + break; + + case PAL_HALT_LIGHT: + result = pal_halt_light (vcpu); + break; + + case PAL_CACHE_READ: + result = pal_cache_read (vcpu); + break; + + case PAL_CACHE_WRITE: + result = pal_cache_write (vcpu); + break; + + case PAL_PLATFORM_ADDR: + result = pal_platform_addr (vcpu); + break; + + default: + panic("pal_emul(): guest call unsupported pal" ); + } + set_pal_result (vcpu, result); +} + + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vlsapic.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vlsapic.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,620 @@ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vlsapic.c: virtual lsapic model including ITC timer. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) + */ + +#include <linux/sched.h> +#include <public/arch-ia64.h> +#include <asm/ia64_int.h> +#include <asm/vcpu.h> +#include <asm/regionreg.h> +#include <asm/tlb.h> +#include <asm/processor.h> +#include <asm/delay.h> +#include <asm/vmx_vcpu.h> +#include <asm/vmx_vcpu.h> +#include <asm/regs.h> +#include <asm/gcc_intrin.h> +#include <asm/vmx_mm_def.h> +#include <asm/vmx.h> +#include <asm/hw_irq.h> +#include <asm/vmx_pal_vsa.h> +#include <asm/kregs.h> + +#define SHARED_VLAPIC_INF +#ifdef V_IOSAPIC_READY +static inline vl_apic_info* get_psapic(VCPU *vcpu) +{ + shared_iopage_t *sp = get_sp(vcpu->domain); + return &(sp->vcpu_iodata[vcpu->vcpu_id].apic_intr); +} +#endif +//u64 fire_itc; +//u64 fire_itc2; +//u64 fire_itm; +//u64 fire_itm2; +/* + * Update the checked last_itc. + */ +static void update_last_itc(vtime_t *vtm, uint64_t cur_itc) +{ + vtm->last_itc = cur_itc; +} + +/* + * ITC value saw in guest (host+offset+drift). + */ +static uint64_t now_itc(vtime_t *vtm) +{ + uint64_t guest_itc=vtm->vtm_offset+ia64_get_itc(); + + if ( vtm->vtm_local_drift ) { +// guest_itc -= vtm->vtm_local_drift; + } + if ( (long)(guest_itc - vtm->last_itc) > 0 ) { + return guest_itc; + + } + else { + /* guest ITC backwarded due after LP switch */ + return vtm->last_itc; + } +} + +/* + * Interval time components reset. + */ +static void vtm_reset(VCPU *vcpu) +{ + uint64_t cur_itc; + vtime_t *vtm; + + vtm=&(vcpu->arch.arch_vmx.vtm); + vtm->vtm_offset = 0; + vtm->vtm_local_drift = 0; + VPD_CR(vcpu, itm) = 0; + VPD_CR(vcpu, itv) = 0x10000; + cur_itc = ia64_get_itc(); + vtm->last_itc = vtm->vtm_offset + cur_itc; +} + +/* callback function when vtm_timer expires */ +static void vtm_timer_fn(void *data) +{ + vtime_t *vtm; + VCPU *vcpu = data; + u64 cur_itc,vitm; + + UINT64 vec; + + vec = VPD_CR(vcpu, itv) & 0xff; + vmx_vcpu_pend_interrupt(vcpu, vec); + + vtm=&(vcpu->arch.arch_vmx.vtm); + cur_itc = now_itc(vtm); + vitm =VPD_CR(vcpu, itm); + //fire_itc2 = cur_itc; + //fire_itm2 = vitm; + update_last_itc(vtm,cur_itc); // pseudo read to update vITC +} + +void vtm_init(VCPU *vcpu) +{ + vtime_t *vtm; + uint64_t itc_freq; + + vtm=&(vcpu->arch.arch_vmx.vtm); + + itc_freq = local_cpu_data->itc_freq; + vtm->cfg_max_jump=itc_freq*MAX_JUMP_STEP/1000; + vtm->cfg_min_grun=itc_freq*MIN_GUEST_RUNNING_TIME/1000; + init_ac_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, 0); + vtm_reset(vcpu); +} + +/* + * Action when guest read ITC. + */ +uint64_t vtm_get_itc(VCPU *vcpu) +{ + uint64_t guest_itc, spsr; + vtime_t *vtm; + + vtm=&(vcpu->arch.arch_vmx.vtm); + // FIXME: should use local_irq_disable & local_irq_enable ?? + local_irq_save(spsr); + guest_itc = now_itc(vtm); +// update_last_itc(vtm, guest_itc); + + local_irq_restore(spsr); + return guest_itc; +} + +void vtm_set_itc(VCPU *vcpu, uint64_t new_itc) +{ + uint64_t spsr; + vtime_t *vtm; + + vtm=&(vcpu->arch.arch_vmx.vtm); + local_irq_save(spsr); + vtm->vtm_offset = new_itc - ia64_get_itc(); + vtm->last_itc = new_itc; + vtm_interruption_update(vcpu, vtm); + local_irq_restore(spsr); +} + +void vtm_set_itv(VCPU *vcpu) +{ + uint64_t spsr,itv; + vtime_t *vtm; + + vtm=&(vcpu->arch.arch_vmx.vtm); + local_irq_save(spsr); + itv = VPD_CR(vcpu, itv); + if ( ITV_IRQ_MASK(itv) ) + rem_ac_timer(&vtm->vtm_timer); + vtm_interruption_update(vcpu, vtm); + local_irq_restore(spsr); +} + + +/* + * Update interrupt or hook the vtm ac_timer for fire + * At this point vtm_timer should be removed if itv is masked. + */ +/* Interrupt must be disabled at this point */ + +extern u64 tick_to_ns(u64 tick); +#define TIMER_SLOP (50*1000) /* ns */ /* copy from ac_timer.c */ +void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm) +{ + uint64_t cur_itc,vitm,vitv; + uint64_t expires; + long diff_now, diff_last; + uint64_t spsr; + + vitv = VPD_CR(vcpu, itv); + if ( ITV_IRQ_MASK(vitv) ) { + return; + } + + vitm =VPD_CR(vcpu, itm); + local_irq_save(spsr); + cur_itc =now_itc(vtm); + diff_last = vtm->last_itc - vitm; + diff_now = cur_itc - vitm; + update_last_itc (vtm,cur_itc); + + if ( diff_last >= 0 ) { + // interrupt already fired. + rem_ac_timer(&vtm->vtm_timer); + } + else if ( diff_now >= 0 ) { + // ITV is fired. + vmx_vcpu_pend_interrupt(vcpu, vitv&0xff); + } + /* Both last_itc & cur_itc < itm, wait for fire condition */ + else { + expires = NOW() + tick_to_ns(0-diff_now) + TIMER_SLOP; + set_ac_timer(&vtm->vtm_timer, expires); + } + local_irq_restore(spsr); +} + +/* + * Action for vtm when the domain is scheduled out. + * Remove the ac_timer for vtm. + */ +void vtm_domain_out(VCPU *vcpu) +{ + if(!is_idle_task(vcpu->domain)) + rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer); +} + +/* + * Action for vtm when the domain is scheduled in. + * Fire vtm IRQ or add the ac_timer for vtm. + */ +void vtm_domain_in(VCPU *vcpu) +{ + vtime_t *vtm; + + if(!is_idle_task(vcpu->domain)) { + vtm=&(vcpu->arch.arch_vmx.vtm); + vtm_interruption_update(vcpu, vtm); + } +} + +/* + * Next for vLSapic + */ + +#define NMI_VECTOR 2 +#define ExtINT_VECTOR 0 +#define NULL_VECTOR -1 +#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.arch_vmx.in_service[i]) +static void update_vhpi(VCPU *vcpu, int vec) +{ + u64 vhpi; + if ( vec == NULL_VECTOR ) { + vhpi = 0; + } + else if ( vec == NMI_VECTOR ) { // NMI + vhpi = 32; + } else if (vec == ExtINT_VECTOR) { //ExtINT + vhpi = 16; + } + else { + vhpi = vec / 16; + } + + VMX_VPD(vcpu,vhpi) = vhpi; + // TODO: Add support for XENO + if ( VMX_VPD(vcpu,vac).a_int ) { + ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT, + (uint64_t) &(vcpu->arch.arch_vmx.vpd), 0, 0,0,0,0,0); + } +} + +#ifdef V_IOSAPIC_READY +void vlapic_update_shared_info(VCPU *vcpu) +{ + //int i; + + vl_apic_info *ps; + + if (vcpu->domain == dom0) + return; + + ps = get_psapic(vcpu); + ps->vl_lapic_id = ((VPD_CR(vcpu, lid) >> 16) & 0xffff) << 16; + printf("vl_lapic_id = %x\n", ps->vl_lapic_id); + ps->vl_apr = 0; + // skip ps->vl_logical_dest && ps->vl_dest_format + // IPF support physical destination mode only + ps->vl_arb_id = 0; + /* + for ( i=0; i<4; i++ ) { + ps->tmr[i] = 0; // edge trigger + } + */ +} + +void vlapic_update_ext_irq(VCPU *vcpu) +{ + int vec; + + vl_apic_info *ps = get_psapic(vcpu); + while ( (vec = highest_bits(ps->irr)) != NULL_VECTOR ) { + clear_bit (vec, ps->irr); + vmx_vcpu_pend_interrupt(vcpu, vec); + } +} +#endif + +void vlsapic_reset(VCPU *vcpu) +{ + int i; +#ifdef V_IOSAPIC_READY + vl_apic_info *psapic; // shared lapic inf. +#endif + + VPD_CR(vcpu, lid) = ia64_getreg(_IA64_REG_CR_LID); + VPD_CR(vcpu, ivr) = 0; + VPD_CR(vcpu,tpr) = 0x10000; + VPD_CR(vcpu, eoi) = 0; + VPD_CR(vcpu, irr[0]) = 0; + VPD_CR(vcpu, irr[1]) = 0; + VPD_CR(vcpu, irr[2]) = 0; + VPD_CR(vcpu, irr[3]) = 0; + VPD_CR(vcpu, pmv) = 0x10000; + VPD_CR(vcpu, cmcv) = 0x10000; + VPD_CR(vcpu, lrr0) = 0x10000; // default reset value? + VPD_CR(vcpu, lrr1) = 0x10000; // default reset value? + update_vhpi(vcpu, NULL_VECTOR); + for ( i=0; i<4; i++) { + VLSAPIC_INSVC(vcpu,i) = 0; + } +#ifdef V_IOSAPIC_READY + vlapic_update_shared_info(vcpu); + //vlapic_update_shared_irr(vcpu); +#endif + DPRINTK("VLSAPIC inservice base=%lp\n", &VLSAPIC_INSVC(vcpu,0) ); +} + +/* + * Find highest signaled bits in 4 words (long). + * + * return 0-255: highest bits. + * -1 : Not found. + */ +static __inline__ int highest_bits(uint64_t *dat) +{ + uint64_t bits, bitnum; + int i; + + /* loop for all 256 bits */ + for ( i=3; i >= 0 ; i -- ) { + bits = dat[i]; + if ( bits ) { + bitnum = ia64_fls(bits); + return i*64+bitnum; + } + } + return NULL_VECTOR; +} + +/* + * Return 0-255 for pending irq. + * NULL_VECTOR: when no pending. + */ +static int highest_pending_irq(VCPU *vcpu) +{ + if ( VPD_CR(vcpu, irr[0]) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR; + if ( VPD_CR(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR; + return highest_bits(&VPD_CR(vcpu, irr[0])); +} + +static int highest_inservice_irq(VCPU *vcpu) +{ + if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR; + if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR; + return highest_bits(&(VLSAPIC_INSVC(vcpu, 0))); +} + +/* + * The pending irq is higher than the inservice one. + * + */ +static int is_higher_irq(int pending, int inservice) +{ + return ( (pending >> 4) > (inservice>>4) || + ((pending != NULL_VECTOR) && (inservice == NULL_VECTOR)) ); +} + +static int is_higher_class(int pending, int mic) +{ + return ( (pending >> 4) > mic ); +} + +static int is_invalid_irq(int vec) +{ + return (vec == 1 || ((vec <= 14 && vec >= 3))); +} + +#define IRQ_NO_MASKED 0 +#define IRQ_MASKED_BY_VTPR 1 +#define IRQ_MASKED_BY_INSVC 2 // masked by inservice IRQ + +/* See Table 5-8 in SDM vol2 for the definition */ +static int +_xirq_masked(VCPU *vcpu, int h_pending, int h_inservice) +{ + tpr_t vtpr; + uint64_t mmi; + + vtpr.val = VPD_CR(vcpu, tpr); + + if ( h_inservice == NMI_VECTOR ) { + return IRQ_MASKED_BY_INSVC; + } + if ( h_pending == NMI_VECTOR ) { + // Non Maskable Interrupt + return IRQ_NO_MASKED; + } + if ( h_inservice == ExtINT_VECTOR ) { + return IRQ_MASKED_BY_INSVC; + } + mmi = vtpr.mmi; + if ( h_pending == ExtINT_VECTOR ) { + if ( mmi ) { + // mask all external IRQ + return IRQ_MASKED_BY_VTPR; + } + else { + return IRQ_NO_MASKED; + } + } + + if ( is_higher_irq(h_pending, h_inservice) ) { + if ( !mmi && is_higher_class(h_pending, vtpr.mic) ) { + return IRQ_NO_MASKED; + } + else { + return IRQ_MASKED_BY_VTPR; + } + } + else { + return IRQ_MASKED_BY_INSVC; + } +} + +static int irq_masked(VCPU *vcpu, int h_pending, int h_inservice) +{ + int mask; + + mask = _xirq_masked(vcpu, h_pending, h_inservice); + return mask; +} + + +/* + * May come from virtualization fault or + * nested host interrupt. + */ +void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector) +{ + uint64_t spsr; + + if (vector & ~0xff) { + DPRINTK("vmx_vcpu_pend_interrupt: bad vector\n"); + return; + } + local_irq_save(spsr); + VPD_CR(vcpu,irr[vector>>6]) |= 1UL<<(vector&63); + //vlapic_update_shared_irr(vcpu); + local_irq_restore(spsr); + vcpu->arch.irq_new_pending = 1; +} + +/* + * Add batch of pending interrupt. + * The interrupt source is contained in pend_irr[0-3] with + * each bits stand for one interrupt. + */ +void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu, UINT64 *pend_irr) +{ + uint64_t spsr; + int i; + + local_irq_save(spsr); + for (i=0 ; i<4; i++ ) { + VPD_CR(vcpu,irr[i]) |= pend_irr[i]; + } + //vlapic_update_shared_irr(vcpu); + local_irq_restore(spsr); + vcpu->arch.irq_new_pending = 1; +} + +/* + * If the new pending interrupt is enabled and not masked, we directly inject + * it into the guest. Otherwise, we set the VHPI if vac.a_int=1 so that when + * the interrupt becomes unmasked, it gets injected. + * RETURN: + * TRUE: Interrupt is injected. + * FALSE: Not injected but may be in VHPI when vac.a_int=1 + * + * Optimization: We defer setting the VHPI until the EOI time, if a higher + * priority interrupt is in-service. The idea is to reduce the + * number of unnecessary calls to inject_vhpi. + */ +int vmx_check_pending_irq(VCPU *vcpu) +{ + uint64_t spsr, mask; + int h_pending, h_inservice; + int injected=0; + uint64_t isr; + IA64_PSR vpsr; + + local_irq_save(spsr); + h_pending = highest_pending_irq(vcpu); + if ( h_pending == NULL_VECTOR ) goto chk_irq_exit; + h_inservice = highest_inservice_irq(vcpu); + + vpsr.val = vmx_vcpu_get_psr(vcpu); + mask = irq_masked(vcpu, h_pending, h_inservice); + if ( vpsr.i && IRQ_NO_MASKED == mask ) { + isr = vpsr.val & IA64_PSR_RI; + if ( !vpsr.ic ) + panic("Interrupt when IC=0\n"); + vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ + injected = 1; + } + else if ( mask == IRQ_MASKED_BY_INSVC ) { + // cann't inject VHPI +// DPRINTK("IRQ masked by higher inservice\n"); + } + else { + // masked by vpsr.i or vtpr. + update_vhpi(vcpu,h_pending); + } + +chk_irq_exit: + local_irq_restore(spsr); + return injected; +} + +/* + * Only coming from virtualization fault. + */ +void guest_write_eoi(VCPU *vcpu) +{ + int vec; + uint64_t spsr; + + vec = highest_inservice_irq(vcpu); + if ( vec == NULL_VECTOR ) panic("Wrong vector to EOI\n"); + local_irq_save(spsr); + VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63)); + local_irq_restore(spsr); + VPD_CR(vcpu, eoi)=0; // overwrite the data + vmx_check_pending_irq(vcpu); +} + +uint64_t guest_read_vivr(VCPU *vcpu) +{ + int vec, next, h_inservice; + uint64_t spsr; + + local_irq_save(spsr); + vec = highest_pending_irq(vcpu); + h_inservice = highest_inservice_irq(vcpu); + if ( vec == NULL_VECTOR || + irq_masked(vcpu, vec, h_inservice) != IRQ_NO_MASKED ) { + local_irq_restore(spsr); + return IA64_SPURIOUS_INT_VECTOR; + } + + VLSAPIC_INSVC(vcpu,vec>>6) |= (1UL <<(vec&63)); + VPD_CR(vcpu, irr[vec>>6]) &= ~(1UL <<(vec&63)); + update_vhpi(vcpu, NULL_VECTOR); // clear VHPI till EOI or IRR write + //vlapic_update_shared_irr(vcpu); + local_irq_restore(spsr); + return (uint64_t)vec; +} + +static void generate_exirq(VCPU *vcpu) +{ + IA64_PSR vpsr; + uint64_t isr; + + vpsr.val = vmx_vcpu_get_psr(vcpu); + update_vhpi(vcpu, NULL_VECTOR); + isr = vpsr.val & IA64_PSR_RI; + if ( !vpsr.ic ) + panic("Interrupt when IC=0\n"); + vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ +} + +vhpi_detection(VCPU *vcpu) +{ + uint64_t threshold,vhpi; + tpr_t vtpr; + IA64_PSR vpsr; + + vpsr.val = vmx_vcpu_get_psr(vcpu); + vtpr.val = VPD_CR(vcpu, tpr); + + threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic; + vhpi = VMX_VPD(vcpu,vhpi); + if ( vhpi > threshold ) { + // interrupt actived + generate_exirq (vcpu); + } +} + +vmx_vexirq(VCPU *vcpu) +{ + static uint64_t vexirq_count=0; + + vexirq_count ++; + printk("Virtual ex-irq %ld\n", vexirq_count); + generate_exirq (vcpu); +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmmu.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmmu.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,846 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmmu.c: virtual memory management unit components. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) + * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <asm/tlb.h> +#include <asm/gcc_intrin.h> +#include <asm/vcpu.h> +#include <linux/interrupt.h> +#include <asm/vmx_vcpu.h> +#include <asm/vmx_mm_def.h> +#include <asm/vmx.h> +#include <asm/hw_irq.h> +#include <asm/vmx_pal_vsa.h> +#include <asm/kregs.h> + +/* + * Architecture ppn is in 4KB unit while XEN + * page may be different(1<<PAGE_SHIFT). + */ +static inline u64 arch_ppn_to_xen_ppn(u64 appn) +{ + return (appn << ARCH_PAGE_SHIFT) >> PAGE_SHIFT; +} + +static inline u64 xen_ppn_to_arch_ppn(u64 xppn) +{ + return (xppn << PAGE_SHIFT) >> ARCH_PAGE_SHIFT; +} + + +/* + * Get the machine page frame number in 16KB unit + * Input: + * d: + */ +u64 get_mfn(domid_t domid, u64 gpfn, u64 pages) +{ + struct domain *d; + u64 i, xen_gppn, xen_mppn, mpfn; + + if ( domid == DOMID_SELF ) { + d = current->domain; + } + else { + d = find_domain_by_id(domid); + } + xen_gppn = arch_ppn_to_xen_ppn(gpfn); + xen_mppn = __gpfn_to_mfn(d, xen_gppn); +/* + for (i=0; i<pages; i++) { + if ( __gpfn_to_mfn(d, gpfn+i) == INVALID_MFN ) { + return INVALID_MFN; + } + } +*/ + mpfn= xen_ppn_to_arch_ppn(xen_mppn); + mpfn = mpfn | (((1UL <<(PAGE_SHIFT-12))-1)&gpfn); + return mpfn; + +} + +/* + * The VRN bits of va stand for which rr to get. + */ +ia64_rr vmmu_get_rr(VCPU *vcpu, u64 va) +{ + ia64_rr vrr; + vmx_vcpu_get_rr(vcpu, va, &vrr.rrval); + return vrr; +} + + +void recycle_message(thash_cb_t *hcb, u64 para) +{ + printk("hcb=%p recycled with %lx\n",hcb,para); +} + + +/* + * Purge all guest TCs in logical processor. + * Instead of purging all LP TCs, we should only purge + * TCs that belong to this guest. + */ +void +purge_machine_tc_by_domid(domid_t domid) +{ +#ifndef PURGE_GUEST_TC_ONLY + // purge all TCs + struct ia64_pal_retval result; + u64 addr; + u32 count1,count2; + u32 stride1,stride2; + u32 i,j; + u64 psr; + + + result = ia64_pal_call_static(PAL_PTCE_INFO,0,0,0, 0); + if ( result.status != 0 ) { + panic ("PAL_PTCE_INFO failed\n"); + } + addr = result.v0; + count1 = HIGH_32BITS(result.v1); + count2 = LOW_32BITS (result.v1); + stride1 = HIGH_32BITS(result.v2); + stride2 = LOW_32BITS (result.v2); + + local_irq_save(psr); + for (i=0; i<count1; i++) { + for (j=0; j<count2; j++) { + ia64_ptce(addr); + addr += stride2; + } + addr += stride1; + } + local_irq_restore(psr); +#else + // purge all TCs belong to this guest. +#endif +} + +static thash_cb_t *init_domain_vhpt(struct vcpu *d) +{ + struct pfn_info *page; + void *vbase,*vcur; + vhpt_special *vs; + thash_cb_t *vhpt; + PTA pta_value; + + page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0); + if ( page == NULL ) { + panic("No enough contiguous memory for init_domain_mm\n"); + } + vbase = page_to_virt(page); + printk("Allocate domain vhpt at 0x%lx\n", (u64)vbase); + memset(vbase, 0, VCPU_TLB_SIZE); + vcur = (void*)((u64)vbase + VCPU_TLB_SIZE); + vhpt = --((thash_cb_t*)vcur); + vhpt->ht = THASH_VHPT; + vhpt->vcpu = d; + vhpt->hash_func = machine_thash; + vs = --((vhpt_special *)vcur); + + /* Setup guest pta */ + pta_value.val = 0; + pta_value.ve = 1; + pta_value.vf = 1; + pta_value.size = VCPU_TLB_SHIFT - 1; /* 2M */ + pta_value.base = ((u64)vbase) >> PTA_BASE_SHIFT; + d->arch.arch_vmx.mpta = pta_value.val; + + vhpt->vs = vs; + vhpt->vs->get_mfn = get_mfn; + vhpt->vs->tag_func = machine_ttag; + vhpt->hash = vbase; + vhpt->hash_sz = VCPU_TLB_SIZE/2; + vhpt->cch_buf = (u64)vbase + vhpt->hash_sz; + vhpt->cch_sz = (u64)vcur - (u64)vhpt->cch_buf; + vhpt->recycle_notifier = recycle_message; + thash_init(vhpt,VCPU_TLB_SHIFT-1); + return vhpt; +} + + +thash_cb_t *init_domain_tlb(struct vcpu *d) +{ + struct pfn_info *page; + void *vbase,*vcur; + tlb_special_t *ts; + thash_cb_t *tlb; + + page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0); + if ( page == NULL ) { + panic("No enough contiguous memory for init_domain_mm\n"); + } + vbase = page_to_virt(page); + printk("Allocate domain tlb at 0x%lx\n", (u64)vbase); + memset(vbase, 0, VCPU_TLB_SIZE); + vcur = (void*)((u64)vbase + VCPU_TLB_SIZE); + tlb = --((thash_cb_t*)vcur); + tlb->ht = THASH_TLB; + tlb->vcpu = d; + ts = --((tlb_special_t *)vcur); + tlb->ts = ts; + tlb->ts->vhpt = init_domain_vhpt(d); + tlb->hash_func = machine_thash; + tlb->hash = vbase; + tlb->hash_sz = VCPU_TLB_SIZE/2; + tlb->cch_buf = (u64)vbase + tlb->hash_sz; + tlb->cch_sz = (u64)vcur - (u64)tlb->cch_buf; + tlb->recycle_notifier = recycle_message; + thash_init(tlb,VCPU_TLB_SHIFT-1); + return tlb; +} + +/* Allocate physical to machine mapping table for domN + * FIXME: Later this interface may be removed, if that table is provided + * by control panel. Dom0 has gpfn identical to mfn, which doesn't need + * this interface at all. + */ +void +alloc_pmt(struct domain *d) +{ + struct pfn_info *page; + + /* Only called once */ + ASSERT(d->arch.pmt); + + page = alloc_domheap_pages(NULL, get_order(d->max_pages), 0); + ASSERT(page); + + d->arch.pmt = page_to_virt(page); + memset(d->arch.pmt, 0x55, d->max_pages * 8); +} + +/* + * Insert guest TLB to machine TLB. + * data: In TLB format + */ +void machine_tlb_insert(struct vcpu *d, thash_data_t *tlb) +{ + u64 saved_itir, saved_ifa, saved_rr; + u64 pages; + thash_data_t mtlb; + ia64_rr vrr; + unsigned int cl = tlb->cl; + + mtlb.ifa = tlb->vadr; + mtlb.itir = tlb->itir & ~ITIR_RV_MASK; + vrr = vmmu_get_rr(d,mtlb.ifa); + //vmx_vcpu_get_rr(d, mtlb.ifa, &vrr.value); + pages = PSIZE(vrr.ps) >> PAGE_SHIFT; + mtlb.page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK; + mtlb.ppn = get_mfn(DOMID_SELF,tlb->ppn, pages); + if (mtlb.ppn == INVALID_MFN) + panic("Machine tlb insert with invalid mfn number.\n"); + + __asm __volatile("rsm psr.ic|psr.i;; srlz.i" ); + + saved_itir = ia64_getreg(_IA64_REG_CR_ITIR); + saved_ifa = ia64_getreg(_IA64_REG_CR_IFA); + saved_rr = ia64_get_rr(mtlb.ifa); + + ia64_setreg(_IA64_REG_CR_ITIR, mtlb.itir); + ia64_setreg(_IA64_REG_CR_IFA, mtlb.ifa); + /* Only access memory stack which is mapped by TR, + * after rr is switched. + */ + ia64_set_rr(mtlb.ifa, vmx_vrrtomrr(d, vrr.rrval)); + ia64_srlz_d(); + if ( cl == ISIDE_TLB ) { + ia64_itci(mtlb.page_flags); + ia64_srlz_i(); + } + else { + ia64_itcd(mtlb.page_flags); + ia64_srlz_d(); + } + ia64_set_rr(mtlb.ifa,saved_rr); + ia64_srlz_d(); + ia64_setreg(_IA64_REG_CR_IFA, saved_ifa); + ia64_setreg(_IA64_REG_CR_ITIR, saved_itir); + __asm __volatile("ssm psr.ic|psr.i;; srlz.i" ); +} + +u64 machine_thash(PTA pta, u64 va, u64 rid, u64 ps) +{ + u64 saved_pta, saved_rr0; + u64 hash_addr, tag; + unsigned long psr; + struct vcpu *v = current; + ia64_rr vrr; + + + saved_pta = ia64_getreg(_IA64_REG_CR_PTA); + saved_rr0 = ia64_get_rr(0); + vrr.rrval = saved_rr0; + vrr.rid = rid; + vrr.ps = ps; + + va = (va << 3) >> 3; // set VRN to 0. + // TODO: Set to enforce lazy mode + local_irq_save(psr); + ia64_setreg(_IA64_REG_CR_PTA, pta.val); + ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval)); + ia64_srlz_d(); + + hash_addr = ia64_thash(va); + ia64_setreg(_IA64_REG_CR_PTA, saved_pta); + + ia64_set_rr(0, saved_rr0); + ia64_srlz_d(); + local_irq_restore(psr); + return hash_addr; +} + +u64 machine_ttag(PTA pta, u64 va, u64 rid, u64 ps) +{ + u64 saved_pta, saved_rr0; + u64 hash_addr, tag; + u64 psr; + struct vcpu *v = current; + ia64_rr vrr; + + // TODO: Set to enforce lazy mode + saved_pta = ia64_getreg(_IA64_REG_CR_PTA); + saved_rr0 = ia64_get_rr(0); + vrr.rrval = saved_rr0; + vrr.rid = rid; + vrr.ps = ps; + + va = (va << 3) >> 3; // set VRN to 0. + local_irq_save(psr); + ia64_setreg(_IA64_REG_CR_PTA, pta.val); + ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval)); + ia64_srlz_d(); + + tag = ia64_ttag(va); + ia64_setreg(_IA64_REG_CR_PTA, saved_pta); + + ia64_set_rr(0, saved_rr0); + ia64_srlz_d(); + local_irq_restore(psr); + return tag; +} + +/* + * Purge machine tlb. + * INPUT + * rr: guest rr. + * va: only bits 0:60 is valid + * size: bits format (1<<size) for the address range to purge. + * + */ +void machine_tlb_purge(u64 rid, u64 va, u64 ps) +{ + u64 saved_rr0; + u64 psr; + ia64_rr vrr; + + va = (va << 3) >> 3; // set VRN to 0. + saved_rr0 = ia64_get_rr(0); + vrr.rrval = saved_rr0; + vrr.rid = rid; + vrr.ps = ps; + local_irq_save(psr); + ia64_set_rr( 0, vmx_vrrtomrr(current,vrr.rrval) ); + ia64_srlz_d(); + ia64_ptcl(va, ps << 2); + ia64_set_rr( 0, saved_rr0 ); + ia64_srlz_d(); + local_irq_restore(psr); +} + + +int vhpt_enabled(VCPU *vcpu, uint64_t vadr, vhpt_ref_t ref) +{ + ia64_rr vrr; + PTA vpta; + IA64_PSR vpsr; + + vpsr.val = vmx_vcpu_get_psr(vcpu); + vrr = vmx_vcpu_rr(vcpu, vadr); + vmx_vcpu_get_pta(vcpu,&vpta.val); + + if ( vrr.ve & vpta.ve ) { + switch ( ref ) { + case DATA_REF: + case NA_REF: + return vpsr.dt; + case INST_REF: + return vpsr.dt && vpsr.it && vpsr.ic; + case RSE_REF: + return vpsr.dt && vpsr.rt; + + } + } + return 0; +} + + +int unimplemented_gva(VCPU *vcpu,u64 vadr) +{ + int bit=vcpu->domain->arch.imp_va_msb; + u64 ladr =(vadr<<3)>>(3+bit); + if(!ladr||ladr==(1U<<(61-bit))-1){ + return 0; + }else{ + return 1; + } +} + + +/* + * Prefetch guest bundle code. + * INPUT: + * code: buffer pointer to hold the read data. + * num: number of dword (8byts) to read. + */ +int +fetch_code(VCPU *vcpu, u64 gip, u64 *code) +{ + u64 gpip; // guest physical IP + u64 mpa; + thash_data_t *tlb; + ia64_rr vrr; + u64 mfn; + + if ( !(VMX_VPD(vcpu, vpsr) & IA64_PSR_IT) ) { // I-side physical mode + gpip = gip; + } + else { + vmx_vcpu_get_rr(vcpu, gip, &vrr.rrval); + tlb = vtlb_lookup_ex (vmx_vcpu_get_vtlb(vcpu), + vrr.rid, gip, ISIDE_TLB ); + if ( tlb == NULL ) panic("No entry found in ITLB\n"); + gpip = (tlb->ppn << 12) | ( gip & (PSIZE(tlb->ps)-1) ); + } + mfn = __gpfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT); + if ( mfn == INVALID_MFN ) return 0; + + mpa = (gpip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT); + *code = *(u64*)__va(mpa); + return 1; +} + +IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) +{ + + thash_data_t data, *ovl; + thash_cb_t *hcb; + search_section_t sections; + ia64_rr vrr; + + hcb = vmx_vcpu_get_vtlb(vcpu); + data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; + data.itir=itir; + data.vadr=PAGEALIGN(ifa,data.ps); + data.tc = 1; + data.cl=ISIDE_TLB; + vmx_vcpu_get_rr(vcpu, ifa, &vrr); + data.rid = vrr.rid; + + sections.tr = 1; + sections.tc = 0; + + ovl = thash_find_overlap(hcb, &data, sections); + while (ovl) { + // generate MCA. + panic("Tlb conflict!!"); + return; + } + thash_purge_and_insert(hcb, &data); + return IA64_NO_FAULT; +} + + + + +IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) +{ + + thash_data_t data, *ovl; + thash_cb_t *hcb; + search_section_t sections; + ia64_rr vrr; + + hcb = vmx_vcpu_get_vtlb(vcpu); + data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; + data.itir=itir; + data.vadr=PAGEALIGN(ifa,data.ps); + data.tc = 1; + data.cl=DSIDE_TLB; + vmx_vcpu_get_rr(vcpu, ifa, &vrr); + data.rid = vrr.rid; + sections.tr = 1; + sections.tc = 0; + + ovl = thash_find_overlap(hcb, &data, sections); + if (ovl) { + // generate MCA. + panic("Tlb conflict!!"); + return; + } + thash_purge_and_insert(hcb, &data); + return IA64_NO_FAULT; +} + +/* + * Return TRUE/FALSE for success of lock operation + */ +int vmx_lock_guest_dtc (VCPU *vcpu, UINT64 va, int lock) +{ + + thash_cb_t *hcb; + ia64_rr vrr; + u64 preferred_size; + + vmx_vcpu_get_rr(vcpu, va, &vrr); + hcb = vmx_vcpu_get_vtlb(vcpu); + va = PAGEALIGN(va,vrr.ps); + preferred_size = PSIZE(vrr.ps); + return thash_lock_tc(hcb, va, preferred_size, vrr.rid, DSIDE_TLB, lock); +} + +IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx) +{ + + thash_data_t data, *ovl; + thash_cb_t *hcb; + search_section_t sections; + ia64_rr vrr; + + hcb = vmx_vcpu_get_vtlb(vcpu); + data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; + data.itir=itir; + data.vadr=PAGEALIGN(ifa,data.ps); + data.tc = 0; + data.cl=ISIDE_TLB; + vmx_vcpu_get_rr(vcpu, ifa, &vrr); + data.rid = vrr.rid; + sections.tr = 1; + sections.tc = 0; + + ovl = thash_find_overlap(hcb, &data, sections); + if (ovl) { + // generate MCA. + panic("Tlb conflict!!"); + return; + } + sections.tr = 0; + sections.tc = 1; + thash_purge_entries(hcb, &data, sections); + thash_tr_insert(hcb, &data, ifa, idx); + return IA64_NO_FAULT; +} + +IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx) +{ + + thash_data_t data, *ovl; + thash_cb_t *hcb; + search_section_t sections; + ia64_rr vrr; + + + hcb = vmx_vcpu_get_vtlb(vcpu); + data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; + data.itir=itir; + data.vadr=PAGEALIGN(ifa,data.ps); + data.tc = 0; + data.cl=DSIDE_TLB; + vmx_vcpu_get_rr(vcpu, ifa, &vrr); + data.rid = vrr.rid; + sections.tr = 1; + sections.tc = 0; + + ovl = thash_find_overlap(hcb, &data, sections); + while (ovl) { + // generate MCA. + panic("Tlb conflict!!"); + return; + } + sections.tr = 0; + sections.tc = 1; + thash_purge_entries(hcb, &data, sections); + thash_tr_insert(hcb, &data, ifa, idx); + return IA64_NO_FAULT; +} + + + +IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps) +{ + thash_cb_t *hcb; + ia64_rr rr; + search_section_t sections; + + hcb = vmx_vcpu_get_vtlb(vcpu); + rr=vmx_vcpu_rr(vcpu,vadr); + sections.tr = 1; + sections.tc = 1; + thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,DSIDE_TLB); + return IA64_NO_FAULT; +} + +IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps) +{ + thash_cb_t *hcb; + ia64_rr rr; + search_section_t sections; + hcb = vmx_vcpu_get_vtlb(vcpu); + rr=vmx_vcpu_rr(vcpu,vadr); + sections.tr = 1; + sections.tc = 1; + thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,ISIDE_TLB); + return IA64_NO_FAULT; +} + +IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps) +{ + thash_cb_t *hcb; + ia64_rr vrr; + search_section_t sections; + thash_data_t data, *ovl; + hcb = vmx_vcpu_get_vtlb(vcpu); + vrr=vmx_vcpu_rr(vcpu,vadr); + sections.tr = 0; + sections.tc = 1; + vadr = PAGEALIGN(vadr, ps); + + thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,DSIDE_TLB); + thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,ISIDE_TLB); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 vadr) +{ + thash_cb_t *hcb; + hcb = vmx_vcpu_get_vtlb(vcpu); + thash_purge_all(hcb); + return IA64_NO_FAULT; +} + +IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 ps) +{ + vmx_vcpu_ptc_l(vcpu, vadr, ps); + return IA64_ILLOP_FAULT; +} + +IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 ps) +{ + vmx_vcpu_ptc_l(vcpu, vadr, ps); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval) +{ + PTA vpta; + ia64_rr vrr; + u64 vhpt_offset,tmp; + vmx_vcpu_get_pta(vcpu, &vpta.val); + vrr=vmx_vcpu_rr(vcpu, vadr); + if(vpta.vf){ + panic("THASH,Don't support long format VHPT"); + *pval = ia64_call_vsa(PAL_VPS_THASH,vadr,vrr.rrval,vpta.val,0,0,0,0); + }else{ + vhpt_offset=((vadr>>vrr.ps)<<3)&((1UL<<(vpta.size))-1); + *pval = (vadr&VRN_MASK)| + (vpta.val<<3>>(vpta.size+3)<<(vpta.size))| + vhpt_offset; + } + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *pval) +{ + ia64_rr vrr; + PTA vpta; + vmx_vcpu_get_pta(vcpu, &vpta.val); + vrr=vmx_vcpu_rr(vcpu, vadr); + if(vpta.vf){ + panic("THASH,Don't support long format VHPT"); + *pval = ia64_call_vsa(PAL_VPS_TTAG,vadr,vrr.rrval,0,0,0,0,0); + }else{ + *pval = 1; + } + return IA64_NO_FAULT; +} + + + +IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr) +{ + thash_data_t *data; + thash_cb_t *hcb; + ia64_rr vrr; + ISR visr,pt_isr; + REGS *regs; + u64 vhpt_adr; + IA64_PSR vpsr; + hcb = vmx_vcpu_get_vtlb(vcpu); + vrr=vmx_vcpu_rr(vcpu,vadr); + regs=vcpu_regs(vcpu); + pt_isr.val=regs->cr_isr; + visr.val=0; + visr.ei=pt_isr.ei; + visr.ir=pt_isr.ir; + vpsr.val = vmx_vcpu_get_psr(vcpu); + if(vpsr.ic==0){ + visr.ni=1; + } + visr.na=1; + data = vtlb_lookup_ex(hcb, vrr.rid, vadr, DSIDE_TLB); + if(data){ + if(data->p==0){ + visr.na=1; + vmx_vcpu_set_isr(vcpu,visr.val); + page_not_present(vcpu, vadr); + return IA64_FAULT; + }else if(data->ma == VA_MATTR_NATPAGE){ + visr.na = 1; + vmx_vcpu_set_isr(vcpu, visr.val); + dnat_page_consumption(vcpu, vadr); + return IA64_FAULT; + }else{ + *padr = (data->ppn<<12) | (vadr&(PSIZE(data->ps)-1)); + return IA64_NO_FAULT; + } + }else{ + if(!vhpt_enabled(vcpu, vadr, NA_REF)){ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, visr.val); + alt_dtlb(vcpu, vadr); + return IA64_FAULT; + } + else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + else{ + vmx_vcpu_thash(vcpu, vadr, &vhpt_adr); + vrr=vmx_vcpu_rr(vcpu,vhpt_adr); + data = vtlb_lookup_ex(hcb, vrr.rid, vhpt_adr, DSIDE_TLB); + if(data){ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, visr.val); + dtlb_fault(vcpu, vadr); + return IA64_FAULT; + } + else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + else{ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, visr.val); + dvhpt_fault(vcpu, vadr); + return IA64_FAULT; + } + else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + } + } +} + +IA64FAULT vmx_vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key) +{ + thash_data_t *data; + thash_cb_t *hcb; + ia64_rr rr; + PTA vpta; + vmx_vcpu_get_pta(vcpu, &vpta.val); + if(vpta.vf==0 || unimplemented_gva(vcpu, vadr)){ + *key=1; + return IA64_NO_FAULT; + } + hcb = vmx_vcpu_get_vtlb(vcpu); + rr=vmx_vcpu_rr(vcpu,vadr); + data = vtlb_lookup_ex(hcb, rr.rid, vadr, DSIDE_TLB); + if(!data||!data->p){ + *key=1; + }else{ + *key=data->key; + } + return IA64_NO_FAULT; +} + +/* + * [FIXME] Is there any effective way to move this routine + * into vmx_uaccess.h? struct exec_domain is incomplete type + * in that way... + * + * This is the interface to lookup virtual TLB, and then + * return corresponding machine address in 2nd parameter. + * The 3rd parameter contains how many bytes mapped by + * matched vTLB entry, thus to allow caller copy more once. + * + * If failed to lookup, -EFAULT is returned. Or else reutrn + * 0. All upper domain access utilities rely on this routine + * to determine the real machine address. + * + * Yes, put_user and get_user seems to somhow slow upon it. + * However it's the necessary steps for any vmx domain virtual + * address, since that's difference address space as HV's one. + * Later some short-circuit may be created for special case + */ +long +__domain_va_to_ma(unsigned long va, unsigned long* ma, unsigned long *len) +{ + unsigned long mpfn, gpfn, m, n = *len; + thash_cb_t *vtlb; + unsigned long end; /* end of the area mapped by current entry */ + thash_data_t *entry; + struct vcpu *v = current; + ia64_rr vrr; + + vtlb = vmx_vcpu_get_vtlb(v); + vrr = vmx_vcpu_rr(v, va); + entry = vtlb_lookup_ex(vtlb, vrr.rid, va, DSIDE_TLB); + if (entry == NULL) + return -EFAULT; + + gpfn =(entry->ppn>>(PAGE_SHIFT-12)); + gpfn =PAGEALIGN(gpfn,(entry->ps-PAGE_SHIFT)); + gpfn = gpfn | POFFSET(va>>PAGE_SHIFT,(entry->ps-PAGE_SHIFT)); + + mpfn = __gpfn_to_mfn(v->domain, gpfn); + m = (mpfn<<PAGE_SHIFT) | (va & (PAGE_SIZE - 1)); + /* machine address may be not continuous */ + end = PAGEALIGN(m, PAGE_SHIFT) + PAGE_SIZE; + /*end = PAGEALIGN(m, entry->ps) + PSIZE(entry->ps);*/ + /* Current entry can't map all requested area */ + if ((m + n) > end) + n = end - m; + + *ma = m; + *len = n; + return 0; +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_entry.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_entry.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,611 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_entry.S: + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx) + * Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx) + */ + +#ifndef VCPU_TLB_SHIFT +#define VCPU_TLB_SHIFT 22 +#endif +#include <linux/config.h> +#include <asm/asmmacro.h> +#include <asm/cache.h> +#include <asm/kregs.h> +#include <asm/offsets.h> +#include <asm/pgtable.h> +#include <asm/percpu.h> +#include <asm/processor.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> + +#include "vmx_minstate.h" + +/* + * prev_task <- vmx_ia64_switch_to(struct task_struct *next) + * With Ingo's new scheduler, interrupts are disabled when this routine gets + * called. The code starting at .map relies on this. The rest of the code + * doesn't care about the interrupt masking status. + * + * Since we allocate domain stack in xenheap, there's no need to map new + * domain's stack since all xenheap is mapped by TR. Another different task + * for vmx_ia64_switch_to is to switch to bank0 and change current pointer. + */ +GLOBAL_ENTRY(vmx_ia64_switch_to) + .prologue + alloc r16=ar.pfs,1,0,0,0 + DO_SAVE_SWITCH_STACK + .body + + bsw.0 // Switch to bank0, because bank0 r21 is current pointer + ;; + adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 + movl r25=init_task + adds r26=IA64_TASK_THREAD_KSP_OFFSET,in0 + ;; + st8 [r22]=sp // save kernel stack pointer of old task + ;; + /* + * TR always mapped this task's page, we can skip doing it again. + */ + ld8 sp=[r26] // load kernel stack pointer of new task + mov r21=in0 // update "current" application register + mov r8=r13 // return pointer to previously running task + mov r13=in0 // set "current" pointer + ;; + bsw.1 + ;; + DO_LOAD_SWITCH_STACK + +#ifdef CONFIG_SMP + sync.i // ensure "fc"s done by this CPU are visible on other CPUs +#endif + br.ret.sptk.many rp // boogie on out in new context +END(vmx_ia64_switch_to) + +GLOBAL_ENTRY(ia64_leave_nested) + rsm psr.i + ;; + adds r21=PT(PR)+16,r12 + ;; + + lfetch [r21],PT(CR_IPSR)-PT(PR) + adds r2=PT(B6)+16,r12 + adds r3=PT(R16)+16,r12 + ;; + lfetch [r21] + ld8 r28=[r2],8 // load b6 + adds r29=PT(R24)+16,r12 + + ld8.fill r16=[r3] + adds r3=PT(AR_CSD)-PT(R16),r3 + adds r30=PT(AR_CCV)+16,r12 + ;; + ld8.fill r24=[r29] + ld8 r15=[r30] // load ar.ccv + ;; + ld8 r29=[r2],16 // load b7 + ld8 r30=[r3],16 // load ar.csd + ;; + ld8 r31=[r2],16 // load ar.ssd + ld8.fill r8=[r3],16 + ;; + ld8.fill r9=[r2],16 + ld8.fill r10=[r3],PT(R17)-PT(R10) + ;; + ld8.fill r11=[r2],PT(R18)-PT(R11) + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + ld8.fill r22=[r2],24 + ld8.fill r23=[r3],24 + mov b6=r28 + ;; + ld8.fill r25=[r2],16 + ld8.fill r26=[r3],16 + mov b7=r29 + ;; + ld8.fill r27=[r2],16 + ld8.fill r28=[r3],16 + ;; + ld8.fill r29=[r2],16 + ld8.fill r30=[r3],24 + ;; + ld8.fill r31=[r2],PT(F9)-PT(R31) + adds r3=PT(F10)-PT(F6),r3 + ;; + ldf.fill f9=[r2],PT(F6)-PT(F9) + ldf.fill f10=[r3],PT(F8)-PT(F10) + ;; + ldf.fill f6=[r2],PT(F7)-PT(F6) + ;; + ldf.fill f7=[r2],PT(F11)-PT(F7) + ldf.fill f8=[r3],32 + ;; + srlz.i // ensure interruption collection is off + mov ar.ccv=r15 + ;; + bsw.0 // switch back to bank 0 (no stop bit required beforehand...) + ;; + ldf.fill f11=[r2] +// mov r18=r13 +// mov r21=r13 + adds r16=PT(CR_IPSR)+16,r12 + adds r17=PT(CR_IIP)+16,r12 + ;; + ld8 r29=[r16],16 // load cr.ipsr + ld8 r28=[r17],16 // load cr.iip + ;; + ld8 r30=[r16],16 // load cr.ifs + ld8 r25=[r17],16 // load ar.unat + ;; + ld8 r26=[r16],16 // load ar.pfs + ld8 r27=[r17],16 // load ar.rsc + cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs + ;; + ld8 r24=[r16],16 // load ar.rnat (may be garbage) + ld8 r23=[r17],16// load ar.bspstore (may be garbage) + ;; + ld8 r31=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r19=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 + ;; + ld8 r20=[r16],16 // ar.fpsr + ld8.fill r15=[r17],16 + ;; + ld8.fill r14=[r16],16 + ld8.fill r2=[r17] + ;; + ld8.fill r3=[r16] + ;; + mov r16=ar.bsp // get existing backing store pointer + ;; + mov b0=r22 + mov ar.pfs=r26 + mov cr.ifs=r30 + mov cr.ipsr=r29 + mov ar.fpsr=r20 + mov cr.iip=r28 + ;; + mov ar.rsc=r27 + mov ar.unat=r25 + mov pr=r31,-1 + rfi +END(ia64_leave_nested) + + + +GLOBAL_ENTRY(ia64_leave_hypervisor) + PT_REGS_UNWIND_INFO(0) + /* + * work.need_resched etc. mustn't get changed by this CPU before it returns to + ;; + * user- or fsys-mode, hence we disable interrupts early on: + */ + rsm psr.i + ;; + alloc loc0=ar.pfs,0,1,1,0 + adds out0=16,r12 + ;; + br.call.sptk.many b0=leave_hypervisor_tail + mov ar.pfs=loc0 + adds r8=IA64_VPD_BASE_OFFSET,r13 + ;; + ld8 r8=[r8] + ;; + adds r9=VPD(VPSR),r8 + ;; + ld8 r9=[r9] + ;; + tbit.z pBN0,pBN1=r9,IA64_PSR_BN_BIT + ;; +(pBN0) add r7=VPD(VBNAT),r8; +(pBN1) add r7=VPD(VNAT),r8; + ;; + ld8 r7=[r7] + ;; + mov ar.unat=r7 +(pBN0) add r4=VPD(VBGR),r8; +(pBN1) add r4=VPD(VGR),r8; +(pBN0) add r5=VPD(VBGR)+0x8,r8; +(pBN1) add r5=VPD(VGR)+0x8,r8; + ;; + ld8.fill r16=[r4],16 + ld8.fill r17=[r5],16 + ;; + ld8.fill r18=[r4],16 + ld8.fill r19=[r5],16 + ;; + ld8.fill r20=[r4],16 + ld8.fill r21=[r5],16 + ;; + ld8.fill r22=[r4],16 + ld8.fill r23=[r5],16 + ;; + ld8.fill r24=[r4],16 + ld8.fill r25=[r5],16 + ;; + ld8.fill r26=[r4],16 + ld8.fill r27=[r5],16 + ;; + ld8.fill r28=[r4],16 + ld8.fill r29=[r5],16 + ;; + ld8.fill r30=[r4],16 + ld8.fill r31=[r5],16 + ;; + bsw.0 + ;; + mov r18=r8 //vpd + mov r19=r9 //vpsr + adds r20=PT(PR)+16,r12 + ;; + lfetch [r20],PT(CR_IPSR)-PT(PR) + adds r16=PT(B6)+16,r12 + adds r17=PT(B7)+16,r12 + ;; + lfetch [r20] + mov r21=r13 // get current + ;; + ld8 r30=[r16],16 // load b6 + ld8 r31=[r17],16 // load b7 + add r20=PT(EML_UNAT)+16,r12 + ;; + ld8 r29=[r20] //load ar_unat + mov b6=r30 + mov b7=r31 + ld8 r30=[r16],16 //load ar_csd + ld8 r31=[r17],16 //load ar_ssd + ;; + mov ar.unat=r29 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + ld8.fill r8=[r16],16 //load r8 + ld8.fill r9=[r17],16 //load r9 + ;; + ld8.fill r10=[r16],PT(R1)-PT(R10) //load r10 + ld8.fill r11=[r17],PT(R12)-PT(R11) //load r11 + ;; + ld8.fill r1=[r16],16 //load r1 + ld8.fill r12=[r17],16 //load r12 + ;; + ld8.fill r13=[r16],16 //load r13 + ld8 r30=[r17],16 //load ar_fpsr + ;; + ld8.fill r15=[r16],16 //load r15 + ld8.fill r14=[r17],16 //load r14 + mov ar.fpsr=r30 + ;; + ld8.fill r2=[r16],16 //load r2 + ld8.fill r3=[r17],16 //load r3 + ;; +/* +(pEml) ld8.fill r4=[r16],16 //load r4 +(pEml) ld8.fill r5=[r17],16 //load r5 + ;; +(pEml) ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6 +(pEml) ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7 + ;; +(pNonEml) adds r16=PT(AR_CCV)-PT(R4),r16 +(pNonEml) adds r17=PT(F7)-PT(R5),r17 + ;; +*/ + ld8.fill r4=[r16],16 //load r4 + ld8.fill r5=[r17],16 //load r5 + ;; + ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6 + ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7 + ;; + + ld8 r30=[r16],PT(F6)-PT(AR_CCV) + rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection + ;; + srlz.i // ensure interruption collection is off + ;; + invala // invalidate ALAT + ;; + ldf.fill f6=[r16],32 + ldf.fill f7=[r17],32 + ;; + ldf.fill f8=[r16],32 + ldf.fill f9=[r17],32 + ;; + ldf.fill f10=[r16] + ldf.fill f11=[r17] + ;; + mov ar.ccv=r30 + adds r16=PT(CR_IPSR)-PT(F10),r16 + adds r17=PT(CR_IIP)-PT(F11),r17 + ;; + ld8 r31=[r16],16 // load cr.ipsr + ld8 r30=[r17],16 // load cr.iip + ;; + ld8 r29=[r16],16 // load cr.ifs + ld8 r28=[r17],16 // load ar.unat + ;; + ld8 r27=[r16],16 // load ar.pfs + ld8 r26=[r17],16 // load ar.rsc + ;; + ld8 r25=[r16],16 // load ar.rnat (may be garbage) + ld8 r24=[r17],16// load ar.bspstore (may be garbage) + ;; + ld8 r23=[r16],16 // load predicates + ld8 r22=[r17],PT(RFI_PFS)-PT(B0) // load b0 + ;; + ld8 r20=[r16],16 // load ar.rsc value for "loadrs" + ;; +//rbs_switch + // loadrs has already been shifted + alloc r16=ar.pfs,0,0,0,0 // drop current register frame + ;; + mov ar.rsc=r20 + ;; + loadrs + ;; + mov ar.bspstore=r24 + ;; + ld8 r24=[r17] //load rfi_pfs + mov ar.unat=r28 + mov ar.rnat=r25 + mov ar.rsc=r26 + ;; + mov cr.ipsr=r31 + mov cr.iip=r30 + mov cr.ifs=r29 + cmp.ne p6,p0=r24,r0 +(p6)br.sptk vmx_dorfirfi + ;; +vmx_dorfirfi_back: + mov ar.pfs=r27 + +//vsa_sync_write_start + movl r20=__vsa_base + ;; + ld8 r20=[r20] // read entry point + mov r25=r18 + ;; + add r16=PAL_VPS_SYNC_WRITE,r20 + movl r24=switch_rr7 // calculate return address + ;; + mov b0=r16 + br.cond.sptk b0 // call the service + ;; +// switch rr7 and rr5 +switch_rr7: + adds r24=SWITCH_MRR5_OFFSET, r21 + adds r26=SWITCH_MRR6_OFFSET, r21 + adds r16=SWITCH_MRR7_OFFSET ,r21 + movl r25=(5<<61) + movl r27=(6<<61) + movl r17=(7<<61) + ;; + ld8 r24=[r24] + ld8 r26=[r26] + ld8 r16=[r16] + ;; + mov rr[r25]=r24 + mov rr[r27]=r26 + mov rr[r17]=r16 + ;; + srlz.i + ;; + add r24=SWITCH_MPTA_OFFSET, r21 + ;; + ld8 r24=[r24] + ;; + mov cr.pta=r24 + ;; + srlz.i + ;; +// fall through +GLOBAL_ENTRY(ia64_vmm_entry) +/* + * must be at bank 0 + * parameter: + * r18:vpd + * r19:vpsr + * r20:__vsa_base + * r22:b0 + * r23:predicate + */ + mov r24=r22 + mov r25=r18 + tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 + ;; + mov pr=r23,-2 + mov b0=r29 + ;; + br.cond.sptk b0 // call pal service +END(ia64_leave_hypervisor) + +//r24 rfi_pfs +//r17 address of rfi_pfs +GLOBAL_ENTRY(vmx_dorfirfi) + mov r16=ar.ec + movl r20 = vmx_dorfirfi_back + ;; +// clean rfi_pfs + st8 [r17]=r0 + mov b0=r20 +// pfs.pec=ar.ec + dep r24 = r16, r24, 52, 6 + ;; + mov ar.pfs=r24 + ;; + br.ret.sptk b0 + ;; +END(vmx_dorfirfi) + + +#define VMX_PURGE_RR7 0 +#define VMX_INSERT_RR7 1 +/* + * in0: old rr7 + * in1: virtual address of xen image + * in2: virtual address of vhpt table + */ +GLOBAL_ENTRY(vmx_purge_double_mapping) + alloc loc1 = ar.pfs,5,9,0,0 + mov loc0 = rp + movl r8 = 1f + ;; + movl loc4 = KERNEL_TR_PAGE_SHIFT + movl loc5 = VCPU_TLB_SHIFT + mov loc6 = psr + movl loc7 = XEN_RR7_SWITCH_STUB + mov loc8 = (1<<VMX_PURGE_RR7) + ;; + srlz.i + ;; + rsm psr.i | psr.ic + ;; + srlz.i + ;; + mov ar.rsc = 0 + mov b6 = loc7 + mov rp = r8 + ;; + br.sptk b6 +1: + mov ar.rsc = 3 + mov rp = loc0 + ;; + mov psr.l = loc6 + ;; + srlz.i + ;; + br.ret.sptk rp +END(vmx_purge_double_mapping) + +/* + * in0: new rr7 + * in1: virtual address of xen image + * in2: virtual address of vhpt table + * in3: pte entry of xen image + * in4: pte entry of vhpt table + */ +GLOBAL_ENTRY(vmx_insert_double_mapping) + alloc loc1 = ar.pfs,5,9,0,0 + mov loc0 = rp + movl loc2 = IA64_TR_XEN_IN_DOM // TR number for xen image + ;; + movl loc3 = IA64_TR_VHPT_IN_DOM // TR number for vhpt table + movl r8 = 1f + movl loc4 = KERNEL_TR_PAGE_SHIFT + ;; + movl loc5 = VCPU_TLB_SHIFT + mov loc6 = psr + movl loc7 = XEN_RR7_SWITCH_STUB + ;; + srlz.i + ;; + rsm psr.i | psr.ic + mov loc8 = (1<<VMX_INSERT_RR7) + ;; + srlz.i + ;; + mov ar.rsc = 0 + mov b6 = loc7 + mov rp = r8 + ;; + br.sptk b6 +1: + mov ar.rsc = 3 + mov rp = loc0 + ;; + mov psr.l = loc6 + ;; + srlz.i + ;; + br.ret.sptk rp +END(vmx_insert_double_mapping) + + .align PAGE_SIZE +/* + * Stub to add double mapping for new domain, which shouldn't + * access any memory when active. Before reaching this point, + * both psr.i/ic is cleared and rse is set in lazy mode. + * + * in0: new rr7 + * in1: virtual address of xen image + * in2: virtual address of vhpt table + * in3: pte entry of xen image + * in4: pte entry of vhpt table + * loc2: TR number for xen image + * loc3: TR number for vhpt table + * loc4: page size for xen image + * loc5: page size of vhpt table + * loc7: free to use + * loc8: purge or insert + * r8: will contain old rid value + */ +GLOBAL_ENTRY(vmx_switch_rr7) + movl loc7 = (7<<61) + dep.z loc4 = loc4, 2, 6 + dep.z loc5 = loc5, 2, 6 + ;; + tbit.nz p6,p7=loc8, VMX_INSERT_RR7 + mov r8 = rr[loc7] + ;; + mov rr[loc7] = in0 +(p6)mov cr.ifa = in1 +(p6)mov cr.itir = loc4 + ;; + srlz.i + ;; +(p6)itr.i itr[loc2] = in3 +(p7)ptr.i in1, loc4 + ;; +(p6)itr.d dtr[loc2] = in3 +(p7)ptr.d in1, loc4 + ;; + srlz.i + ;; +(p6)mov cr.ifa = in2 +(p6)mov cr.itir = loc5 + ;; +(p6)itr.d dtr[loc3] = in4 +(p7)ptr.d in2, loc5 + ;; + srlz.i + ;; + mov rr[loc7] = r8 + ;; + srlz.i + br.sptk rp +END(vmx_switch_rr7) + .align PAGE_SIZE diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_hypercall.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,235 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_hyparcall.c: handling hypercall from domain + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) + */ + +#include <xen/config.h> +#include <xen/errno.h> +#include <asm/vmx_vcpu.h> +#include <public/xen.h> +#include <public/event_channel.h> +#include <asm/vmmu.h> +#include <asm/tlb.h> +#include <asm/regionreg.h> +#include <asm/page.h> +#include <xen/mm.h> +#include <xen/multicall.h> + + +void hyper_not_support(void) +{ + VCPU *vcpu=current; + vmx_vcpu_set_gr(vcpu, 8, -1, 0); + vmx_vcpu_increment_iip(vcpu); +} + +void hyper_mmu_update(void) +{ + VCPU *vcpu=current; + u64 r32,r33,r34,r35,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + vmx_vcpu_get_gr(vcpu,17,&r33); + vmx_vcpu_get_gr(vcpu,18,&r34); + vmx_vcpu_get_gr(vcpu,19,&r35); + ret=do_mmu_update((mmu_update_t*)r32,r33,r34,r35); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + vmx_vcpu_increment_iip(vcpu); +} + +unsigned long __hypercall_create_continuation( + unsigned int op, unsigned int nr_args, ...) +{ + struct mc_state *mcs = &mc_state[smp_processor_id()]; + VCPU *vcpu = current; + struct cpu_user_regs *regs = vcpu_regs(vcpu); + unsigned int i; + va_list args; + + va_start(args, nr_args); + if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) { + panic("PREEMPT happen in multicall\n"); // Not support yet + } else { + vmx_vcpu_set_gr(vcpu, 15, op, 0); + for ( i = 0; i < nr_args; i++) { + switch (i) { + case 0: vmx_vcpu_set_gr(vcpu, 16, va_arg(args, unsigned long), 0); + break; + case 1: vmx_vcpu_set_gr(vcpu, 17, va_arg(args, unsigned long), 0); + break; + case 2: vmx_vcpu_set_gr(vcpu, 18, va_arg(args, unsigned long), 0); + break; + case 3: vmx_vcpu_set_gr(vcpu, 19, va_arg(args, unsigned long), 0); + break; + case 4: vmx_vcpu_set_gr(vcpu, 20, va_arg(args, unsigned long), 0); + break; + default: panic("Too many args for hypercall continuation\n"); + break; + } + } + } + vcpu->arch.hypercall_continuation = 1; + va_end(args); + return op; +} + +void hyper_dom_mem_op(void) +{ + VCPU *vcpu=current; + u64 r32,r33,r34,r35,r36; + u64 ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + vmx_vcpu_get_gr(vcpu,17,&r33); + vmx_vcpu_get_gr(vcpu,18,&r34); + vmx_vcpu_get_gr(vcpu,19,&r35); + vmx_vcpu_get_gr(vcpu,20,&r36); + ret=do_dom_mem_op(r32,(u64 *)r33,r34,r35,r36); + printf("do_dom_mem return value: %lx\n", ret); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + + /* Hard to define a special return value to indicate hypercall restart. + * So just add a new mark, which is SMP safe + */ + if (vcpu->arch.hypercall_continuation == 1) + vcpu->arch.hypercall_continuation = 0; + else + vmx_vcpu_increment_iip(vcpu); +} + + +void hyper_sched_op(void) +{ + VCPU *vcpu=current; + u64 r32,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + ret=do_sched_op(r32); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + + vmx_vcpu_increment_iip(vcpu); +} + +void hyper_dom0_op(void) +{ + VCPU *vcpu=current; + u64 r32,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + ret=do_dom0_op((dom0_op_t *)r32); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + + vmx_vcpu_increment_iip(vcpu); +} + +void hyper_event_channel_op(void) +{ + VCPU *vcpu=current; + u64 r32,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + ret=do_event_channel_op((evtchn_op_t *)r32); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + vmx_vcpu_increment_iip(vcpu); +} + +void hyper_xen_version(void) +{ + VCPU *vcpu=current; + u64 r32,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + ret=do_xen_version((int )r32); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + vmx_vcpu_increment_iip(vcpu); +} + +static int do_lock_page(VCPU *vcpu, u64 va, u64 lock) +{ + int i; + ia64_rr rr; + thash_cb_t *hcb; + hcb = vmx_vcpu_get_vtlb(vcpu); + rr = vmx_vcpu_rr(vcpu, va); + return thash_lock_tc(hcb, va ,1U<<rr.ps, rr.rid, DSIDE_TLB, lock); +} + +/* + * Lock guest page in vTLB, so that it's not relinquished by recycle + * session when HV is servicing that hypercall. + */ +void hyper_lock_page(void) +{ +//TODO: + VCPU *vcpu=current; + u64 va,lock, ret; + vmx_vcpu_get_gr(vcpu,16,&va); + vmx_vcpu_get_gr(vcpu,17,&lock); + ret=do_lock_page(vcpu, va, lock); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + + vmx_vcpu_increment_iip(vcpu); +} + +static int do_set_shared_page(VCPU *vcpu, u64 gpa) +{ + u64 shared_info, o_info; + struct domain *d = vcpu->domain; + struct vcpu *v; + if(vcpu->domain!=dom0) + return -EPERM; + shared_info = __gpa_to_mpa(vcpu->domain, gpa); + o_info = (u64)vcpu->domain->shared_info; + d->shared_info= (shared_info_t *)__va(shared_info); + + /* Copy existing shared info into new page */ + if (o_info) { + memcpy((void*)d->shared_info, (void*)o_info, PAGE_SIZE); + for_each_vcpu(d, v) { + v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id]; + } + /* If original page belongs to xen heap, then relinguish back + * to xen heap. Or else, leave to domain itself to decide. + */ + if (likely(IS_XEN_HEAP_FRAME(virt_to_page(o_info)))) + free_xenheap_page(o_info); + } else + memset(d->shared_info, 0, PAGE_SIZE); + return 0; +} + +void hyper_set_shared_page(void) +{ + VCPU *vcpu=current; + u64 gpa,ret; + vmx_vcpu_get_gr(vcpu,16,&gpa); + + ret=do_set_shared_page(vcpu, gpa); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + + vmx_vcpu_increment_iip(vcpu); +} + +/* +void hyper_grant_table_op(void) +{ + VCPU *vcpu=current; + u64 r32,r33,r34,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + vmx_vcpu_get_gr(vcpu,17,&r33); + vmx_vcpu_get_gr(vcpu,18,&r34); + + ret=do_grant_table_op((unsigned int)r32, (void *)r33, (unsigned int)r34); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); +} +*/ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_init.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_init.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,375 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_init.c: initialization work for vt specific domain + * Copyright (c) 2005, Intel Corporation. + * Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx> + * Xuefei Xu (Anthony Xu) <anthony.xu@xxxxxxxxx> + * Fred Yang <fred.yang@xxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +/* + * 05/08/16 Kun tian (Kevin Tian) <kevin.tian@xxxxxxxxx>: + * Disable doubling mapping + * + * 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>: + * Simplied design in first step: + * - One virtual environment + * - Domain is bound to one LP + * Later to support guest SMP: + * - Need interface to handle VP scheduled to different LP + */ +#include <xen/config.h> +#include <xen/types.h> +#include <xen/sched.h> +#include <asm/pal.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/vmx_vcpu.h> +#include <xen/lib.h> +#include <asm/vmmu.h> +#include <public/arch-ia64.h> +#include <public/io/ioreq.h> +#include <asm/vmx_phy_mode.h> +#include <asm/processor.h> +#include <asm/vmx.h> +#include <xen/mm.h> + +/* Global flag to identify whether Intel vmx feature is on */ +u32 vmx_enabled = 0; +static u32 vm_order; +static u64 buffer_size; +static u64 vp_env_info; +static u64 vm_buffer = 0; /* Buffer required to bring up VMX feature */ +u64 __vsa_base = 0; /* Run-time service base of VMX */ + +/* Check whether vt feature is enabled or not. */ +void +identify_vmx_feature(void) +{ + pal_status_t ret; + u64 avail = 1, status = 1, control = 1; + + vmx_enabled = 0; + /* Check VT-i feature */ + ret = ia64_pal_proc_get_features(&avail, &status, &control); + if (ret != PAL_STATUS_SUCCESS) { + printk("Get proc features failed.\n"); + goto no_vti; + } + + /* FIXME: do we need to check status field, to see whether + * PSR.vm is actually enabled? If yes, aonther call to + * ia64_pal_proc_set_features may be reuqired then. + */ + printk("avail:0x%lx, status:0x%lx,control:0x%lx, vm?0x%lx\n", + avail, status, control, avail & PAL_PROC_VM_BIT); + if (!(avail & PAL_PROC_VM_BIT)) { + printk("No VT feature supported.\n"); + goto no_vti; + } + + ret = ia64_pal_vp_env_info(&buffer_size, &vp_env_info); + if (ret != PAL_STATUS_SUCCESS) { + printk("Get vp environment info failed.\n"); + goto no_vti; + } + + /* Does xen has ability to decode itself? */ + if (!(vp_env_info & VP_OPCODE)) + printk("WARNING: no opcode provided from hardware(%lx)!!!\n", vp_env_info); + vm_order = get_order(buffer_size); + printk("vm buffer size: %d, order: %d\n", buffer_size, vm_order); + + vmx_enabled = 1; +no_vti: + return; +} + +/* + * Init virtual environment on current LP + * vsa_base is the indicator whether it's first LP to be initialized + * for current domain. + */ +void +vmx_init_env(void) +{ + u64 status, tmp_base; + + if (!vm_buffer) { + vm_buffer = alloc_xenheap_pages(vm_order); + ASSERT(vm_buffer); + printk("vm_buffer: 0x%lx\n", vm_buffer); + } + + status=ia64_pal_vp_init_env(__vsa_base ? VP_INIT_ENV : VP_INIT_ENV_INITALIZE, + __pa(vm_buffer), + vm_buffer, + &tmp_base); + + if (status != PAL_STATUS_SUCCESS) { + printk("ia64_pal_vp_init_env failed.\n"); + return -1; + } + + if (!__vsa_base) + __vsa_base = tmp_base; + else + ASSERT(tmp_base != __vsa_base); + +#ifdef XEN_DBL_MAPPING + /* Init stub for rr7 switch */ + vmx_init_double_mapping_stub(); +#endif +} + +void vmx_setup_platform(struct vcpu *v, struct vcpu_guest_context *c) +{ + struct domain *d = v->domain; + shared_iopage_t *sp; + + ASSERT(d != dom0); /* only for non-privileged vti domain */ + d->arch.vmx_platform.shared_page_va = __va(c->share_io_pg); + sp = get_sp(d); + memset((char *)sp,0,PAGE_SIZE); + /* FIXME: temp due to old CP */ + sp->sp_global.eport = 2; +#ifdef V_IOSAPIC_READY + sp->vcpu_number = 1; +#endif + /* TEMP */ + d->arch.vmx_platform.pib_base = 0xfee00000UL; + + /* One more step to enable interrupt assist */ + set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags); + /* Only open one port for I/O and interrupt emulation */ + if (v == d->vcpu[0]) { + memset(&d->shared_info->evtchn_mask[0], 0xff, + sizeof(d->shared_info->evtchn_mask)); + clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]); + } + + /* FIXME: only support PMT table continuously by far */ + d->arch.pmt = __va(c->pt_base); + d->arch.max_pfn = c->pt_max_pfn; + + vmx_final_setup_domain(d); +} + +typedef union { + u64 value; + struct { + u64 number : 8; + u64 revision : 8; + u64 model : 8; + u64 family : 8; + u64 archrev : 8; + u64 rv : 24; + }; +} cpuid3_t; + +/* Allocate vpd from xenheap */ +static vpd_t *alloc_vpd(void) +{ + int i; + cpuid3_t cpuid3; + vpd_t *vpd; + + vpd = alloc_xenheap_pages(get_order(VPD_SIZE)); + if (!vpd) { + printk("VPD allocation failed.\n"); + return NULL; + } + + printk("vpd base: 0x%lx, vpd size:%d\n", vpd, sizeof(vpd_t)); + memset(vpd, 0, VPD_SIZE); + /* CPUID init */ + for (i = 0; i < 5; i++) + vpd->vcpuid[i] = ia64_get_cpuid(i); + + /* Limit the CPUID number to 5 */ + cpuid3.value = vpd->vcpuid[3]; + cpuid3.number = 4; /* 5 - 1 */ + vpd->vcpuid[3] = cpuid3.value; + + vpd->vdc.d_vmsw = 1; + return vpd; +} + + +#ifdef CONFIG_VTI +/* + * Create a VP on intialized VMX environment. + */ +static void +vmx_create_vp(struct vcpu *v) +{ + u64 ret; + vpd_t *vpd = v->arch.arch_vmx.vpd; + u64 ivt_base; + extern char vmx_ia64_ivt; + /* ia64_ivt is function pointer, so need this tranlation */ + ivt_base = (u64) &vmx_ia64_ivt; + printk("ivt_base: 0x%lx\n", ivt_base); + ret = ia64_pal_vp_create(vpd, ivt_base, 0); + if (ret != PAL_STATUS_SUCCESS) + panic("ia64_pal_vp_create failed. \n"); +} + +#ifdef XEN_DBL_MAPPING +void vmx_init_double_mapping_stub(void) +{ + u64 base, psr; + extern void vmx_switch_rr7(void); + + base = (u64) &vmx_switch_rr7; + base = *((u64*)base); + + psr = ia64_clear_ic(); + ia64_itr(0x1, IA64_TR_RR7_SWITCH_STUB, XEN_RR7_SWITCH_STUB, + pte_val(pfn_pte(__pa(base) >> PAGE_SHIFT, PAGE_KERNEL)), + RR7_SWITCH_SHIFT); + ia64_set_psr(psr); + ia64_srlz_i(); + printk("Add TR mapping for rr7 switch stub, with physical: 0x%lx\n", (u64)(__pa(base))); +} +#endif + +/* Other non-context related tasks can be done in context switch */ +void +vmx_save_state(struct vcpu *v) +{ + u64 status, psr; + u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt; + + /* FIXME: about setting of pal_proc_vector... time consuming */ + status = ia64_pal_vp_save(v->arch.arch_vmx.vpd, 0); + if (status != PAL_STATUS_SUCCESS) + panic("Save vp status failed\n"); + +#ifdef XEN_DBL_MAPPING + /* FIXME: Do we really need purge double mapping for old vcpu? + * Since rid is completely different between prev and next, + * it's not overlap and thus no MCA possible... */ + dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7])); + vmx_purge_double_mapping(dom_rr7, KERNEL_START, + (u64)v->arch.vtlb->ts->vhpt->hash); +#endif + + /* Need to save KR when domain switch, though HV itself doesn;t + * use them. + */ + v->arch.arch_vmx.vkr[0] = ia64_get_kr(0); + v->arch.arch_vmx.vkr[1] = ia64_get_kr(1); + v->arch.arch_vmx.vkr[2] = ia64_get_kr(2); + v->arch.arch_vmx.vkr[3] = ia64_get_kr(3); + v->arch.arch_vmx.vkr[4] = ia64_get_kr(4); + v->arch.arch_vmx.vkr[5] = ia64_get_kr(5); + v->arch.arch_vmx.vkr[6] = ia64_get_kr(6); + v->arch.arch_vmx.vkr[7] = ia64_get_kr(7); +} + +/* Even guest is in physical mode, we still need such double mapping */ +void +vmx_load_state(struct vcpu *v) +{ + u64 status, psr; + u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt; + u64 pte_xen, pte_vhpt; + int i; + + status = ia64_pal_vp_restore(v->arch.arch_vmx.vpd, 0); + if (status != PAL_STATUS_SUCCESS) + panic("Restore vp status failed\n"); + +#ifdef XEN_DBL_MAPPING + dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7])); + pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL)); + pte_vhpt = pte_val(pfn_pte((__pa(v->arch.vtlb->ts->vhpt->hash) >> PAGE_SHIFT), PAGE_KERNEL)); + vmx_insert_double_mapping(dom_rr7, KERNEL_START, + (u64)v->arch.vtlb->ts->vhpt->hash, + pte_xen, pte_vhpt); +#endif + + ia64_set_kr(0, v->arch.arch_vmx.vkr[0]); + ia64_set_kr(1, v->arch.arch_vmx.vkr[1]); + ia64_set_kr(2, v->arch.arch_vmx.vkr[2]); + ia64_set_kr(3, v->arch.arch_vmx.vkr[3]); + ia64_set_kr(4, v->arch.arch_vmx.vkr[4]); + ia64_set_kr(5, v->arch.arch_vmx.vkr[5]); + ia64_set_kr(6, v->arch.arch_vmx.vkr[6]); + ia64_set_kr(7, v->arch.arch_vmx.vkr[7]); + /* Guest vTLB is not required to be switched explicitly, since + * anchored in vcpu */ +} + +#ifdef XEN_DBL_MAPPING +/* Purge old double mapping and insert new one, due to rr7 change */ +void +vmx_change_double_mapping(struct vcpu *v, u64 oldrr7, u64 newrr7) +{ + u64 pte_xen, pte_vhpt, vhpt_base; + + vhpt_base = (u64)v->arch.vtlb->ts->vhpt->hash; + vmx_purge_double_mapping(oldrr7, KERNEL_START, + vhpt_base); + + pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL)); + pte_vhpt = pte_val(pfn_pte((__pa(vhpt_base) >> PAGE_SHIFT), PAGE_KERNEL)); + vmx_insert_double_mapping(newrr7, KERNEL_START, + vhpt_base, + pte_xen, pte_vhpt); +} +#endif // XEN_DBL_MAPPING +#endif // CONFIG_VTI + +/* + * Initialize VMX envirenment for guest. Only the 1st vp/vcpu + * is registered here. + */ +void +vmx_final_setup_domain(struct domain *d) +{ + struct vcpu *v = d->vcpu[0]; + vpd_t *vpd; + + /* Allocate resources for vcpu 0 */ + //memset(&v->arch.arch_vmx, 0, sizeof(struct arch_vmx_struct)); + + vpd = alloc_vpd(); + ASSERT(vpd); + + v->arch.arch_vmx.vpd = vpd; + vpd->virt_env_vaddr = vm_buffer; + +#ifdef CONFIG_VTI + /* v->arch.schedule_tail = arch_vmx_do_launch; */ + vmx_create_vp(v); + + /* Set this ed to be vmx */ + set_bit(ARCH_VMX_VMCS_LOADED, &v->arch.arch_vmx.flags); + + /* Physical mode emulation initialization, including + * emulation ID allcation and related memory request + */ + physical_mode_init(v); + + vlsapic_reset(v); + vtm_init(v); +#endif + + /* Other vmx specific initialization work */ +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_interrupt.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_interrupt.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,388 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_interrupt.c: handle inject interruption. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx> + * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx> + * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) + */ + + +#include <xen/types.h> +#include <asm/vmx_vcpu.h> +#include <asm/vmx_mm_def.h> +#include <asm/vmx_pal_vsa.h> +/* SDM vol2 5.5 - IVA based interruption handling */ +#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034 +void +collect_interruption(VCPU *vcpu) +{ + u64 ipsr; + u64 vdcr; + u64 vifs; + IA64_PSR vpsr; + REGS * regs = vcpu_regs(vcpu); + vpsr.val = vmx_vcpu_get_psr(vcpu); + + if(vpsr.ic){ + extern void vmx_dorfirfi(void); + if (regs->cr_iip == *(unsigned long *)vmx_dorfirfi) + panic("COLLECT interruption for vmx_dorfirfi\n"); + + /* Sync mpsr id/da/dd/ss/ed bits to vipsr + * since after guest do rfi, we still want these bits on in + * mpsr + */ + + ipsr = regs->cr_ipsr; + vpsr.val = vpsr.val | (ipsr & (IA64_PSR_ID | IA64_PSR_DA + | IA64_PSR_DD |IA64_PSR_SS |IA64_PSR_ED)); + vmx_vcpu_set_ipsr(vcpu, vpsr.val); + + /* Currently, for trap, we do not advance IIP to next + * instruction. That's because we assume caller already + * set up IIP correctly + */ + + vmx_vcpu_set_iip(vcpu , regs->cr_iip); + + /* set vifs.v to zero */ + vifs = VPD_CR(vcpu,ifs); + vifs &= ~IA64_IFS_V; + vmx_vcpu_set_ifs(vcpu, vifs); + + vmx_vcpu_set_iipa(vcpu, regs->cr_iipa); + } + + vdcr = VPD_CR(vcpu,dcr); + + /* Set guest psr + * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged + * be: set to the value of dcr.be + * pp: set to the value of dcr.pp + */ + vpsr.val &= INITIAL_PSR_VALUE_AT_INTERRUPTION; + vpsr.val |= ( vdcr & IA64_DCR_BE); + + /* VDCR pp bit position is different from VPSR pp bit */ + if ( vdcr & IA64_DCR_PP ) { + vpsr.val |= IA64_PSR_PP; + } else { + vpsr.val &= ~IA64_PSR_PP;; + } + + vmx_vcpu_set_psr(vcpu, vpsr.val); + +} +int +inject_guest_interruption(VCPU *vcpu, u64 vec) +{ + u64 viva; + REGS *regs; + regs=vcpu_regs(vcpu); + + collect_interruption(vcpu); + + vmx_vcpu_get_iva(vcpu,&viva); + regs->cr_iip = viva + vec; +} + + +/* + * Set vIFA & vITIR & vIHA, when vPSR.ic =1 + * Parameter: + * set_ifa: if true, set vIFA + * set_itir: if true, set vITIR + * set_iha: if true, set vIHA + */ +void +set_ifa_itir_iha (VCPU *vcpu, u64 vadr, + int set_ifa, int set_itir, int set_iha) +{ + IA64_PSR vpsr; + u64 value; + vpsr.val = vmx_vcpu_get_psr(vcpu); + /* Vol2, Table 8-1 */ + if ( vpsr.ic ) { + if ( set_ifa){ + vmx_vcpu_set_ifa(vcpu, vadr); + } + if ( set_itir) { + value = vmx_vcpu_get_itir_on_fault(vcpu, vadr); + vmx_vcpu_set_itir(vcpu, value); + } + + if ( set_iha) { + vmx_vcpu_thash(vcpu, vadr, &value); + vmx_vcpu_set_iha(vcpu, value); + } + } + + +} + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +dtlb_fault (VCPU *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu,IA64_DATA_TLB_VECTOR); +} + +/* + * Instruction TLB Fault + * @ Instruction TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +itlb_fault (VCPU *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu,IA64_INST_TLB_VECTOR); +} + + + +/* + * Data Nested TLB Fault + * @ Data Nested TLB Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +nested_dtlb (VCPU *vcpu) +{ + inject_guest_interruption(vcpu,IA64_DATA_NESTED_TLB_VECTOR); +} + +/* + * Alternate Data TLB Fault + * @ Alternate Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +alt_dtlb (VCPU *vcpu, u64 vadr) +{ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu,IA64_ALT_DATA_TLB_VECTOR); +} + + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +alt_itlb (VCPU *vcpu, u64 vadr) +{ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu,IA64_ALT_INST_TLB_VECTOR); +} + +/* Deal with: + * VHPT Translation Vector + */ +static void +_vhpt_fault(VCPU *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA*/ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu,IA64_VHPT_TRANS_VECTOR); + + +} + +/* + * VHPT Instruction Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +ivhpt_fault (VCPU *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + +/* + * VHPT Data Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +dvhpt_fault (VCPU *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + + +/* + * Deal with: + * General Exception vector + */ +void +_general_exception (VCPU *vcpu) +{ + inject_guest_interruption(vcpu,IA64_GENEX_VECTOR); +} + + +/* + * Illegal Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +illegal_op (VCPU *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Illegal Dependency Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +illegal_dep (VCPU *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Reserved Register/Field Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +rsv_reg_field (VCPU *vcpu) +{ + _general_exception(vcpu); +} +/* + * Privileged Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ + +void +privilege_op (VCPU *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Unimplement Data Address Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +unimpl_daddr (VCPU *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Privileged Register Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +privilege_reg (VCPU *vcpu) +{ + _general_exception(vcpu); +} + +/* Deal with + * Nat consumption vector + * Parameter: + * vaddr: Optional, if t == REGISTER + */ +static void +_nat_consumption_fault(VCPU *vcpu, u64 vadr, miss_type t) +{ + /* If vPSR.ic && t == DATA/INST, IFA */ + if ( t == DATA || t == INSTRUCTION ) { + /* IFA */ + set_ifa_itir_iha (vcpu, vadr, 1, 0, 0); + } + + inject_guest_interruption(vcpu,IA64_NAT_CONSUMPTION_VECTOR); +} + +/* + * IR Data Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +static void +ir_nat_page_consumption (VCPU *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, DATA); +} + +/* + * Instruction Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +inat_page_consumption (VCPU *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, INSTRUCTION); +} + +/* + * Register Nat Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +rnat_consumption (VCPU *vcpu) +{ + _nat_consumption_fault(vcpu, 0, REGISTER); +} + +/* + * Data Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +dnat_page_consumption (VCPU *vcpu, uint64_t vadr) +{ + _nat_consumption_fault(vcpu, vadr, DATA); +} + +/* Deal with + * Page not present vector + */ +void +page_not_present(VCPU *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR */ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); +} + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_irq_ia64.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_irq_ia64.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,127 @@ +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/jiffies.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/kernel_stat.h> +#include <linux/slab.h> +#include <linux/ptrace.h> +#include <linux/random.h> /* for rand_initialize_irq() */ +#include <linux/signal.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/threads.h> +#include <linux/bitops.h> + +#include <asm/delay.h> +#include <asm/intrinsics.h> +#include <asm/io.h> +#include <asm/hw_irq.h> +#include <asm/machvec.h> +#include <asm/pgtable.h> +#include <asm/system.h> + +#ifdef CONFIG_PERFMON +# include <asm/perfmon.h> +#endif + +#define IRQ_DEBUG 0 + +#ifdef CONFIG_VTI +#define vmx_irq_enter() \ + add_preempt_count(HARDIRQ_OFFSET); + +/* Now softirq will be checked when leaving hypervisor, or else + * scheduler irq will be executed too early. + */ +#define vmx_irq_exit(void) \ + sub_preempt_count(HARDIRQ_OFFSET); +/* + * That's where the IVT branches when we get an external + * interrupt. This branches to the correct hardware IRQ handler via + * function ptr. + */ +void +vmx_ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) +{ + unsigned long saved_tpr; + int wake_dom0 = 0; + + +#if IRQ_DEBUG + { + unsigned long bsp, sp; + + /* + * Note: if the interrupt happened while executing in + * the context switch routine (ia64_switch_to), we may + * get a spurious stack overflow here. This is + * because the register and the memory stack are not + * switched atomically. + */ + bsp = ia64_getreg(_IA64_REG_AR_BSP); + sp = ia64_getreg(_IA64_REG_AR_SP); + + if ((sp - bsp) < 1024) { + static unsigned char count; + static long last_time; + + if (jiffies - last_time > 5*HZ) + count = 0; + if (++count < 5) { + last_time = jiffies; + printk("ia64_handle_irq: DANGER: less than " + "1KB of free stack space!!\n" + "(bsp=0x%lx, sp=%lx)\n", bsp, sp); + } + } + } +#endif /* IRQ_DEBUG */ + + /* + * Always set TPR to limit maximum interrupt nesting depth to + * 16 (without this, it would be ~240, which could easily lead + * to kernel stack overflows). + */ + vmx_irq_enter(); + saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); + ia64_srlz_d(); + while (vector != IA64_SPURIOUS_INT_VECTOR) { + if (!IS_RESCHEDULE(vector)) { + ia64_setreg(_IA64_REG_CR_TPR, vector); + ia64_srlz_d(); + + if (vector != IA64_TIMER_VECTOR) { + /* FIXME: Leave IRQ re-route later */ + vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector); + wake_dom0 = 1; + } + else { // FIXME: Handle Timer only now + __do_IRQ(local_vector_to_irq(vector), regs); + } + + /* + * Disable interrupts and send EOI: + */ + local_irq_disable(); + ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); + } + else { + printf("Oops: RESCHEDULE IPI absorbed by HV\n"); + } + ia64_eoi(); + vector = ia64_get_ivr(); + } + /* + * This must be done *after* the ia64_eoi(). For example, the keyboard softirq + * handler needs to be able to wait for further keyboard interrupts, which can't + * come through until ia64_eoi() has been done. + */ + vmx_irq_exit(); + if ( wake_dom0 && current != dom0 ) + vcpu_wake(dom0->vcpu[0]); +} +#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_ivt.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_ivt.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,1085 @@ +/* + * arch/ia64/kernel/vmx_ivt.S + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + * David Mosberger <davidm@xxxxxxxxxx> + * Copyright (C) 2000, 2002-2003 Intel Co + * Asit Mallick <asit.k.mallick@xxxxxxxxx> + * Suresh Siddha <suresh.b.siddha@xxxxxxxxx> + * Kenneth Chen <kenneth.w.chen@xxxxxxxxx> + * Fenghua Yu <fenghua.yu@xxxxxxxxx> + * + * + * 00/08/23 Asit Mallick <asit.k.mallick@xxxxxxxxx> TLB handling for SMP + * 00/12/20 David Mosberger-Tang <davidm@xxxxxxxxxx> DTLB/ITLB handler now uses virtual PT. + * + * 05/3/20 Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx) + * Supporting Intel virtualization architecture + * + */ + +/* + * This file defines the interruption vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for critical + * interruptions like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) + * entry offset ----/ / / / / + * entry number ---------/ / / / + * size of the entry -------------/ / / + * vector name -------------------------------------/ / + * interruptions triggering this vector ----------------------/ + * + * The table is 32KB in size and must be aligned on 32KB boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.6 (Oct 1999) + */ + +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/break.h> +#include <asm/ia32.h> +#include <asm/kregs.h> +#include <asm/offsets.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> +#include <asm/vhpt.h> + + +#if 0 + /* + * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't + * needed for something else before enabling this... + */ +# define VMX_DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 +#else +# define VMX_DBG_FAULT(i) +#endif + +#include "vmx_minstate.h" + + + +#define VMX_FAULT(n) \ +vmx_fault_##n:; \ + br.sptk vmx_fault_##n; \ + ;; \ + + +#define VMX_REFLECT(n) \ + mov r31=pr; \ + mov r19=n; /* prepare to save predicates */ \ + mov r29=cr.ipsr; \ + ;; \ + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ +(p7) br.sptk.many vmx_dispatch_reflection; \ + VMX_FAULT(n); \ + + +GLOBAL_ENTRY(vmx_panic) + br.sptk.many vmx_panic + ;; +END(vmx_panic) + + + + + + .section .text.ivt,"ax" + + .align 32768 // align on 32KB boundary + .global vmx_ia64_ivt +vmx_ia64_ivt: +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) +ENTRY(vmx_vhpt_miss) + VMX_FAULT(0) +END(vmx_vhpt_miss) + + .org vmx_ia64_ivt+0x400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) +ENTRY(vmx_itlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6) br.sptk vmx_fault_1 + mov r16 = cr.ifa + ;; + thash r17 = r16 + ttag r20 = r16 + ;; +vmx_itlb_loop: + cmp.eq p6,p0 = r0, r17 +(p6) br vmx_itlb_out + ;; + adds r22 = VLE_TITAG_OFFSET, r17 + adds r23 = VLE_CCHAIN_OFFSET, r17 + ;; + ld8 r24 = [r22] + ld8 r25 = [r23] + ;; + lfetch [r25] + cmp.eq p6,p7 = r20, r24 + ;; +(p7) mov r17 = r25; +(p7) br.sptk vmx_itlb_loop + ;; + adds r23 = VLE_PGFLAGS_OFFSET, r17 + adds r24 = VLE_ITIR_OFFSET, r17 + ;; + ld8 r26 = [r23] + ld8 r25 = [r24] + ;; + mov cr.itir = r25 + ;; + itc.i r26 + ;; + srlz.i + ;; + mov r23=r31 + mov r22=b0 + adds r16=IA64_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r16] + ;; + adds r19=VPD(VPSR),r18 + movl r20=__vsa_base + ;; + ld8 r19=[r19] + ld8 r20=[r20] + ;; + br.sptk ia64_vmm_entry + ;; +vmx_itlb_out: + mov r19 = 1 + br.sptk vmx_dispatch_tlb_miss + VMX_FAULT(1); +END(vmx_itlb_miss) + + .org vmx_ia64_ivt+0x0800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) +ENTRY(vmx_dtlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6)br.sptk vmx_fault_2 + mov r16 = cr.ifa + ;; + thash r17 = r16 + ttag r20 = r16 + ;; +vmx_dtlb_loop: + cmp.eq p6,p0 = r0, r17 +(p6)br vmx_dtlb_out + ;; + adds r22 = VLE_TITAG_OFFSET, r17 + adds r23 = VLE_CCHAIN_OFFSET, r17 + ;; + ld8 r24 = [r22] + ld8 r25 = [r23] + ;; + lfetch [r25] + cmp.eq p6,p7 = r20, r24 + ;; +(p7)mov r17 = r25; +(p7)br.sptk vmx_dtlb_loop + ;; + adds r23 = VLE_PGFLAGS_OFFSET, r17 + adds r24 = VLE_ITIR_OFFSET, r17 + ;; + ld8 r26 = [r23] + ld8 r25 = [r24] + ;; + mov cr.itir = r25 + ;; + itc.d r26 + ;; + srlz.d; + ;; + mov r23=r31 + mov r22=b0 + adds r16=IA64_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r16] + ;; + adds r19=VPD(VPSR),r18 + movl r20=__vsa_base + ;; + ld8 r19=[r19] + ld8 r20=[r20] + ;; + br.sptk ia64_vmm_entry + ;; +vmx_dtlb_out: + mov r19 = 2 + br.sptk vmx_dispatch_tlb_miss + VMX_FAULT(2); +END(vmx_dtlb_miss) + + .org vmx_ia64_ivt+0x0c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) +ENTRY(vmx_alt_itlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p7)br.sptk vmx_fault_3 + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=PAGE_KERNEL + mov r24=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + shr.u r18=r16,55 // move address bit 59 to bit 4 + ;; + and r18=0x10,r18 // bit 4=address-bit(61) + or r19=r17,r19 // insert PTE control bits into r19 + ;; + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi + VMX_FAULT(3); +END(vmx_alt_itlb_miss) + + + .org vmx_ia64_ivt+0x1000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) +ENTRY(vmx_alt_dtlb_miss) + mov r31=pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p7)br.sptk vmx_fault_4 + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=PAGE_KERNEL + mov r20=cr.isr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r24=cr.ipsr + ;; + and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field + tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? + shr.u r18=r16,55 // move address bit 59 to bit 4 + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? + ;; + and r18=0x10,r18 // bit 4=address-bit(61) +(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field + dep r24=-1,r24,IA64_PSR_ED_BIT,1 + or r19=r19,r17 // insert PTE control bits into r19 + ;; + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 +(p6) mov cr.ipsr=r24 + ;; +(p7) itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi + VMX_FAULT(4); +END(vmx_alt_dtlb_miss) + + .org vmx_ia64_ivt+0x1400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) +ENTRY(vmx_nested_dtlb_miss) + VMX_FAULT(5) +END(vmx_nested_dtlb_miss) + + .org vmx_ia64_ivt+0x1800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) +ENTRY(vmx_ikey_miss) + VMX_REFLECT(6) +END(vmx_ikey_miss) + + .org vmx_ia64_ivt+0x1c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) +ENTRY(vmx_dkey_miss) + VMX_REFLECT(7) +END(vmx_dkey_miss) + + .org vmx_ia64_ivt+0x2000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) +ENTRY(vmx_dirty_bit) + VMX_REFLECT(8) +END(vmx_idirty_bit) + + .org vmx_ia64_ivt+0x2400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) +ENTRY(vmx_iaccess_bit) + VMX_REFLECT(9) +END(vmx_iaccess_bit) + + .org vmx_ia64_ivt+0x2800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) +ENTRY(vmx_daccess_bit) + VMX_REFLECT(10) +END(vmx_daccess_bit) + + .org vmx_ia64_ivt+0x2c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) +ENTRY(vmx_break_fault) + mov r31=pr + mov r19=11 + mov r30=cr.iim + movl r29=0x1100 + ;; + cmp.eq p6,p7=r30,r0 + (p6) br.sptk vmx_fault_11 + ;; + cmp.eq p6,p7=r29,r30 + (p6) br.dptk.few vmx_hypercall_dispatch + (p7) br.sptk.many vmx_dispatch_break_fault + ;; + VMX_FAULT(11); +END(vmx_break_fault) + + .org vmx_ia64_ivt+0x3000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) +ENTRY(vmx_interrupt) + mov r31=pr // prepare to save predicates + mov r19=12 + mov r29=cr.ipsr + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT + tbit.z p0,p15=r29,IA64_PSR_I_BIT + ;; +(p7) br.sptk vmx_dispatch_interrupt + ;; + mov r27=ar.rsc /* M */ + mov r20=r1 /* A */ + mov r25=ar.unat /* M */ + mov r26=ar.pfs /* I */ + mov r28=cr.iip /* M */ + cover /* B (or nothing) */ + ;; + mov r1=sp + ;; + invala /* M */ + mov r30=cr.ifs + ;; + addl r1=-IA64_PT_REGS_SIZE,r1 + ;; + adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ + adds r16=PT(CR_IPSR),r1 + ;; + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES + st8 [r16]=r29 /* save cr.ipsr */ + ;; + lfetch.fault.excl.nt1 [r17] + mov r29=b0 + ;; + adds r16=PT(R8),r1 /* initialize first base pointer */ + adds r17=PT(R9),r1 /* initialize second base pointer */ + mov r18=r0 /* make sure r18 isn't NaT */ + ;; +.mem.offset 0,0; st8.spill [r16]=r8,16 +.mem.offset 8,0; st8.spill [r17]=r9,16 + ;; +.mem.offset 0,0; st8.spill [r16]=r10,24 +.mem.offset 8,0; st8.spill [r17]=r11,24 + ;; + st8 [r16]=r28,16 /* save cr.iip */ + st8 [r17]=r30,16 /* save cr.ifs */ + mov r8=ar.fpsr /* M */ + mov r9=ar.csd + mov r10=ar.ssd + movl r11=FPSR_DEFAULT /* L-unit */ + ;; + st8 [r16]=r25,16 /* save ar.unat */ + st8 [r17]=r26,16 /* save ar.pfs */ + shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ + ;; + st8 [r16]=r27,16 /* save ar.rsc */ + adds r17=16,r17 /* skip over ar_rnat field */ + ;; /* avoid RAW on r16 & r17 */ + st8 [r17]=r31,16 /* save predicates */ + adds r16=16,r16 /* skip over ar_bspstore field */ + ;; + st8 [r16]=r29,16 /* save b0 */ + st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ + ;; +.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ +.mem.offset 8,0; st8.spill [r17]=r12,16 + adds r12=-16,r1 /* switch to kernel memory stack (with 16 bytes of scratch) */ + ;; +.mem.offset 0,0; st8.spill [r16]=r13,16 +.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ + mov r13=r21 /* establish `current' */ + ;; +.mem.offset 0,0; st8.spill [r16]=r15,16 +.mem.offset 8,0; st8.spill [r17]=r14,16 + dep r14=-1,r0,60,4 + ;; +.mem.offset 0,0; st8.spill [r16]=r2,16 +.mem.offset 8,0; st8.spill [r17]=r3,16 + adds r2=IA64_PT_REGS_R16_OFFSET,r1 + ;; + mov r8=ar.ccv + movl r1=__gp /* establish kernel global pointer */ + ;; \ + bsw.1 + ;; + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group + mov out0=cr.ivr // pass cr.ivr as first arg + add out1=16,sp // pass pointer to pt_regs as second arg + + ssm psr.ic + ;; + srlz.i + ;; + (p15) ssm psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; +.mem.offset 0,0; st8.spill [r2]=r16,16 +.mem.offset 8,0; st8.spill [r3]=r17,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r18,16 +.mem.offset 8,0; st8.spill [r3]=r19,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r20,16 +.mem.offset 8,0; st8.spill [r3]=r21,16 + mov r18=b6 + ;; +.mem.offset 0,0; st8.spill [r2]=r22,16 +.mem.offset 8,0; st8.spill [r3]=r23,16 + mov r19=b7 + ;; +.mem.offset 0,0; st8.spill [r2]=r24,16 +.mem.offset 8,0; st8.spill [r3]=r25,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r26,16 +.mem.offset 8,0; st8.spill [r3]=r27,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r28,16 +.mem.offset 8,0; st8.spill [r3]=r29,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r30,16 +.mem.offset 8,0; st8.spill [r3]=r31,32 + ;; + mov ar.fpsr=r11 /* M-unit */ + st8 [r2]=r8,8 /* ar.ccv */ + adds r24=PT(B6)-PT(F7),r3 + ;; + stf.spill [r2]=f6,32 + stf.spill [r3]=f7,32 + ;; + stf.spill [r2]=f8,32 + stf.spill [r3]=f9,32 + ;; + stf.spill [r2]=f10 + stf.spill [r3]=f11 + adds r25=PT(B7)-PT(F11),r3 + ;; + st8 [r24]=r18,16 /* b6 */ + st8 [r25]=r19,16 /* b7 */ + ;; + st8 [r24]=r9 /* ar.csd */ + st8 [r25]=r10 /* ar.ssd */ + ;; + srlz.d // make sure we see the effect of cr.ivr + movl r14=ia64_leave_nested + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_ia64_handle_irq + ;; +END(vmx_interrupt) + + .org vmx_ia64_ivt+0x3400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved +ENTRY(vmx_virtual_exirq) + VMX_DBG_FAULT(13) + mov r31=pr + mov r19=13 + br.sptk vmx_dispatch_vexirq +END(vmx_virtual_exirq) + + .org vmx_ia64_ivt+0x3800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + VMX_DBG_FAULT(14) + VMX_FAULT(14) + + + .org vmx_ia64_ivt+0x3c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + VMX_DBG_FAULT(15) + VMX_FAULT(15) + + + .org vmx_ia64_ivt+0x4000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + VMX_DBG_FAULT(16) + VMX_FAULT(16) + + .org vmx_ia64_ivt+0x4400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + VMX_DBG_FAULT(17) + VMX_FAULT(17) + + .org vmx_ia64_ivt+0x4800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + VMX_DBG_FAULT(18) + VMX_FAULT(18) + + .org vmx_ia64_ivt+0x4c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + VMX_DBG_FAULT(19) + VMX_FAULT(19) + + .org vmx_ia64_ivt+0x5000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present +ENTRY(vmx_page_not_present) + VMX_REFLECT(20) +END(vmx_page_not_present) + + .org vmx_ia64_ivt+0x5100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission vector +ENTRY(vmx_key_permission) + VMX_REFLECT(21) +END(vmx_key_permission) + + .org vmx_ia64_ivt+0x5200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) +ENTRY(vmx_iaccess_rights) + VMX_REFLECT(22) +END(vmx_iaccess_rights) + + .org vmx_ia64_ivt+0x5300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) +ENTRY(vmx_daccess_rights) + VMX_REFLECT(23) +END(vmx_daccess_rights) + + .org vmx_ia64_ivt+0x5400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) +ENTRY(vmx_general_exception) + VMX_FAULT(24) +// VMX_REFLECT(24) +END(vmx_general_exception) + + .org vmx_ia64_ivt+0x5500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) +ENTRY(vmx_disabled_fp_reg) + VMX_REFLECT(25) +END(vmx_disabled_fp_reg) + + .org vmx_ia64_ivt+0x5600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) +ENTRY(vmx_nat_consumption) + VMX_REFLECT(26) +END(vmx_nat_consumption) + + .org vmx_ia64_ivt+0x5700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) +ENTRY(vmx_speculation_vector) + VMX_REFLECT(27) +END(vmx_speculation_vector) + + .org vmx_ia64_ivt+0x5800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + VMX_DBG_FAULT(28) + VMX_FAULT(28) + + .org vmx_ia64_ivt+0x5900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) +ENTRY(vmx_debug_vector) + VMX_DBG_FAULT(29) + VMX_FAULT(29) +END(vmx_debug_vector) + + .org vmx_ia64_ivt+0x5a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) +ENTRY(vmx_unaligned_access) + VMX_REFLECT(30) +END(vmx_unaligned_access) + + .org vmx_ia64_ivt+0x5b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) +ENTRY(vmx_unsupported_data_reference) + VMX_REFLECT(31) +END(vmx_unsupported_data_reference) + + .org vmx_ia64_ivt+0x5c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) +ENTRY(vmx_floating_point_fault) + VMX_REFLECT(32) +END(vmx_floating_point_fault) + + .org vmx_ia64_ivt+0x5d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) +ENTRY(vmx_floating_point_trap) + VMX_REFLECT(33) +END(vmx_floating_point_trap) + + .org vmx_ia64_ivt+0x5e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) +ENTRY(vmx_lower_privilege_trap) + VMX_REFLECT(34) +END(vmx_lower_privilege_trap) + + .org vmx_ia64_ivt+0x5f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) +ENTRY(vmx_taken_branch_trap) + VMX_REFLECT(35) +END(vmx_taken_branch_trap) + + .org vmx_ia64_ivt+0x6000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) +ENTRY(vmx_single_step_trap) + VMX_REFLECT(36) +END(vmx_single_step_trap) + + .org vmx_ia64_ivt+0x6100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault +ENTRY(vmx_virtualization_fault) + VMX_DBG_FAULT(37) + mov r31=pr + mov r19=37 + br.sptk vmx_dispatch_virtualization_fault +END(vmx_virtualization_fault) + + .org vmx_ia64_ivt+0x6200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + VMX_DBG_FAULT(38) + VMX_FAULT(38) + + .org vmx_ia64_ivt+0x6300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + VMX_DBG_FAULT(39) + VMX_FAULT(39) + + .org vmx_ia64_ivt+0x6400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + VMX_DBG_FAULT(40) + VMX_FAULT(40) + + .org vmx_ia64_ivt+0x6500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + VMX_DBG_FAULT(41) + VMX_FAULT(41) + + .org vmx_ia64_ivt+0x6600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + VMX_DBG_FAULT(42) + VMX_FAULT(42) + + .org vmx_ia64_ivt+0x6700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + VMX_DBG_FAULT(43) + VMX_FAULT(43) + + .org vmx_ia64_ivt+0x6800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + VMX_DBG_FAULT(44) + VMX_FAULT(44) + + .org vmx_ia64_ivt+0x6900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) +ENTRY(vmx_ia32_exception) + VMX_DBG_FAULT(45) + VMX_FAULT(45) +END(vmx_ia32_exception) + + .org vmx_ia64_ivt+0x6a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) +ENTRY(vmx_ia32_intercept) + VMX_DBG_FAULT(46) + VMX_FAULT(46) +END(vmx_ia32_intercept) + + .org vmx_ia64_ivt+0x6b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) +ENTRY(vmx_ia32_interrupt) + VMX_DBG_FAULT(47) + VMX_FAULT(47) +END(vmx_ia32_interrupt) + + .org vmx_ia64_ivt+0x6c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + VMX_DBG_FAULT(48) + VMX_FAULT(48) + + .org vmx_ia64_ivt+0x6d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + VMX_DBG_FAULT(49) + VMX_FAULT(49) + + .org vmx_ia64_ivt+0x6e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + VMX_DBG_FAULT(50) + VMX_FAULT(50) + + .org vmx_ia64_ivt+0x6f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + VMX_DBG_FAULT(51) + VMX_FAULT(51) + + .org vmx_ia64_ivt+0x7000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7000 Entry 52 (size 16 bundles) Reserved + VMX_DBG_FAULT(52) + VMX_FAULT(52) + + .org vmx_ia64_ivt+0x7100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + VMX_DBG_FAULT(53) + VMX_FAULT(53) + + .org vmx_ia64_ivt+0x7200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + VMX_DBG_FAULT(54) + VMX_FAULT(54) + + .org vmx_ia64_ivt+0x7300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + VMX_DBG_FAULT(55) + VMX_FAULT(55) + + .org vmx_ia64_ivt+0x7400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + VMX_DBG_FAULT(56) + VMX_FAULT(56) + + .org vmx_ia64_ivt+0x7500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + VMX_DBG_FAULT(57) + VMX_FAULT(57) + + .org vmx_ia64_ivt+0x7600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + VMX_DBG_FAULT(58) + VMX_FAULT(58) + + .org vmx_ia64_ivt+0x7700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + VMX_DBG_FAULT(59) + VMX_FAULT(59) + + .org vmx_ia64_ivt+0x7800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + VMX_DBG_FAULT(60) + VMX_FAULT(60) + + .org vmx_ia64_ivt+0x7900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + VMX_DBG_FAULT(61) + VMX_FAULT(61) + + .org vmx_ia64_ivt+0x7a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + VMX_DBG_FAULT(62) + VMX_FAULT(62) + + .org vmx_ia64_ivt+0x7b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + VMX_DBG_FAULT(63) + VMX_FAULT(63) + + .org vmx_ia64_ivt+0x7c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + VMX_DBG_FAULT(64) + VMX_FAULT(64) + + .org vmx_ia64_ivt+0x7d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + VMX_DBG_FAULT(65) + VMX_FAULT(65) + + .org vmx_ia64_ivt+0x7e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + VMX_DBG_FAULT(66) + VMX_FAULT(66) + + .org vmx_ia64_ivt+0x7f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + VMX_DBG_FAULT(67) + VMX_FAULT(67) + + .org vmx_ia64_ivt+0x8000 + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + +ENTRY(vmx_dispatch_reflection) + /* + * Input: + * psr.ic: off + * r19: intr type (offset into ivt, see ia64_int.h) + * r31: contains saved predicates (pr) + */ + VMX_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,4,0 + mov out0=cr.ifa + mov out1=cr.isr + mov out2=cr.iim + mov out3=r15 + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + (p15) ssm psr.i // restore psr.i + adds r3=16,r2 // set up second base pointer + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_reflect_interruption +END(vmx_dispatch_reflection) + +ENTRY(vmx_dispatch_virtualization_fault) + VMX_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,3,0 // now it's safe (must be first in insn group!) + mov out0=r13 //vcpu + mov out1=r4 //cause + mov out2=r5 //opcode + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + (p15) ssm psr.i // restore psr.i + adds r3=16,r2 // set up second base pointer + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_emulate +END(vmx_dispatch_virtualization_fault) + + +ENTRY(vmx_dispatch_vexirq) + VMX_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,1,0 + mov out0=r13 + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + (p15) ssm psr.i // restore psr.i + adds r3=16,r2 // set up second base pointer + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_vexirq +END(vmx_dispatch_vexirq) + +ENTRY(vmx_dispatch_tlb_miss) + VMX_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=r13 + mov out1=r15 + mov out2=cr.ifa + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + (p15) ssm psr.i // restore psr.i + adds r3=16,r2 // set up second base pointer + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_hpw_miss +END(vmx_dispatch_tlb_miss) + + +ENTRY(vmx_dispatch_break_fault) + VMX_SAVE_MIN_WITH_COVER_R19 + ;; + ;; + alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + adds out1=16,sp + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.iim // FIXME: pity to make this slow access twice + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + (p15)ssm psr.i // restore psr.i + adds r3=16,r2 // set up second base pointer + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_ia64_handle_break + ;; +END(vmx_dispatch_break_fault) + + +ENTRY(vmx_hypercall_dispatch) + VMX_SAVE_MIN_WITH_COVER + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + (p15) ssm psr.i // restore psr.i + adds r3=16,r2 // set up second base pointer + ;; + VMX_SAVE_REST + ;; + movl r14=ia64_leave_hypervisor + movl r2=hyper_call_table + ;; + mov rp=r14 + shladd r2=r15,3,r2 + ;; + ld8 r2=[r2] + ;; + mov b6=r2 + ;; + br.call.sptk.many b6=b6 + ;; +END(vmx_hypercall_dispatch) + + + +ENTRY(vmx_dispatch_interrupt) + VMX_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 + ;; + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group + mov out0=cr.ivr // pass cr.ivr as first arg + add out1=16,sp // pass pointer to pt_regs as second arg + + ssm psr.ic + ;; + srlz.i + ;; + (p15) ssm psr.i + adds r3=16,r2 // set up second base pointer for SAVE_REST + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_ia64_handle_irq +END(vmx_dispatch_interrupt) + + + + .rodata + .align 8 + .globl hyper_call_table +hyper_call_table: + data8 hyper_not_support //hyper_set_trap_table /* 0 */ + data8 hyper_mmu_update + data8 hyper_not_support //hyper_set_gdt + data8 hyper_not_support //hyper_stack_switch + data8 hyper_not_support //hyper_set_callbacks + data8 hyper_not_support //hyper_fpu_taskswitch /* 5 */ + data8 hyper_sched_op + data8 hyper_dom0_op + data8 hyper_not_support //hyper_set_debugreg + data8 hyper_not_support //hyper_get_debugreg + data8 hyper_not_support //hyper_update_descriptor /* 10 */ + data8 hyper_not_support //hyper_set_fast_trap + data8 hyper_dom_mem_op + data8 hyper_not_support //hyper_multicall + data8 hyper_not_support //hyper_update_va_mapping + data8 hyper_not_support //hyper_set_timer_op /* 15 */ + data8 hyper_event_channel_op + data8 hyper_xen_version + data8 hyper_not_support //hyper_console_io + data8 hyper_not_support //hyper_physdev_op + data8 hyper_not_support //hyper_grant_table_op /* 20 */ + data8 hyper_not_support //hyper_vm_assist + data8 hyper_not_support //hyper_update_va_mapping_otherdomain + data8 hyper_not_support //hyper_switch_vm86 + data8 hyper_not_support //hyper_boot_vcpu + data8 hyper_not_support //hyper_ni_hypercall /* 25 */ + data8 hyper_not_support //hyper_mmuext_op + data8 hyper_lock_page + data8 hyper_set_shared_page diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_minstate.h --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_minstate.h Thu Sep 1 18:46:28 2005 @@ -0,0 +1,333 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_minstate.h: + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) + */ + +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/fpu.h> +#include <asm/mmu_context.h> +#include <asm/offsets.h> +#include <asm/pal.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/vmx_pal_vsa.h> +#include <asm/vmx_vpd.h> +#include <asm/cache.h> +#include "entry.h" + +#define VMX_MINSTATE_START_SAVE_MIN \ + mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ + ;; \ + mov.m r28=ar.rnat; \ + addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ + ;; \ + lfetch.fault.excl.nt1 [r22]; \ + addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ + mov r23=ar.bspstore; /* save ar.bspstore */ \ + ;; \ + mov ar.bspstore=r22; /* switch to kernel RBS */ \ + ;; \ + mov r18=ar.bsp; \ + mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ + + + +#define VMX_MINSTATE_END_SAVE_MIN \ + bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ + ;; + + +#define PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \ + /* begin to call pal vps sync_read and cleanup psr.pl */ \ + add r25=IA64_VPD_BASE_OFFSET, r21; \ + movl r20=__vsa_base; \ + ;; \ + ld8 r25=[r25]; /* read vpd base */ \ + ld8 r20=[r20]; /* read entry point */ \ + ;; \ + mov r6=r25; \ + add r20=PAL_VPS_SYNC_READ,r20; \ + ;; \ +{ .mii; \ + add r22=VPD(VPSR),r25; \ + mov r24=ip; \ + mov b0=r20; \ + ;; \ +}; \ +{ .mmb; \ + add r24 = 0x20, r24; \ + mov r16 = cr.ipsr; /* Temp workaround since psr.ic is off */ \ + br.cond.sptk b0; /* call the service */ \ + ;; \ +}; \ + ld8 r7=[r22]; \ + /* deposite ipsr bit cpl into vpd.vpsr, since epc will change */ \ + extr.u r30=r16, IA64_PSR_CPL0_BIT, 2; \ + ;; \ + dep r7=r30, r7, IA64_PSR_CPL0_BIT, 2; \ + ;; \ + extr.u r30=r16, IA64_PSR_BE_BIT, 5; \ + ;; \ + dep r7=r30, r7, IA64_PSR_BE_BIT, 5; \ + ;; \ + extr.u r30=r16, IA64_PSR_RI_BIT, 2; \ + ;; \ + dep r7=r30, r7, IA64_PSR_RI_BIT, 2; \ + ;; \ + st8 [r22]=r7; \ + ;; + + + +#define IA64_CURRENT_REG IA64_KR(CURRENT) /* r21 is reserved for current pointer */ +//#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=IA64_CURRENT_REG +#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=r21 + +/* + * VMX_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * r2 = points to &pt_regs.r16 + * r8 = contents of ar.ccv + * r9 = contents of ar.csd + * r10 = contents of ar.ssd + * r11 = FPSR_DEFAULT + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ +#define VMX_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ +/* switch rr7 */ \ + movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)); \ + movl r17=(7<<61); \ + movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)); \ + movl r22=(6<<61); \ + movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1); \ + movl r23=(5<<61); \ + ;; \ + mov rr[r17]=r16; \ + mov rr[r22]=r20; \ + mov rr[r23]=r18; \ + ;; \ + srlz.i; \ + ;; \ + VMX_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ + mov r27=ar.rsc; /* M */ \ + mov r20=r1; /* A */ \ + mov r26=ar.unat; /* M */ \ + mov r29=cr.ipsr; /* M */ \ + mov r18=cr.isr; \ + COVER; /* B;; (or nothing) */ \ + ;; \ + tbit.z p6,p0=r29,IA64_PSR_VM_BIT; \ + tbit.nz.or p6,p0 = r18,39; \ + ;; \ +(p6) br.sptk.few vmx_panic; \ + tbit.z p0,p15=r29,IA64_PSR_I_BIT; \ + mov r1=r16; \ +/* mov r21=r16; */ \ + /* switch from user to kernel RBS: */ \ + ;; \ + invala; /* M */ \ + SAVE_IFS; \ + ;; \ + VMX_MINSTATE_START_SAVE_MIN \ + adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ + adds r16=PT(CR_IPSR),r1; \ + ;; \ + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ + st8 [r16]=r29; /* save cr.ipsr */ \ + ;; \ + lfetch.fault.excl.nt1 [r17]; \ + tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ + mov r29=b0 \ + ;; \ + adds r16=PT(R8),r1; /* initialize first base pointer */ \ + adds r17=PT(R9),r1; /* initialize second base pointer */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r8,16; \ +.mem.offset 8,0; st8.spill [r17]=r9,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r10,24; \ +.mem.offset 8,0; st8.spill [r17]=r11,24; \ + ;; \ + mov r8=ar.pfs; /* I */ \ + mov r9=cr.iip; /* M */ \ + mov r10=ar.fpsr; /* M */ \ + ;; \ + st8 [r16]=r9,16; /* save cr.iip */ \ + st8 [r17]=r30,16; /* save cr.ifs */ \ + sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ + ;; \ + st8 [r16]=r26,16; /* save ar.unat */ \ + st8 [r17]=r8,16; /* save ar.pfs */ \ + shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ + ;; \ + st8 [r16]=r27,16; /* save ar.rsc */ \ + st8 [r17]=r28,16; /* save ar.rnat */ \ + ;; /* avoid RAW on r16 & r17 */ \ + st8 [r16]=r23,16; /* save ar.bspstore */ \ + st8 [r17]=r31,16; /* save predicates */ \ + ;; \ + st8 [r16]=r29,16; /* save b0 */ \ + st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ +.mem.offset 8,0; st8.spill [r17]=r12,16; \ + adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r13,16; \ +.mem.offset 8,0; st8.spill [r17]=r10,16; /* save ar.fpsr */ \ + mov r13=r21; /* establish `current' */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r15,16; \ +.mem.offset 8,0; st8.spill [r17]=r14,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r2,16; \ +.mem.offset 8,0; st8.spill [r17]=r3,16; \ + adds r2=PT(F6),r1; \ + ;; \ + .mem.offset 0,0; st8.spill [r16]=r4,16; \ + .mem.offset 8,0; st8.spill [r17]=r5,16; \ + ;; \ + .mem.offset 0,0; st8.spill [r16]=r6,16; \ + .mem.offset 8,0; st8.spill [r17]=r7,16; \ + mov r20=ar.ccv; \ + ;; \ + mov r18=cr.iipa; \ + mov r4=cr.isr; \ + mov r22=ar.unat; \ + ;; \ + st8 [r16]=r18,16; \ + st8 [r17]=r4; \ + ;; \ + adds r16=PT(EML_UNAT),r1; \ + adds r17=PT(AR_CCV),r1; \ + ;; \ + st8 [r16]=r22,8; \ + st8 [r17]=r20; \ + mov r4=r24; \ + mov r5=r25; \ + ;; \ + st8 [r16]=r0; \ + EXTRA; \ + mov r9=ar.csd; \ + mov r10=ar.ssd; \ + movl r11=FPSR_DEFAULT; /* L-unit */ \ + movl r1=__gp; /* establish kernel global pointer */ \ + ;; \ + PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \ + VMX_MINSTATE_END_SAVE_MIN + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). + * + * Assumed state upon entry: + * psr.ic: on + * r2: points to &pt_regs.f6 + * r3: points to &pt_regs.f7 + * r4,r5,scrach + * r6: points to vpd + * r7: vpsr + * r9: contents of ar.csd + * r10: contents of ar.ssd + * r11: FPSR_DEFAULT + * + * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. + */ +#define VMX_SAVE_REST \ + tbit.z pBN0,pBN1=r7,IA64_PSR_BN_BIT; /* guest bank0 or bank1 ? */ \ + ;; \ +(pBN0) add r4=VPD(VBGR),r6; \ +(pBN0) add r5=VPD(VBGR)+0x8,r6; \ +(pBN0) add r7=VPD(VBNAT),r6; \ + ;; \ +(pBN1) add r5=VPD(VGR)+0x8,r6; \ +(pBN1) add r4=VPD(VGR),r6; \ +(pBN1) add r7=VPD(VNAT),r6; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r16,16; \ +.mem.offset 8,0; st8.spill [r5]=r17,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r18,16; \ +.mem.offset 8,0; st8.spill [r5]=r19,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r20,16; \ +.mem.offset 8,0; st8.spill [r5]=r21,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r22,16; \ +.mem.offset 8,0; st8.spill [r5]=r23,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r24,16; \ +.mem.offset 8,0; st8.spill [r5]=r25,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r26,16; \ +.mem.offset 8,0; st8.spill [r5]=r27,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r28,16; \ +.mem.offset 8,0; st8.spill [r5]=r29,16; \ + mov r26=b6; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r30,16; \ +.mem.offset 8,0; st8.spill [r5]=r31,16; \ + mov r27=b7; \ + ;; \ + mov r30=ar.unat; \ + ;; \ + st8 [r7]=r30; \ + mov ar.fpsr=r11; /* M-unit */ \ + ;; \ + stf.spill [r2]=f6,32; \ + stf.spill [r3]=f7,32; \ + ;; \ + stf.spill [r2]=f8,32; \ + stf.spill [r3]=f9,32; \ + ;; \ + stf.spill [r2]=f10; \ + stf.spill [r3]=f11; \ + ;; \ + adds r2=PT(B6)-PT(F10),r2; \ + adds r3=PT(B7)-PT(F11),r3; \ + ;; \ + st8 [r2]=r26,16; /* b6 */ \ + st8 [r3]=r27,16; /* b7 */ \ + ;; \ + st8 [r2]=r9; /* ar.csd */ \ + st8 [r3]=r10; /* ar.ssd */ \ + ;; + +#define VMX_SAVE_MIN_WITH_COVER VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs,) +#define VMX_SAVE_MIN_WITH_COVER_R19 VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19) +#define VMX_SAVE_MIN VMX_DO_SAVE_MIN( , mov r30=r0, ) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_phy_mode.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_phy_mode.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,433 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_phy_mode.c: emulating domain physical mode. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Arun Sharma (arun.sharma@xxxxxxxxx) + * Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx) + * Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx) + */ + + +#include <asm/processor.h> +#include <asm/gcc_intrin.h> +#include <asm/vmx_phy_mode.h> +#include <xen/sched.h> +#include <asm/pgtable.h> + + +int valid_mm_mode[8] = { + GUEST_PHYS, /* (it, dt, rt) -> (0, 0, 0) */ + INV_MODE, + INV_MODE, + GUEST_PHYS, /* (it, dt, rt) -> (0, 1, 1) */ + INV_MODE, + GUEST_PHYS, /* (it, dt, rt) -> (1, 0, 1) */ + INV_MODE, + GUEST_VIRT, /* (it, dt, rt) -> (1, 1, 1).*/ +}; + +/* + * Special notes: + * - Index by it/dt/rt sequence + * - Only existing mode transitions are allowed in this table + * - RSE is placed at lazy mode when emulating guest partial mode + * - If gva happens to be rr0 and rr4, only allowed case is identity + * mapping (gva=gpa), or panic! (How?) + */ +int mm_switch_table[8][8] = { + /* 2004/09/12(Kevin): Allow switch to self */ + /* + * (it,dt,rt): (0,0,0) -> (1,1,1) + * This kind of transition usually occurs in the very early + * stage of Linux boot up procedure. Another case is in efi + * and pal calls. (see "arch/ia64/kernel/head.S") + * + * (it,dt,rt): (0,0,0) -> (0,1,1) + * This kind of transition is found when OSYa exits efi boot + * service. Due to gva = gpa in this case (Same region), + * data access can be satisfied though itlb entry for physical + * emulation is hit. + */ + SW_SELF,0, 0, SW_NOP, 0, 0, 0, SW_P2V, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + /* + * (it,dt,rt): (0,1,1) -> (1,1,1) + * This kind of transition is found in OSYa. + * + * (it,dt,rt): (0,1,1) -> (0,0,0) + * This kind of transition is found in OSYa + */ + SW_NOP, 0, 0, SW_SELF,0, 0, 0, SW_P2V, + /* (1,0,0)->(1,1,1) */ + 0, 0, 0, 0, 0, 0, 0, SW_P2V, + /* + * (it,dt,rt): (1,0,1) -> (1,1,1) + * This kind of transition usually occurs when Linux returns + * from the low level TLB miss handlers. + * (see "arch/ia64/kernel/ivt.S") + */ + 0, 0, 0, 0, 0, SW_SELF,0, SW_P2V, + 0, 0, 0, 0, 0, 0, 0, 0, + /* + * (it,dt,rt): (1,1,1) -> (1,0,1) + * This kind of transition usually occurs in Linux low level + * TLB miss handler. (see "arch/ia64/kernel/ivt.S") + * + * (it,dt,rt): (1,1,1) -> (0,0,0) + * This kind of transition usually occurs in pal and efi calls, + * which requires running in physical mode. + * (see "arch/ia64/kernel/head.S") + * (1,1,1)->(1,0,0) + */ + + SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF, +}; + +void +physical_mode_init(VCPU *vcpu) +{ + UINT64 psr; + struct domain * d = vcpu->domain; + + vcpu->arch.old_rsc = 0; + vcpu->arch.mode_flags = GUEST_IN_PHY; +} + +extern u64 get_mfn(domid_t domid, u64 gpfn, u64 pages); +#if 0 +void +physical_itlb_miss_domn(VCPU *vcpu, u64 vadr) +{ + u64 psr; + IA64_PSR vpsr; + u64 mppn,gppn,mpp1,gpp1; + struct domain *d; + static u64 test=0; + d=vcpu->domain; + if(test) + panic("domn physical itlb miss happen\n"); + else + test=1; + vpsr.val=vmx_vcpu_get_psr(vcpu); + gppn=(vadr<<1)>>13; + mppn = get_mfn(DOMID_SELF,gppn,1); + mppn=(mppn<<12)|(vpsr.cpl<<7); + gpp1=0; + mpp1 = get_mfn(DOMID_SELF,gpp1,1); + mpp1=(mpp1<<12)|(vpsr.cpl<<7); +// if(vadr>>63) +// mppn |= PHY_PAGE_UC; +// else +// mppn |= PHY_PAGE_WB; + mpp1 |= PHY_PAGE_WB; + psr=ia64_clear_ic(); + ia64_itr(0x1, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24); + ia64_srlz_i(); + ia64_itr(0x2, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24); + ia64_stop(); + ia64_srlz_i(); + ia64_itr(0x1, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL), (mppn|PHY_PAGE_WB), 24); + ia64_srlz_i(); + ia64_itr(0x2, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL), (mppn|PHY_PAGE_WB), 24); + ia64_stop(); + ia64_srlz_i(); + ia64_itr(0x1, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28); + ia64_srlz_i(); + ia64_itr(0x2, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28); + ia64_stop(); + ia64_srlz_i(); + ia64_set_psr(psr); + ia64_srlz_i(); + return; +} +#endif + +void +physical_itlb_miss(VCPU *vcpu, u64 vadr) +{ + physical_itlb_miss_dom0(vcpu, vadr); +} + + +void +physical_itlb_miss_dom0(VCPU *vcpu, u64 vadr) +{ + u64 psr; + IA64_PSR vpsr; + u64 mppn,gppn; + vpsr.val=vmx_vcpu_get_psr(vcpu); + gppn=(vadr<<1)>>13; + mppn = get_mfn(DOMID_SELF,gppn,1); + mppn=(mppn<<12)|(vpsr.cpl<<7); +// if(vadr>>63) +// mppn |= PHY_PAGE_UC; +// else + mppn |= PHY_PAGE_WB; + + psr=ia64_clear_ic(); + ia64_itc(1,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT); + ia64_set_psr(psr); + ia64_srlz_i(); + return; +} + + +void +physical_dtlb_miss(VCPU *vcpu, u64 vadr) +{ + u64 psr; + IA64_PSR vpsr; + u64 mppn,gppn; +// if(vcpu->domain!=dom0) +// panic("dom n physical dtlb miss happen\n"); + vpsr.val=vmx_vcpu_get_psr(vcpu); + gppn=(vadr<<1)>>13; + mppn = get_mfn(DOMID_SELF,gppn,1); + mppn=(mppn<<12)|(vpsr.cpl<<7); + if(vadr>>63) + mppn |= PHY_PAGE_UC; + else + mppn |= PHY_PAGE_WB; + + psr=ia64_clear_ic(); + ia64_itc(2,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT); + ia64_set_psr(psr); + ia64_srlz_i(); + return; +} + +void +vmx_init_all_rr(VCPU *vcpu) +{ + VMX(vcpu,vrr[VRN0]) = 0x38; + VMX(vcpu,vrr[VRN1]) = 0x38; + VMX(vcpu,vrr[VRN2]) = 0x38; + VMX(vcpu,vrr[VRN3]) = 0x38; + VMX(vcpu,vrr[VRN4]) = 0x38; + VMX(vcpu,vrr[VRN5]) = 0x38; + VMX(vcpu,vrr[VRN6]) = 0x60; + VMX(vcpu,vrr[VRN7]) = 0x60; + + VMX(vcpu,mrr5) = vmx_vrrtomrr(vcpu, 0x38); + VMX(vcpu,mrr6) = vmx_vrrtomrr(vcpu, 0x60); + VMX(vcpu,mrr7) = vmx_vrrtomrr(vcpu, 0x60); +} + +void +vmx_load_all_rr(VCPU *vcpu) +{ + unsigned long psr; + ia64_rr phy_rr; + + psr = ia64_clear_ic(); + + phy_rr.ps = EMUL_PHY_PAGE_SHIFT; + phy_rr.ve = 1; + + /* WARNING: not allow co-exist of both virtual mode and physical + * mode in same region + */ + if (is_physical_mode(vcpu)) { + if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) + panic("Unexpected domain switch in phy emul\n"); + phy_rr.rid = vcpu->domain->arch.metaphysical_rr0; + ia64_set_rr((VRN0 << VRN_SHIFT), phy_rr.rrval); + phy_rr.rid = vcpu->domain->arch.metaphysical_rr4; + ia64_set_rr((VRN4 << VRN_SHIFT), phy_rr.rrval); + } else { + ia64_set_rr((VRN0 << VRN_SHIFT), + vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN0]))); + ia64_set_rr((VRN4 << VRN_SHIFT), + vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN4]))); + } + +#if 1 + /* rr567 will be postponed to last point when resuming back to guest */ + ia64_set_rr((VRN1 << VRN_SHIFT), + vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN1]))); + ia64_set_rr((VRN2 << VRN_SHIFT), + vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN2]))); + ia64_set_rr((VRN3 << VRN_SHIFT), + vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN3]))); +#endif + ia64_srlz_d(); + ia64_set_psr(psr); + ia64_srlz_i(); +} + +void +switch_to_physical_rid(VCPU *vcpu) +{ + UINT64 psr; + ia64_rr phy_rr; + + phy_rr.ps = EMUL_PHY_PAGE_SHIFT; + phy_rr.ve = 1; + + /* Save original virtual mode rr[0] and rr[4] */ + psr=ia64_clear_ic(); + phy_rr.rid = vcpu->domain->arch.metaphysical_rr0; + ia64_set_rr(VRN0<<VRN_SHIFT, phy_rr.rrval); + ia64_srlz_d(); + phy_rr.rid = vcpu->domain->arch.metaphysical_rr4; + ia64_set_rr(VRN4<<VRN_SHIFT, phy_rr.rrval); + ia64_srlz_d(); + + ia64_set_psr(psr); + ia64_srlz_i(); + return; +} + + +void +switch_to_virtual_rid(VCPU *vcpu) +{ + UINT64 psr; + ia64_rr mrr; + + psr=ia64_clear_ic(); + + mrr=vmx_vcpu_rr(vcpu,VRN0<<VRN_SHIFT); + ia64_set_rr(VRN0<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval)); + ia64_srlz_d(); + mrr=vmx_vcpu_rr(vcpu,VRN4<<VRN_SHIFT); + ia64_set_rr(VRN4<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval)); + ia64_srlz_d(); + ia64_set_psr(psr); + ia64_srlz_i(); + return; +} + +static int mm_switch_action(IA64_PSR opsr, IA64_PSR npsr) +{ + return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)]; +} + +void +switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr) +{ + int act; + REGS * regs=vcpu_regs(vcpu); + act = mm_switch_action(old_psr, new_psr); + switch (act) { + case SW_V2P: + vcpu->arch.old_rsc = regs->ar_rsc; + switch_to_physical_rid(vcpu); + /* + * Set rse to enforced lazy, to prevent active rse save/restor when + * guest physical mode. + */ + regs->ar_rsc &= ~(IA64_RSC_MODE); + vcpu->arch.mode_flags |= GUEST_IN_PHY; + break; + case SW_P2V: + switch_to_virtual_rid(vcpu); + /* + * recover old mode which is saved when entering + * guest physical mode + */ + regs->ar_rsc = vcpu->arch.old_rsc; + vcpu->arch.mode_flags &= ~GUEST_IN_PHY; + break; + case SW_SELF: + printf("Switch to self-0x%lx!!! MM mode doesn't change...\n", + old_psr.val); + break; + case SW_NOP: + printf("No action required for mode transition: (0x%lx -> 0x%lx)\n", + old_psr.val, new_psr.val); + break; + default: + /* Sanity check */ + printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val); + panic("Unexpected virtual <--> physical mode transition"); + break; + } + return; +} + + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void +check_mm_mode_switch (VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr) +{ + + if ( (old_psr.dt != new_psr.dt ) || + (old_psr.it != new_psr.it ) || + (old_psr.rt != new_psr.rt ) + ) { + switch_mm_mode (vcpu, old_psr, new_psr); + } + + return 0; +} + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void +prepare_if_physical_mode(VCPU *vcpu) +{ + if (is_physical_mode(vcpu)) { + vcpu->arch.mode_flags |= GUEST_PHY_EMUL; + switch_to_virtual_rid(vcpu); + } + return; +} + +/* Recover always follows prepare */ +void +recover_if_physical_mode(VCPU *vcpu) +{ + if (is_physical_mode(vcpu)) { + vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL; + switch_to_physical_rid(vcpu); + } + return; +} + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_process.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_process.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,375 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_process.c: handling VMX architecture-related VM exits + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx> + * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) + */ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/errno.h> +#include <xen/sched.h> +#include <xen/smp.h> +#include <asm/ptrace.h> +#include <xen/delay.h> + +#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */ +#include <asm/sal.h> /* FOR struct ia64_sal_retval */ + +#include <asm/system.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/desc.h> +//#include <asm/ldt.h> +#include <xen/irq.h> +#include <xen/event.h> +#include <asm/regionreg.h> +#include <asm/privop.h> +#include <asm/ia64_int.h> +#include <asm/hpsim_ssc.h> +#include <asm/dom_fw.h> +#include <asm/vmx_vcpu.h> +#include <asm/kregs.h> +#include <asm/vmx.h> +#include <asm/vmx_mm_def.h> +#include <xen/mm.h> +/* reset all PSR field to 0, except up,mfl,mfh,pk,dt,rt,mc,it */ +#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034 + + +extern struct ia64_sal_retval pal_emulator_static(UINT64); +extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64); +extern void rnat_consumption (VCPU *vcpu); +#define DOMN_PAL_REQUEST 0x110000 +IA64FAULT +vmx_ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim) +{ + static int first_time = 1; + struct domain *d = (struct domain *) current->domain; + struct vcpu *v = (struct domain *) current; + extern unsigned long running_on_sim; + unsigned long i, sal_param[8]; + +#if 0 + if (first_time) { + if (platform_is_hp_ski()) running_on_sim = 1; + else running_on_sim = 0; + first_time = 0; + } + if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant + if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs); + else do_ssc(vcpu_get_gr(current,36), regs); + } +#endif + if (iim == d->arch.breakimm) { + struct ia64_sal_retval x; + switch (regs->r2) { + case FW_HYPERCALL_PAL_CALL: + //printf("*** PAL hypercall: index=%d\n",regs->r28); + //FIXME: This should call a C routine + x = pal_emulator_static(VMX_VPD(v, vgr[12])); + regs->r8 = x.status; regs->r9 = x.v0; + regs->r10 = x.v1; regs->r11 = x.v2; +#if 0 + if (regs->r8) + printk("Failed vpal emulation, with index:0x%lx\n", + VMX_VPD(v, vgr[12])); +#endif + break; + case FW_HYPERCALL_SAL_CALL: + for (i = 0; i < 8; i++) + vmx_vcpu_get_gr(v, 32+i, &sal_param[i]); + x = sal_emulator(sal_param[0], sal_param[1], + sal_param[2], sal_param[3], + sal_param[4], sal_param[5], + sal_param[6], sal_param[7]); + regs->r8 = x.status; regs->r9 = x.v0; + regs->r10 = x.v1; regs->r11 = x.v2; +#if 0 + if (regs->r8) + printk("Failed vsal emulation, with index:0x%lx\n", + sal_param[0]); +#endif + break; + case FW_HYPERCALL_EFI_RESET_SYSTEM: + printf("efi.reset_system called "); + if (current->domain == dom0) { + printf("(by dom0)\n "); + (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL); + } + printf("(not supported for non-0 domain)\n"); + regs->r8 = EFI_UNSUPPORTED; + break; + case FW_HYPERCALL_EFI_GET_TIME: + { + unsigned long *tv, *tc; + vmx_vcpu_get_gr(v, 32, &tv); + vmx_vcpu_get_gr(v, 33, &tc); + printf("efi_get_time(%p,%p) called...",tv,tc); + tv = __va(translate_domain_mpaddr(tv)); + if (tc) tc = __va(translate_domain_mpaddr(tc)); + regs->r8 = (*efi.get_time)(tv,tc); + printf("and returns %lx\n",regs->r8); + } + break; + case FW_HYPERCALL_EFI_SET_TIME: + case FW_HYPERCALL_EFI_GET_WAKEUP_TIME: + case FW_HYPERCALL_EFI_SET_WAKEUP_TIME: + // FIXME: need fixes in efi.h from 2.6.9 + case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP: + // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED + // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS + // POINTER ARGUMENTS WILL BE VIRTUAL!! + case FW_HYPERCALL_EFI_GET_VARIABLE: + // FIXME: need fixes in efi.h from 2.6.9 + case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE: + case FW_HYPERCALL_EFI_SET_VARIABLE: + case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT: + // FIXME: need fixes in efi.h from 2.6.9 + regs->r8 = EFI_UNSUPPORTED; + break; + } +#if 0 + if (regs->r8) + printk("Failed vgfw emulation, with index:0x%lx\n", + regs->r2); +#endif + vmx_vcpu_increment_iip(current); + }else if(iim == DOMN_PAL_REQUEST){ + pal_emul(current); + vmx_vcpu_increment_iip(current); + } else + vmx_reflect_interruption(ifa,isr,iim,11); +} + +static UINT64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000, 0x1400,0x1800, + 0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,0x4000, + 0x4400,0x4800,0x4c00,0x5000,0x5100,0x5200,0x5300,0x5400,0x5500,0x5600, + 0x5700,0x5800,0x5900,0x5a00,0x5b00,0x5c00,0x5d00,0x5e00,0x5f00,0x6000, + 0x6100,0x6200,0x6300,0x6400,0x6500,0x6600,0x6700,0x6800,0x6900,0x6a00, + 0x6b00,0x6c00,0x6d00,0x6e00,0x6f00,0x7000,0x7100,0x7200,0x7300,0x7400, + 0x7500,0x7600,0x7700,0x7800,0x7900,0x7a00,0x7b00,0x7c00,0x7d00,0x7e00, + 0x7f00, +}; + + + +void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim, + UINT64 vector) +{ + VCPU *vcpu = current; + REGS *regs=vcpu_regs(vcpu); + UINT64 viha,vpsr = vmx_vcpu_get_psr(vcpu); + if(!(vpsr&IA64_PSR_IC)&&(vector!=5)){ + panic("Guest nested fault!"); + } + VPD_CR(vcpu,isr)=isr; + VPD_CR(vcpu,iipa) = regs->cr_iip; + vector=vec2off[vector]; + if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) + VPD_CR(vcpu,iim) = iim; + else { + set_ifa_itir_iha(vcpu,ifa,1,1,1); + } + inject_guest_interruption(vcpu, vector); +} + +// ONLY gets called from ia64_leave_kernel +// ONLY call with interrupts disabled?? (else might miss one?) +// NEVER successful if already reflecting a trap/fault because psr.i==0 +void leave_hypervisor_tail(struct pt_regs *regs) +{ + struct domain *d = current->domain; + struct vcpu *v = current; + // FIXME: Will this work properly if doing an RFI??? + if (!is_idle_task(d) ) { // always comes from guest + extern void vmx_dorfirfi(void); + struct pt_regs *user_regs = vcpu_regs(current); + + if (local_softirq_pending()) + do_softirq(); + local_irq_disable(); + + if (user_regs != regs) + printk("WARNING: checking pending interrupt in nested interrupt!!!\n"); + + /* VMX Domain N has other interrupt source, saying DM */ + if (test_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags)) + vmx_intr_assist(v); + + /* FIXME: Check event pending indicator, and set + * pending bit if necessary to inject back to guest. + * Should be careful about window between this check + * and above assist, since IOPACKET_PORT shouldn't be + * injected into vmx domain. + * + * Now hardcode the vector as 0x10 temporarily + */ + if (event_pending(v)&&(!((v->arch.arch_vmx.in_service[0])&(1UL<<0x10)))) { + VPD_CR(v, irr[0]) |= 1UL << 0x10; + v->arch.irq_new_pending = 1; + } + + if ( v->arch.irq_new_pending ) { + v->arch.irq_new_pending = 0; + vmx_check_pending_irq(v); + } + } +} + +extern ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr); + +/* We came here because the H/W VHPT walker failed to find an entry */ +void vmx_hpw_miss(VCPU *vcpu, u64 vec, u64 vadr) +{ + IA64_PSR vpsr; + CACHE_LINE_TYPE type; + u64 vhpt_adr; + ISR misr; + ia64_rr vrr; + REGS *regs; + thash_cb_t *vtlb, *vhpt; + thash_data_t *data, me; + vtlb=vmx_vcpu_get_vtlb(vcpu); +#ifdef VTLB_DEBUG + check_vtlb_sanity(vtlb); + dump_vtlb(vtlb); +#endif + vpsr.val = vmx_vcpu_get_psr(vcpu); + regs = vcpu_regs(vcpu); + misr.val=regs->cr_isr; +/* TODO + if(vcpu->domain->id && vec == 2 && + vpsr.dt == 0 && is_gpa_io(MASK_PMA(vaddr))){ + emulate_ins(&v); + return; + } +*/ + + if((vec==1)&&(!vpsr.it)){ + physical_itlb_miss(vcpu, vadr); + return; + } + if((vec==2)&&(!vpsr.dt)){ + if(vcpu->domain!=dom0&&__gpfn_is_io(vcpu->domain,(vadr<<1)>>(PAGE_SHIFT+1))){ + emulate_io_inst(vcpu,((vadr<<1)>>1),4); // UC + }else{ + physical_dtlb_miss(vcpu, vadr); + } + return; + } + vrr = vmx_vcpu_rr(vcpu,vadr); + if(vec == 1) type = ISIDE_TLB; + else if(vec == 2) type = DSIDE_TLB; + else panic("wrong vec\n"); + +// prepare_if_physical_mode(vcpu); + + if(data=vtlb_lookup_ex(vtlb, vrr.rid, vadr,type)){ + if(vcpu->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(vcpu->domain, data->ppn>>(PAGE_SHIFT-12))){ + vadr=(vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps); + emulate_io_inst(vcpu, vadr, data->ma); + return IA64_FAULT; + } + if ( data->ps != vrr.ps ) { + machine_tlb_insert(vcpu, data); + } + else { + thash_insert(vtlb->ts->vhpt,data,vadr); + } + }else if(type == DSIDE_TLB){ + if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, misr.val); + alt_dtlb(vcpu, vadr); + return IA64_FAULT; + } else{ + if(misr.sp){ + //TODO lds emulation + panic("Don't support speculation load"); + }else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + } else{ + vmx_vcpu_thash(vcpu, vadr, &vhpt_adr); + vrr=vmx_vcpu_rr(vcpu,vhpt_adr); + data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB); + if(data){ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, misr.val); + dtlb_fault(vcpu, vadr); + return IA64_FAULT; + }else{ + if(misr.sp){ + //TODO lds emulation + panic("Don't support speculation load"); + }else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + }else{ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, misr.val); + dvhpt_fault(vcpu, vadr); + return IA64_FAULT; + }else{ + if(misr.sp){ + //TODO lds emulation + panic("Don't support speculation load"); + }else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + } + } + }else if(type == ISIDE_TLB){ + if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){ + if(!vpsr.ic){ + misr.ni=1; + } + vmx_vcpu_set_isr(vcpu, misr.val); + alt_itlb(vcpu, vadr); + return IA64_FAULT; + } else{ + vmx_vcpu_thash(vcpu, vadr, &vhpt_adr); + vrr=vmx_vcpu_rr(vcpu,vhpt_adr); + data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB); + if(data){ + if(!vpsr.ic){ + misr.ni=1; + } + vmx_vcpu_set_isr(vcpu, misr.val); + itlb_fault(vcpu, vadr); + return IA64_FAULT; + }else{ + if(!vpsr.ic){ + misr.ni=1; + } + vmx_vcpu_set_isr(vcpu, misr.val); + ivhpt_fault(vcpu, vadr); + return IA64_FAULT; + } + } + } +} + + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_support.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_support.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,164 @@ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_support.c: vmx specific support interface. + * Copyright (c) 2005, Intel Corporation. + * Kun Tian (Kevin Tian) (Kevin.tian@xxxxxxxxx) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ +#include <xen/config.h> +#include <xen/sched.h> +#include <public/io/ioreq.h> +#include <asm/vmx.h> +#include <asm/vmx_vcpu.h> + +/* + * I/O emulation should be atomic from domain point of view. However, + * when emulation code is waiting for I/O completion by do_block, + * other events like DM interrupt, VBD, etc. may come and unblock + * current exection flow. So we have to prepare for re-block if unblocked + * by non I/O completion event. + */ +void vmx_wait_io(void) +{ + struct vcpu *v = current; + struct domain *d = v->domain; + extern void do_block(); + int port = iopacket_port(d); + + do { + if (!test_bit(port, + &d->shared_info->evtchn_pending[0])) + do_block(); + + /* Unblocked when some event is coming. Clear pending indication + * immediately if deciding to go for io assist + */ + if (test_and_clear_bit(port, + &d->shared_info->evtchn_pending[0])) { + clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel); + clear_bit(0, &v->vcpu_info->evtchn_upcall_pending); + vmx_io_assist(v); + } + + + if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) { + /* + * Latest event is not I/O completion, so clear corresponding + * selector and pending indication, to allow real event coming + */ + clear_bit(0, &v->vcpu_info->evtchn_upcall_pending); + + /* Here atually one window is leaved before selector is cleared. + * However this window only delay the indication to coming event, + * nothing losed. Next loop will check I/O channel to fix this + * window. + */ + clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel); + } + else + break; + } while (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)); +} + +/* + * Only place to call vmx_io_assist is mmio/legacy_io emulation. + * Since I/O emulation is synchronous, it shouldn't be called in + * other places. This is not like x86, since IA-64 implements a + * per-vp stack without continuation. + */ +void vmx_io_assist(struct vcpu *v) +{ + vcpu_iodata_t *vio; + ioreq_t *p; + + /* + * This shared page contains I/O request between emulation code + * and device model. + */ + vio = get_vio(v->domain, v->vcpu_id); + if (!vio) + panic("Corruption: bad shared page: %lx\n", (unsigned long)vio); + + p = &vio->vp_ioreq; + + if (p->state == STATE_IORESP_HOOK) + panic("Not supported: No hook available for DM request\n"); + + if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) { + if (p->state != STATE_IORESP_READY) { + /* Can't do_block here, for the same reason as other places to + * use vmx_wait_io. Simple return is safe since vmx_wait_io will + * try to block again + */ + return; + } else + p->state = STATE_INVALID; + + clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); + } else + return; /* Spurous event? */ +} + +/* + * VMX domainN has two types of interrupt source: lsapic model within + * HV, and device model within domain 0 (service OS). There're another + * pending array in share page, manipulated by device model directly. + * To conform to VT-i spec, we have to sync pending bits in shared page + * into VPD. This has to be done before checking pending interrupt at + * resume to guest. For domain 0, all the interrupt sources come from + * HV, which then doesn't require this assist. + */ +void vmx_intr_assist(struct vcpu *v) +{ + vcpu_iodata_t *vio; + struct domain *d = v->domain; + extern void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu, + unsigned long *pend_irr); + int port = iopacket_port(d); + + /* I/O emulation is atomic, so it's impossible to see execution flow + * out of vmx_wait_io, when guest is still waiting for response. + */ + if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) + panic("!!!Bad resume to guest before I/O emulation is done.\n"); + + /* Clear indicator specific to interrupt delivered from DM */ + if (test_and_clear_bit(port, + &d->shared_info->evtchn_pending[0])) { + if (!d->shared_info->evtchn_pending[port >> 5]) + clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel); + + if (!v->vcpu_info->evtchn_pending_sel) + clear_bit(0, &v->vcpu_info->evtchn_upcall_pending); + } + + /* Even without event pending, we still need to sync pending bits + * between DM and vlsapic. The reason is that interrupt delivery + * shares same event channel as I/O emulation, with corresponding + * indicator possibly cleared when vmx_wait_io(). + */ + vio = get_vio(v->domain, v->vcpu_id); + if (!vio) + panic("Corruption: bad shared page: %lx\n", (unsigned long)vio); + +#ifdef V_IOSAPIC_READY + vlapic_update_ext_irq(v); +#else + panic("IOSAPIC model is missed in qemu\n"); +#endif + return; +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_utility.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_utility.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,659 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_utility.c: + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx> + * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx> + * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) + */ + +#include <xen/types.h> +#include <asm/vmx_vcpu.h> +#include <asm/processor.h> +#include <asm/vmx_mm_def.h> + + +/* + * Return: + * 0: Not reserved indirect registers + * 1: Is reserved indirect registers + */ +int +is_reserved_indirect_register ( + int type, + int index ) +{ + switch (type) { + case IA64_CPUID: + if ( index >= 5 ) { + return 1; + } + + case IA64_DBR: + case IA64_IBR: + //bugbugbug:check with pal about the max ibr/dbr!!!! + break; + + case IA64_PMC: + //bugbugbug:check with pal about the max ibr/dbr!!!! + break; + + case IA64_PMD: + //bugbugbug:check with pal about the max ibr/dbr!!!! + break; + + case IA64_PKR: + //bugbugbug:check with pal about the max pkr!!!! + break; + + case IA64_RR: + //bugbugbug:check with pal about the max rr!!!! + break; + + default: + panic ("Unsupported instruction!"); + } + + return 0; + +} + +/* + * Return: + * Set all ignored fields in value to 0 and return + */ +u64 +indirect_reg_igfld_MASK ( + int type, + int index, + u64 value + ) +{ + u64 nvalue; + + nvalue = value; + switch ( type ) { + case IA64_CPUID: + if ( index == 2 ) { + nvalue = 0; + } + break; + + case IA64_DBR: + case IA64_IBR: + /* Refer to SDM Vol2 Table 7-1,7-2 */ + if ( index % 2 != 0) { + /* Ignore field: {61:60} */ + nvalue = value & (~MASK (60, 2)); + } + break; + case IA64_PMC: + if ( index == 0 ) { + /* Ignore field: 3:1 */ + nvalue = value & (~MASK (1, 3)); + } + break; + case IA64_PMD: + if ( index >= 4 ) { + /* Ignore field: 7:7 */ + /* bugbug: this code is correct for generic + * PMD. However, for implementation specific + * PMD, it's WRONG. need more info to judge + * what's implementation specific PMD. + */ + nvalue = value & (~MASK (7, 1)); + } + break; + case IA64_PKR: + case IA64_RR: + break; + default: + panic ("Unsupported instruction!"); + } + + return nvalue; +} + +/* + * Return: + * Set all ignored fields in value to 0 and return + */ +u64 +cr_igfld_mask (int index, u64 value) +{ + u64 nvalue; + + nvalue = value; + + switch ( index ) { + case IA64_REG_CR_IVA: + /* Ignore filed: 14:0 */ + nvalue = value & (~MASK (0, 15)); + break; + + case IA64_REG_CR_IHA: + /* Ignore filed: 1:0 */ + nvalue = value & (~MASK (0, 2)); + break; + + case IA64_REG_CR_LID: + /* Ignore filed: 63:32 */ + nvalue = value & (~MASK (32, 32)); + break; + + case IA64_REG_CR_TPR: + /* Ignore filed: 63:17,3:0 */ + nvalue = value & (~MASK (17, 47)); + nvalue = nvalue & (~MASK (0, 4)); + break; + + case IA64_REG_CR_EOI: + /* Ignore filed: 63:0 */ + nvalue = 0; + break; + + case IA64_REG_CR_ITV: + case IA64_REG_CR_PMV: + case IA64_REG_CR_CMCV: + case IA64_REG_CR_LRR0: + case IA64_REG_CR_LRR1: + /* Ignore filed: 63:17,12:12 */ + nvalue = value & (~MASK (17, 47)); + nvalue = nvalue & (~MASK (12, 1)); + break; + } + + return nvalue; +} + + +/* + * Return: + * 1: PSR reserved fields are not zero + * 0: PSR reserved fields are all zero + */ +int +check_psr_rsv_fields (u64 value) +{ + /* PSR reserved fields: 0, 12~6, 16, 31~28, 63~46 + * These reserved fields shall all be zero + * Otherwise we will panic + */ + + if ( value & MASK (0, 1) || + value & MASK (6, 7) || + value & MASK (16, 1) || + value & MASK (28, 4) || + value & MASK (46, 18) + ) { + return 1; + } + + return 0; +} + + + +/* + * Return: + * 1: CR reserved fields are not zero + * 0: CR reserved fields are all zero + */ +int +check_cr_rsv_fields (int index, u64 value) +{ + switch (index) { + case IA64_REG_CR_DCR: + if ( (value & MASK ( 3, 5 )) || + (value & MASK (15, 49))) { + return 1; + } + return 0; + + case IA64_REG_CR_ITM: + case IA64_REG_CR_IVA: + case IA64_REG_CR_IIP: + case IA64_REG_CR_IFA: + case IA64_REG_CR_IIPA: + case IA64_REG_CR_IIM: + case IA64_REG_CR_IHA: + case IA64_REG_CR_EOI: + return 0; + + case IA64_REG_CR_PTA: + if ( (value & MASK ( 1, 1 )) || + (value & MASK (9, 6))) { + return 1; + } + return 0; + + case IA64_REG_CR_IPSR: + return check_psr_rsv_fields (value); + + + case IA64_REG_CR_ISR: + if ( (value & MASK ( 24, 8 )) || + (value & MASK (44, 20))) { + return 1; + } + return 0; + + case IA64_REG_CR_ITIR: + if ( (value & MASK ( 0, 2 )) || + (value & MASK (32, 32))) { + return 1; + } + return 0; + + case IA64_REG_CR_IFS: + if ( (value & MASK ( 38, 25 ))) { + return 1; + } + return 0; + + case IA64_REG_CR_LID: + if ( (value & MASK ( 0, 16 ))) { + return 1; + } + return 0; + + case IA64_REG_CR_IVR: + if ( (value & MASK ( 8, 56 ))) { + return 1; + } + return 0; + + case IA64_REG_CR_TPR: + if ( (value & MASK ( 8, 8 ))) { + return 1; + } + return 0; + + case IA64_REG_CR_IRR0: + if ( (value & MASK ( 1, 1 )) || + (value & MASK (3, 13))) { + return 1; + } + return 0; + + case IA64_REG_CR_ITV: + case IA64_REG_CR_PMV: + case IA64_REG_CR_CMCV: + if ( (value & MASK ( 8, 4 )) || + (value & MASK (13, 3))) { + return 1; + } + return 0; + + case IA64_REG_CR_LRR0: + case IA64_REG_CR_LRR1: + if ( (value & MASK ( 11, 1 )) || + (value & MASK (14, 1))) { + return 1; + } + return 0; + } + + + panic ("Unsupported CR"); +} + + + +/* + * Return: + * 0: Indirect Reg reserved fields are not zero + * 1: Indirect Reg reserved fields are all zero + */ +int +check_indirect_reg_rsv_fields ( int type, int index, u64 value ) +{ + + switch ( type ) { + case IA64_CPUID: + if ( index == 3 ) { + if ( value & MASK (40, 24 )) { + return 0; + } + } else if ( index == 4 ) { + if ( value & MASK (2, 62 )) { + return 0; + } + } + break; + + case IA64_DBR: + case IA64_IBR: + case IA64_PMC: + case IA64_PMD: + break; + + case IA64_PKR: + if ( value & MASK (4, 4) || + value & MASK (32, 32 )) { + return 0; + } + break; + + case IA64_RR: + if ( value & MASK (1, 1) || + value & MASK (32, 32 )) { + return 0; + } + break; + + default: + panic ("Unsupported instruction!"); + } + + return 1; +} + + + + +/* Return + * Same format as isr_t + * Only ei/ni bits are valid, all other bits are zero + */ +u64 +set_isr_ei_ni (VCPU *vcpu) +{ + + IA64_PSR vpsr,ipsr; + ISR visr; + REGS *regs; + + regs=vcpu_regs(vcpu); + + visr.val = 0; + + vpsr.val = vmx_vcpu_get_psr (vcpu); + + if (!vpsr.ic == 1 ) { + /* Set ISR.ni */ + visr.ni = 1; + } + ipsr.val = regs->cr_ipsr; + + visr.ei = ipsr.ri; + return visr.val; +} + + +/* Set up ISR.na/code{3:0}/r/w for no-access instructions + * Refer to SDM Vol Table 5-1 + * Parameter: + * setr: if 1, indicates this function will set up ISR.r + * setw: if 1, indicates this function will set up ISR.w + * Return: + * Same format as ISR. All fields are zero, except na/code{3:0}/r/w + */ +u64 +set_isr_for_na_inst(VCPU *vcpu, int op) +{ + ISR visr; + visr.val = 0; + switch (op) { + case IA64_INST_TPA: + visr.na = 1; + visr.code = 0; + break; + case IA64_INST_TAK: + visr.na = 1; + visr.code = 3; + break; + } + return visr.val; +} + + + +/* + * Set up ISR for registe Nat consumption fault + * Parameters: + * read: if 1, indicates this is a read access; + * write: if 1, indicates this is a write access; + */ +void +set_rnat_consumption_isr (VCPU *vcpu,int inst,int read,int write) +{ + ISR visr; + u64 value; + /* Need set up ISR: code, ei, ni, na, r/w */ + visr.val = 0; + + /* ISR.code{7:4} =1, + * Set up ISR.code{3:0}, ISR.na + */ + visr.code = (1 << 4); + if (inst) { + + value = set_isr_for_na_inst (vcpu,inst); + visr.val = visr.val | value; + } + + /* Set up ISR.r/w */ + visr.r = read; + visr.w = write; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr (vcpu,visr.val); +} + + + +/* + * Set up ISR for break fault + */ +void set_break_isr (VCPU *vcpu) +{ + ISR visr; + u64 value; + + /* Need set up ISR: ei, ni */ + + visr.val = 0; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr(vcpu, visr.val); +} + + + + + + +/* + * Set up ISR for Priviledged Operation fault + */ +void set_privileged_operation_isr (VCPU *vcpu,int inst) +{ + ISR visr; + u64 value; + + /* Need set up ISR: code, ei, ni, na */ + + visr.val = 0; + + /* Set up na, code{3:0} for no-access instruction */ + value = set_isr_for_na_inst (vcpu, inst); + visr.val = visr.val | value; + + + /* ISR.code{7:4} =1 */ + visr.code = (1 << 4) | visr.code; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr (vcpu, visr.val); +} + + + + +/* + * Set up ISR for Priviledged Register fault + */ +void set_privileged_reg_isr (VCPU *vcpu, int inst) +{ + ISR visr; + u64 value; + + /* Need set up ISR: code, ei, ni */ + + visr.val = 0; + + /* ISR.code{7:4} =2 */ + visr.code = 2 << 4; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr (vcpu, visr.val); +} + + + + + +/* + * Set up ISR for Reserved Register/Field fault + */ +void set_rsv_reg_field_isr (VCPU *vcpu) +{ + ISR visr; + u64 value; + + /* Need set up ISR: code, ei, ni */ + + visr.val = 0; + + /* ISR.code{7:4} =4 */ + visr.code = (3 << 4) | visr.code; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr (vcpu, visr.val); +} + + + +/* + * Set up ISR for Illegal Operation fault + */ +void set_illegal_op_isr (VCPU *vcpu) +{ + ISR visr; + u64 value; + + /* Need set up ISR: ei, ni */ + + visr.val = 0; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr (vcpu, visr.val); +} + + +void set_isr_reg_nat_consumption(VCPU *vcpu, u64 flag, u64 non_access) +{ + ISR isr; + + isr.val = 0; + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_REG_NAT_CONSUMPTION_FAULT | flag; + isr.na = non_access; + isr.r = 1; + isr.w = 0; + vmx_vcpu_set_isr(vcpu, isr.val); + return; +} + +void set_isr_for_priv_fault(VCPU *vcpu, u64 non_access) +{ + u64 value; + ISR isr; + + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_PRIV_OP_FAULT; + isr.na = non_access; + vmx_vcpu_set_isr(vcpu, isr.val); + + return; +} + + +IA64FAULT check_target_register(VCPU *vcpu, u64 reg_index) +{ + u64 sof; + REGS *regs; + regs=vcpu_regs(vcpu); + sof = regs->cr_ifs & 0x7f; + if(reg_index >= sof + 32) + return IA64_FAULT; + return IA64_NO_FAULT;; +} + + +int is_reserved_rr_register(VCPU* vcpu, int reg_index) +{ + return (reg_index >= 8); +} + +#define ITIR_RSV_MASK (0x3UL | (((1UL<<32)-1) << 32)) +int is_reserved_itir_field(VCPU* vcpu, u64 itir) +{ + if ( itir & ITIR_RSV_MASK ) { + return 1; + } + return 0; +} + +int is_reserved_rr_field(VCPU* vcpu, u64 reg_value) +{ + ia64_rr rr; + rr.rrval = reg_value; + + if(rr.reserved0 != 0 || rr.reserved1 != 0){ + return 1; + } + if(rr.ps < 12 || rr.ps > 28){ + // page too big or small. + return 1; + } + if(rr.ps > 15 && rr.ps % 2 != 0){ + // unsupported page size. + return 1; + } + return 0; +} + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_vcpu.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_vcpu.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,446 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_vcpu.c: handling all virtual cpu related thing. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Fred yang (fred.yang@xxxxxxxxx) + * Arun Sharma (arun.sharma@xxxxxxxxx) + * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx> + * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) + * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) + */ + +#include <xen/sched.h> +#include <public/arch-ia64.h> +#include <asm/ia64_int.h> +#include <asm/vmx_vcpu.h> +#include <asm/regionreg.h> +#include <asm/tlb.h> +#include <asm/processor.h> +#include <asm/delay.h> +#include <asm/regs.h> +#include <asm/gcc_intrin.h> +#include <asm/vmx_mm_def.h> +#include <asm/vmx.h> + +//u64 fire_itc; +//u64 fire_itc2; +//u64 fire_itm; +//u64 fire_itm2; +/* + * Copyright (c) 2005 Intel Corporation. + * Anthony Xu (anthony.xu@xxxxxxxxx) + * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +/************************************************************************** + VCPU general register access routines +**************************************************************************/ +#include <asm/hw_irq.h> +#include <asm/vmx_pal_vsa.h> +#include <asm/kregs.h> + +//unsigned long last_guest_rsm = 0x0; +struct guest_psr_bundle{ + unsigned long ip; + unsigned long psr; +}; + +struct guest_psr_bundle guest_psr_buf[100]; +unsigned long guest_psr_index = 0; + +void +vmx_vcpu_set_psr(VCPU *vcpu, unsigned long value) +{ + + UINT64 mask; + REGS *regs; + IA64_PSR old_psr, new_psr; + old_psr.val=vmx_vcpu_get_psr(vcpu); + + regs=vcpu_regs(vcpu); + /* We only support guest as: + * vpsr.pk = 0 + * vpsr.is = 0 + * Otherwise panic + */ + if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) { + panic ("Setting unsupport guest psr!"); + } + + /* + * For those IA64_PSR bits: id/da/dd/ss/ed/ia + * Since these bits will become 0, after success execution of each + * instruction, we will change set them to mIA64_PSR + */ + VMX_VPD(vcpu,vpsr) = value & + (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD | + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA + )); + + if ( !old_psr.i && (value & IA64_PSR_I) ) { + // vpsr.i 0->1 + vcpu->arch.irq_new_condition = 1; + } + new_psr.val=vmx_vcpu_get_psr(vcpu); + { + struct pt_regs *regs = vcpu_regs(vcpu); + guest_psr_buf[guest_psr_index].ip = regs->cr_iip; + guest_psr_buf[guest_psr_index].psr = new_psr.val; + if (++guest_psr_index >= 100) + guest_psr_index = 0; + } +#if 0 + if (old_psr.i != new_psr.i) { + if (old_psr.i) + last_guest_rsm = vcpu_regs(vcpu)->cr_iip; + else + last_guest_rsm = 0; + } +#endif + + /* + * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr) + * , except for the following bits: + * ic/i/dt/si/rt/mc/it/bn/vm + */ + mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI + + IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN + + IA64_PSR_VM; + + regs->cr_ipsr = (regs->cr_ipsr & mask ) | ( value & (~mask) ); + + check_mm_mode_switch(vcpu, old_psr, new_psr); + return IA64_NO_FAULT; +} + +/* Adjust slot both in pt_regs and vpd, upon vpsr.ri which + * should have sync with ipsr in entry. + * + * Clear some bits due to successfully emulation. + */ +IA64FAULT vmx_vcpu_increment_iip(VCPU *vcpu) +{ + // TODO: trap_bounce?? Eddie + REGS *regs = vcpu_regs(vcpu); + IA64_PSR vpsr; + IA64_PSR *ipsr = (IA64_PSR *)&regs->cr_ipsr; + + vpsr.val = vmx_vcpu_get_psr(vcpu); + if (vpsr.ri == 2) { + vpsr.ri = 0; + regs->cr_iip += 16; + } else { + vpsr.ri++; + } + + ipsr->ri = vpsr.ri; + vpsr.val &= + (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD | + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA + )); + + VMX_VPD(vcpu, vpsr) = vpsr.val; + + ipsr->val &= + (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD | + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA + )); + + return (IA64_NO_FAULT); +} + + +IA64FAULT vmx_vcpu_cover(VCPU *vcpu) +{ + REGS *regs = vcpu_regs(vcpu); + IA64_PSR vpsr; + vpsr.val = vmx_vcpu_get_psr(vcpu); + + if(!vpsr.ic) + VPD_CR(vcpu,ifs) = regs->cr_ifs; + regs->cr_ifs = IA64_IFS_V; + return (IA64_NO_FAULT); +} + + +thash_cb_t * +vmx_vcpu_get_vtlb(VCPU *vcpu) +{ + return vcpu->arch.vtlb; +} + + +struct virutal_platform_def * +vmx_vcpu_get_plat(VCPU *vcpu) +{ + return &(vcpu->domain->arch.vmx_platform); +} + + +ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr) +{ + return (ia64_rr)VMX(vcpu,vrr[vadr>>61]); +} + + +IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + ia64_rr oldrr,newrr; + thash_cb_t *hcb; + oldrr=vmx_vcpu_rr(vcpu,reg); + newrr.rrval=val; +#if 1 + if(oldrr.ps!=newrr.ps){ + hcb = vmx_vcpu_get_vtlb(vcpu); + thash_purge_all(hcb); + } +#endif + VMX(vcpu,vrr[reg>>61]) = val; + switch((u64)(reg>>61)) { + case VRN5: + VMX(vcpu,mrr5)=vmx_vrrtomrr(vcpu,val); + break; + case VRN6: + VMX(vcpu,mrr6)=vmx_vrrtomrr(vcpu,val); + break; + case VRN7: + VMX(vcpu,mrr7)=vmx_vrrtomrr(vcpu,val); + /* Change double mapping for this domain */ +#ifdef XEN_DBL_MAPPING + vmx_change_double_mapping(vcpu, + vmx_vrrtomrr(vcpu,oldrr.rrval), + vmx_vrrtomrr(vcpu,newrr.rrval)); +#endif + break; + default: + ia64_set_rr(reg,vmx_vrrtomrr(vcpu,val)); + break; + } + + return (IA64_NO_FAULT); +} + + + +/************************************************************************** + VCPU protection key register access routines +**************************************************************************/ + +IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + UINT64 val = (UINT64)ia64_get_pkr(reg); + *pval = val; + return (IA64_NO_FAULT); +} + +IA64FAULT vmx_vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + ia64_set_pkr(reg,val); + return (IA64_NO_FAULT); +} + +#if 0 +int tlb_debug=0; +check_entry(u64 va, u64 ps, char *str) +{ + va &= ~ (PSIZE(ps)-1); + if ( va == 0x2000000002908000UL || + va == 0x600000000000C000UL ) { + stop(); + } + if (tlb_debug) printf("%s at %lx %lx\n", str, va, 1UL<<ps); +} +#endif + + +u64 vmx_vcpu_get_itir_on_fault(VCPU *vcpu, u64 ifa) +{ + ia64_rr rr,rr1; + rr=vmx_vcpu_rr(vcpu,ifa); + rr1.rrval=0; + rr1.ps=rr.ps; + rr1.rid=rr.rid; + return (rr1.rrval); +} + + + + +IA64FAULT vmx_vcpu_rfi(VCPU *vcpu) +{ + // TODO: Only allowed for current vcpu + UINT64 ifs, psr; + REGS *regs = vcpu_regs(vcpu); + psr = VPD_CR(vcpu,ipsr); + vmx_vcpu_set_psr(vcpu,psr); + ifs=VPD_CR(vcpu,ifs); + if((ifs>>63)&&(ifs<<1)){ + ifs=(regs->cr_ifs)&0x7f; + regs->rfi_pfs = (ifs<<7)|ifs; + regs->cr_ifs = VPD_CR(vcpu,ifs); + } + regs->cr_iip = VPD_CR(vcpu,iip); + return (IA64_NO_FAULT); +} + + +UINT64 +vmx_vcpu_get_psr(VCPU *vcpu) +{ + return VMX_VPD(vcpu,vpsr); +} + + +IA64FAULT +vmx_vcpu_get_bgr(VCPU *vcpu, unsigned int reg, UINT64 *val) +{ + IA64_PSR vpsr; + + vpsr.val = vmx_vcpu_get_psr(vcpu); + if ( vpsr.bn ) { + *val=VMX_VPD(vcpu,vgr[reg-16]); + // Check NAT bit + if ( VMX_VPD(vcpu,vnat) & (1UL<<(reg-16)) ) { + // TODO + //panic ("NAT consumption fault\n"); + return IA64_FAULT; + } + + } + else { + *val=VMX_VPD(vcpu,vbgr[reg-16]); + if ( VMX_VPD(vcpu,vbnat) & (1UL<<reg) ) { + //panic ("NAT consumption fault\n"); + return IA64_FAULT; + } + + } + return IA64_NO_FAULT; +} + +IA64FAULT +vmx_vcpu_set_bgr(VCPU *vcpu, unsigned int reg, u64 val,int nat) +{ + IA64_PSR vpsr; + vpsr.val = vmx_vcpu_get_psr(vcpu); + if ( vpsr.bn ) { + VMX_VPD(vcpu,vgr[reg-16]) = val; + if(nat){ + VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg-16) ); + }else{ + VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg-16) ); + } + } + else { + VMX_VPD(vcpu,vbgr[reg-16]) = val; + if(nat){ + VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg) ); + }else{ + VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg) ); + } + } + return IA64_NO_FAULT; +} + + + +IA64FAULT +vmx_vcpu_get_gr(VCPU *vcpu, unsigned reg, UINT64 * val) +{ + REGS *regs=vcpu_regs(vcpu); + int nat; + //TODO, Eddie + if (!regs) return 0; + if (reg >= 16 && reg < 32) { + return vmx_vcpu_get_bgr(vcpu,reg,val); + } + getreg(reg,val,&nat,regs); // FIXME: handle NATs later + if(nat){ + return IA64_FAULT; + } + return IA64_NO_FAULT; +} + +// returns: +// IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault +// IA64_NO_FAULT otherwise + +IA64FAULT +vmx_vcpu_set_gr(VCPU *vcpu, unsigned reg, u64 value, int nat) +{ + REGS *regs = vcpu_regs(vcpu); + long sof = (regs->cr_ifs) & 0x7f; + //TODO Eddie + + if (!regs) return IA64_ILLOP_FAULT; + if (reg >= sof + 32) return IA64_ILLOP_FAULT; + if ( reg >= 16 && reg < 32 ) { + return vmx_vcpu_set_bgr(vcpu,reg, value, nat); + } + setreg(reg,value,nat,regs); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24) +{ + UINT64 vpsr; + vpsr = vmx_vcpu_get_psr(vcpu); + vpsr &= (~imm24); + vmx_vcpu_set_psr(vcpu, vpsr); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24) +{ + UINT64 vpsr; + vpsr = vmx_vcpu_get_psr(vcpu); + vpsr |= imm24; + vmx_vcpu_set_psr(vcpu, vpsr); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_set_psr_l(VCPU *vcpu, UINT64 val) +{ + vmx_vcpu_set_psr(vcpu, val); + return IA64_NO_FAULT; +} + +IA64FAULT +vmx_vcpu_set_tpr(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,tpr)=val; + vcpu->arch.irq_new_condition = 1; + return IA64_NO_FAULT; +} + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_virt.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_virt.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,1511 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_virt.c: + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Fred yang (fred.yang@xxxxxxxxx) + * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx> + * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) + */ + + + +#include <asm/privop.h> +#include <asm/vmx_vcpu.h> +#include <asm/processor.h> +#include <asm/delay.h> // Debug only +#include <asm/vmmu.h> +#include <asm/vmx_mm_def.h> +#include <asm/smp.h> + +#include <asm/virt_event.h> +extern UINT64 privop_trace; + +void +ia64_priv_decoder(IA64_SLOT_TYPE slot_type, INST64 inst, UINT64 * cause) +{ + *cause=0; + switch (slot_type) { + case M: + if (inst.generic.major==0){ + if(inst.M28.x3==0){ + if(inst.M44.x4==6){ + *cause=EVENT_SSM; + }else if(inst.M44.x4==7){ + *cause=EVENT_RSM; + }else if(inst.M30.x4==8&&inst.M30.x2==2){ + *cause=EVENT_MOV_TO_AR_IMM; + } + } + } + else if(inst.generic.major==1){ + if(inst.M28.x3==0){ + if(inst.M32.x6==0x2c){ + *cause=EVENT_MOV_TO_CR; + }else if(inst.M33.x6==0x24){ + *cause=EVENT_MOV_FROM_CR; + }else if(inst.M35.x6==0x2d){ + *cause=EVENT_MOV_TO_PSR; + }else if(inst.M36.x6==0x25){ + *cause=EVENT_MOV_FROM_PSR; + }else if(inst.M29.x6==0x2A){ + *cause=EVENT_MOV_TO_AR; + }else if(inst.M31.x6==0x22){ + *cause=EVENT_MOV_FROM_AR; + }else if(inst.M45.x6==0x09){ + *cause=EVENT_PTC_L; + }else if(inst.M45.x6==0x0A){ + *cause=EVENT_PTC_G; + }else if(inst.M45.x6==0x0B){ + *cause=EVENT_PTC_GA; + }else if(inst.M45.x6==0x0C){ + *cause=EVENT_PTR_D; + }else if(inst.M45.x6==0x0D){ + *cause=EVENT_PTR_I; + }else if(inst.M46.x6==0x1A){ + *cause=EVENT_THASH; + }else if(inst.M46.x6==0x1B){ + *cause=EVENT_TTAG; + }else if(inst.M46.x6==0x1E){ + *cause=EVENT_TPA; + }else if(inst.M46.x6==0x1F){ + *cause=EVENT_TAK; + }else if(inst.M47.x6==0x34){ + *cause=EVENT_PTC_E; + }else if(inst.M41.x6==0x2E){ + *cause=EVENT_ITC_D; + }else if(inst.M41.x6==0x2F){ + *cause=EVENT_ITC_I; + }else if(inst.M42.x6==0x00){ + *cause=EVENT_MOV_TO_RR; + }else if(inst.M42.x6==0x01){ + *cause=EVENT_MOV_TO_DBR; + }else if(inst.M42.x6==0x02){ + *cause=EVENT_MOV_TO_IBR; + }else if(inst.M42.x6==0x03){ + *cause=EVENT_MOV_TO_PKR; + }else if(inst.M42.x6==0x04){ + *cause=EVENT_MOV_TO_PMC; + }else if(inst.M42.x6==0x05){ + *cause=EVENT_MOV_TO_PMD; + }else if(inst.M42.x6==0x0E){ + *cause=EVENT_ITR_D; + }else if(inst.M42.x6==0x0F){ + *cause=EVENT_ITR_I; + }else if(inst.M43.x6==0x10){ + *cause=EVENT_MOV_FROM_RR; + }else if(inst.M43.x6==0x11){ + *cause=EVENT_MOV_FROM_DBR; + }else if(inst.M43.x6==0x12){ + *cause=EVENT_MOV_FROM_IBR; + }else if(inst.M43.x6==0x13){ + *cause=EVENT_MOV_FROM_PKR; + }else if(inst.M43.x6==0x14){ + *cause=EVENT_MOV_FROM_PMC; +/* + }else if(inst.M43.x6==0x15){ + *cause=EVENT_MOV_FROM_PMD; +*/ + }else if(inst.M43.x6==0x17){ + *cause=EVENT_MOV_FROM_CPUID; + } + } + } + break; + case B: + if(inst.generic.major==0){ + if(inst.B8.x6==0x02){ + *cause=EVENT_COVER; + }else if(inst.B8.x6==0x08){ + *cause=EVENT_RFI; + }else if(inst.B8.x6==0x0c){ + *cause=EVENT_BSW_0; + }else if(inst.B8.x6==0x0d){ + *cause=EVENT_BSW_1; + } + } + } +} + +IA64FAULT vmx_emul_rsm(VCPU *vcpu, INST64 inst) +{ + UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm; + return vmx_vcpu_reset_psr_sm(vcpu,imm24); +} + +IA64FAULT vmx_emul_ssm(VCPU *vcpu, INST64 inst) +{ + UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm; + return vmx_vcpu_set_psr_sm(vcpu,imm24); +} + +unsigned long last_guest_psr = 0x0; +IA64FAULT vmx_emul_mov_from_psr(VCPU *vcpu, INST64 inst) +{ + UINT64 tgt = inst.M33.r1; + UINT64 val; + IA64FAULT fault; + +/* + if ((fault = vmx_vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT) + return vmx_vcpu_set_gr(vcpu, tgt, val); + else return fault; + */ + val = vmx_vcpu_get_psr(vcpu); + val = (val & MASK(0, 32)) | (val & MASK(35, 2)); + last_guest_psr = val; + return vmx_vcpu_set_gr(vcpu, tgt, val, 0); +} + +/** + * @todo Check for reserved bits and return IA64_RSVDREG_FAULT. + */ +IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst) +{ + UINT64 val; + IA64FAULT fault; + if(vmx_vcpu_get_gr(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT) + panic(" get_psr nat bit fault\n"); + + val = (val & MASK(0, 32)) | (VMX_VPD(vcpu, vpsr) & MASK(32, 32)); +#if 0 + if (last_mov_from_psr && (last_guest_psr != (val & MASK(0,32)))) + while(1); + else + last_mov_from_psr = 0; +#endif + return vmx_vcpu_set_psr_l(vcpu,val); +} + + +/************************************************************************** +Privileged operation emulation routines +**************************************************************************/ + +IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST64 inst) +{ + IA64_PSR vpsr; + REGS *regs; +#ifdef CHECK_FAULT + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + regs=vcpu_regs(vcpu); + vpsr.val=regs->cr_ipsr; + if ( vpsr.is == 1 ) { + panic ("We do not support IA32 instruction yet"); + } + + return vmx_vcpu_rfi(vcpu); +} + +IA64FAULT vmx_emul_bsw0(VCPU *vcpu, INST64 inst) +{ +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + return vmx_vcpu_bsw0(vcpu); +} + +IA64FAULT vmx_emul_bsw1(VCPU *vcpu, INST64 inst) +{ +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + return vmx_vcpu_bsw1(vcpu); +} + +IA64FAULT vmx_emul_cover(VCPU *vcpu, INST64 inst) +{ + return vmx_vcpu_cover(vcpu); +} + +IA64FAULT vmx_emul_ptc_l(VCPU *vcpu, INST64 inst) +{ + u64 r2,r3; + ISR isr; + IA64_PSR vpsr; + + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&r2)){ +#ifdef VMAL_NO_FAULT_CHECK + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif // VMAL_NO_FAULT_CHECK + } +#ifdef VMAL_NO_FAULT_CHECK + if (unimplemented_gva(vcpu,r3) ) { + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + return vmx_vcpu_ptc_l(vcpu,r3,bits(r2,2,7)); +} + +IA64FAULT vmx_emul_ptc_e(VCPU *vcpu, INST64 inst) +{ + u64 r3; + ISR isr; + IA64_PSR vpsr; + + vpsr.val=vmx_vcpu_get_psr(vcpu); +#ifdef VMAL_NO_FAULT_CHECK + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + if(vmx_vcpu_get_gr(vcpu,inst.M47.r3,&r3)){ +#ifdef VMAL_NO_FAULT_CHECK + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif // VMAL_NO_FAULT_CHECK + } + return vmx_vcpu_ptc_e(vcpu,r3); +} + +IA64FAULT vmx_emul_ptc_g(VCPU *vcpu, INST64 inst) +{ + return vmx_emul_ptc_l(vcpu, inst); +} + +IA64FAULT vmx_emul_ptc_ga(VCPU *vcpu, INST64 inst) +{ + return vmx_emul_ptc_l(vcpu, inst); +} + +IA64FAULT ptr_fault_check(VCPU *vcpu, INST64 inst, u64 *pr2, u64 *pr3) +{ + ISR isr; + IA64FAULT ret1, ret2; + +#ifdef VMAL_NO_FAULT_CHECK + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r3,pr3); + ret2 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pr2); +#ifdef VMAL_NO_FAULT_CHECK + if ( ret1 != IA64_NO_FAULT || ret2 != IA64_NO_FAULT ) { + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; + } + if (unimplemented_gva(vcpu,r3) ) { + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + return IA64_NO_FAULT; +} + +IA64FAULT vmx_emul_ptr_d(VCPU *vcpu, INST64 inst) +{ + u64 r2,r3; + if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT ) + return IA64_FAULT; + return vmx_vcpu_ptr_d(vcpu,r3,bits(r2,2,7)); +} + +IA64FAULT vmx_emul_ptr_i(VCPU *vcpu, INST64 inst) +{ + u64 r2,r3; + if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT ) + return IA64_FAULT; + return vmx_vcpu_ptr_i(vcpu,r3,bits(r2,2,7)); +} + + +IA64FAULT vmx_emul_thash(VCPU *vcpu, INST64 inst) +{ + u64 r1,r3; + ISR visr; + IA64_PSR vpsr; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M46.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ +#ifdef CHECK_FAULT + vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); + return IA64_NO_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(unimplemented_gva(vcpu, r3)){ + vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); + return IA64_NO_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_thash(vcpu, r3, &r1); + vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + + +IA64FAULT vmx_emul_ttag(VCPU *vcpu, INST64 inst) +{ + u64 r1,r3; + ISR visr; + IA64_PSR vpsr; + #ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M46.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ +#ifdef CHECK_FAULT + vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); + return IA64_NO_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(unimplemented_gva(vcpu, r3)){ + vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); + return IA64_NO_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_ttag(vcpu, r3, &r1); + vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + + +IA64FAULT vmx_emul_tpa(VCPU *vcpu, INST64 inst) +{ + u64 r1,r3; + ISR visr; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M46.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if(vpsr.cpl!=0){ + visr.val=0; + vcpu_set_isr(vcpu, visr.val); + return IA64_FAULT; + } +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,1); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if (unimplemented_gva(vcpu,r3) ) { + // inject unimplemented_data_address_fault + visr.val = set_isr_ei_ni(vcpu); + visr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + // FAULT_UNIMPLEMENTED_DATA_ADDRESS. + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + + if(vmx_vcpu_tpa(vcpu, r3, &r1)){ + return IA64_FAULT; + } + vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + +IA64FAULT vmx_emul_tak(VCPU *vcpu, INST64 inst) +{ + u64 r1,r3; + ISR visr; + IA64_PSR vpsr; + int fault=IA64_NO_FAULT; +#ifdef CHECK_FAULT + visr.val=0; + if(check_target_register(vcpu, inst.M46.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + vpsr.val=vmx_vcpu_get_psr(vcpu); + if(vpsr.cpl!=0){ + vcpu_set_isr(vcpu, visr.val); + return IA64_FAULT; + } +#endif + if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,1); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif + } + if(vmx_vcpu_tak(vcpu, r3, &r1)){ + return IA64_FAULT; + } + vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + + +/************************************ + * Insert translation register/cache +************************************/ + +IA64FAULT vmx_emul_itr_d(VCPU *vcpu, INST64 inst) +{ + UINT64 fault, itir, ifa, pte, slot; + ISR isr; + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.ic ) { + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#ifdef VMAL_NO_FAULT_CHECK + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){ +#ifdef VMAL_NO_FAULT_CHECK + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif // VMAL_NO_FAULT_CHECK + } +#ifdef VMAL_NO_FAULT_CHECK + if(is_reserved_rr_register(vcpu, slot)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + + if (vmx_vcpu_get_itir(vcpu,&itir)){ + return(IA64_FAULT); + } + if (vmx_vcpu_get_ifa(vcpu,&ifa)){ + return(IA64_FAULT); + } +#ifdef VMAL_NO_FAULT_CHECK + if (is_reserved_itir_field(vcpu, itir)) { + // TODO + return IA64_FAULT; + } + if (unimplemented_gva(vcpu,ifa) ) { + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + + return (vmx_vcpu_itr_d(vcpu,pte,itir,ifa,slot)); +} + +IA64FAULT vmx_emul_itr_i(VCPU *vcpu, INST64 inst) +{ + UINT64 fault, itir, ifa, pte, slot; + ISR isr; + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.ic ) { + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#ifdef VMAL_NO_FAULT_CHECK + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){ +#ifdef VMAL_NO_FAULT_CHECK + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif // VMAL_NO_FAULT_CHECK + } +#ifdef VMAL_NO_FAULT_CHECK + if(is_reserved_rr_register(vcpu, slot)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + + if (vmx_vcpu_get_itir(vcpu,&itir)){ + return(IA64_FAULT); + } + if (vmx_vcpu_get_ifa(vcpu,&ifa)){ + return(IA64_FAULT); + } +#ifdef VMAL_NO_FAULT_CHECK + if (is_reserved_itir_field(vcpu, itir)) { + // TODO + return IA64_FAULT; + } + if (unimplemented_gva(vcpu,ifa) ) { + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + + return (vmx_vcpu_itr_i(vcpu,pte,itir,ifa,slot)); +} + +IA64FAULT itc_fault_check(VCPU *vcpu, INST64 inst, u64 *itir, u64 *ifa,u64 *pte) +{ + UINT64 fault; + ISR isr; + IA64_PSR vpsr; + IA64FAULT ret1; + + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.ic ) { + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + +#ifdef VMAL_NO_FAULT_CHECK + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pte); +#ifdef VMAL_NO_FAULT_CHECK + if( ret1 != IA64_NO_FAULT ){ + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + + if (vmx_vcpu_get_itir(vcpu,itir)){ + return(IA64_FAULT); + } + if (vmx_vcpu_get_ifa(vcpu,ifa)){ + return(IA64_FAULT); + } +#ifdef VMAL_NO_FAULT_CHECK + if (unimplemented_gva(vcpu,ifa) ) { + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + return IA64_NO_FAULT; +} + +IA64FAULT vmx_emul_itc_d(VCPU *vcpu, INST64 inst) +{ + UINT64 itir, ifa, pte; + + if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) { + return IA64_FAULT; + } + + return (vmx_vcpu_itc_d(vcpu,pte,itir,ifa)); +} + +IA64FAULT vmx_emul_itc_i(VCPU *vcpu, INST64 inst) +{ + UINT64 itir, ifa, pte; + + if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) { + return IA64_FAULT; + } + + return (vmx_vcpu_itc_i(vcpu,pte,itir,ifa)); + +} + +/************************************* + * Moves to semi-privileged registers +*************************************/ + +IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *vcpu, INST64 inst) +{ + // I27 and M30 are identical for these fields + if(inst.M30.ar3!=44){ + panic("Can't support ar register other than itc"); + } +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + UINT64 imm; + if(inst.M30.s){ + imm = -inst.M30.imm; + }else{ + imm = inst.M30.imm; + } + return (vmx_vcpu_set_itc(vcpu, imm)); +} + +IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *vcpu, INST64 inst) +{ + // I26 and M29 are identical for these fields + u64 r2; + if(inst.M29.ar3!=44){ + panic("Can't support ar register other than itc"); + } + if(vmx_vcpu_get_gr(vcpu,inst.M29.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + return (vmx_vcpu_set_itc(vcpu, r2)); +} + + +IA64FAULT vmx_emul_mov_from_ar_reg(VCPU *vcpu, INST64 inst) +{ + // I27 and M30 are identical for these fields + if(inst.M31.ar3!=44){ + panic("Can't support ar register other than itc"); + } +#ifdef CHECK_FAULT + if(check_target_register(vcpu,inst.M31.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.si&& vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + u64 r1; + vmx_vcpu_get_itc(vcpu,&r1); + vmx_vcpu_set_gr(vcpu,inst.M31.r1,r1,0); + return IA64_NO_FAULT; +} + + +/******************************** + * Moves to privileged registers +********************************/ + +IA64FAULT vmx_emul_mov_to_pkr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_pkr(vcpu,r3,r2)); +} + +IA64FAULT vmx_emul_mov_to_rr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_rr(vcpu,r3,r2)); +} + +IA64FAULT vmx_emul_mov_to_dbr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_dbr(vcpu,r3,r2)); +} + +IA64FAULT vmx_emul_mov_to_ibr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_ibr(vcpu,r3,r2)); +} + +IA64FAULT vmx_emul_mov_to_pmc(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_pmc(vcpu,r3,r2)); +} + +IA64FAULT vmx_emul_mov_to_pmd(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_pmd(vcpu,r3,r2)); +} + + +/********************************** + * Moves from privileged registers + **********************************/ + +IA64FAULT vmx_emul_mov_from_rr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_rr_register(vcpu,r3>>VRN_SHIFT)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + } +#endif //CHECK_FAULT + vmx_vcpu_get_rr(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_from_pkr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_indirect_register(vcpu,r3)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_get_pkr(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_from_dbr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_indirect_register(vcpu,r3)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_get_dbr(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_from_ibr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_indirect_register(vcpu,r3)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_get_ibr(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_from_pmc(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_indirect_register(vcpu,r3)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_get_pmc(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_from_cpuid(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_indirect_register(vcpu,r3)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_get_cpuid(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_to_cr(VCPU *vcpu, INST64 inst) +{ + u64 r2,cr3; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if(is_reserved_cr(inst.M32.cr3)||(vpsr.ic&&is_interruption_control_cr(inst.M32.cr3))){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu, inst.M32.r2, &r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if ( check_cr_rsv_fields (inst.M32.cr3, r2)) { + /* Inject Reserved Register/Field fault + * into guest */ + set_rsv_reg_field_isr (vcpu,0); + rsv_reg_field (vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + extern u64 cr_igfld_mask(int index, u64 value); + r2 = cr_igfld_mask(inst.M32.cr3,r2); + VMX_VPD(vcpu, vcr[inst.M32.cr3]) = r2; + switch (inst.M32.cr3) { + case 0: return vmx_vcpu_set_dcr(vcpu,r2); + case 1: return vmx_vcpu_set_itm(vcpu,r2); + case 2: return vmx_vcpu_set_iva(vcpu,r2); + case 8: return vmx_vcpu_set_pta(vcpu,r2); + case 16:return vmx_vcpu_set_ipsr(vcpu,r2); + case 17:return vmx_vcpu_set_isr(vcpu,r2); + case 19:return vmx_vcpu_set_iip(vcpu,r2); + case 20:return vmx_vcpu_set_ifa(vcpu,r2); + case 21:return vmx_vcpu_set_itir(vcpu,r2); + case 22:return vmx_vcpu_set_iipa(vcpu,r2); + case 23:return vmx_vcpu_set_ifs(vcpu,r2); + case 24:return vmx_vcpu_set_iim(vcpu,r2); + case 25:return vmx_vcpu_set_iha(vcpu,r2); + case 64:printk("SET LID to 0x%lx\n", r2); + return vmx_vcpu_set_lid(vcpu,r2); + case 65:return IA64_NO_FAULT; + case 66:return vmx_vcpu_set_tpr(vcpu,r2); + case 67:return vmx_vcpu_set_eoi(vcpu,r2); + case 68:return IA64_NO_FAULT; + case 69:return IA64_NO_FAULT; + case 70:return IA64_NO_FAULT; + case 71:return IA64_NO_FAULT; + case 72:return vmx_vcpu_set_itv(vcpu,r2); + case 73:return vmx_vcpu_set_pmv(vcpu,r2); + case 74:return vmx_vcpu_set_cmcv(vcpu,r2); + case 80:return vmx_vcpu_set_lrr0(vcpu,r2); + case 81:return vmx_vcpu_set_lrr1(vcpu,r2); + default: return IA64_NO_FAULT; + } +} + + +#define cr_get(cr) \ + ((fault=vmx_vcpu_get_##cr(vcpu,&val))==IA64_NO_FAULT)?\ + vmx_vcpu_set_gr(vcpu, tgt, val,0):fault; + + +IA64FAULT vmx_emul_mov_from_cr(VCPU *vcpu, INST64 inst) +{ + UINT64 tgt = inst.M33.r1; + UINT64 val; + IA64FAULT fault; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if(is_reserved_cr(inst.M33.cr3)||is_read_only_cr(inst.M33.cr3|| + (vpsr.ic&&is_interruption_control_cr(inst.M33.cr3)))){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + +// from_cr_cnt[inst.M33.cr3]++; + switch (inst.M33.cr3) { + case 0: return cr_get(dcr); + case 1: return cr_get(itm); + case 2: return cr_get(iva); + case 8: return cr_get(pta); + case 16:return cr_get(ipsr); + case 17:return cr_get(isr); + case 19:return cr_get(iip); + case 20:return cr_get(ifa); + case 21:return cr_get(itir); + case 22:return cr_get(iipa); + case 23:return cr_get(ifs); + case 24:return cr_get(iim); + case 25:return cr_get(iha); +// case 64:val = ia64_getreg(_IA64_REG_CR_LID); +// return vmx_vcpu_set_gr(vcpu,tgt,val,0); + case 64:return cr_get(lid); + case 65: + vmx_vcpu_get_ivr(vcpu,&val); + return vmx_vcpu_set_gr(vcpu,tgt,val,0); + case 66:return cr_get(tpr); + case 67:return vmx_vcpu_set_gr(vcpu,tgt,0L,0); + case 68:return cr_get(irr0); + case 69:return cr_get(irr1); + case 70:return cr_get(irr2); + case 71:return cr_get(irr3); + case 72:return cr_get(itv); + case 73:return cr_get(pmv); + case 74:return cr_get(cmcv); + case 80:return cr_get(lrr0); + case 81:return cr_get(lrr1); + default: + panic("Read reserved cr register"); + } +} + + +static void post_emulation_action(VCPU *vcpu) +{ + if ( vcpu->arch.irq_new_condition ) { + vcpu->arch.irq_new_condition = 0; + vhpi_detection(vcpu); + } +} + +//#define BYPASS_VMAL_OPCODE +extern IA64_SLOT_TYPE slot_types[0x20][3]; +IA64_BUNDLE __vmx_get_domain_bundle(u64 iip) +{ + IA64_BUNDLE bundle; + + fetch_code( current,iip, &bundle.i64[0]); + fetch_code( current,iip+8, &bundle.i64[1]); + return bundle; +} + +/** Emulate a privileged operation. + * + * + * @param vcpu virtual cpu + * @cause the reason cause virtualization fault + * @opcode the instruction code which cause virtualization fault + */ + +void +vmx_emulate(VCPU *vcpu, UINT64 cause, UINT64 opcode) +{ + IA64_BUNDLE bundle; + int slot; + IA64_SLOT_TYPE slot_type; + IA64FAULT status; + INST64 inst; + REGS * regs; + UINT64 iip; + regs = vcpu_regs(vcpu); + iip = regs->cr_iip; + IA64_PSR vpsr; +/* + if (privop_trace) { + static long i = 400; + //if (i > 0) printf("privop @%p\n",iip); + if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n", + iip,ia64_get_itc(),ia64_get_itm()); + i--; + } +*/ +#ifdef VTLB_DEBUG + check_vtlb_sanity(vmx_vcpu_get_vtlb(vcpu)); + dump_vtlb(vmx_vcpu_get_vtlb(vcpu)); +#endif +#if 0 +if ( (cause == 0xff && opcode == 0x1e000000000) || cause == 0 ) { + printf ("VMAL decode error: cause - %lx; op - %lx\n", + cause, opcode ); + return; +} +#endif +#ifdef BYPASS_VMAL_OPCODE + // make a local copy of the bundle containing the privop + bundle = __vmx_get_domain_bundle(iip); + slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; + if (!slot) inst.inst = bundle.slot0; + else if (slot == 1) + inst.inst = bundle.slot1a + (bundle.slot1b<<18); + else if (slot == 2) inst.inst = bundle.slot2; + else printf("priv_handle_op: illegal slot: %d\n", slot); + slot_type = slot_types[bundle.template][slot]; + ia64_priv_decoder(slot_type, inst, &cause); + if(cause==0){ + printf("This instruction at 0x%lx slot %d can't be virtualized", iip, slot); + panic("123456\n"); + } +#else + inst.inst=opcode; +#endif /* BYPASS_VMAL_OPCODE */ + + /* + * Switch to actual virtual rid in rr0 and rr4, + * which is required by some tlb related instructions. + */ + prepare_if_physical_mode(vcpu); + + switch(cause) { + case EVENT_RSM: + status=vmx_emul_rsm(vcpu, inst); + break; + case EVENT_SSM: + status=vmx_emul_ssm(vcpu, inst); + break; + case EVENT_MOV_TO_PSR: + status=vmx_emul_mov_to_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_PSR: + status=vmx_emul_mov_from_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_CR: + status=vmx_emul_mov_from_cr(vcpu, inst); + break; + case EVENT_MOV_TO_CR: + status=vmx_emul_mov_to_cr(vcpu, inst); + break; + case EVENT_BSW_0: + status=vmx_emul_bsw0(vcpu, inst); + break; + case EVENT_BSW_1: + status=vmx_emul_bsw1(vcpu, inst); + break; + case EVENT_COVER: + status=vmx_emul_cover(vcpu, inst); + break; + case EVENT_RFI: + status=vmx_emul_rfi(vcpu, inst); + break; + case EVENT_ITR_D: + status=vmx_emul_itr_d(vcpu, inst); + break; + case EVENT_ITR_I: + status=vmx_emul_itr_i(vcpu, inst); + break; + case EVENT_PTR_D: + status=vmx_emul_ptr_d(vcpu, inst); + break; + case EVENT_PTR_I: + status=vmx_emul_ptr_i(vcpu, inst); + break; + case EVENT_ITC_D: + status=vmx_emul_itc_d(vcpu, inst); + break; + case EVENT_ITC_I: + status=vmx_emul_itc_i(vcpu, inst); + break; + case EVENT_PTC_L: + status=vmx_emul_ptc_l(vcpu, inst); + break; + case EVENT_PTC_G: + status=vmx_emul_ptc_g(vcpu, inst); + break; + case EVENT_PTC_GA: + status=vmx_emul_ptc_ga(vcpu, inst); + break; + case EVENT_PTC_E: + status=vmx_emul_ptc_e(vcpu, inst); + break; + case EVENT_MOV_TO_RR: + status=vmx_emul_mov_to_rr(vcpu, inst); + break; + case EVENT_MOV_FROM_RR: + status=vmx_emul_mov_from_rr(vcpu, inst); + break; + case EVENT_THASH: + status=vmx_emul_thash(vcpu, inst); + break; + case EVENT_TTAG: + status=vmx_emul_ttag(vcpu, inst); + break; + case EVENT_TPA: + status=vmx_emul_tpa(vcpu, inst); + break; + case EVENT_TAK: + status=vmx_emul_tak(vcpu, inst); + break; + case EVENT_MOV_TO_AR_IMM: + status=vmx_emul_mov_to_ar_imm(vcpu, inst); + break; + case EVENT_MOV_TO_AR: + status=vmx_emul_mov_to_ar_reg(vcpu, inst); + break; + case EVENT_MOV_FROM_AR: + status=vmx_emul_mov_from_ar_reg(vcpu, inst); + break; + case EVENT_MOV_TO_DBR: + status=vmx_emul_mov_to_dbr(vcpu, inst); + break; + case EVENT_MOV_TO_IBR: + status=vmx_emul_mov_to_ibr(vcpu, inst); + break; + case EVENT_MOV_TO_PMC: + status=vmx_emul_mov_to_pmc(vcpu, inst); + break; + case EVENT_MOV_TO_PMD: + status=vmx_emul_mov_to_pmd(vcpu, inst); + break; + case EVENT_MOV_TO_PKR: + status=vmx_emul_mov_to_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_DBR: + status=vmx_emul_mov_from_dbr(vcpu, inst); + break; + case EVENT_MOV_FROM_IBR: + status=vmx_emul_mov_from_ibr(vcpu, inst); + break; + case EVENT_MOV_FROM_PMC: + status=vmx_emul_mov_from_pmc(vcpu, inst); + break; + case EVENT_MOV_FROM_PKR: + status=vmx_emul_mov_from_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_CPUID: + status=vmx_emul_mov_from_cpuid(vcpu, inst); + break; + case EVENT_VMSW: + printf ("Unimplemented instruction %d\n", cause); + status=IA64_FAULT; + break; + default: + printf("unknown cause %d, iip: %lx, ipsr: %lx\n", cause,regs->cr_iip,regs->cr_ipsr); + while(1); + /* For unknown cause, let hardware to re-execute */ + status=IA64_RETRY; + break; +// panic("unknown cause in virtualization intercept"); + }; + +#if 0 + if (status == IA64_FAULT) + panic("Emulation failed with cause %d:\n", cause); +#endif + + if ( status == IA64_NO_FAULT && cause !=EVENT_RFI ) { + vmx_vcpu_increment_iip(vcpu); + } + + recover_if_physical_mode(vcpu); + post_emulation_action (vcpu); +//TODO set_irq_check(v); + return; + +} + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vmx_vsa.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vmx_vsa.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,84 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_vsa.c: Call PAL virtualization services. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Arun Sharma <arun.sharma@xxxxxxxxx> + * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) + */ + +#include <asm/asmmacro.h> + + + .text + +/* + * extern UINT64 ia64_call_vsa(UINT64 proc,UINT64 arg1, UINT64 arg2, + * UINT64 arg3, UINT64 arg4, UINT64 arg5, + * UINT64 arg6, UINT64 arg7); + * + * XXX: The currently defined services use only 4 args at the max. The + * rest are not consumed. + */ +GLOBAL_ENTRY(ia64_call_vsa) + .regstk 4,4,0,0 + +rpsave = loc0 +pfssave = loc1 +psrsave = loc2 +entry = loc3 +hostret = r24 + + alloc pfssave=ar.pfs,4,4,0,0 + mov rpsave=rp + movl entry=@gprel(__vsa_base) +1: mov hostret=ip + mov r25=in1 // copy arguments + mov r26=in2 + mov r27=in3 + mov psrsave=psr + ;; + add entry=entry,gp + tbit.nz p6,p0=psrsave,14 // IA64_PSR_I + tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC + ;; + ld8 entry=[entry] // read entry point + ;; + add hostret=2f-1b,hostret // calculate return address + add entry=entry,in0 + ;; + rsm psr.i | psr.ic + ;; + srlz.d + mov b6=entry + br.cond.sptk b6 // call the service +2: + // Architectural sequence for enabling interrupts if necessary +(p7) ssm psr.ic + ;; +(p7) srlz.d + ;; +(p6) ssm psr.i + ;; + mov rp=rpsave + mov ar.pfs=pfssave + mov r8=r31 + ;; + srlz.d + br.ret.sptk rp + +END(ia64_call_vsa) + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx/vtlb.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/vmx/vtlb.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,1094 @@ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vtlb.c: guest virtual tlb handling module. + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) + * XiaoYan Feng (Fleming Feng) (Fleming.feng@xxxxxxxxx) + */ + +#include <linux/sched.h> +#include <asm/tlb.h> +#include <asm/mm.h> +#include <asm/vmx_mm_def.h> +#include <asm/gcc_intrin.h> +#include <linux/interrupt.h> +#include <asm/vmx_vcpu.h> +#define MAX_CCH_LENGTH 40 + + +static void cch_mem_init(thash_cb_t *hcb) +{ + thash_cch_mem_t *p, *q; + + hcb->cch_freelist = p = hcb->cch_buf; + + for ( q=p+1; (u64)(q + 1) <= (u64)hcb->cch_buf + hcb->cch_sz; + p++, q++ ) { + p->next = q; + } + p->next = NULL; +} + +static thash_data_t *cch_alloc(thash_cb_t *hcb) +{ + thash_cch_mem_t *p; + + if ( (p = hcb->cch_freelist) != NULL ) { + hcb->cch_freelist = p->next; + } + return &(p->data); +} + +static void cch_free(thash_cb_t *hcb, thash_data_t *cch) +{ + thash_cch_mem_t *p = (thash_cch_mem_t*)cch; + + p->next = hcb->cch_freelist; + hcb->cch_freelist = p; +} + +/* + * Check to see if the address rid:va is translated by the TLB + */ +static int __is_translated(thash_data_t *tlb, u64 rid, u64 va, CACHE_LINE_TYPE cl) +{ + u64 size1,sa1,ea1; + + if ( tlb->rid != rid || tlb->cl != cl ) + return 0; + size1 = PSIZE(tlb->ps); + sa1 = tlb->vadr & ~(size1-1); // mask the low address bits + ea1 = sa1 + size1; + + if ( va >= sa1 && (va < ea1 || ea1 == 0) ) + return 1; + else + return 0; +} + +/* + * Only for TLB format. + */ +static int +__is_tlb_overlap(thash_cb_t *hcb,thash_data_t *entry,int rid, char cl, u64 sva, u64 eva) +{ + uint64_t size1,size2,sa1,ea1,ea2; + + if ( entry->invalid || entry->rid != rid || entry->cl != cl ) { + return 0; + } + size1=PSIZE(entry->ps); + sa1 = entry->vadr & ~(size1-1); // mask the low address bits + ea1 = sa1 + size1; + if ( (sva >= ea1 && ea1 != 0) || (eva <= sa1 && eva != 0) ) + return 0; + else + return 1; + +} + +static void __rem_tr (thash_cb_t *hcb, thash_data_t *tr) +{ + if ( hcb->remove_notifier ) { + (hcb->remove_notifier)(hcb,tr); + } + tr->invalid = 1; +} + +static inline void __set_tr (thash_data_t *tr, thash_data_t *data, int idx) +{ + *tr = *data; + tr->tr_idx = idx; +} + + +static void __init_tr(thash_cb_t *hcb) +{ + int i; + thash_data_t *tr; + + for ( i=0, tr = &ITR(hcb,0); i<NITRS; i++ ) { + tr[i].invalid = 1; + } + for ( i=0, tr = &DTR(hcb,0); i<NDTRS; i++ ) { + tr[i].invalid = 1; + } +} + +/* + * Replace TR entry. + */ +static void rep_tr(thash_cb_t *hcb,thash_data_t *insert, int idx) +{ + thash_data_t *tr; + + if ( insert->cl == ISIDE_TLB ) { + tr = &ITR(hcb,idx); + } + else { + tr = &DTR(hcb,idx); + } + if ( !INVALID_TLB(tr) ) { + __rem_tr(hcb, tr); + } + __set_tr (tr, insert, idx); +} + +/* + * remove TR entry. + */ +static void rem_tr(thash_cb_t *hcb,CACHE_LINE_TYPE cl, int idx) +{ + thash_data_t *tr; + + if ( cl == ISIDE_TLB ) { + tr = &ITR(hcb,idx); + } + else { + tr = &DTR(hcb,idx); + } + if ( !INVALID_TLB(tr) ) { + __rem_tr(hcb, tr); + } +} + +/* + * Delete an thash entry in collision chain. + * prev: the previous entry. + * rem: the removed entry. + */ +static void __rem_chain(thash_cb_t *hcb/*, thash_data_t *prev*/, thash_data_t *rem) +{ + //prev->next = rem->next; + if ( hcb->remove_notifier ) { + (hcb->remove_notifier)(hcb,rem); + } + cch_free (hcb, rem); +} + +/* + * Delete an thash entry leading collision chain. + */ +static void __rem_hash_head(thash_cb_t *hcb, thash_data_t *hash) +{ + thash_data_t *next=hash->next; + + if ( hcb->remove_notifier ) { + (hcb->remove_notifier)(hcb,hash); + } + if ( next != NULL ) { + *hash = *next; + cch_free (hcb, next); + } + else { + INVALIDATE_HASH(hcb, hash); + } +} + +thash_data_t *__vtr_lookup(thash_cb_t *hcb, + u64 rid, u64 va, + CACHE_LINE_TYPE cl) +{ + thash_data_t *tr; + int num,i; + + if ( cl == ISIDE_TLB ) { + tr = &ITR(hcb,0); + num = NITRS; + } + else { + tr = &DTR(hcb,0); + num = NDTRS; + } + for ( i=0; i<num; i++ ) { + if ( !INVALID_ENTRY(hcb,&tr[i]) && + __is_translated(&tr[i], rid, va, cl) ) + return &tr[i]; + } + return NULL; +} + + +/* + * Find overlap VHPT entry within current collision chain + * base on internal priv info. + */ +static inline thash_data_t* _vhpt_next_overlap_in_chain(thash_cb_t *hcb) +{ + thash_data_t *cch; + thash_internal_t *priv = &hcb->priv; + + + for (cch=priv->cur_cch; cch; cch = cch->next) { + if ( priv->tag == cch->etag ) { + return cch; + } + } + return NULL; +} + +/* + * Find overlap TLB/VHPT entry within current collision chain + * base on internal priv info. + */ +static thash_data_t *_vtlb_next_overlap_in_chain(thash_cb_t *hcb) +{ + thash_data_t *cch; + thash_internal_t *priv = &hcb->priv; + + /* Find overlap TLB entry */ + for (cch=priv->cur_cch; cch; cch = cch->next) { + if ( ( cch->tc ? priv->s_sect.tc : priv->s_sect.tr ) && + __is_tlb_overlap(hcb, cch, priv->rid, priv->cl, + priv->_curva, priv->_eva) ) { + return cch; + } + } + return NULL; +} + +/* + * Get the machine format of VHPT entry. + * PARAS: + * 1: tlb: means the tlb format hash entry converting to VHPT. + * 2: va means the guest virtual address that must be coverd by + * the translated machine VHPT. + * 3: vhpt: means the machine format VHPT converting from tlb. + * NOTES: + * 1: In case of the machine address is discontiguous, + * "tlb" needs to be covered by several machine VHPT. va + * is used to choice one of them. + * 2: Foreign map is supported in this API. + * RETURN: + * 0/1: means successful or fail. + * + */ +int __tlb_to_vhpt(thash_cb_t *hcb, + thash_data_t *tlb, u64 va, + thash_data_t *vhpt) +{ + u64 pages,mfn; + ia64_rr vrr; + + ASSERT ( hcb->ht == THASH_VHPT ); + vrr = (hcb->get_rr_fn)(hcb->vcpu,va); + pages = PSIZE(vrr.ps) >> PAGE_SHIFT; + mfn = (hcb->vs->get_mfn)(DOMID_SELF,tlb->ppn, pages); + if ( mfn == INVALID_MFN ) return 0; + + // TODO with machine discontinuous address space issue. + vhpt->etag = (hcb->vs->tag_func)( hcb->pta, + tlb->vadr, tlb->rid, tlb->ps); + //vhpt->ti = 0; + vhpt->itir = tlb->itir & ~ITIR_RV_MASK; + vhpt->page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK; + vhpt->ppn = mfn; + vhpt->next = 0; + return 1; +} + + +/* + * Insert an entry to hash table. + * NOTES: + * 1: TLB entry may be TR, TC or Foreign Map. For TR entry, + * itr[]/dtr[] need to be updated too. + * 2: Inserting to collision chain may trigger recycling if + * the buffer for collision chain is empty. + * 3: The new entry is inserted at the next of hash table. + * (I.e. head of the collision chain) + * 4: The buffer holding the entry is allocated internally + * from cch_buf or just in the hash table. + * 5: Return the entry in hash table or collision chain. + * 6: Input parameter, entry, should be in TLB format. + * I.e. Has va, rid, ps... + * 7: This API is invoked by emulating ITC/ITR and tlb_miss. + * + */ + +void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx) +{ + if ( hcb->ht != THASH_TLB || entry->tc ) { + panic("wrong parameter\n"); + } + entry->vadr = PAGEALIGN(entry->vadr,entry->ps); + entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12); + rep_tr(hcb, entry, idx); + return ; +} + +thash_data_t *__alloc_chain(thash_cb_t *hcb,thash_data_t *entry) +{ + thash_data_t *cch; + + cch = cch_alloc(hcb); + if(cch == NULL){ + // recycle + if ( hcb->recycle_notifier ) { + hcb->recycle_notifier(hcb,(u64)entry); + } + thash_purge_all(hcb); + cch = cch_alloc(hcb); + } + return cch; +} + +/* + * Insert an entry into hash TLB or VHPT. + * NOTES: + * 1: When inserting VHPT to thash, "va" is a must covered + * address by the inserted machine VHPT entry. + * 2: The format of entry is always in TLB. + * 3: The caller need to make sure the new entry will not overlap + * with any existed entry. + */ +void vtlb_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) +{ + thash_data_t *hash_table, *cch; + int flag; + ia64_rr vrr; + u64 gppn; + u64 ppns, ppne; + + hash_table = (hcb->hash_func)(hcb->pta, + va, entry->rid, entry->ps); + if( INVALID_ENTRY(hcb, hash_table) ) { + *hash_table = *entry; + hash_table->next = 0; + } + else { + // TODO: Add collision chain length limitation. + cch = __alloc_chain(hcb,entry); + + *cch = *hash_table; + *hash_table = *entry; + hash_table->next = cch; + } + if(hcb->vcpu->domain->domain_id==0){ + thash_insert(hcb->ts->vhpt, entry, va); + return; + } + flag = 1; + gppn = (POFFSET(va,entry->ps)|PAGEALIGN((entry->ppn<<12),entry->ps))>>PAGE_SHIFT; + ppns = PAGEALIGN((entry->ppn<<12),entry->ps); + ppne = ppns + PSIZE(entry->ps); + if(((ppns<=0xa0000)&&(ppne>0xa0000))||((ppne>0xc0000)&&(ppns<=0xc0000))) + flag = 0; + if((__gpfn_is_mem(hcb->vcpu->domain, gppn)&&flag)) + thash_insert(hcb->ts->vhpt, entry, va); + return ; +} + +static void vhpt_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) +{ + thash_data_t *hash_table, *cch; + ia64_rr vrr; + + hash_table = (hcb->hash_func)(hcb->pta, + va, entry->rid, entry->ps); + if( INVALID_ENTRY(hcb, hash_table) ) { + if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) { + panic("Can't convert to machine VHPT entry\n"); + } + hash_table->next = 0; + } + else { + // TODO: Add collision chain length limitation. + cch = __alloc_chain(hcb,entry); + + *cch = *hash_table; + if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) { + panic("Can't convert to machine VHPT entry\n"); + } + hash_table->next = cch; + if(hash_table->tag==hash_table->next->tag) + while(1); + } + return /*hash_table*/; +} + +void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) +{ + thash_data_t *hash_table; + ia64_rr vrr; + + vrr = (hcb->get_rr_fn)(hcb->vcpu,entry->vadr); + if ( entry->ps != vrr.ps && entry->tc ) { + panic("Not support for multiple page size now\n"); + } + entry->vadr = PAGEALIGN(entry->vadr,entry->ps); + entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12); + (hcb->ins_hash)(hcb, entry, va); + +} + +static void rem_thash(thash_cb_t *hcb, thash_data_t *entry) +{ + thash_data_t *hash_table, *p, *q; + thash_internal_t *priv = &hcb->priv; + int idx; + + hash_table = priv->hash_base; + if ( hash_table == entry ) { +// if ( PURGABLE_ENTRY(hcb, entry) ) { + __rem_hash_head (hcb, entry); +// } + return ; + } + // remove from collision chain + p = hash_table; + for ( q=p->next; q; q = p->next ) { + if ( q == entry ){ +// if ( PURGABLE_ENTRY(hcb,q ) ) { + p->next = q->next; + __rem_chain(hcb, entry); +// } + return ; + } + p = q; + } + panic("Entry not existed or bad sequence\n"); +} + +static void rem_vtlb(thash_cb_t *hcb, thash_data_t *entry) +{ + thash_data_t *hash_table, *p, *q; + thash_internal_t *priv = &hcb->priv; + int idx; + + if ( !entry->tc ) { + return rem_tr(hcb, entry->cl, entry->tr_idx); + } + rem_thash(hcb, entry); +} + +int cch_depth=0; +/* + * Purge the collision chain starting from cch. + * NOTE: + * For those UN-Purgable entries(FM), this function will return + * the head of left collision chain. + */ +static thash_data_t *thash_rem_cch(thash_cb_t *hcb, thash_data_t *cch) +{ + thash_data_t *next; + + if ( ++cch_depth > MAX_CCH_LENGTH ) { + printf ("cch length > MAX_CCH_LENGTH, exceed the expected length\n"); + while(1); + } + if ( cch -> next ) { + next = thash_rem_cch(hcb, cch->next); + } + else { + next = NULL; + } + if ( PURGABLE_ENTRY(hcb, cch) ) { + __rem_chain(hcb, cch); + return next; + } + else { + cch->next = next; + return cch; + } +} + +/* + * Purge one hash line (include the entry in hash table). + * Can only be called by thash_purge_all. + * Input: + * hash: The head of collision chain (hash table) + * + */ +static void thash_rem_line(thash_cb_t *hcb, thash_data_t *hash) +{ + if ( INVALID_ENTRY(hcb, hash) ) return; + + if ( hash->next ) { + cch_depth = 0; + hash->next = thash_rem_cch(hcb, hash->next); + } + // Then hash table itself. + if ( PURGABLE_ENTRY(hcb, hash) ) { + __rem_hash_head(hcb, hash); + } +} + + +/* + * Find an overlap entry in hash table and its collision chain. + * Refer to SDM2 4.1.1.4 for overlap definition. + * PARAS: + * 1: in: TLB format entry, rid:ps must be same with vrr[]. + * va & ps identify the address space for overlap lookup + * 2: section can be combination of TR, TC and FM. (THASH_SECTION_XX) + * 3: cl means I side or D side. + * RETURNS: + * NULL to indicate the end of findings. + * NOTES: + * + */ +thash_data_t *thash_find_overlap(thash_cb_t *hcb, + thash_data_t *in, search_section_t s_sect) +{ + return (hcb->find_overlap)(hcb, in->vadr, + PSIZE(in->ps), in->rid, in->cl, s_sect); +} + +static thash_data_t *vtlb_find_overlap(thash_cb_t *hcb, + u64 va, u64 size, int rid, char cl, search_section_t s_sect) +{ + thash_data_t *hash_table; + thash_internal_t *priv = &hcb->priv; + u64 tag; + ia64_rr vrr; + + priv->_curva = va & ~(size-1); + priv->_eva = priv->_curva + size; + priv->rid = rid; + vrr = (hcb->get_rr_fn)(hcb->vcpu,va); + priv->ps = vrr.ps; + hash_table = (hcb->hash_func)(hcb->pta, + priv->_curva, rid, priv->ps); + + priv->s_sect = s_sect; + priv->cl = cl; + priv->_tr_idx = 0; + priv->hash_base = hash_table; + priv->cur_cch = hash_table; + return (hcb->next_overlap)(hcb); +} + +static thash_data_t *vhpt_find_overlap(thash_cb_t *hcb, + u64 va, u64 size, int rid, char cl, search_section_t s_sect) +{ + thash_data_t *hash_table; + thash_internal_t *priv = &hcb->priv; + u64 tag; + ia64_rr vrr; + + priv->_curva = va & ~(size-1); + priv->_eva = priv->_curva + size; + priv->rid = rid; + vrr = (hcb->get_rr_fn)(hcb->vcpu,va); + priv->ps = vrr.ps; + hash_table = (hcb->hash_func)( hcb->pta, + priv->_curva, rid, priv->ps); + tag = (hcb->vs->tag_func)( hcb->pta, + priv->_curva, rid, priv->ps); + + priv->tag = tag; + priv->hash_base = hash_table; + priv->cur_cch = hash_table; + return (hcb->next_overlap)(hcb); +} + + +static thash_data_t *vtr_find_next_overlap(thash_cb_t *hcb) +{ + thash_data_t *tr; + thash_internal_t *priv = &hcb->priv; + int num; + + if ( priv->cl == ISIDE_TLB ) { + num = NITRS; + tr = &ITR(hcb,0); + } + else { + num = NDTRS; + tr = &DTR(hcb,0); + } + for (; priv->_tr_idx < num; priv->_tr_idx ++ ) { + if ( __is_tlb_overlap(hcb, &tr[priv->_tr_idx], + priv->rid, priv->cl, + priv->_curva, priv->_eva) ) { + return &tr[priv->_tr_idx++]; + } + } + return NULL; +} + +/* + * Similar with vtlb_next_overlap but find next entry. + * NOTES: + * Intermediate position information is stored in hcb->priv. + */ +static thash_data_t *vtlb_next_overlap(thash_cb_t *hcb) +{ + thash_data_t *ovl; + thash_internal_t *priv = &hcb->priv; + u64 addr,rr_psize; + ia64_rr vrr; + + if ( priv->s_sect.tr ) { + ovl = vtr_find_next_overlap (hcb); + if ( ovl ) return ovl; + priv->s_sect.tr = 0; + } + if ( priv->s_sect.v == 0 ) return NULL; + vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva); + rr_psize = PSIZE(vrr.ps); + + while ( priv->_curva < priv->_eva ) { + if ( !INVALID_ENTRY(hcb, priv->hash_base) ) { + ovl = _vtlb_next_overlap_in_chain(hcb); + if ( ovl ) { + priv->cur_cch = ovl->next; + return ovl; + } + } + priv->_curva += rr_psize; + priv->hash_base = (hcb->hash_func)( hcb->pta, + priv->_curva, priv->rid, priv->ps); + priv->cur_cch = priv->hash_base; + } + return NULL; +} + +static thash_data_t *vhpt_next_overlap(thash_cb_t *hcb) +{ + thash_data_t *ovl; + thash_internal_t *priv = &hcb->priv; + u64 addr,rr_psize; + ia64_rr vrr; + + vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva); + rr_psize = PSIZE(vrr.ps); + + while ( priv->_curva < priv->_eva ) { + if ( !INVALID_ENTRY(hcb, priv->hash_base) ) { + ovl = _vhpt_next_overlap_in_chain(hcb); + if ( ovl ) { + priv->cur_cch = ovl->next; + return ovl; + } + } + priv->_curva += rr_psize; + priv->hash_base = (hcb->hash_func)( hcb->pta, + priv->_curva, priv->rid, priv->ps); + priv->tag = (hcb->vs->tag_func)( hcb->pta, + priv->_curva, priv->rid, priv->ps); + priv->cur_cch = priv->hash_base; + } + return NULL; +} + + +/* + * Find and purge overlap entries in hash table and its collision chain. + * PARAS: + * 1: in: TLB format entry, rid:ps must be same with vrr[]. + * rid, va & ps identify the address space for purge + * 2: section can be combination of TR, TC and FM. (thash_SECTION_XX) + * 3: cl means I side or D side. + * NOTES: + * + */ +void thash_purge_entries(thash_cb_t *hcb, + thash_data_t *in, search_section_t p_sect) +{ + return thash_purge_entries_ex(hcb, in->rid, in->vadr, + in->ps, p_sect, in->cl); +} + +void thash_purge_entries_ex(thash_cb_t *hcb, + u64 rid, u64 va, u64 ps, + search_section_t p_sect, + CACHE_LINE_TYPE cl) +{ + thash_data_t *ovl; + + ovl = (hcb->find_overlap)(hcb, va, PSIZE(ps), rid, cl, p_sect); + while ( ovl != NULL ) { + (hcb->rem_hash)(hcb, ovl); + ovl = (hcb->next_overlap)(hcb); + }; +} + +/* + * Purge overlap TCs and then insert the new entry to emulate itc ops. + * Notes: Only TC entry can purge and insert. + */ +void thash_purge_and_insert(thash_cb_t *hcb, thash_data_t *in) +{ + thash_data_t *ovl; + search_section_t sections; + +#ifdef XEN_DEBUGGER + vrr = (hcb->get_rr_fn)(hcb->vcpu,in->vadr); + if ( in->ps != vrr.ps || hcb->ht != THASH_TLB || !in->tc ) { + panic ("Oops, wrong call for purge_and_insert\n"); + return; + } +#endif + in->vadr = PAGEALIGN(in->vadr,in->ps); + in->ppn = PAGEALIGN(in->ppn, in->ps-12); + sections.tr = 0; + sections.tc = 1; + ovl = (hcb->find_overlap)(hcb, in->vadr, PSIZE(in->ps), + in->rid, in->cl, sections); + if(ovl) + (hcb->rem_hash)(hcb, ovl); +#ifdef XEN_DEBUGGER + ovl = (hcb->next_overlap)(hcb); + if ( ovl ) { + panic ("Oops, 2+ overlaps for purge_and_insert\n"); + return; + } +#endif + (hcb->ins_hash)(hcb, in, in->vadr); +} + +/* + * Purge all TCs or VHPT entries including those in Hash table. + * + */ + +// TODO: add sections. +void thash_purge_all(thash_cb_t *hcb) +{ + thash_data_t *hash_table; + +#ifdef VTLB_DEBUG + extern u64 sanity_check; + static u64 statistics_before_purge_all=0; + if ( statistics_before_purge_all ) { + sanity_check = 1; + check_vtlb_sanity(hcb); + } +#endif + + hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz); + + for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) { + thash_rem_line(hcb, hash_table); + } +} + + +/* + * Lookup the hash table and its collision chain to find an entry + * covering this address rid:va or the entry. + * + * INPUT: + * in: TLB format for both VHPT & TLB. + */ +thash_data_t *vtlb_lookup(thash_cb_t *hcb, + thash_data_t *in) +{ + return vtlb_lookup_ex(hcb, in->rid, in->vadr, in->cl); +} + +thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb, + u64 rid, u64 va, + CACHE_LINE_TYPE cl) +{ + thash_data_t *hash_table, *cch; + u64 tag; + ia64_rr vrr; + + ASSERT ( hcb->ht == THASH_VTLB ); + + cch = __vtr_lookup(hcb, rid, va, cl);; + if ( cch ) return cch; + + vrr = (hcb->get_rr_fn)(hcb->vcpu,va); + hash_table = (hcb->hash_func)( hcb->pta,va, rid, vrr.ps); + + if ( INVALID_ENTRY(hcb, hash_table ) ) + return NULL; + + + for (cch=hash_table; cch; cch = cch->next) { + if ( __is_translated(cch, rid, va, cl) ) + return cch; + } + return NULL; +} + +/* + * Lock/Unlock TC if found. + * NOTES: Only the page in prefered size can be handled. + * return: + * 1: failure + * 0: success + */ +int thash_lock_tc(thash_cb_t *hcb, u64 va, u64 size, int rid, char cl, int lock) +{ + thash_data_t *ovl; + search_section_t sections; + + sections.tr = 1; + sections.tc = 1; + ovl = (hcb->find_overlap)(hcb, va, size, rid, cl, sections); + if ( ovl ) { + if ( !ovl->tc ) { +// panic("Oops, TR for lock\n"); + return 0; + } + else if ( lock ) { + if ( ovl->locked ) { + DPRINTK("Oops, already locked entry\n"); + } + ovl->locked = 1; + } + else if ( !lock ) { + if ( !ovl->locked ) { + DPRINTK("Oops, already unlocked entry\n"); + } + ovl->locked = 0; + } + return 0; + } + return 1; +} + +/* + * Notifier when TLB is deleted from hash table and its collision chain. + * NOTES: + * The typical situation is that TLB remove needs to inform + * VHPT to remove too. + * PARAS: + * 1: hcb is TLB object. + * 2: The format of entry is always in TLB. + * + */ +void tlb_remove_notifier(thash_cb_t *hcb, thash_data_t *entry) +{ + thash_cb_t *vhpt; + search_section_t s_sect; + + s_sect.v = 0; + thash_purge_entries(hcb->ts->vhpt, entry, s_sect); + machine_tlb_purge(entry->rid, entry->vadr, entry->ps); +} + +/* + * Initialize internal control data before service. + */ +void thash_init(thash_cb_t *hcb, u64 sz) +{ + thash_data_t *hash_table; + + cch_mem_init (hcb); + hcb->magic = THASH_CB_MAGIC; + hcb->pta.val = hcb->hash; + hcb->pta.vf = 1; + hcb->pta.ve = 1; + hcb->pta.size = sz; + hcb->get_rr_fn = vmmu_get_rr; + ASSERT ( hcb->hash_sz % sizeof(thash_data_t) == 0 ); + if ( hcb->ht == THASH_TLB ) { + hcb->remove_notifier = tlb_remove_notifier; + hcb->find_overlap = vtlb_find_overlap; + hcb->next_overlap = vtlb_next_overlap; + hcb->rem_hash = rem_vtlb; + hcb->ins_hash = vtlb_insert; + __init_tr(hcb); + } + else { + hcb->remove_notifier = NULL; + hcb->find_overlap = vhpt_find_overlap; + hcb->next_overlap = vhpt_next_overlap; + hcb->rem_hash = rem_thash; + hcb->ins_hash = vhpt_insert; + } + hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz); + + for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) { + INVALIDATE_HASH(hcb,hash_table); + } +} + +#ifdef VTLB_DEBUG +static u64 cch_length_statistics[MAX_CCH_LENGTH+1]; +u64 sanity_check=0; +u64 vtlb_chain_sanity(thash_cb_t *vtlb, thash_cb_t *vhpt, thash_data_t *hash) +{ + thash_data_t *cch; + thash_data_t *ovl; + search_section_t s_sect; + u64 num=0; + + s_sect.v = 0; + for (cch=hash; cch; cch=cch->next) { + ovl = thash_find_overlap(vhpt, cch, s_sect); + while ( ovl != NULL ) { + ovl->checked = 1; + ovl = (vhpt->next_overlap)(vhpt); + }; + num ++; + } + if ( num >= MAX_CCH_LENGTH ) { + cch_length_statistics[MAX_CCH_LENGTH] ++; + } + else { + cch_length_statistics[num] ++; + } + return num; +} + +void check_vtlb_sanity(thash_cb_t *vtlb) +{ +// struct pfn_info *page; + u64 hash_num, i, psr; + static u64 check_ok_num, check_fail_num,check_invalid; +// void *vb1, *vb2; + thash_data_t *hash, *cch; + thash_data_t *ovl; + search_section_t s_sect; + thash_cb_t *vhpt = vtlb->ts->vhpt; + u64 invalid_ratio; + + if ( sanity_check == 0 ) return; + sanity_check --; + s_sect.v = 0; +// page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0); +// if ( page == NULL ) { +// panic("No enough contiguous memory for init_domain_mm\n"); +// }; +// vb1 = page_to_virt(page); +// printf("Allocated page=%lp vbase=%lp\n", page, vb1); +// vb2 = vb1 + vtlb->hash_sz; + hash_num = vhpt->hash_sz / sizeof(thash_data_t); +// printf("vb2=%lp, size=%lx hash_num=%lx\n", vb2, vhpt->hash_sz, hash_num); + printf("vtlb=%lp, hash=%lp size=0x%lx; vhpt=%lp, hash=%lp size=0x%lx\n", + vtlb, vtlb->hash,vtlb->hash_sz, + vhpt, vhpt->hash, vhpt->hash_sz); + //memcpy(vb1, vtlb->hash, vtlb->hash_sz); + //memcpy(vb2, vhpt->hash, vhpt->hash_sz); + for ( i=0; i < sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) { + cch_length_statistics[i] = 0; + } + + local_irq_save(psr); + + hash = vhpt->hash; + for (i=0; i < hash_num; i++) { + if ( !INVALID_ENTRY(vhpt, hash) ) { + for ( cch= hash; cch; cch=cch->next) { + cch->checked = 0; + } + } + hash ++; + } + printf("Done vhpt clear checked flag, hash_num=0x%lx\n", hash_num); + check_invalid = 0; + check_ok_num=0; + hash = vtlb->hash; + for ( i=0; i< hash_num; i++ ) { + if ( !INVALID_ENTRY(vtlb, hash) ) { + check_ok_num += vtlb_chain_sanity(vtlb, vhpt, hash); + } + else { + check_invalid++; + } + hash ++; + } + printf("Done vtlb entry check, hash=%lp\n", hash); + printf("check_ok_num = 0x%lx check_invalid=0x%lx\n", check_ok_num,check_invalid); + invalid_ratio = 1000*check_invalid / hash_num; + printf("%02ld.%01ld%% entries are invalid\n", + invalid_ratio/10, invalid_ratio % 10 ); + for (i=0; i<NDTRS; i++) { + ovl = thash_find_overlap(vhpt, &vtlb->ts->dtr[i], s_sect); + while ( ovl != NULL ) { + ovl->checked = 1; + ovl = (vhpt->next_overlap)(vhpt); + }; + } + printf("Done dTR\n"); + for (i=0; i<NITRS; i++) { + ovl = thash_find_overlap(vhpt, &vtlb->ts->itr[i], s_sect); + while ( ovl != NULL ) { + ovl->checked = 1; + ovl = (vhpt->next_overlap)(vhpt); + }; + } + printf("Done iTR\n"); + check_fail_num = 0; + check_invalid = 0; + check_ok_num=0; + hash = vhpt->hash; + for (i=0; i < hash_num; i++) { + if ( !INVALID_ENTRY(vhpt, hash) ) { + for ( cch= hash; cch; cch=cch->next) { + if ( !cch->checked ) { + printf ("!!!Hash=%lp cch=%lp not within vtlb\n", hash, cch); + check_fail_num ++; + } + else { + check_ok_num++; + } + } + } + else { + check_invalid ++; + } + hash ++; + } + local_irq_restore(psr); + printf("check_ok_num=0x%lx check_fail_num=0x%lx check_invalid=0x%lx\n", + check_ok_num, check_fail_num, check_invalid); + //memcpy(vtlb->hash, vb1, vtlb->hash_sz); + //memcpy(vhpt->hash, vb2, vhpt->hash_sz); + printf("The statistics of collision chain length is listed\n"); + for ( i=0; i < sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) { + printf("CCH length=%02ld, chain number=%ld\n", i, cch_length_statistics[i]); + } +// free_domheap_pages(page, VCPU_TLB_ORDER); + printf("Done check_vtlb\n"); +} + +void dump_vtlb(thash_cb_t *vtlb) +{ + static u64 dump_vtlb=0; + thash_data_t *hash, *cch, *tr; + u64 hash_num,i; + + if ( dump_vtlb == 0 ) return; + dump_vtlb --; + hash_num = vtlb->hash_sz / sizeof(thash_data_t); + hash = vtlb->hash; + + printf("Dump vTC\n"); + for ( i = 0; i < hash_num; i++ ) { + if ( !INVALID_ENTRY(vtlb, hash) ) { + printf("VTLB at hash=%lp\n", hash); + for (cch=hash; cch; cch=cch->next) { + printf("Entry %lp va=%lx ps=%lx rid=%lx\n", + cch, cch->vadr, cch->ps, cch->rid); + } + } + hash ++; + } + printf("Dump vDTR\n"); + for (i=0; i<NDTRS; i++) { + tr = &DTR(vtlb,i); + printf("Entry %lp va=%lx ps=%lx rid=%lx\n", + tr, tr->vadr, tr->ps, tr->rid); + } + printf("Dump vITR\n"); + for (i=0; i<NITRS; i++) { + tr = &ITR(vtlb,i); + printf("Entry %lp va=%lx ps=%lx rid=%lx\n", + tr, tr->vadr, tr->ps, tr->rid); + } + printf("End of vTLB dump\n"); +} +#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/acpi.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/acpi.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,678 @@ +/* + * acpi.c - Architecture-Specific Low-Level ACPI Support + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond <drummond@xxxxxxxxxxx> + * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@xxxxxxxxxx> + * Copyright (C) 2000 Intel Corp. + * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@xxxxxxxxx> + * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx> + * Copyright (C) 2001 Jenna Hall <jenna.s.hall@xxxxxxxxx> + * Copyright (C) 2001 Takayoshi Kochi <t-kochi@xxxxxxxxxxxxx> + * Copyright (C) 2002 Erich Focht <efocht@xxxxxxxxxx> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/irq.h> +#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/mmzone.h> +#include <asm/io.h> +//#include <asm/iosapic.h> +#include <asm/machvec.h> +#include <asm/page.h> +#include <asm/system.h> +#include <asm/numa.h> +#include <asm/sal.h> +//#include <asm/cyclone.h> + +#define BAD_MADT_ENTRY(entry, end) ( \ + (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ + ((acpi_table_entry_header *)entry)->length != sizeof(*entry)) + +#define PREFIX "ACPI: " + +void (*pm_idle) (void); +EXPORT_SYMBOL(pm_idle); +void (*pm_power_off) (void); + +unsigned char acpi_kbd_controller_present = 1; +unsigned char acpi_legacy_devices; + +const char * +acpi_get_sysname (void) +{ +/* #ifdef CONFIG_IA64_GENERIC */ + unsigned long rsdp_phys; + struct acpi20_table_rsdp *rsdp; + struct acpi_table_xsdt *xsdt; + struct acpi_table_header *hdr; + + rsdp_phys = acpi_find_rsdp(); + if (!rsdp_phys) { + printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n"); + return "dig"; + } + + rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys); + if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) { + printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n"); + return "dig"; + } + + xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address); + hdr = &xsdt->header; + if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) { + printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n"); + return "dig"; + } + + if (!strcmp(hdr->oem_id, "HP")) { + return "hpzx1"; + } + else if (!strcmp(hdr->oem_id, "SGI")) { + return "sn2"; + } + + return "dig"; +/* +#else +# if defined (CONFIG_IA64_HP_SIM) + return "hpsim"; +# elif defined (CONFIG_IA64_HP_ZX1) + return "hpzx1"; +# elif defined (CONFIG_IA64_SGI_SN2) + return "sn2"; +# elif defined (CONFIG_IA64_DIG) + return "dig"; +# else +# error Unknown platform. Fix acpi.c. +# endif +#endif +*/ +} + +#ifdef CONFIG_ACPI_BOOT + +#define ACPI_MAX_PLATFORM_INTERRUPTS 256 + +#if 0 +/* Array to record platform interrupt vectors for generic interrupt routing. */ +int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = { + [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1 +}; + +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC; + +/* + * Interrupt routing API for device drivers. Provides interrupt vector for + * a generic platform event. Currently only CPEI is implemented. + */ +int +acpi_request_vector (u32 int_type) +{ + int vector = -1; + + if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) { + /* corrected platform error interrupt */ + vector = platform_intr_list[int_type]; + } else + printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n"); + return vector; +} +#endif +char * +__acpi_map_table (unsigned long phys_addr, unsigned long size) +{ + return __va(phys_addr); +} + +/* -------------------------------------------------------------------------- + Boot-time Table Parsing + -------------------------------------------------------------------------- */ + +static int total_cpus __initdata; +static int available_cpus __initdata; +struct acpi_table_madt * acpi_madt __initdata; +static u8 has_8259; + +#if 0 +static int __init +acpi_parse_lapic_addr_ovr ( + acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_lapic_addr_ovr *lapic; + + lapic = (struct acpi_table_lapic_addr_ovr *) header; + + if (BAD_MADT_ENTRY(lapic, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + if (lapic->address) { + iounmap((void *) ipi_base_addr); + ipi_base_addr = (unsigned long) ioremap(lapic->address, 0); + } + return 0; +} + + +static int __init +acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_lsapic *lsapic; + + lsapic = (struct acpi_table_lsapic *) header; + + if (BAD_MADT_ENTRY(lsapic, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + printk(KERN_INFO "CPU %d (0x%04x)", total_cpus, (lsapic->id << 8) | lsapic->eid); + + if (!lsapic->flags.enabled) + printk(" disabled"); + else { + printk(" enabled"); +#ifdef CONFIG_SMP + smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid; + if (hard_smp_processor_id() + == (unsigned int) smp_boot_data.cpu_phys_id[available_cpus]) + printk(" (BSP)"); +#endif + ++available_cpus; + } + + printk("\n"); + + total_cpus++; + return 0; +} + + +static int __init +acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_lapic_nmi *lacpi_nmi; + + lacpi_nmi = (struct acpi_table_lapic_nmi*) header; + + if (BAD_MADT_ENTRY(lacpi_nmi, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + /* TBD: Support lapic_nmi entries */ + return 0; +} + + +static int __init +acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_iosapic *iosapic; + + iosapic = (struct acpi_table_iosapic *) header; + + if (BAD_MADT_ENTRY(iosapic, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + iosapic_init(iosapic->address, iosapic->global_irq_base); + + return 0; +} + + +static int __init +acpi_parse_plat_int_src ( + acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_plat_int_src *plintsrc; + int vector; + + plintsrc = (struct acpi_table_plat_int_src *) header; + + if (BAD_MADT_ENTRY(plintsrc, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + /* + * Get vector assignment for this interrupt, set attributes, + * and program the IOSAPIC routing table. + */ + vector = iosapic_register_platform_intr(plintsrc->type, + plintsrc->global_irq, + plintsrc->iosapic_vector, + plintsrc->eid, + plintsrc->id, + (plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + (plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); + + platform_intr_list[plintsrc->type] = vector; + return 0; +} + + +static int __init +acpi_parse_int_src_ovr ( + acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_int_src_ovr *p; + + p = (struct acpi_table_int_src_ovr *) header; + + if (BAD_MADT_ENTRY(p, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + iosapic_override_isa_irq(p->bus_irq, p->global_irq, + (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); + return 0; +} + + +static int __init +acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_nmi_src *nmi_src; + + nmi_src = (struct acpi_table_nmi_src*) header; + + if (BAD_MADT_ENTRY(nmi_src, end)) + return -EINVAL; + + acpi_table_print_madt_entry(header); + + /* TBD: Support nimsrc entries */ + return 0; +} +/* Hook from generic ACPI tables.c */ +void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "IBM", 3) && + (!strncmp(oem_table_id, "SERMOW", 6))){ + + /* Unfortunatly ITC_DRIFT is not yet part of the + * official SAL spec, so the ITC_DRIFT bit is not + * set by the BIOS on this hardware. + */ + sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT; + + /*Start cyclone clock*/ + cyclone_setup(0); + } +} + +static int __init +acpi_parse_madt (unsigned long phys_addr, unsigned long size) +{ + if (!phys_addr || !size) + return -EINVAL; + + acpi_madt = (struct acpi_table_madt *) __va(phys_addr); + + /* remember the value for reference after free_initmem() */ +#ifdef CONFIG_ITANIUM + has_8259 = 1; /* Firmware on old Itanium systems is broken */ +#else + has_8259 = acpi_madt->flags.pcat_compat; +#endif + iosapic_system_init(has_8259); + + /* Get base address of IPI Message Block */ + + if (acpi_madt->lapic_address) + ipi_base_addr = (unsigned long) ioremap(acpi_madt->lapic_address, 0); + + printk(KERN_INFO PREFIX "Local APIC address 0x%lx\n", ipi_base_addr); + + acpi_madt_oem_check(acpi_madt->header.oem_id, + acpi_madt->header.oem_table_id); + + return 0; +} +#endif + +#ifdef CONFIG_ACPI_NUMA + +#undef SLIT_DEBUG + +#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32) + +static int __initdata srat_num_cpus; /* number of cpus */ +static u32 __initdata pxm_flag[PXM_FLAG_LEN]; +#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) +#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) +/* maps to convert between proximity domain and logical node ID */ +int __initdata pxm_to_nid_map[MAX_PXM_DOMAINS]; +int __initdata nid_to_pxm_map[MAX_NUMNODES]; +static struct acpi_table_slit __initdata *slit_table; + +/* + * ACPI 2.0 SLIT (System Locality Information Table) + * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf + */ +void __init +acpi_numa_slit_init (struct acpi_table_slit *slit) +{ + u32 len; + + len = sizeof(struct acpi_table_header) + 8 + + slit->localities * slit->localities; + if (slit->header.length != len) { + printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", + len, slit->header.length); + memset(numa_slit, 10, sizeof(numa_slit)); + return; + } + slit_table = slit; +} + +void __init +acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa) +{ + /* record this node in proximity bitmap */ + pxm_bit_set(pa->proximity_domain); + + node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid); + /* nid should be overridden as logical node id later */ + node_cpuid[srat_num_cpus].nid = pa->proximity_domain; + srat_num_cpus++; +} + +void __init +acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma) +{ + unsigned long paddr, size; + u8 pxm; + struct node_memblk_s *p, *q, *pend; + + pxm = ma->proximity_domain; + + /* fill node memory chunk structure */ + paddr = ma->base_addr_hi; + paddr = (paddr << 32) | ma->base_addr_lo; + size = ma->length_hi; + size = (size << 32) | ma->length_lo; + + /* Ignore disabled entries */ + if (!ma->flags.enabled) + return; + + /* record this node in proximity bitmap */ + pxm_bit_set(pxm); + + /* Insertion sort based on base address */ + pend = &node_memblk[num_node_memblks]; + for (p = &node_memblk[0]; p < pend; p++) { + if (paddr < p->start_paddr) + break; + } + if (p < pend) { + for (q = pend - 1; q >= p; q--) + *(q + 1) = *q; + } + p->start_paddr = paddr; + p->size = size; + p->nid = pxm; + num_node_memblks++; +} + +void __init +acpi_numa_arch_fixup (void) +{ + int i, j, node_from, node_to; + + /* If there's no SRAT, fix the phys_id */ + if (srat_num_cpus == 0) { + node_cpuid[0].phys_id = hard_smp_processor_id(); + return; + } + + /* calculate total number of nodes in system from PXM bitmap */ + numnodes = 0; /* init total nodes in system */ + + memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); + memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); + for (i = 0; i < MAX_PXM_DOMAINS; i++) { + if (pxm_bit_test(i)) { + pxm_to_nid_map[i] = numnodes; + node_set_online(numnodes); + nid_to_pxm_map[numnodes++] = i; + } + } + + /* set logical node id in memory chunk structure */ + for (i = 0; i < num_node_memblks; i++) + node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid]; + + /* assign memory bank numbers for each chunk on each node */ + for (i = 0; i < numnodes; i++) { + int bank; + + bank = 0; + for (j = 0; j < num_node_memblks; j++) + if (node_memblk[j].nid == i) + node_memblk[j].bank = bank++; + } + + /* set logical node id in cpu structure */ + for (i = 0; i < srat_num_cpus; i++) + node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid]; + + printk(KERN_INFO "Number of logical nodes in system = %d\n", numnodes); + printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks); + + if (!slit_table) return; + memset(numa_slit, -1, sizeof(numa_slit)); + for (i=0; i<slit_table->localities; i++) { + if (!pxm_bit_test(i)) + continue; + node_from = pxm_to_nid_map[i]; + for (j=0; j<slit_table->localities; j++) { + if (!pxm_bit_test(j)) + continue; + node_to = pxm_to_nid_map[j]; + node_distance(node_from, node_to) = + slit_table->entry[i*slit_table->localities + j]; + } + } + +#ifdef SLIT_DEBUG + printk("ACPI 2.0 SLIT locality table:\n"); + for (i = 0; i < numnodes; i++) { + for (j = 0; j < numnodes; j++) + printk("%03d ", node_distance(i,j)); + printk("\n"); + } +#endif +} +#endif /* CONFIG_ACPI_NUMA */ + +#if 0 +unsigned int +acpi_register_gsi (u32 gsi, int polarity, int trigger) +{ + return acpi_register_irq(gsi, polarity, trigger); +} +EXPORT_SYMBOL(acpi_register_gsi); +static int __init +acpi_parse_fadt (unsigned long phys_addr, unsigned long size) +{ + struct acpi_table_header *fadt_header; + struct fadt_descriptor_rev2 *fadt; + + if (!phys_addr || !size) + return -EINVAL; + + fadt_header = (struct acpi_table_header *) __va(phys_addr); + if (fadt_header->revision != 3) + return -ENODEV; /* Only deal with ACPI 2.0 FADT */ + + fadt = (struct fadt_descriptor_rev2 *) fadt_header; + + if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER)) + acpi_kbd_controller_present = 0; + + if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES) + acpi_legacy_devices = 1; + + acpi_register_gsi(fadt->sci_int, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE); + return 0; +} +#endif + +unsigned long __init +acpi_find_rsdp (void) +{ + unsigned long rsdp_phys = 0; + + if (efi.acpi20) + rsdp_phys = __pa(efi.acpi20); + else if (efi.acpi) + printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n"); + return rsdp_phys; +} + +#if 0 +int __init +acpi_boot_init (void) +{ + + /* + * MADT + * ---- + * Parse the Multiple APIC Description Table (MADT), if exists. + * Note that this table provides platform SMP configuration + * information -- the successor to MPS tables. + */ + + if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) { + printk(KERN_ERR PREFIX "Can't find MADT\n"); + goto skip_madt; + } + + /* Local APIC */ + + if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); + + if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1) + printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n"); + + if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); + + /* I/O APIC */ + + if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) + printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n"); + + /* System-Level Interrupt Routing */ + + if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0) + printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n"); + + if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); + + if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); + skip_madt: + + /* + * FADT says whether a legacy keyboard controller is present. + * The FADT also contains an SCI_INT line, by which the system + * gets interrupts such as power and sleep buttons. If it's not + * on a Legacy interrupt, it needs to be setup. + */ + if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1) + printk(KERN_ERR PREFIX "Can't find FADT\n"); + +#ifdef CONFIG_SMP + if (available_cpus == 0) { + printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); + printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); + smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id(); + available_cpus = 1; /* We've got at least one of these, no? */ + } + smp_boot_data.cpu_count = available_cpus; + + smp_build_cpu_map(); +# ifdef CONFIG_ACPI_NUMA + if (srat_num_cpus == 0) { + int cpu, i = 1; + for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) + if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id()) + node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu]; + } + build_cpu_to_node_map(); +# endif +#endif + /* Make boot-up look pretty */ + printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); + return 0; +} +int +acpi_gsi_to_irq (u32 gsi, unsigned int *irq) +{ + int vector; + + if (has_8259 && gsi < 16) + *irq = isa_irq_to_vector(gsi); + else { + vector = gsi_to_vector(gsi); + if (vector == -1) + return -1; + + *irq = vector; + } + return 0; +} + +int +acpi_register_irq (u32 gsi, u32 polarity, u32 trigger) +{ + if (has_8259 && gsi < 16) + return isa_irq_to_vector(gsi); + + return iosapic_register_intr(gsi, + (polarity == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + (trigger == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); +} +EXPORT_SYMBOL(acpi_register_irq); +#endif +#endif /* CONFIG_ACPI_BOOT */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/dom0_ops.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/dom0_ops.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,237 @@ +/****************************************************************************** + * Arch-specific dom0_ops.c + * + * Process command requests from domain-0 guest OS. + * + * Copyright (c) 2002, K A Fraser + */ + +#include <xen/config.h> +#include <xen/types.h> +#include <xen/lib.h> +#include <xen/mm.h> +#include <public/dom0_ops.h> +#include <xen/sched.h> +#include <xen/event.h> +#include <asm/pdb.h> +#include <xen/trace.h> +#include <xen/console.h> +#include <public/sched_ctl.h> + +long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op) +{ + long ret = 0; + + if ( !IS_PRIV(current->domain) ) + return -EPERM; + + switch ( op->cmd ) + { + case DOM0_GETPAGEFRAMEINFO: + { + struct pfn_info *page; + unsigned long pfn = op->u.getpageframeinfo.pfn; + domid_t dom = op->u.getpageframeinfo.domain; + struct domain *d; + + ret = -EINVAL; + + if ( unlikely(pfn >= max_page) || + unlikely((d = find_domain_by_id(dom)) == NULL) ) + break; + + page = &frame_table[pfn]; + + if ( likely(get_page(page, d)) ) + { + ret = 0; + + op->u.getpageframeinfo.type = NOTAB; + + if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) + { + switch ( page->u.inuse.type_info & PGT_type_mask ) + { + default: + panic("No such page type\n"); + break; + } + } + + put_page(page); + } + + put_domain(d); + + copy_to_user(u_dom0_op, op, sizeof(*op)); + } + break; + + case DOM0_GETPAGEFRAMEINFO2: + { +#define GPF2_BATCH 128 + int n,j; + int num = op->u.getpageframeinfo2.num; + domid_t dom = op->u.getpageframeinfo2.domain; + unsigned long *s_ptr = (unsigned long*) op->u.getpageframeinfo2.array; + struct domain *d; + unsigned long *l_arr; + ret = -ESRCH; + + if ( unlikely((d = find_domain_by_id(dom)) == NULL) ) + break; + + if ( unlikely(num > 1024) ) + { + ret = -E2BIG; + break; + } + + l_arr = (unsigned long *)alloc_xenheap_page(); + + ret = 0; + for( n = 0; n < num; ) + { + int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n); + + if ( copy_from_user(l_arr, &s_ptr[n], k*sizeof(unsigned long)) ) + { + ret = -EINVAL; + break; + } + + for( j = 0; j < k; j++ ) + { + struct pfn_info *page; + unsigned long mfn = l_arr[j]; + + if ( unlikely(mfn >= max_page) ) + goto e2_err; + + page = &frame_table[mfn]; + + if ( likely(get_page(page, d)) ) + { + unsigned long type = 0; + + switch( page->u.inuse.type_info & PGT_type_mask ) + { + default: + panic("No such page type\n"); + break; + } + + if ( page->u.inuse.type_info & PGT_pinned ) + type |= LPINTAB; + l_arr[j] |= type; + put_page(page); + } + else + { + e2_err: + l_arr[j] |= XTAB; + } + + } + + if ( copy_to_user(&s_ptr[n], l_arr, k*sizeof(unsigned long)) ) + { + ret = -EINVAL; + break; + } + + n += j; + } + + free_xenheap_page((unsigned long)l_arr); + + put_domain(d); + } + break; +#ifndef CONFIG_VTI + /* + * NOTE: DOM0_GETMEMLIST has somewhat different semantics on IA64 - + * it actually allocates and maps pages. + */ + case DOM0_GETMEMLIST: + { + unsigned long i; + struct domain *d = find_domain_by_id(op->u.getmemlist.domain); + unsigned long start_page = op->u.getmemlist.max_pfns >> 32; + unsigned long nr_pages = op->u.getmemlist.max_pfns & 0xffffffff; + unsigned long pfn; + unsigned long *buffer = op->u.getmemlist.buffer; + struct page *page; + + ret = -EINVAL; + if ( d != NULL ) + { + ret = 0; + + for ( i = start_page; i < (start_page + nr_pages); i++ ) + { + page = map_new_domain_page(d, i << PAGE_SHIFT); + if ( page == NULL ) + { + ret = -ENOMEM; + break; + } + pfn = page_to_pfn(page); + if ( put_user(pfn, buffer) ) + { + ret = -EFAULT; + break; + } + buffer++; + } + + op->u.getmemlist.num_pfns = i - start_page; + copy_to_user(u_dom0_op, op, sizeof(*op)); + + put_domain(d); + } + } + break; +#else + case DOM0_GETMEMLIST: + { + int i; + struct domain *d = find_domain_by_id(op->u.getmemlist.domain); + unsigned long max_pfns = op->u.getmemlist.max_pfns; + unsigned long pfn; + unsigned long *buffer = op->u.getmemlist.buffer; + struct list_head *list_ent; + + ret = -EINVAL; + if (!d) { + ret = 0; + + spin_lock(&d->page_alloc_lock); + list_ent = d->page_list.next; + for (i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++) { + pfn = list_entry(list_ent, struct pfn_info, list) - + frame_table; + if (put_user(pfn, buffer)) { + ret = -EFAULT; + break; + } + buffer++; + list_ent = frame_table[pfn].list.next; + } + spin_unlock(&d->page_alloc_lock); + + op->u.getmemlist.num_pfns = i; + copy_to_user(u_dom0_op, op, sizeof(*op)); + + put_domain(d); + } + } + break; +#endif // CONFIG_VTI + default: + ret = -ENOSYS; + + } + + return ret; +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/dom_fw.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/dom_fw.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,688 @@ +/* + * Xen domain firmware emulation support + * Copyright (C) 2004 Hewlett-Packard Co. + * Dan Magenheimer (dan.magenheimer@xxxxxx) + * + */ + +#include <xen/config.h> +#include <asm/system.h> +#include <asm/pgalloc.h> + +#include <linux/efi.h> +#include <asm/io.h> +#include <asm/pal.h> +#include <asm/sal.h> +#include <xen/acpi.h> + +#include <asm/dom_fw.h> + +struct ia64_boot_param *dom_fw_init(struct domain *, char *,int,char *,int); +extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr); +extern struct domain *dom0; +extern unsigned long dom0_start; + +extern unsigned long running_on_sim; + + +unsigned long dom_fw_base_mpa = -1; +unsigned long imva_fw_base = -1; + +// return domain (meta)physical address for a given imva +// this function is a call-back from dom_fw_init +unsigned long dom_pa(unsigned long imva) +{ + if (dom_fw_base_mpa == -1 || imva_fw_base == -1) { + printf("dom_pa: uninitialized! (spinning...)\n"); + while(1); + } + if (imva - imva_fw_base > PAGE_SIZE) { + printf("dom_pa: bad offset! imva=%p, imva_fw_base=%p (spinning...)\n",imva,imva_fw_base); + while(1); + } + return dom_fw_base_mpa + (imva - imva_fw_base); +} + +// builds a hypercall bundle at domain physical address +void dom_efi_hypercall_patch(struct domain *d, unsigned long paddr, unsigned long hypercall) +{ + unsigned long imva; + + if (d == dom0) paddr += dom0_start; + imva = domain_mpa_to_imva(d,paddr); + build_hypercall_bundle(imva,d->arch.breakimm,hypercall,1); +} + + +// builds a hypercall bundle at domain physical address +void dom_fw_hypercall_patch(struct domain *d, unsigned long paddr, unsigned long hypercall,unsigned long ret) +{ + unsigned long imva; + + if (d == dom0) paddr += dom0_start; + imva = domain_mpa_to_imva(d,paddr); + build_hypercall_bundle(imva,d->arch.breakimm,hypercall,ret); +} + + +// FIXME: This is really a hack: Forcing the boot parameter block +// at domain mpaddr 0 page, then grabbing only the low bits of the +// Xen imva, which is the offset into the page +unsigned long dom_fw_setup(struct domain *d, char *args, int arglen) +{ + struct ia64_boot_param *bp; + + dom_fw_base_mpa = 0; + if (d == dom0) dom_fw_base_mpa += dom0_start; + imva_fw_base = domain_mpa_to_imva(d,dom_fw_base_mpa); + bp = dom_fw_init(d,args,arglen,imva_fw_base,PAGE_SIZE); + return dom_pa((unsigned long)bp); +} + + +/* the following heavily leveraged from linux/arch/ia64/hp/sim/fw-emu.c */ + +#define MB (1024*1024UL) + +#define NUM_EFI_SYS_TABLES 6 +#define PASS_THRU_IOPORT_SPACE +#ifdef PASS_THRU_IOPORT_SPACE +# define NUM_MEM_DESCS 4 +#else +# define NUM_MEM_DESCS 3 +#endif + + +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) + +/* Compute the `struct tm' representation of *T, + offset OFFSET seconds east of UTC, + and store year, yday, mon, mday, wday, hour, min, sec into *TP. + Return nonzero if successful. */ +int +offtime (unsigned long t, efi_time_t *tp) +{ + const unsigned short int __mon_yday[2][13] = + { + /* Normal years. */ + { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, + /* Leap years. */ + { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } + }; + long int days, rem, y; + const unsigned short int *ip; + + days = t / SECS_PER_DAY; + rem = t % SECS_PER_DAY; + while (rem < 0) { + rem += SECS_PER_DAY; + --days; + } + while (rem >= SECS_PER_DAY) { + rem -= SECS_PER_DAY; + ++days; + } + tp->hour = rem / SECS_PER_HOUR; + rem %= SECS_PER_HOUR; + tp->minute = rem / 60; + tp->second = rem % 60; + /* January 1, 1970 was a Thursday. */ + y = 1970; + +# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) +# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) +# define __isleap(year) \ + ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) + + while (days < 0 || days >= (__isleap (y) ? 366 : 365)) { + /* Guess a corrected year, assuming 365 days per year. */ + long int yg = y + days / 365 - (days % 365 < 0); + + /* Adjust DAYS and Y to match the guessed year. */ + days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1) + - LEAPS_THRU_END_OF (y - 1)); + y = yg; + } + tp->year = y; + ip = __mon_yday[__isleap(y)]; + for (y = 11; days < (long int) ip[y]; --y) + continue; + days -= ip[y]; + tp->month = y + 1; + tp->day = days + 1; + return 1; +} + +extern struct ia64_pal_retval pal_emulator_static (unsigned long); + +/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */ + +#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3) + +#define REG_OFFSET(addr) (0x00000000000000FF & (addr)) +#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr)) +#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr)) + +#ifndef XEN +static efi_status_t +fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc) +{ +#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC) + struct { + int tv_sec; /* must be 32bits to work */ + int tv_usec; + } tv32bits; + + ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD); + + memset(tm, 0, sizeof(*tm)); + offtime(tv32bits.tv_sec, tm); + + if (tc) + memset(tc, 0, sizeof(*tc)); +#else +# error Not implemented yet... +#endif + return EFI_SUCCESS; +} + +static void +efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data) +{ +#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC) + ssc(status, 0, 0, 0, SSC_EXIT); +#else +# error Not implemented yet... +#endif +} + +static efi_status_t +efi_unimplemented (void) +{ + return EFI_UNSUPPORTED; +} +#endif /* !XEN */ + +struct sal_ret_values +sal_emulator (long index, unsigned long in1, unsigned long in2, + unsigned long in3, unsigned long in4, unsigned long in5, + unsigned long in6, unsigned long in7) +{ + long r9 = 0; + long r10 = 0; + long r11 = 0; + long status; + + /* + * Don't do a "switch" here since that gives us code that + * isn't self-relocatable. + */ + status = 0; + if (index == SAL_FREQ_BASE) { + if (!running_on_sim) + status = ia64_sal_freq_base(in1,&r9,&r10); + else switch (in1) { + case SAL_FREQ_BASE_PLATFORM: + r9 = 200000000; + break; + + case SAL_FREQ_BASE_INTERVAL_TIMER: + r9 = 700000000; + break; + + case SAL_FREQ_BASE_REALTIME_CLOCK: + r9 = 1; + break; + + default: + status = -1; + break; + } + } else if (index == SAL_PCI_CONFIG_READ) { + if (current->domain == dom0) { + u64 value; + // note that args 2&3 are swapped!! + status = ia64_sal_pci_config_read(in1,in3,in2,&value); + r9 = value; + } + else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_READ\n"); + } else if (index == SAL_PCI_CONFIG_WRITE) { + if (current->domain == dom0) { + if (((in1 & ~0xffffffffUL) && (in4 == 0)) || + (in4 > 1) || + (in2 > 8) || (in2 & (in2-1))) + printf("*** SAL_PCI_CONF_WRITE?!?(adr=%p,typ=%p,sz=%p,val=%p)\n",in1,in4,in2,in3); + // note that args are in a different order!! + status = ia64_sal_pci_config_write(in1,in4,in2,in3); + } + else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_WRITE\n"); + } else if (index == SAL_SET_VECTORS) { + printf("*** CALLED SAL_SET_VECTORS. IGNORED...\n"); + } else if (index == SAL_GET_STATE_INFO) { + printf("*** CALLED SAL_GET_STATE_INFO. IGNORED...\n"); + } else if (index == SAL_GET_STATE_INFO_SIZE) { + printf("*** CALLED SAL_GET_STATE_INFO_SIZE. IGNORED...\n"); + } else if (index == SAL_CLEAR_STATE_INFO) { + printf("*** CALLED SAL_CLEAR_STATE_INFO. IGNORED...\n"); + } else if (index == SAL_MC_RENDEZ) { + printf("*** CALLED SAL_MC_RENDEZ. IGNORED...\n"); + } else if (index == SAL_MC_SET_PARAMS) { + printf("*** CALLED SAL_MC_SET_PARAMS. IGNORED...\n"); + } else if (index == SAL_CACHE_FLUSH) { + printf("*** CALLED SAL_CACHE_FLUSH. IGNORED...\n"); + } else if (index == SAL_CACHE_INIT) { + printf("*** CALLED SAL_CACHE_INIT. IGNORED...\n"); + } else if (index == SAL_UPDATE_PAL) { + printf("*** CALLED SAL_UPDATE_PAL. IGNORED...\n"); + } else { + printf("*** CALLED SAL_ WITH UNKNOWN INDEX. IGNORED...\n"); + status = -1; + } + return ((struct sal_ret_values) {status, r9, r10, r11}); +} + +struct ia64_pal_retval +xen_pal_emulator(unsigned long index, unsigned long in1, + unsigned long in2, unsigned long in3) +{ + long r9 = 0; + long r10 = 0; + long r11 = 0; + long status = -1; + +#define USE_PAL_EMULATOR +#ifdef USE_PAL_EMULATOR + return pal_emulator_static(index); +#endif + if (running_on_sim) return pal_emulator_static(index); + if (index >= PAL_COPY_PAL) { + printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n", + index); + } + else switch (index) { + case PAL_MEM_ATTRIB: + status = ia64_pal_mem_attrib(&r9); + break; + case PAL_FREQ_BASE: + status = ia64_pal_freq_base(&r9); + break; + case PAL_PROC_GET_FEATURES: + status = ia64_pal_proc_get_features(&r9,&r10,&r11); + break; + case PAL_BUS_GET_FEATURES: + status = ia64_pal_bus_get_features(&r9,&r10,&r11); + break; + case PAL_FREQ_RATIOS: + status = ia64_pal_freq_ratios(&r9,&r10,&r11); + break; + case PAL_PTCE_INFO: + { + // return hard-coded xen-specific values because ptc.e + // is emulated on xen to always flush everything + // these values result in only one ptc.e instruction + status = 0; r9 = 0; r10 = (1L << 32) | 1L; r11 = 0; + } + break; + case PAL_VERSION: + status = ia64_pal_version(&r9,&r10); + break; + case PAL_VM_PAGE_SIZE: + status = ia64_pal_vm_page_size(&r9,&r10); + break; + case PAL_DEBUG_INFO: + status = ia64_pal_debug_info(&r9,&r10); + break; + case PAL_CACHE_SUMMARY: + status = ia64_pal_cache_summary(&r9,&r10); + break; + case PAL_VM_SUMMARY: + // FIXME: what should xen return for these, figure out later + // For now, linux does the right thing if pal call fails + // In particular, rid_size must be set properly! + //status = ia64_pal_vm_summary(&r9,&r10); + break; + case PAL_RSE_INFO: + status = ia64_pal_rse_info(&r9,&r10); + break; + case PAL_VM_INFO: + status = ia64_pal_vm_info(in1,in2,&r9,&r10); + break; + case PAL_REGISTER_INFO: + status = ia64_pal_register_info(in1,&r9,&r10); + break; + case PAL_CACHE_FLUSH: + /* FIXME */ + printk("PAL_CACHE_FLUSH NOT IMPLEMENTED!\n"); + BUG(); + break; + case PAL_PERF_MON_INFO: + { + unsigned long pm_buffer[16]; + int i; + status = ia64_pal_perf_mon_info(pm_buffer,&r9); + if (status != 0) { + while(1) + printk("PAL_PERF_MON_INFO fails ret=%d\n",status); + break; + } + if (copy_to_user((void __user *)in1,pm_buffer,128)) { + while(1) + printk("xen_pal_emulator: PAL_PERF_MON_INFO " + "can't copy to user!!!!\n"); + status = -1; + break; + } + } + break; + case PAL_CACHE_INFO: + { + pal_cache_config_info_t ci; + status = ia64_pal_cache_config_info(in1,in2,&ci); + if (status != 0) break; + r9 = ci.pcci_info_1.pcci1_data; + r10 = ci.pcci_info_2.pcci2_data; + } + break; + case PAL_VM_TR_READ: /* FIXME: vcpu_get_tr?? */ + printk("PAL_VM_TR_READ NOT IMPLEMENTED, IGNORED!\n"); + break; + case PAL_HALT_INFO: /* inappropriate info for guest? */ + printk("PAL_HALT_INFO NOT IMPLEMENTED, IGNORED!\n"); + break; + default: + printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n", + index); + break; + } + return ((struct ia64_pal_retval) {status, r9, r10, r11}); +} + +#define NFUNCPTRS 20 + +void print_md(efi_memory_desc_t *md) +{ +#if 1 + printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n", + md->type, md->attribute, md->phys_addr, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), + md->num_pages >> (20 - EFI_PAGE_SHIFT)); +#endif +} + +#define LSAPIC_NUM 16 // TEMP +static u32 lsapic_flag=1; + +/* Provide only one LP to guest */ +static int +acpi_update_lsapic (acpi_table_entry_header *header) +{ + struct acpi_table_lsapic *lsapic; + + lsapic = (struct acpi_table_lsapic *) header; + if (!lsapic) + return -EINVAL; + + if (lsapic->flags.enabled && lsapic_flag) { + printk("enable lsapic entry: 0x%lx\n", (u64)lsapic); + lsapic_flag = 0; /* disable all the following processros */ + } else if (lsapic->flags.enabled) { + printk("DISABLE lsapic entry: 0x%lx\n", (u64)lsapic); + lsapic->flags.enabled = 0; + } else + printk("lsapic entry is already disabled: 0x%lx\n", (u64)lsapic); + + return 0; +} + +static int +acpi_update_madt_checksum (unsigned long phys_addr, unsigned long size) +{ + u8 checksum=0; + u8* ptr; + int len; + struct acpi_table_madt* acpi_madt; + + if (!phys_addr || !size) + return -EINVAL; + + acpi_madt = (struct acpi_table_madt *) __va(phys_addr); + acpi_madt->header.checksum=0; + + /* re-calculate MADT checksum */ + ptr = (u8*)acpi_madt; + len = acpi_madt->header.length; + while (len>0){ + checksum = (u8)( checksum + (*ptr++) ); + len--; + } + acpi_madt->header.checksum = 0x0 - checksum; + + return 0; +} + +/* base is physical address of acpi table */ +void touch_acpi_table(void) +{ + u64 count = 0; + count = acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic, NR_CPUS); + if ( count < 1) + printk("Error parsing MADT - no LAPIC entires\n"); + printk("Total %d lsapic entry\n", count); + acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum); + + return; +} + + +struct ia64_boot_param * +dom_fw_init (struct domain *d, char *args, int arglen, char *fw_mem, int fw_mem_size) +{ + efi_system_table_t *efi_systab; + efi_runtime_services_t *efi_runtime; + efi_config_table_t *efi_tables; + struct ia64_sal_systab *sal_systab; + efi_memory_desc_t *efi_memmap, *md; + unsigned long *pal_desc, *sal_desc; + struct ia64_sal_desc_entry_point *sal_ed; + struct ia64_boot_param *bp; + unsigned long *pfn; + unsigned char checksum = 0; + char *cp, *cmd_line, *fw_vendor; + int i = 0; + unsigned long maxmem = d->max_pages * PAGE_SIZE; + unsigned long start_mpaddr = ((d==dom0)?dom0_start:0); + +# define MAKE_MD(typ, attr, start, end, abs) \ + do { \ + md = efi_memmap + i++; \ + md->type = typ; \ + md->pad = 0; \ + md->phys_addr = abs ? start : start_mpaddr + start; \ + md->virt_addr = 0; \ + md->num_pages = (end - start) >> 12; \ + md->attribute = attr; \ + print_md(md); \ + } while (0) + +/* FIXME: should check size but for now we have a whole MB to play with. + And if stealing code from fw-emu.c, watch out for new fw_vendor on the end! + if (fw_mem_size < sizeof(fw_mem_proto)) { + printf("sys_fw_init: insufficient space for fw_mem\n"); + return 0; + } +*/ + memset(fw_mem, 0, fw_mem_size); + +#ifdef XEN +#else + pal_desc = (unsigned long *) &pal_emulator_static; + sal_desc = (unsigned long *) &sal_emulator; +#endif + + cp = fw_mem; + efi_systab = (void *) cp; cp += sizeof(*efi_systab); + efi_runtime = (void *) cp; cp += sizeof(*efi_runtime); + efi_tables = (void *) cp; cp += NUM_EFI_SYS_TABLES * sizeof(*efi_tables); + sal_systab = (void *) cp; cp += sizeof(*sal_systab); + sal_ed = (void *) cp; cp += sizeof(*sal_ed); + efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap); + bp = (void *) cp; cp += sizeof(*bp); + pfn = (void *) cp; cp += NFUNCPTRS * 2 * sizeof(pfn); + cmd_line = (void *) cp; + + if (args) { + if (arglen >= 1024) + arglen = 1023; + memcpy(cmd_line, args, arglen); + } else { + arglen = 0; + } + cmd_line[arglen] = '\0'; + + memset(efi_systab, 0, sizeof(efi_systab)); + efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE; + efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION; + efi_systab->hdr.headersize = sizeof(efi_systab->hdr); + cp = fw_vendor = &cmd_line[arglen] + (2-(arglen&1)); // round to 16-bit boundary +#define FW_VENDOR "X\0e\0n\0/\0i\0a\0\066\0\064\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + cp += sizeof(FW_VENDOR) + (8-((unsigned long)cp & 7)); // round to 64-bit boundary + + memcpy(fw_vendor,FW_VENDOR,sizeof(FW_VENDOR)); + efi_systab->fw_vendor = dom_pa(fw_vendor); + + efi_systab->fw_revision = 1; + efi_systab->runtime = (void *) dom_pa(efi_runtime); + efi_systab->nr_tables = NUM_EFI_SYS_TABLES; + efi_systab->tables = dom_pa(efi_tables); + + efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE; + efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION; + efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr); +#define EFI_HYPERCALL_PATCH(tgt,call) do { \ + dom_efi_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call); \ + tgt = dom_pa(pfn); \ + *pfn++ = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \ + *pfn++ = 0; \ + } while (0) + + EFI_HYPERCALL_PATCH(efi_runtime->get_time,EFI_GET_TIME); + EFI_HYPERCALL_PATCH(efi_runtime->set_time,EFI_SET_TIME); + EFI_HYPERCALL_PATCH(efi_runtime->get_wakeup_time,EFI_GET_WAKEUP_TIME); + EFI_HYPERCALL_PATCH(efi_runtime->set_wakeup_time,EFI_SET_WAKEUP_TIME); + EFI_HYPERCALL_PATCH(efi_runtime->set_virtual_address_map,EFI_SET_VIRTUAL_ADDRESS_MAP); + EFI_HYPERCALL_PATCH(efi_runtime->get_variable,EFI_GET_VARIABLE); + EFI_HYPERCALL_PATCH(efi_runtime->get_next_variable,EFI_GET_NEXT_VARIABLE); + EFI_HYPERCALL_PATCH(efi_runtime->set_variable,EFI_SET_VARIABLE); + EFI_HYPERCALL_PATCH(efi_runtime->get_next_high_mono_count,EFI_GET_NEXT_HIGH_MONO_COUNT); + EFI_HYPERCALL_PATCH(efi_runtime->reset_system,EFI_RESET_SYSTEM); + + efi_tables[0].guid = SAL_SYSTEM_TABLE_GUID; + efi_tables[0].table = dom_pa(sal_systab); + for (i = 1; i < NUM_EFI_SYS_TABLES; i++) { + efi_tables[i].guid = NULL_GUID; + efi_tables[i].table = 0; + } + if (d == dom0) { + printf("Domain0 EFI passthrough:"); + i = 1; + if (efi.mps) { + efi_tables[i].guid = MPS_TABLE_GUID; + efi_tables[i].table = __pa(efi.mps); + printf(" MPS=%0xlx",efi_tables[i].table); + i++; + } + + touch_acpi_table(); + + if (efi.acpi20) { + efi_tables[i].guid = ACPI_20_TABLE_GUID; + efi_tables[i].table = __pa(efi.acpi20); + printf(" ACPI 2.0=%0xlx",efi_tables[i].table); + i++; + } + if (efi.acpi) { + efi_tables[i].guid = ACPI_TABLE_GUID; + efi_tables[i].table = __pa(efi.acpi); + printf(" ACPI=%0xlx",efi_tables[i].table); + i++; + } + if (efi.smbios) { + efi_tables[i].guid = SMBIOS_TABLE_GUID; + efi_tables[i].table = __pa(efi.smbios); + printf(" SMBIOS=%0xlx",efi_tables[i].table); + i++; + } + if (efi.hcdp) { + efi_tables[i].guid = HCDP_TABLE_GUID; + efi_tables[i].table = __pa(efi.hcdp); + printf(" HCDP=%0xlx",efi_tables[i].table); + i++; + } + printf("\n"); + } + + /* fill in the SAL system table: */ + memcpy(sal_systab->signature, "SST_", 4); + sal_systab->size = sizeof(*sal_systab); + sal_systab->sal_rev_minor = 1; + sal_systab->sal_rev_major = 0; + sal_systab->entry_count = 1; + + strcpy(sal_systab->oem_id, "Xen/ia64"); + strcpy(sal_systab->product_id, "Xen/ia64"); + + /* fill in an entry point: */ + sal_ed->type = SAL_DESC_ENTRY_POINT; +#define FW_HYPERCALL_PATCH(tgt,call,ret) do { \ + dom_fw_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call,ret); \ + tgt = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \ + } while (0) + FW_HYPERCALL_PATCH(sal_ed->pal_proc,PAL_CALL,0); + FW_HYPERCALL_PATCH(sal_ed->sal_proc,SAL_CALL,1); + sal_ed->gp = 0; // will be ignored + + for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp) + checksum += *cp; + + sal_systab->checksum = -checksum; + + /* simulate 1MB free memory at physical address zero */ + i = 0; + MAKE_MD(EFI_BOOT_SERVICES_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0); + /* hypercall patches live here, masquerade as reserved PAL memory */ + MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0); + MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 0); +#ifdef PASS_THRU_IOPORT_SPACE + if (d == dom0 && !running_on_sim) { + /* pass through the I/O port space */ + efi_memory_desc_t *efi_get_io_md(void); + efi_memory_desc_t *ia64_efi_io_md = efi_get_io_md(); + u32 type; + u64 iostart, ioend, ioattr; + + type = ia64_efi_io_md->type; + iostart = ia64_efi_io_md->phys_addr; + ioend = ia64_efi_io_md->phys_addr + + (ia64_efi_io_md->num_pages << 12); + ioattr = ia64_efi_io_md->attribute; + MAKE_MD(type,ioattr,iostart,ioend, 1); + } + else + MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0); +#endif + + bp->efi_systab = dom_pa(fw_mem); + bp->efi_memmap = dom_pa(efi_memmap); + bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t); + bp->efi_memdesc_size = sizeof(efi_memory_desc_t); + bp->efi_memdesc_version = 1; + bp->command_line = dom_pa(cmd_line); + bp->console_info.num_cols = 80; + bp->console_info.num_rows = 25; + bp->console_info.orig_x = 0; + bp->console_info.orig_y = 24; + bp->fpswa = 0; + + return bp; +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/domain.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/domain.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,1103 @@ +/* + * Copyright (C) 1995 Linus Torvalds + * + * Pentium III FXSR, SSE support + * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000 + * + * Copyright (C) 2005 Intel Co + * Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx> + * + * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx> Add CONFIG_VTI domain support + */ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/errno.h> +#include <xen/sched.h> +#include <xen/smp.h> +#include <xen/delay.h> +#include <xen/softirq.h> +#include <xen/mm.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/desc.h> +//#include <asm/mpspec.h> +#include <xen/irq.h> +#include <xen/event.h> +//#include <xen/shadow.h> +#include <xen/console.h> + +#include <xen/elf.h> +//#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/dma.h> /* for MAX_DMA_ADDRESS */ + +#include <asm/asm-offsets.h> /* for IA64_THREAD_INFO_SIZE */ + +#include <asm/vcpu.h> /* for function declarations */ +#include <public/arch-ia64.h> +#include <asm/vmx.h> +#include <asm/vmx_vcpu.h> +#include <asm/vmx_vpd.h> +#include <asm/pal.h> +#include <public/io/ioreq.h> + +#define CONFIG_DOMAIN0_CONTIGUOUS +unsigned long dom0_start = -1L; +unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable +//FIXME: alignment should be 256MB, lest Linux use a 256MB page size +unsigned long dom0_align = 256*1024*1024; +#ifdef DOMU_BUILD_STAGING +unsigned long domU_staging_size = 32*1024*1024; //FIXME: Should be configurable +unsigned long domU_staging_start; +unsigned long domU_staging_align = 64*1024; +unsigned long *domU_staging_area; +#endif + +// initialized by arch/ia64/setup.c:find_initrd() +unsigned long initrd_start = 0, initrd_end = 0; + +#define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend)) + +//extern int loadelfimage(char *); +extern int readelfimage_base_and_size(char *, unsigned long, + unsigned long *, unsigned long *, unsigned long *); + +unsigned long map_domain_page0(struct domain *); +extern unsigned long dom_fw_setup(struct domain *, char *, int); + +/* this belongs in include/asm, but there doesn't seem to be a suitable place */ +void free_perdomain_pt(struct domain *d) +{ + printf("free_perdomain_pt: not implemented\n"); + //free_page((unsigned long)d->mm.perdomain_pt); +} + +int hlt_counter; + +void disable_hlt(void) +{ + hlt_counter++; +} + +void enable_hlt(void) +{ + hlt_counter--; +} + +static void default_idle(void) +{ + if ( hlt_counter == 0 ) + { + local_irq_disable(); + if ( !softirq_pending(smp_processor_id()) ) + safe_halt(); + //else + local_irq_enable(); + } +} + +void continue_cpu_idle_loop(void) +{ + int cpu = smp_processor_id(); + for ( ; ; ) + { +#ifdef IA64 +// __IRQ_STAT(cpu, idle_timestamp) = jiffies +#else + irq_stat[cpu].idle_timestamp = jiffies; +#endif + while ( !softirq_pending(cpu) ) + default_idle(); + raise_softirq(SCHEDULE_SOFTIRQ); + do_softirq(); + } +} + +void startup_cpu_idle_loop(void) +{ + /* Just some sanity to ensure that the scheduler is set up okay. */ + ASSERT(current->domain == IDLE_DOMAIN_ID); + raise_softirq(SCHEDULE_SOFTIRQ); + do_softirq(); + + /* + * Declares CPU setup done to the boot processor. + * Therefore memory barrier to ensure state is visible. + */ + smp_mb(); +#if 0 +//do we have to ensure the idle task has a shared page so that, for example, +//region registers can be loaded from it. Apparently not... + idle0_task.shared_info = (void *)alloc_xenheap_page(); + memset(idle0_task.shared_info, 0, PAGE_SIZE); + /* pin mapping */ + // FIXME: Does this belong here? Or do only at domain switch time? + { + /* WARNING: following must be inlined to avoid nested fault */ + unsigned long psr = ia64_clear_ic(); + ia64_itr(0x2, IA64_TR_SHARED_INFO, SHAREDINFO_ADDR, + pte_val(pfn_pte(ia64_tpa(idle0_task.shared_info) >> PAGE_SHIFT, PAGE_KERNEL)), + PAGE_SHIFT); + ia64_set_psr(psr); + ia64_srlz_i(); + } +#endif + + continue_cpu_idle_loop(); +} + +struct vcpu *arch_alloc_vcpu_struct(void) +{ + /* Per-vp stack is used here. So we need keep vcpu + * same page as per-vp stack */ + return alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER); +} + +void arch_free_vcpu_struct(struct vcpu *v) +{ + free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER); +} + +static void init_switch_stack(struct vcpu *v) +{ + struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; + struct switch_stack *sw = (struct switch_stack *) regs - 1; + extern void ia64_ret_from_clone; + + memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs)); + sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET; + sw->b0 = (unsigned long) &ia64_ret_from_clone; + sw->ar_fpsr = FPSR_DEFAULT; + v->arch._thread.ksp = (unsigned long) sw - 16; + // stay on kernel stack because may get interrupts! + // ia64_ret_from_clone (which b0 gets in new_thread) switches + // to user stack + v->arch._thread.on_ustack = 0; + memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96); +} + +void arch_do_createdomain(struct vcpu *v) +{ + struct domain *d = v->domain; + struct thread_info *ti = alloc_thread_info(v); + + /* Clear thread_info to clear some important fields, like preempt_count */ + memset(ti, 0, sizeof(struct thread_info)); + init_switch_stack(v); + + d->shared_info = (void *)alloc_xenheap_page(); + if (!d->shared_info) { + printk("ERROR/HALTING: CAN'T ALLOC PAGE\n"); + while (1); + } + memset(d->shared_info, 0, PAGE_SIZE); + d->shared_info->vcpu_data[0].arch.privregs = + alloc_xenheap_pages(get_order(sizeof(mapped_regs_t))); + printf("arch_vcpu_info=%p\n", d->shared_info->vcpu_data[0].arch.privregs); + memset(d->shared_info->vcpu_data[0].arch.privregs, 0, PAGE_SIZE); + v->vcpu_info = &(d->shared_info->vcpu_data[0]); + + d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME + +#ifdef CONFIG_VTI + /* Per-domain vTLB and vhpt implementation. Now vmx domain will stick + * to this solution. Maybe it can be deferred until we know created + * one as vmx domain */ + v->arch.vtlb = init_domain_tlb(v); +#endif + + /* We may also need emulation rid for region4, though it's unlikely + * to see guest issue uncacheable access in metaphysical mode. But + * keep such info here may be more sane. + */ + if (((d->arch.metaphysical_rr0 = allocate_metaphysical_rr()) == -1UL) + || ((d->arch.metaphysical_rr4 = allocate_metaphysical_rr()) == -1UL)) + BUG(); + VCPU(v, metaphysical_mode) = 1; + v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0; + v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4; + v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0; + v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4; +#define DOMAIN_RID_BITS_DEFAULT 18 + if (!allocate_rid_range(d,DOMAIN_RID_BITS_DEFAULT)) // FIXME + BUG(); + v->arch.starting_rid = d->arch.starting_rid; + v->arch.ending_rid = d->arch.ending_rid; + // the following will eventually need to be negotiated dynamically + d->xen_vastart = XEN_START_ADDR; + d->xen_vaend = XEN_END_ADDR; + d->shared_info_va = SHAREDINFO_ADDR; + d->arch.breakimm = 0x1000; + v->arch.breakimm = d->arch.breakimm; + + d->arch.mm = xmalloc(struct mm_struct); + if (unlikely(!d->arch.mm)) { + printk("Can't allocate mm_struct for domain %d\n",d->domain_id); + return -ENOMEM; + } + memset(d->arch.mm, 0, sizeof(*d->arch.mm)); + d->arch.mm->pgd = pgd_alloc(d->arch.mm); + if (unlikely(!d->arch.mm->pgd)) { + printk("Can't allocate pgd for domain %d\n",d->domain_id); + return -ENOMEM; + } +} + +void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c) +{ + struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; + + printf("arch_getdomaininfo_ctxt\n"); + c->regs = *regs; + c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector; +#if 0 + if (c->vcpu.privregs && copy_to_user(c->vcpu.privregs, + v->vcpu_info->arch.privregs, sizeof(mapped_regs_t))) { + printk("Bad ctxt address: 0x%lx\n", c->vcpu.privregs); + return -EFAULT; + } +#endif + + c->shared = v->domain->shared_info->arch; +} + +int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c) +{ + struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; + struct domain *d = v->domain; + int i, rc, ret; + unsigned long progress = 0; + + printf("arch_set_info_guest\n"); + if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) + return 0; + + if (c->flags & VGCF_VMX_GUEST) { + if (!vmx_enabled) { + printk("No VMX hardware feature for vmx domain.\n"); + return -EINVAL; + } + + vmx_setup_platform(v, c); + } + + *regs = c->regs; + new_thread(v, regs->cr_iip, 0, 0); + + v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector; + if ( c->vcpu.privregs && copy_from_user(v->vcpu_info->arch.privregs, + c->vcpu.privregs, sizeof(mapped_regs_t))) { + printk("Bad ctxt address in arch_set_info_guest: 0x%lx\n", c->vcpu.privregs); + return -EFAULT; + } + + v->arch.domain_itm_last = -1L; + d->shared_info->arch = c->shared; + + /* Don't redo final setup */ + set_bit(_VCPUF_initialised, &v->vcpu_flags); + return 0; +} + +void arch_do_boot_vcpu(struct vcpu *v) +{ + struct domain *d = v->domain; + printf("arch_do_boot_vcpu: not implemented\n"); + + d->shared_info->vcpu_data[v->vcpu_id].arch.privregs = + alloc_xenheap_pages(get_order(sizeof(mapped_regs_t))); + printf("arch_vcpu_info=%p\n", d->shared_info->vcpu_data[v->vcpu_id].arch.privregs); + memset(d->shared_info->vcpu_data[v->vcpu_id].arch.privregs, 0, PAGE_SIZE); + return; +} + +void domain_relinquish_resources(struct domain *d) +{ + /* FIXME */ + printf("domain_relinquish_resources: not implemented\n"); +} + +// heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread() +// and linux/arch/ia64/kernel/process.c:kernel_thread() +void new_thread(struct vcpu *v, + unsigned long start_pc, + unsigned long start_stack, + unsigned long start_info) +{ + struct domain *d = v->domain; + struct pt_regs *regs; + struct ia64_boot_param *bp; + extern char saved_command_line[]; + + +#ifdef CONFIG_DOMAIN0_CONTIGUOUS + if (d == dom0) start_pc += dom0_start; +#endif + + regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; + if (VMX_DOMAIN(v)) { + /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */ + regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro */ + } else { + regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) + | IA64_PSR_BITS_TO_SET | IA64_PSR_BN + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS); + regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2 + } + regs->cr_iip = start_pc; + regs->cr_ifs = 1UL << 63; /* or clear? */ + regs->ar_fpsr = FPSR_DEFAULT; + + if (VMX_DOMAIN(v)) { +#ifdef CONFIG_VTI + vmx_init_all_rr(v); + if (d == dom0) + VMX_VPD(v,vgr[12]) = dom_fw_setup(d,saved_command_line,256L); + /* Virtual processor context setup */ + VMX_VPD(v, vpsr) = IA64_PSR_BN; + VPD_CR(v, dcr) = 0; +#endif + } else { + init_all_rr(v); + if (d == dom0) + regs->r28 = dom_fw_setup(d,saved_command_line,256L); + else { + regs->ar_rsc |= (2 << 2); /* force PL2/3 */ + regs->r28 = dom_fw_setup(d,"nomca nosmp xencons=tty0 console=tty0 root=/dev/hda1",256L); //FIXME + } + VCPU(v, banknum) = 1; + VCPU(v, metaphysical_mode) = 1; + d->shared_info->arch.flags = (d == dom0) ? (SIF_INITDOMAIN|SIF_PRIVILEGED|SIF_BLK_BE_DOMAIN|SIF_NET_BE_DOMAIN|SIF_USB_BE_DOMAIN) : 0; + } +} + +static struct page * map_new_domain0_page(unsigned long mpaddr) +{ + if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { + printk("map_new_domain0_page: bad domain0 mpaddr %p!\n",mpaddr); +printk("map_new_domain0_page: start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size); + while(1); + } + return pfn_to_page((mpaddr >> PAGE_SHIFT)); +} + +/* allocate new page for domain and map it to the specified metaphysical addr */ +struct page * map_new_domain_page(struct domain *d, unsigned long mpaddr) +{ + struct mm_struct *mm = d->arch.mm; + struct page *p = (struct page *)0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; +extern unsigned long vhpt_paddr, vhpt_pend; + + if (!mm->pgd) { + printk("map_new_domain_page: domain pgd must exist!\n"); + return(p); + } + pgd = pgd_offset(mm,mpaddr); + if (pgd_none(*pgd)) + pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr)); + + pud = pud_offset(pgd, mpaddr); + if (pud_none(*pud)) + pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr)); + + pmd = pmd_offset(pud, mpaddr); + if (pmd_none(*pmd)) + pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr)); +// pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr)); + + pte = pte_offset_map(pmd, mpaddr); + if (pte_none(*pte)) { +#ifdef CONFIG_DOMAIN0_CONTIGUOUS + if (d == dom0) p = map_new_domain0_page(mpaddr); + else +#endif + { + p = alloc_domheap_page(d); + // zero out pages for security reasons + memset(__va(page_to_phys(p)),0,PAGE_SIZE); + } + if (unlikely(!p)) { +printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n"); + return(p); + } +if (unlikely(page_to_phys(p) > vhpt_paddr && page_to_phys(p) < vhpt_pend)) { + printf("map_new_domain_page: reassigned vhpt page %p!!\n",page_to_phys(p)); +} + set_pte(pte, pfn_pte(page_to_phys(p) >> PAGE_SHIFT, + __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX))); + } + else printk("map_new_domain_page: mpaddr %lx already mapped!\n",mpaddr); + return p; +} + +/* map a physical address to the specified metaphysical addr */ +void map_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr) +{ + struct mm_struct *mm = d->arch.mm; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (!mm->pgd) { + printk("map_domain_page: domain pgd must exist!\n"); + return; + } + pgd = pgd_offset(mm,mpaddr); + if (pgd_none(*pgd)) + pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr)); + + pud = pud_offset(pgd, mpaddr); + if (pud_none(*pud)) + pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr)); + + pmd = pmd_offset(pud, mpaddr); + if (pmd_none(*pmd)) + pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr)); +// pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr)); + + pte = pte_offset_map(pmd, mpaddr); + if (pte_none(*pte)) { + set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT, + __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX))); + } + else printk("map_domain_page: mpaddr %lx already mapped!\n",mpaddr); +} + +void mpafoo(unsigned long mpaddr) +{ + extern unsigned long privop_trace; + if (mpaddr == 0x3800) + privop_trace = 1; +} + +unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr) +{ + struct mm_struct *mm = d->arch.mm; + pgd_t *pgd = pgd_offset(mm, mpaddr); + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + +#ifdef CONFIG_DOMAIN0_CONTIGUOUS + if (d == dom0) { + if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { + //printk("lookup_domain_mpa: bad dom0 mpaddr %p!\n",mpaddr); +//printk("lookup_domain_mpa: start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size); + mpafoo(mpaddr); + } + pte_t pteval = pfn_pte(mpaddr >> PAGE_SHIFT, + __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)); + pte = &pteval; + return *(unsigned long *)pte; + } +#endif +tryagain: + if (pgd_present(*pgd)) { + pud = pud_offset(pgd,mpaddr); + if (pud_present(*pud)) { + pmd = pmd_offset(pud,mpaddr); + if (pmd_present(*pmd)) { + pte = pte_offset_map(pmd,mpaddr); + if (pte_present(*pte)) { +//printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte)); + return *(unsigned long *)pte; + } + } + } + } + /* if lookup fails and mpaddr is "legal", "create" the page */ + if ((mpaddr >> PAGE_SHIFT) < d->max_pages) { + if (map_new_domain_page(d,mpaddr)) goto tryagain; + } + printk("lookup_domain_mpa: bad mpa %p (> %p\n", + mpaddr,d->max_pages<<PAGE_SHIFT); + mpafoo(mpaddr); + return 0; +} + +// FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE +#ifndef CONFIG_VTI +unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr) +{ + unsigned long pte = lookup_domain_mpa(d,mpaddr); + unsigned long imva; + + pte &= _PAGE_PPN_MASK; + imva = __va(pte); + imva |= mpaddr & ~PAGE_MASK; + return(imva); +} +#else // CONFIG_VTI +unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr) +{ + unsigned long imva = __gpa_to_mpa(d, mpaddr); + + return __va(imva); +} +#endif // CONFIG_VTI + +// remove following line if not privifying in memory +//#define HAVE_PRIVIFY_MEMORY +#ifndef HAVE_PRIVIFY_MEMORY +#define privify_memory(x,y) do {} while(0) +#endif + +// see arch/x86/xxx/domain_build.c +int elf_sanity_check(Elf_Ehdr *ehdr) +{ + return (IS_ELF(*ehdr)); +} + +static void copy_memory(void *dst, void *src, int size) +{ + int remain; + + if (IS_XEN_ADDRESS(dom0,src)) { + memcpy(dst,src,size); + } + else { + printf("About to call __copy_from_user(%p,%p,%d)\n", + dst,src,size); + while (remain = __copy_from_user(dst,src,size)) { + printf("incomplete user copy, %d remain of %d\n", + remain,size); + dst += size - remain; src += size - remain; + size -= remain; + } + } +} + +void loaddomainelfimage(struct domain *d, unsigned long image_start) +{ + char *elfbase = image_start; + //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start; + Elf_Ehdr ehdr; + Elf_Phdr phdr; + int h, filesz, memsz, paddr; + unsigned long elfaddr, dom_mpaddr, dom_imva; + struct page *p; + unsigned long pteval; + + copy_memory(&ehdr,image_start,sizeof(Elf_Ehdr)); + for ( h = 0; h < ehdr.e_phnum; h++ ) { + copy_memory(&phdr,elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize), + sizeof(Elf_Phdr)); + //if ( !is_loadable_phdr(phdr) ) + if ((phdr.p_type != PT_LOAD)) { + continue; + } + filesz = phdr.p_filesz; memsz = phdr.p_memsz; + elfaddr = elfbase + phdr.p_offset; + dom_mpaddr = phdr.p_paddr; +//printf("p_offset: %x, size=%x\n",elfaddr,filesz); +#ifdef CONFIG_DOMAIN0_CONTIGUOUS + if (d == dom0) { + if (dom_mpaddr+memsz>dom0_size || dom_mpaddr+filesz>dom0_size) { + printf("Domain0 doesn't fit in allocated space!\n"); + while(1); + } + dom_imva = __va(dom_mpaddr + dom0_start); + copy_memory(dom_imva,elfaddr,filesz); + if (memsz > filesz) memset(dom_imva+filesz,0,memsz-filesz); +//FIXME: This test for code seems to find a lot more than objdump -x does + if (phdr.p_flags & PF_X) privify_memory(dom_imva,filesz); + } + else +#endif + while (memsz > 0) { +#ifdef DOMU_AUTO_RESTART + pteval = lookup_domain_mpa(d,dom_mpaddr); + if (pteval) dom_imva = __va(pteval & _PFN_MASK); + else { printf("loaddomainelfimage: BAD!\n"); while(1); } +#else + p = map_new_domain_page(d,dom_mpaddr); + if (unlikely(!p)) BUG(); + dom_imva = __va(page_to_phys(p)); +#endif + if (filesz > 0) { + if (filesz >= PAGE_SIZE) + copy_memory(dom_imva,elfaddr,PAGE_SIZE); + else { // copy partial page, zero the rest of page + copy_memory(dom_imva,elfaddr,filesz); + memset(dom_imva+filesz,0,PAGE_SIZE-filesz); + } +//FIXME: This test for code seems to find a lot more than objdump -x does + if (phdr.p_flags & PF_X) + privify_memory(dom_imva,PAGE_SIZE); + } + else if (memsz > 0) // always zero out entire page + memset(dom_imva,0,PAGE_SIZE); + memsz -= PAGE_SIZE; filesz -= PAGE_SIZE; + elfaddr += PAGE_SIZE; dom_mpaddr += PAGE_SIZE; + } + } +} + +int +parsedomainelfimage(char *elfbase, unsigned long elfsize, unsigned long *entry) +{ + Elf_Ehdr ehdr; + + copy_memory(&ehdr,elfbase,sizeof(Elf_Ehdr)); + + if ( !elf_sanity_check(&ehdr) ) { + printk("ELF sanity check failed.\n"); + return -EINVAL; + } + + if ( (ehdr.e_phoff + (ehdr.e_phnum * ehdr.e_phentsize)) > elfsize ) + { + printk("ELF program headers extend beyond end of image.\n"); + return -EINVAL; + } + + if ( (ehdr.e_shoff + (ehdr.e_shnum * ehdr.e_shentsize)) > elfsize ) + { + printk("ELF section headers extend beyond end of image.\n"); + return -EINVAL; + } + +#if 0 + /* Find the section-header strings table. */ + if ( ehdr.e_shstrndx == SHN_UNDEF ) + { + printk("ELF image has no section-header strings table (shstrtab).\n"); + return -EINVAL; + } +#endif + + *entry = ehdr.e_entry; +printf("parsedomainelfimage: entry point = %p\n",*entry); + + return 0; +} + + +void alloc_dom0(void) +{ +#ifdef CONFIG_DOMAIN0_CONTIGUOUS + if (platform_is_hp_ski()) { + dom0_size = 128*1024*1024; //FIXME: Should be configurable + } + printf("alloc_dom0: starting (initializing %d MB...)\n",dom0_size/(1024*1024)); + + /* FIXME: The first trunk (say 256M) should always be assigned to + * Dom0, since Dom0's physical == machine address for DMA purpose. + * Some old version linux, like 2.4, assumes physical memory existing + * in 2nd 64M space. + */ + dom0_start = alloc_boot_pages( + dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT); + dom0_start <<= PAGE_SHIFT; + if (!dom0_start) { + printf("construct_dom0: can't allocate contiguous memory size=%p\n", + dom0_size); + while(1); + } + printf("alloc_dom0: dom0_start=%p\n",dom0_start); +#else + dom0_start = 0; +#endif + +} + +#ifdef DOMU_BUILD_STAGING +void alloc_domU_staging(void) +{ + domU_staging_size = 32*1024*1024; //FIXME: Should be configurable + printf("alloc_domU_staging: starting (initializing %d MB...)\n",domU_staging_size/(1024*1024)); + domU_staging_start = alloc_boot_pages( + domU_staging_size >> PAGE_SHIFT, domU_staging_align >> PAGE_SHIFT); + domU_staging_start <<= PAGE_SHIFT; + if (!domU_staging_size) { + printf("alloc_domU_staging: can't allocate, spinning...\n"); + while(1); + } + else domU_staging_area = (unsigned long *)__va(domU_staging_start); + printf("alloc_domU_staging: domU_staging_area=%p\n",domU_staging_area); + +} + +unsigned long +domU_staging_read_8(unsigned long at) +{ + // no way to return errors so just do it + return domU_staging_area[at>>3]; + +} + +unsigned long +domU_staging_write_32(unsigned long at, unsigned long a, unsigned long b, + unsigned long c, unsigned long d) +{ + if (at + 32 > domU_staging_size) return -1; + if (at & 0x1f) return -1; + at >>= 3; + domU_staging_area[at++] = a; + domU_staging_area[at++] = b; + domU_staging_area[at++] = c; + domU_staging_area[at] = d; + return 0; + +} +#endif + +/* + * Domain 0 has direct access to all devices absolutely. However + * the major point of this stub here, is to allow alloc_dom_mem + * handled with order > 0 request. Dom0 requires that bit set to + * allocate memory for other domains. + */ +void physdev_init_dom0(struct domain *d) +{ + set_bit(_DOMF_physdev_access, &d->domain_flags); +} + +extern unsigned long running_on_sim; +unsigned int vmx_dom0 = 0; +int construct_dom0(struct domain *d, + unsigned long image_start, unsigned long image_len, + unsigned long initrd_start, unsigned long initrd_len, + char *cmdline) +{ + char *dst; + int i, rc; + unsigned long pfn, mfn; + unsigned long nr_pt_pages; + unsigned long count; + unsigned long alloc_start, alloc_end; + struct pfn_info *page = NULL; + start_info_t *si; + struct vcpu *v = d->vcpu[0]; + + struct domain_setup_info dsi; + unsigned long p_start; + unsigned long pkern_start; + unsigned long pkern_entry; + unsigned long pkern_end; + unsigned long ret, progress = 0; + +//printf("construct_dom0: starting\n"); + /* Sanity! */ +#ifndef CLONE_DOMAIN0 + if ( d != dom0 ) + BUG(); + if ( test_bit(_DOMF_constructed, &d->domain_flags) ) + BUG(); +#endif + + memset(&dsi, 0, sizeof(struct domain_setup_info)); + + printk("*** LOADING DOMAIN 0 ***\n"); + + alloc_start = dom0_start; + alloc_end = dom0_start + dom0_size; + d->tot_pages = d->max_pages = dom0_size/PAGE_SIZE; + image_start = __va(ia64_boot_param->initrd_start); + image_len = ia64_boot_param->initrd_size; +//printk("image_start=%lx, image_len=%lx\n",image_start,image_len); +//printk("First word of image: %lx\n",*(unsigned long *)image_start); + +//printf("construct_dom0: about to call parseelfimage\n"); + dsi.image_addr = (unsigned long)image_start; + dsi.image_len = image_len; + rc = parseelfimage(&dsi); + if ( rc != 0 ) + return rc; + +#ifdef CONFIG_VTI + /* Temp workaround */ + if (running_on_sim) + dsi.xen_section_string = (char *)1; + + /* Check whether dom0 is vti domain */ + if ((!vmx_enabled) && !dsi.xen_section_string) { + printk("Lack of hardware support for unmodified vmx dom0\n"); + panic(""); + } + + if (vmx_enabled && !dsi.xen_section_string) { + printk("Dom0 is vmx domain!\n"); + vmx_dom0 = 1; + } +#endif + + p_start = dsi.v_start; + pkern_start = dsi.v_kernstart; + pkern_end = dsi.v_kernend; + pkern_entry = dsi.v_kernentry; + +//printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry); + + if ( (p_start & (PAGE_SIZE-1)) != 0 ) + { + printk("Initial guest OS must load to a page boundary.\n"); + return -EINVAL; + } + + printk("METAPHYSICAL MEMORY ARRANGEMENT:\n" + " Kernel image: %lx->%lx\n" + " Entry address: %lx\n" + " Init. ramdisk: (NOT IMPLEMENTED YET)\n", + pkern_start, pkern_end, pkern_entry); + + if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) ) + { + printk("Initial guest OS requires too much space\n" + "(%luMB is greater than %luMB limit)\n", + (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20); + return -ENOMEM; + } + + // if high 3 bits of pkern start are non-zero, error + + // if pkern end is after end of metaphysical memory, error + // (we should be able to deal with this... later) + + + // + +#if 0 + strcpy(d->name,"Domain0"); +#endif + + /* Mask all upcalls... */ + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; + +#ifdef CONFIG_VTI + /* Construct a frame-allocation list for the initial domain, since these + * pages are allocated by boot allocator and pfns are not set properly + */ + for ( mfn = (alloc_start>>PAGE_SHIFT); + mfn < (alloc_end>>PAGE_SHIFT); + mfn++ ) + { + page = &frame_table[mfn]; + page_set_owner(page, d); + page->u.inuse.type_info = 0; + page->count_info = PGC_allocated | 1; + list_add_tail(&page->list, &d->page_list); + + /* Construct 1:1 mapping */ + machine_to_phys_mapping[mfn] = mfn; + } + + /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt + * for dom0 + */ + d->arch.pmt = NULL; +#endif + + /* Copy the OS image. */ + loaddomainelfimage(d,image_start); + + /* Copy the initial ramdisk. */ + //if ( initrd_len != 0 ) + // memcpy((void *)vinitrd_start, initrd_start, initrd_len); + + /* Sync d/i cache conservatively */ + ret = ia64_pal_cache_flush(4, 0, &progress, NULL); + if (ret != PAL_STATUS_SUCCESS) + panic("PAL CACHE FLUSH failed for dom0.\n"); + printk("Sync i/d cache for dom0 image SUCC\n"); + +#if 0 + /* Set up start info area. */ + //si = (start_info_t *)vstartinfo_start; + memset(si, 0, PAGE_SIZE); + si->nr_pages = d->tot_pages; + si->shared_info = virt_to_phys(d->shared_info); + si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; + //si->pt_base = vpt_start; + //si->nr_pt_frames = nr_pt_pages; + //si->mfn_list = vphysmap_start; + + if ( initrd_len != 0 ) + { + //si->mod_start = vinitrd_start; + si->mod_len = initrd_len; + printk("Initrd len 0x%lx, start at 0x%08lx\n", + si->mod_len, si->mod_start); + } + + dst = si->cmd_line; + if ( cmdline != NULL ) + { + for ( i = 0; i < 255; i++ ) + { + if ( cmdline[i] == '\0' ) + break; + *dst++ = cmdline[i]; + } + } + *dst = '\0'; + + zap_low_mappings(); /* Do the same for the idle page tables. */ +#endif + + /* Give up the VGA console if DOM0 is configured to grab it. */ + if (cmdline != NULL) + console_endboot(strstr(cmdline, "tty0") != NULL); + + /* VMX specific construction for Dom0, if hardware supports VMX + * and Dom0 is unmodified image + */ + printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d); + if (vmx_dom0) + vmx_final_setup_domain(dom0); + + set_bit(_DOMF_constructed, &d->domain_flags); + + new_thread(v, pkern_entry, 0, 0); + physdev_init_dom0(d); + + // FIXME: Hack for keyboard input +#ifdef CLONE_DOMAIN0 +if (d == dom0) +#endif + serial_input_init(); + if (d == dom0) { + VCPU(v, delivery_mask[0]) = -1L; + VCPU(v, delivery_mask[1]) = -1L; + VCPU(v, delivery_mask[2]) = -1L; + VCPU(v, delivery_mask[3]) = -1L; + } + else __set_bit(0x30, VCPU(v, delivery_mask)); + + return 0; +} + +// FIXME: When dom0 can construct domains, this goes away (or is rewritten) +int construct_domU(struct domain *d, + unsigned long image_start, unsigned long image_len, + unsigned long initrd_start, unsigned long initrd_len, + char *cmdline) +{ + int i, rc; + struct vcpu *v = d->vcpu[0]; + unsigned long pkern_entry; + +#ifndef DOMU_AUTO_RESTART + if ( test_bit(_DOMF_constructed, &d->domain_flags) ) BUG(); +#endif + + printk("*** LOADING DOMAIN %d ***\n",d->domain_id); + + d->max_pages = dom0_size/PAGE_SIZE; // FIXME: use dom0 size + // FIXME: use domain0 command line + rc = parsedomainelfimage(image_start, image_len, &pkern_entry); + printk("parsedomainelfimage returns %d\n",rc); + if ( rc != 0 ) return rc; + + /* Mask all upcalls... */ + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; + + /* Copy the OS image. */ + printk("calling loaddomainelfimage(%p,%p)\n",d,image_start); + loaddomainelfimage(d,image_start); + printk("loaddomainelfimage returns\n"); + + set_bit(_DOMF_constructed, &d->domain_flags); + + printk("calling new_thread, entry=%p\n",pkern_entry); +#ifdef DOMU_AUTO_RESTART + v->domain->arch.image_start = image_start; + v->domain->arch.image_len = image_len; + v->domain->arch.entry = pkern_entry; +#endif + new_thread(v, pkern_entry, 0, 0); + printk("new_thread returns\n"); + __set_bit(0x30, VCPU(v, delivery_mask)); + + return 0; +} + +#ifdef DOMU_AUTO_RESTART +void reconstruct_domU(struct vcpu *v) +{ + /* re-copy the OS image to reset data values to original */ + printk("reconstruct_domU: restarting domain %d...\n", + v->domain->domain_id); + loaddomainelfimage(v->domain,v->domain->arch.image_start); + new_thread(v, v->domain->arch.entry, 0, 0); +} +#endif + +// FIXME: When dom0 can construct domains, this goes away (or is rewritten) +int launch_domainU(unsigned long size) +{ +#ifdef CLONE_DOMAIN0 + static int next = CLONE_DOMAIN0+1; +#else + static int next = 1; +#endif + + struct domain *d = do_createdomain(next,0); + if (!d) { + printf("launch_domainU: couldn't create\n"); + return 1; + } + else next++; + if (construct_domU(d, (unsigned long)domU_staging_area, size,0,0,0)) { + printf("launch_domainU: couldn't construct(id=%d,%lx,%lx)\n", + d->domain_id,domU_staging_area,size); + return 2; + } + domain_unpause_by_systemcontroller(d); +} + +void machine_restart(char * __unused) +{ + if (platform_is_hp_ski()) dummy(); + printf("machine_restart called: spinning....\n"); + while(1); +} + +void machine_halt(void) +{ + if (platform_is_hp_ski()) dummy(); + printf("machine_halt called: spinning....\n"); + while(1); +} + +void dummy_called(char *function) +{ + if (platform_is_hp_ski()) asm("break 0;;"); + printf("dummy called in %s: spinning....\n", function); + while(1); +} + + +#if 0 +void switch_to(struct vcpu *prev, struct vcpu *next) +{ + struct vcpu *last; + + __switch_to(prev,next,last); + //set_current(next); +} +#endif + +void domain_pend_keyboard_interrupt(int irq) +{ + vcpu_pend_interrupt(dom0->vcpu[0],irq); +} + +void vcpu_migrate_cpu(struct vcpu *v, int newcpu) +{ + if ( v->processor == newcpu ) + return; + + set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags); + v->processor = newcpu; +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/grant_table.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/grant_table.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,1288 @@ +#ifndef CONFIG_VTI +// temporarily in arch/ia64 until can merge into common/grant_table.c +/****************************************************************************** + * common/grant_table.c + * + * Mechanism for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Copyright (c) 2005 Christopher Clark + * Copyright (c) 2004 K A Fraser + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define GRANT_DEBUG 0 +#define GRANT_DEBUG_VERBOSE 0 + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/sched.h> +#include <xen/shadow.h> +#include <xen/mm.h> +#ifdef __ia64__ +#define __addr_ok(a) 1 // FIXME-ia64: a variant of access_ok?? +// FIXME-ia64: need to implement real cmpxchg_user on ia64 +//#define cmpxchg_user(_p,_o,_n) ((*_p == _o) ? ((*_p = _n), 0) : ((_o = *_p), 0)) +// FIXME-ia64: these belong in an asm/grant_table.h... PAGE_SIZE different +#undef ORDER_GRANT_FRAMES +//#undef NUM_GRANT_FRAMES +#define ORDER_GRANT_FRAMES 0 +//#define NUM_GRANT_FRAMES (1U << ORDER_GRANT_FRAMES) +#endif + +#define PIN_FAIL(_lbl, _rc, _f, _a...) \ + do { \ + DPRINTK( _f, ## _a ); \ + rc = (_rc); \ + goto _lbl; \ + } while ( 0 ) + +static inline int +get_maptrack_handle( + grant_table_t *t) +{ + unsigned int h; + if ( unlikely((h = t->maptrack_head) == t->maptrack_limit) ) + return -1; + t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT; + t->map_count++; + return h; +} + +static inline void +put_maptrack_handle( + grant_table_t *t, int handle) +{ + t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT; + t->maptrack_head = handle; + t->map_count--; +} + +static int +__gnttab_activate_grant_ref( + struct domain *mapping_d, /* IN */ + struct vcpu *mapping_ed, + struct domain *granting_d, + grant_ref_t ref, + u16 dev_hst_ro_flags, + unsigned long host_virt_addr, + unsigned long *pframe ) /* OUT */ +{ + domid_t sdom; + u16 sflags; + active_grant_entry_t *act; + grant_entry_t *sha; + s16 rc = 1; + unsigned long frame = 0; + int retries = 0; + + /* + * Objectives of this function: + * . Make the record ( granting_d, ref ) active, if not already. + * . Update shared grant entry of owner, indicating frame is mapped. + * . Increment the owner act->pin reference counts. + * . get_page on shared frame if new mapping. + * . get_page_type if this is first RW mapping of frame. + * . Add PTE to virtual address space of mapping_d, if necessary. + * Returns: + * . -ve: error + * . 1: ok + * . 0: ok and TLB invalidate of host_virt_addr needed. + * + * On success, *pframe contains mfn. + */ + + /* + * We bound the number of times we retry CMPXCHG on memory locations that + * we share with a guest OS. The reason is that the guest can modify that + * location at a higher rate than we can read-modify-CMPXCHG, so the guest + * could cause us to livelock. There are a few cases where it is valid for + * the guest to race our updates (e.g., to change the GTF_readonly flag), + * so we allow a few retries before failing. + */ + + act = &granting_d->grant_table->active[ref]; + sha = &granting_d->grant_table->shared[ref]; + + spin_lock(&granting_d->grant_table->lock); + + if ( act->pin == 0 ) + { + /* CASE 1: Activating a previously inactive entry. */ + + sflags = sha->flags; + sdom = sha->domid; + + for ( ; ; ) + { + u32 scombo, prev_scombo, new_scombo; + + if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) || + unlikely(sdom != mapping_d->domain_id) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", + sflags, sdom, mapping_d->domain_id); + + /* Merge two 16-bit values into a 32-bit combined update. */ + /* NB. Endianness! */ + prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; + + new_scombo = scombo | GTF_reading; + if ( !(dev_hst_ro_flags & GNTMAP_readonly) ) + { + new_scombo |= GTF_writing; + if ( unlikely(sflags & GTF_readonly) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Attempt to write-pin a r/o grant entry.\n"); + } + + /* NB. prev_scombo is updated in place to seen value. */ + if ( unlikely(cmpxchg_user((u32 *)&sha->flags, + prev_scombo, + new_scombo)) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Fault while modifying shared flags and domid.\n"); + + /* Did the combined update work (did we see what we expected?). */ + if ( likely(prev_scombo == scombo) ) + break; + + if ( retries++ == 4 ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Shared grant entry is unstable.\n"); + + /* Didn't see what we expected. Split out the seen flags & dom. */ + /* NB. Endianness! */ + sflags = (u16)prev_scombo; + sdom = (u16)(prev_scombo >> 16); + } + + /* rmb(); */ /* not on x86 */ + + frame = __gpfn_to_mfn_foreign(granting_d, sha->frame); + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? +#else + if ( unlikely(!pfn_valid(frame)) || + unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ? + get_page(&frame_table[frame], granting_d) : + get_page_and_type(&frame_table[frame], granting_d, + PGT_writable_page))) ) + { + clear_bit(_GTF_writing, &sha->flags); + clear_bit(_GTF_reading, &sha->flags); + PIN_FAIL(unlock_out, GNTST_general_error, + "Could not pin the granted frame (%lx)!\n", frame); + } +#endif + + if ( dev_hst_ro_flags & GNTMAP_device_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? + GNTPIN_devr_inc : GNTPIN_devw_inc; + if ( dev_hst_ro_flags & GNTMAP_host_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? + GNTPIN_hstr_inc : GNTPIN_hstw_inc; + act->domid = sdom; + act->frame = frame; + } + else + { + /* CASE 2: Active modications to an already active entry. */ + + /* + * A cheesy check for possible pin-count overflow. + * A more accurate check cannot be done with a single comparison. + */ + if ( (act->pin & 0x80808080U) != 0 ) + PIN_FAIL(unlock_out, ENOSPC, + "Risk of counter overflow %08x\n", act->pin); + + frame = act->frame; + + if ( !(dev_hst_ro_flags & GNTMAP_readonly) && + !((sflags = sha->flags) & GTF_writing) ) + { + for ( ; ; ) + { + u16 prev_sflags; + + if ( unlikely(sflags & GTF_readonly) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Attempt to write-pin a r/o grant entry.\n"); + + prev_sflags = sflags; + + /* NB. prev_sflags is updated in place to seen value. */ + if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, + prev_sflags | GTF_writing)) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Fault while modifying shared flags.\n"); + + if ( likely(prev_sflags == sflags) ) + break; + + if ( retries++ == 4 ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Shared grant entry is unstable.\n"); + + sflags = prev_sflags; + } + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? +#else + if ( unlikely(!get_page_type(&frame_table[frame], + PGT_writable_page)) ) + { + clear_bit(_GTF_writing, &sha->flags); + PIN_FAIL(unlock_out, GNTST_general_error, + "Attempt to write-pin a unwritable page.\n"); + } +#endif + } + + if ( dev_hst_ro_flags & GNTMAP_device_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? + GNTPIN_devr_inc : GNTPIN_devw_inc; + + if ( dev_hst_ro_flags & GNTMAP_host_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? + GNTPIN_hstr_inc : GNTPIN_hstw_inc; + } + + /* + * At this point: + * act->pin updated to reflect mapping. + * sha->flags updated to indicate to granting domain mapping done. + * frame contains the mfn. + */ + + spin_unlock(&granting_d->grant_table->lock); + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? +#else + if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) ) + { + /* Write update into the pagetable. */ + l1_pgentry_t pte; + pte = l1e_from_pfn(frame, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY); + if ( !(dev_hst_ro_flags & GNTMAP_readonly) ) + l1e_add_flags(pte,_PAGE_RW); + rc = update_grant_va_mapping( host_virt_addr, pte, + mapping_d, mapping_ed ); + + /* + * IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB. + * This is done in the outer gnttab_map_grant_ref. + */ + + if ( rc < 0 ) + { + /* Failure: undo and abort. */ + + spin_lock(&granting_d->grant_table->lock); + + if ( dev_hst_ro_flags & GNTMAP_readonly ) + { + act->pin -= GNTPIN_hstr_inc; + } + else + { + act->pin -= GNTPIN_hstw_inc; + if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) + { + clear_bit(_GTF_writing, &sha->flags); + put_page_type(&frame_table[frame]); + } + } + + if ( act->pin == 0 ) + { + clear_bit(_GTF_reading, &sha->flags); + put_page(&frame_table[frame]); + } + + spin_unlock(&granting_d->grant_table->lock); + } + + } +#endif + + *pframe = frame; + return rc; + + unlock_out: + spin_unlock(&granting_d->grant_table->lock); + return rc; +} + +/* + * Returns 0 if TLB flush / invalidate required by caller. + * va will indicate the address to be invalidated. + */ +static int +__gnttab_map_grant_ref( + gnttab_map_grant_ref_t *uop, + unsigned long *va) +{ + domid_t dom; + grant_ref_t ref; + struct domain *ld, *rd; + struct vcpu *led; + u16 dev_hst_ro_flags; + int handle; + unsigned long frame = 0, host_virt_addr; + int rc; + + led = current; + ld = led->domain; + + /* Bitwise-OR avoids short-circuiting which screws control flow. */ + if ( unlikely(__get_user(dom, &uop->dom) | + __get_user(ref, &uop->ref) | + __get_user(host_virt_addr, &uop->host_addr) | + __get_user(dev_hst_ro_flags, &uop->flags)) ) + { + DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n"); + return -EFAULT; /* don't set status */ + } + + + if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map)) && + unlikely(!__addr_ok(host_virt_addr))) + { + DPRINTK("Bad virtual address (%lx) or flags (%x).\n", + host_virt_addr, dev_hst_ro_flags); + (void)__put_user(GNTST_bad_virt_addr, &uop->handle); + return GNTST_bad_gntref; + } + + if ( unlikely(ref >= NR_GRANT_ENTRIES) || + unlikely((dev_hst_ro_flags & + (GNTMAP_device_map|GNTMAP_host_map)) == 0) ) + { + DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags); + (void)__put_user(GNTST_bad_gntref, &uop->handle); + return GNTST_bad_gntref; + } + + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || + unlikely(ld == rd) ) + { + if ( rd != NULL ) + put_domain(rd); + DPRINTK("Could not find domain %d\n", dom); + (void)__put_user(GNTST_bad_domain, &uop->handle); + return GNTST_bad_domain; + } + + /* Get a maptrack handle. */ + if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) ) + { + int i; + grant_mapping_t *new_mt; + grant_table_t *lgt = ld->grant_table; + + /* Grow the maptrack table. */ + new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1); + if ( new_mt == NULL ) + { + put_domain(rd); + DPRINTK("No more map handles available\n"); + (void)__put_user(GNTST_no_device_space, &uop->handle); + return GNTST_no_device_space; + } + + memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order); + for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ ) + new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; + + free_xenheap_pages(lgt->maptrack, lgt->maptrack_order); + lgt->maptrack = new_mt; + lgt->maptrack_order += 1; + lgt->maptrack_limit <<= 1; + + printk("Doubled maptrack size\n"); + handle = get_maptrack_handle(ld->grant_table); + } + +#if GRANT_DEBUG_VERBOSE + DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n", + ref, dom, dev_hst_ro_flags); +#endif + + if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref, + dev_hst_ro_flags, + host_virt_addr, &frame))) + { + /* + * Only make the maptrack live _after_ writing the pte, in case we + * overwrite the same frame number, causing a maptrack walk to find it + */ + ld->grant_table->maptrack[handle].domid = dom; + + ld->grant_table->maptrack[handle].ref_and_flags + = (ref << MAPTRACK_REF_SHIFT) | + (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK); + + (void)__put_user(frame, &uop->dev_bus_addr); + + if ( dev_hst_ro_flags & GNTMAP_host_map ) + *va = host_virt_addr; + + (void)__put_user(handle, &uop->handle); + } + else + { + (void)__put_user(rc, &uop->handle); + put_maptrack_handle(ld->grant_table, handle); + } + + put_domain(rd); + return rc; +} + +static long +gnttab_map_grant_ref( + gnttab_map_grant_ref_t *uop, unsigned int count) +{ + int i, flush = 0; + unsigned long va = 0; + + for ( i = 0; i < count; i++ ) + if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 ) + flush++; + +#ifdef __ia64__ +// FIXME-ia64: probably need to do something here to avoid stale mappings? +#else + if ( flush == 1 ) + flush_tlb_one_mask(current->domain->cpumask, va); + else if ( flush != 0 ) + flush_tlb_mask(current->domain->cpumask); +#endif + + return 0; +} + +static int +__gnttab_unmap_grant_ref( + gnttab_unmap_grant_ref_t *uop, + unsigned long *va) +{ + domid_t dom; + grant_ref_t ref; + u16 handle; + struct domain *ld, *rd; + + active_grant_entry_t *act; + grant_entry_t *sha; + grant_mapping_t *map; + u16 flags; + s16 rc = 1; + unsigned long frame, virt; + + ld = current->domain; + + /* Bitwise-OR avoids short-circuiting which screws control flow. */ + if ( unlikely(__get_user(virt, &uop->host_addr) | + __get_user(frame, &uop->dev_bus_addr) | + __get_user(handle, &uop->handle)) ) + { + DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n"); + return -EFAULT; /* don't set status */ + } + + map = &ld->grant_table->maptrack[handle]; + + if ( unlikely(handle >= ld->grant_table->maptrack_limit) || + unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) ) + { + DPRINTK("Bad handle (%d).\n", handle); + (void)__put_user(GNTST_bad_handle, &uop->status); + return GNTST_bad_handle; + } + + dom = map->domid; + ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; + flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK; + + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || + unlikely(ld == rd) ) + { + if ( rd != NULL ) + put_domain(rd); + DPRINTK("Could not find domain %d\n", dom); + (void)__put_user(GNTST_bad_domain, &uop->status); + return GNTST_bad_domain; + } + +#if GRANT_DEBUG_VERBOSE + DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n", + ref, dom, handle); +#endif + + act = &rd->grant_table->active[ref]; + sha = &rd->grant_table->shared[ref]; + + spin_lock(&rd->grant_table->lock); + + if ( frame == 0 ) + { + frame = act->frame; + } + else + { + if ( unlikely(frame != act->frame) ) + PIN_FAIL(unmap_out, GNTST_general_error, + "Bad frame number doesn't match gntref.\n"); + if ( flags & GNTMAP_device_map ) + act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc + : GNTPIN_devw_inc; + + map->ref_and_flags &= ~GNTMAP_device_map; + (void)__put_user(0, &uop->dev_bus_addr); + + /* Frame is now unmapped for device access. */ + } + + if ( (virt != 0) && + (flags & GNTMAP_host_map) && + ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0)) + { +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? +#else + l1_pgentry_t *pl1e; + unsigned long _ol1e; + + pl1e = &linear_pg_table[l1_linear_offset(virt)]; + + if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) ) + { + DPRINTK("Could not find PTE entry for address %lx\n", virt); + rc = -EINVAL; + goto unmap_out; + } + + /* + * Check that the virtual address supplied is actually mapped to + * act->frame. + */ + if ( unlikely((_ol1e >> PAGE_SHIFT) != frame )) + { + DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n", + _ol1e, virt, frame); + rc = -EINVAL; + goto unmap_out; + } + + /* Delete pagetable entry. */ + if ( unlikely(__put_user(0, (unsigned long *)pl1e))) + { + DPRINTK("Cannot delete PTE entry at %p for virtual address %lx\n", + pl1e, virt); + rc = -EINVAL; + goto unmap_out; + } +#endif + + map->ref_and_flags &= ~GNTMAP_host_map; + + act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc + : GNTPIN_hstw_inc; + + rc = 0; + *va = virt; + } + + if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) + { + map->ref_and_flags = 0; + put_maptrack_handle(ld->grant_table, handle); + } + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? I think not and then +// this can probably be macro-ized into nothingness +#else + /* If just unmapped a writable mapping, mark as dirtied */ + if ( unlikely(shadow_mode_log_dirty(rd)) && + !( flags & GNTMAP_readonly ) ) + mark_dirty(rd, frame); +#endif + + /* If the last writable mapping has been removed, put_page_type */ + if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) && + ( !( flags & GNTMAP_readonly ) ) ) + { + clear_bit(_GTF_writing, &sha->flags); + put_page_type(&frame_table[frame]); + } + + if ( act->pin == 0 ) + { + clear_bit(_GTF_reading, &sha->flags); + put_page(&frame_table[frame]); + } + + unmap_out: + (void)__put_user(rc, &uop->status); + spin_unlock(&rd->grant_table->lock); + put_domain(rd); + return rc; +} + +static long +gnttab_unmap_grant_ref( + gnttab_unmap_grant_ref_t *uop, unsigned int count) +{ + int i, flush = 0; + unsigned long va = 0; + + for ( i = 0; i < count; i++ ) + if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 ) + flush++; + +#ifdef __ia64__ +// FIXME-ia64: probably need to do something here to avoid stale mappings? +#else + if ( flush == 1 ) + flush_tlb_one_mask(current->domain->cpumask, va); + else if ( flush != 0 ) + flush_tlb_mask(current->domain->cpumask); +#endif + + return 0; +} + +static long +gnttab_setup_table( + gnttab_setup_table_t *uop, unsigned int count) +{ + gnttab_setup_table_t op; + struct domain *d; + int i; + unsigned long addr; + + if ( count != 1 ) + return -EINVAL; + + if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) + { + DPRINTK("Fault while reading gnttab_setup_table_t.\n"); + return -EFAULT; + } + + if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) ) + { + DPRINTK("Xen only supports up to %d grant-table frames per domain.\n", + NR_GRANT_FRAMES); + (void)put_user(GNTST_general_error, &uop->status); + return 0; + } + + if ( op.dom == DOMID_SELF ) + { + op.dom = current->domain->domain_id; + } + else if ( unlikely(!IS_PRIV(current->domain)) ) + { + (void)put_user(GNTST_permission_denied, &uop->status); + return 0; + } + + if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) ) + { + DPRINTK("Bad domid %d.\n", op.dom); + (void)put_user(GNTST_bad_domain, &uop->status); + return 0; + } + + if ( op.nr_frames <= NR_GRANT_FRAMES ) + { + ASSERT(d->grant_table != NULL); + (void)put_user(GNTST_okay, &uop->status); +#ifdef __ia64__ + if (d == dom0) { + for ( i = 0; i < op.nr_frames; i++ ) + (void)put_user( + (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i, + &uop->frame_list[i]); + } else { + /* IA64 hack - need to map it somewhere */ + addr = (1UL << 40); + map_domain_page(d, addr, virt_to_phys(d->grant_table->shared)); + (void)put_user(addr >> PAGE_SHIFT, &uop->frame_list[0]); + } +#else + for ( i = 0; i < op.nr_frames; i++ ) + (void)put_user( + (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i, + &uop->frame_list[i]); +#endif + } + + put_domain(d); + return 0; +} + +#if GRANT_DEBUG +static int +gnttab_dump_table(gnttab_dump_table_t *uop) +{ + grant_table_t *gt; + gnttab_dump_table_t op; + struct domain *d; + u32 shared_mfn; + active_grant_entry_t *act; + grant_entry_t sha_copy; + grant_mapping_t *maptrack; + int i; + + + if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) + { + DPRINTK("Fault while reading gnttab_dump_table_t.\n"); + return -EFAULT; + } + + if ( op.dom == DOMID_SELF ) + { + op.dom = current->domain->domain_id; + } + + if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) ) + { + DPRINTK("Bad domid %d.\n", op.dom); + (void)put_user(GNTST_bad_domain, &uop->status); + return 0; + } + + ASSERT(d->grant_table != NULL); + gt = d->grant_table; + (void)put_user(GNTST_okay, &uop->status); + + shared_mfn = virt_to_phys(d->grant_table->shared); + + DPRINTK("Grant table for dom (%hu) MFN (%x)\n", + op.dom, shared_mfn); + + ASSERT(d->grant_table->active != NULL); + ASSERT(d->grant_table->shared != NULL); + ASSERT(d->grant_table->maptrack != NULL); + + for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) + { + sha_copy = gt->shared[i]; + + if ( sha_copy.flags ) + { + DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) " + "dom:(%hu) frame:(%lx)\n", + op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame); + } + } + + spin_lock(&gt->lock); + + for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) + { + act = &gt->active[i]; + + if ( act->pin ) + { + DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) " + "dom:(%hu) frame:(%lx)\n", + op.dom, i, act->pin, act->domid, act->frame); + } + } + + for ( i = 0; i < gt->maptrack_limit; i++ ) + { + maptrack = &gt->maptrack[i]; + + if ( maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK ) + { + DPRINTK("Grant: dom (%hu) MAP (%d) ref:(%hu) flags:(%x) " + "dom:(%hu)\n", + op.dom, i, + maptrack->ref_and_flags >> MAPTRACK_REF_SHIFT, + maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK, + maptrack->domid); + } + } + + spin_unlock(&gt->lock); + + put_domain(d); + return 0; +} +#endif + +long +do_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + long rc; + + if ( count > 512 ) + return -EINVAL; + + LOCK_BIGLOCK(current->domain); + + rc = -EFAULT; + switch ( cmd ) + { + case GNTTABOP_map_grant_ref: + if ( unlikely(!array_access_ok( + uop, count, sizeof(gnttab_map_grant_ref_t))) ) + goto out; + rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count); + break; + case GNTTABOP_unmap_grant_ref: + if ( unlikely(!array_access_ok( + uop, count, sizeof(gnttab_unmap_grant_ref_t))) ) + goto out; + rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count); + break; + case GNTTABOP_setup_table: + rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count); + break; +#if GRANT_DEBUG + case GNTTABOP_dump_table: + rc = gnttab_dump_table((gnttab_dump_table_t *)uop); + break; +#endif + default: + rc = -ENOSYS; + break; + } + +out: + UNLOCK_BIGLOCK(current->domain); + + return rc; +} + +int +gnttab_check_unmap( + struct domain *rd, struct domain *ld, unsigned long frame, int readonly) +{ + /* Called when put_page is invoked on a page belonging to a foreign domain. + * Instead of decrementing the frame table ref count, locate the grant + * table entry, if any, and if found, decrement that count. + * Called a _lot_ at domain creation because pages mapped by priv domains + * also traverse this. + */ + + /* Note: If the same frame is mapped multiple times, and then one of + * the ptes is overwritten, which maptrack handle gets invalidated? + * Advice: Don't do it. Explicitly unmap. + */ + + unsigned int handle, ref, refcount; + grant_table_t *lgt, *rgt; + active_grant_entry_t *act; + grant_mapping_t *map; + int found = 0; + + lgt = ld->grant_table; + +#if GRANT_DEBUG_VERBOSE + if ( ld->domain_id != 0 ) + { + DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n", + rd->domain_id, ld->domain_id, frame, readonly); + } +#endif + + /* Fast exit if we're not mapping anything using grant tables */ + if ( lgt->map_count == 0 ) + return 0; + + if ( get_domain(rd) == 0 ) + { + DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n", + rd->domain_id); + return 0; + } + + rgt = rd->grant_table; + + for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) + { + map = &lgt->maptrack[handle]; + + if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) && + ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly)))) + { + ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT); + act = &rgt->active[ref]; + + spin_lock(&rgt->lock); + + if ( act->frame != frame ) + { + spin_unlock(&rgt->lock); + continue; + } + + refcount = act->pin & ( readonly ? GNTPIN_hstr_mask + : GNTPIN_hstw_mask ); + if ( refcount == 0 ) + { + spin_unlock(&rgt->lock); + continue; + } + + /* gotcha */ + DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n", + rd->domain_id, ld->domain_id, frame, readonly); + + if ( readonly ) + act->pin -= GNTPIN_hstr_inc; + else + { + act->pin -= GNTPIN_hstw_inc; + + /* any more granted writable mappings? */ + if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) + { + clear_bit(_GTF_writing, &rgt->shared[ref].flags); + put_page_type(&frame_table[frame]); + } + } + + if ( act->pin == 0 ) + { + clear_bit(_GTF_reading, &rgt->shared[ref].flags); + put_page(&frame_table[frame]); + } + spin_unlock(&rgt->lock); + + clear_bit(GNTMAP_host_map, &map->ref_and_flags); + + if ( !(map->ref_and_flags & GNTMAP_device_map) ) + put_maptrack_handle(lgt, handle); + + found = 1; + break; + } + } + put_domain(rd); + + return found; +} + +int +gnttab_prepare_for_transfer( + struct domain *rd, struct domain *ld, grant_ref_t ref) +{ + grant_table_t *rgt; + grant_entry_t *sha; + domid_t sdom; + u16 sflags; + u32 scombo, prev_scombo; + int retries = 0; + unsigned long target_pfn; + + DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n", + rd->domain_id, ld->domain_id, ref); + + if ( unlikely((rgt = rd->grant_table) == NULL) || + unlikely(ref >= NR_GRANT_ENTRIES) ) + { + DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n", + rd->domain_id, ref); + return 0; + } + + spin_lock(&rgt->lock); + + sha = &rgt->shared[ref]; + + sflags = sha->flags; + sdom = sha->domid; + + for ( ; ; ) + { + target_pfn = sha->frame; + + if ( unlikely(target_pfn >= max_page ) ) + { + DPRINTK("Bad pfn (%lx)\n", target_pfn); + goto fail; + } + + if ( unlikely(sflags != GTF_accept_transfer) || + unlikely(sdom != ld->domain_id) ) + { + DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", + sflags, sdom, ld->domain_id); + goto fail; + } + + /* Merge two 16-bit values into a 32-bit combined update. */ + /* NB. Endianness! */ + prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; + + /* NB. prev_scombo is updated in place to seen value. */ + if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, + prev_scombo | GTF_transfer_committed)) ) + { + DPRINTK("Fault while modifying shared flags and domid.\n"); + goto fail; + } + + /* Did the combined update work (did we see what we expected?). */ + if ( likely(prev_scombo == scombo) ) + break; + + if ( retries++ == 4 ) + { + DPRINTK("Shared grant entry is unstable.\n"); + goto fail; + } + + /* Didn't see what we expected. Split out the seen flags & dom. */ + /* NB. Endianness! */ + sflags = (u16)prev_scombo; + sdom = (u16)(prev_scombo >> 16); + } + + spin_unlock(&rgt->lock); + return 1; + + fail: + spin_unlock(&rgt->lock); + return 0; +} + +void +gnttab_notify_transfer( + struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame) +{ + grant_entry_t *sha; + unsigned long pfn; + + DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n", + rd->domain_id, ld->domain_id, ref); + + sha = &rd->grant_table->shared[ref]; + + spin_lock(&rd->grant_table->lock); + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? +#else + pfn = sha->frame; + + if ( unlikely(pfn >= max_page ) ) + DPRINTK("Bad pfn (%lx)\n", pfn); + else + { + machine_to_phys_mapping[frame] = pfn; + + if ( unlikely(shadow_mode_log_dirty(ld))) + mark_dirty(ld, frame); + + if (shadow_mode_translate(ld)) + __phys_to_machine_mapping[pfn] = frame; + } +#endif + sha->frame = __mfn_to_gpfn(rd, frame); + sha->domid = rd->domain_id; + wmb(); + sha->flags = ( GTF_accept_transfer | GTF_transfer_completed ); + + spin_unlock(&rd->grant_table->lock); + + return; +} + +int +grant_table_create( + struct domain *d) +{ + grant_table_t *t; + int i; + + if ( (t = xmalloc(grant_table_t)) == NULL ) + goto no_mem; + + /* Simple stuff. */ + memset(t, 0, sizeof(*t)); + spin_lock_init(&t->lock); + + /* Active grant table. */ + if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES)) + == NULL ) + goto no_mem; + memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES); + + /* Tracking of mapped foreign frames table */ + if ( (t->maptrack = alloc_xenheap_page()) == NULL ) + goto no_mem; + t->maptrack_order = 0; + t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t); + memset(t->maptrack, 0, PAGE_SIZE); + for ( i = 0; i < t->maptrack_limit; i++ ) + t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; + + /* Shared grant table. */ + t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES); + if ( t->shared == NULL ) + goto no_mem; + memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE); + +#ifdef __ia64__ +// I don't think there's anything to do here on ia64?... +#else + for ( i = 0; i < NR_GRANT_FRAMES; i++ ) + { + SHARE_PFN_WITH_DOMAIN( + virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d); + machine_to_phys_mapping[(virt_to_phys(t->shared) >> PAGE_SHIFT) + i] = + INVALID_M2P_ENTRY; + } +#endif + + /* Okay, install the structure. */ + wmb(); /* avoid races with lock-free access to d->grant_table */ + d->grant_table = t; + return 0; + + no_mem: + if ( t != NULL ) + { + xfree(t->active); + if ( t->maptrack != NULL ) + free_xenheap_page(t->maptrack); + xfree(t); + } + return -ENOMEM; +} + +void +gnttab_release_dev_mappings(grant_table_t *gt) +{ + grant_mapping_t *map; + domid_t dom; + grant_ref_t ref; + u16 handle; + struct domain *ld, *rd; + unsigned long frame; + active_grant_entry_t *act; + grant_entry_t *sha; + + ld = current->domain; + + for ( handle = 0; handle < gt->maptrack_limit; handle++ ) + { + map = &gt->maptrack[handle]; + + if ( map->ref_and_flags & GNTMAP_device_map ) + { + dom = map->domid; + ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; + + DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n", + handle, ref, + map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom); + + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || + unlikely(ld == rd) ) + { + if ( rd != NULL ) + put_domain(rd); + + printk(KERN_WARNING "Grant release: No dom%d\n", dom); + continue; + } + + act = &rd->grant_table->active[ref]; + sha = &rd->grant_table->shared[ref]; + + spin_lock(&rd->grant_table->lock); + + if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) ) + { + frame = act->frame; + + if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) && + ( (act->pin & GNTPIN_devw_mask) > 0 ) ) + { + clear_bit(_GTF_writing, &sha->flags); + put_page_type(&frame_table[frame]); + } + + act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask); + + if ( act->pin == 0 ) + { + clear_bit(_GTF_reading, &sha->flags); + map->ref_and_flags = 0; + put_page(&frame_table[frame]); + } + else + map->ref_and_flags &= ~GNTMAP_device_map; + } + + spin_unlock(&rd->grant_table->lock); + + put_domain(rd); + } + } +} + + +void +grant_table_destroy( + struct domain *d) +{ + grant_table_t *t; + + if ( (t = d->grant_table) != NULL ) + { + /* Free memory relating to this grant table. */ + d->grant_table = NULL; + free_xenheap_pages(t->shared, ORDER_GRANT_FRAMES); + free_xenheap_page(t->maptrack); + xfree(t->active); + xfree(t); + } +} + +void +grant_table_init( + void) +{ + /* Nothing. */ +} +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/hpsimserial.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/hpsimserial.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,23 @@ +/* + * HP Ski simulator serial I/O + * + * Copyright (C) 2004 Hewlett-Packard Co + * Dan Magenheimer <dan.magenheimer@xxxxxx> + */ + +#include <linux/config.h> +#include <xen/sched.h> +#include <xen/serial.h> +#include "hpsim_ssc.h" + +static void hp_ski_putc(struct serial_port *port, char c) +{ + ia64_ssc(c,0,0,0,SSC_PUTCHAR); +} + +static struct uart_driver hp_ski = { .putc = hp_ski_putc }; + +void hpsim_serial_init(void) +{ + serial_register_uart(0, &hp_ski, 0); +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/hypercall.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/hypercall.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,182 @@ +/* + * Hypercall implementations + * + * Copyright (C) 2005 Hewlett-Packard Co. + * Dan Magenheimer (dan.magenheimer@xxxxxx) + * + */ + +#include <xen/config.h> +#include <xen/sched.h> + +#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */ +#include <asm/sal.h> /* FOR struct ia64_sal_retval */ + +#include <asm/vcpu.h> +#include <asm/dom_fw.h> + +extern unsigned long translate_domain_mpaddr(unsigned long); +extern struct ia64_pal_retval xen_pal_emulator(UINT64,UINT64,UINT64,UINT64); +extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64); + +unsigned long idle_when_pending = 0; +unsigned long pal_halt_light_count = 0; + +int +ia64_hypercall (struct pt_regs *regs) +{ + struct vcpu *v = (struct domain *) current; + struct ia64_sal_retval x; + struct ia64_pal_retval y; + unsigned long *tv, *tc; + int pi; + + switch (regs->r2) { + case FW_HYPERCALL_PAL_CALL: + //printf("*** PAL hypercall: index=%d\n",regs->r28); + //FIXME: This should call a C routine +#if 0 + // This is very conservative, but avoids a possible + // (and deadly) freeze in paravirtualized domains due + // to a yet-to-be-found bug where pending_interruption + // is zero when it shouldn't be. Since PAL is called + // in the idle loop, this should resolve it + VCPU(v,pending_interruption) = 1; +#endif + if (regs->r28 == PAL_HALT_LIGHT) { +#define SPURIOUS_VECTOR 15 + pi = vcpu_check_pending_interrupts(v); + if (pi != SPURIOUS_VECTOR) { + if (!VCPU(v,pending_interruption)) + idle_when_pending++; + vcpu_pend_unspecified_interrupt(v); +//printf("idle w/int#%d pending!\n",pi); +//this shouldn't happen, but it apparently does quite a bit! so don't +//allow it to happen... i.e. if a domain has an interrupt pending and +//it tries to halt itself because it thinks it is idle, just return here +//as deliver_pending_interrupt is called on the way out and will deliver it + } + else { + pal_halt_light_count++; + do_sched_op(SCHEDOP_yield); + } + //break; + } + else if (regs->r28 >= PAL_COPY_PAL) { /* FIXME */ + printf("stacked PAL hypercalls not supported\n"); + regs->r8 = -1; + break; + } + else y = xen_pal_emulator(regs->r28,regs->r29, + regs->r30,regs->r31); + regs->r8 = y.status; regs->r9 = y.v0; + regs->r10 = y.v1; regs->r11 = y.v2; + break; + case FW_HYPERCALL_SAL_CALL: + x = sal_emulator(vcpu_get_gr(v,32),vcpu_get_gr(v,33), + vcpu_get_gr(v,34),vcpu_get_gr(v,35), + vcpu_get_gr(v,36),vcpu_get_gr(v,37), + vcpu_get_gr(v,38),vcpu_get_gr(v,39)); + regs->r8 = x.status; regs->r9 = x.v0; + regs->r10 = x.v1; regs->r11 = x.v2; + break; + case FW_HYPERCALL_EFI_RESET_SYSTEM: + printf("efi.reset_system called "); + if (current->domain == dom0) { + printf("(by dom0)\n "); + (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL); + } +#ifdef DOMU_AUTO_RESTART + else { + reconstruct_domU(current); + return 0; // don't increment ip! + } +#else + printf("(not supported for non-0 domain)\n"); + regs->r8 = EFI_UNSUPPORTED; +#endif + break; + case FW_HYPERCALL_EFI_GET_TIME: + tv = vcpu_get_gr(v,32); + tc = vcpu_get_gr(v,33); + //printf("efi_get_time(%p,%p) called...",tv,tc); + tv = __va(translate_domain_mpaddr(tv)); + if (tc) tc = __va(translate_domain_mpaddr(tc)); + regs->r8 = (*efi.get_time)(tv,tc); + //printf("and returns %lx\n",regs->r8); + break; + case FW_HYPERCALL_EFI_SET_TIME: + case FW_HYPERCALL_EFI_GET_WAKEUP_TIME: + case FW_HYPERCALL_EFI_SET_WAKEUP_TIME: + // FIXME: need fixes in efi.h from 2.6.9 + case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP: + // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED + // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS + // POINTER ARGUMENTS WILL BE VIRTUAL!! + case FW_HYPERCALL_EFI_GET_VARIABLE: + // FIXME: need fixes in efi.h from 2.6.9 + case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE: + case FW_HYPERCALL_EFI_SET_VARIABLE: + case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT: + // FIXME: need fixes in efi.h from 2.6.9 + regs->r8 = EFI_UNSUPPORTED; + break; + case 0xffff: // test dummy hypercall + regs->r8 = dump_privop_counts_to_user( + vcpu_get_gr(v,32), + vcpu_get_gr(v,33)); + break; + case 0xfffe: // test dummy hypercall + regs->r8 = zero_privop_counts_to_user( + vcpu_get_gr(v,32), + vcpu_get_gr(v,33)); + break; + case 0xfffd: // test dummy hypercall + regs->r8 = launch_domainU( + vcpu_get_gr(v,32)); + break; + case 0xfffc: // test dummy hypercall + regs->r8 = domU_staging_write_32( + vcpu_get_gr(v,32), + vcpu_get_gr(v,33), + vcpu_get_gr(v,34), + vcpu_get_gr(v,35), + vcpu_get_gr(v,36)); + break; + case 0xfffb: // test dummy hypercall + regs->r8 = domU_staging_read_8(vcpu_get_gr(v,32)); + break; + + case __HYPERVISOR_dom0_op: + regs->r8 = do_dom0_op(regs->r14); + break; + + case __HYPERVISOR_dom_mem_op: +#ifdef CONFIG_VTI + regs->r8 = do_dom_mem_op(regs->r14, regs->r15, regs->r16, regs->r17, regs->r18); +#else + /* we don't handle reservations; just return success */ + regs->r8 = regs->r16; +#endif + break; + + case __HYPERVISOR_event_channel_op: + regs->r8 = do_event_channel_op(regs->r14); + break; + +#ifndef CONFIG_VTI + case __HYPERVISOR_grant_table_op: + regs->r8 = do_grant_table_op(regs->r14, regs->r15, regs->r16); + break; +#endif + + case __HYPERVISOR_console_io: + regs->r8 = do_console_io(regs->r14, regs->r15, regs->r16); + break; + + default: + printf("unknown hypercall %x\n", regs->r2); + regs->r8 = (unsigned long)-1; + } + return 1; +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/hyperprivop.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/hyperprivop.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,1592 @@ +/* + * arch/ia64/kernel/hyperprivop.S + * + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer <dan.magenheimer@xxxxxx> + */ + +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/kregs.h> +#include <asm/offsets.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <public/arch-ia64.h> + +#if 1 // change to 0 to turn off all fast paths +#define FAST_HYPERPRIVOPS +#define FAST_HYPERPRIVOP_CNT +#define FAST_REFLECT_CNT +//#define FAST_TICK +#define FAST_BREAK +#define FAST_ACCESS_REFLECT +#define FAST_RFI +#define FAST_SSM_I +#define FAST_PTC_GA +#undef RFI_TO_INTERRUPT // not working yet +#endif + +#ifdef CONFIG_SMP +#warning "FIXME: ptc.ga instruction requires spinlock for SMP" +#undef FAST_PTC_GA +#endif + +// FIXME: turn off for now... but NaTs may crash Xen so re-enable soon! +//#define HANDLE_AR_UNAT + +// FIXME: This is defined in include/asm-ia64/hw_irq.h but this +// doesn't appear to be include'able from assembly? +#define IA64_TIMER_VECTOR 0xef + +// Should be included from common header file (also in process.c) +// NO PSR_CLR IS DIFFERENT! (CPL) +#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT) +#define IA64_PSR_CPL0 (__IA64_UL(1) << IA64_PSR_CPL0_BIT) +// note IA64_PSR_PK removed from following, why is this necessary? +#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \ + IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \ + IA64_PSR_IT | IA64_PSR_BN) + +#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \ + IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \ + IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \ + IA64_PSR_MC | IA64_PSR_IS | \ + IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \ + IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA) + +// Note: not hand-scheduled for now +// Registers at entry +// r16 == cr.isr +// r17 == cr.iim +// r18 == XSI_PSR_IC_OFS +// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits) +// r31 == pr +GLOBAL_ENTRY(fast_hyperprivop) +#ifndef FAST_HYPERPRIVOPS // see beginning of file + br.sptk.many dispatch_break_fault ;; +#endif + // HYPERPRIVOP_SSM_I? + // assumes domain interrupts pending, so just do it + cmp.eq p7,p6=XEN_HYPER_SSM_I,r17 +(p7) br.sptk.many hyper_ssm_i;; + + // FIXME. This algorithm gives up (goes to the slow path) if there + // are ANY interrupts pending, even if they are currently + // undeliverable. This should be improved later... + adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;; + ld4 r20=[r20] ;; + cmp.eq p7,p0=r0,r20 +(p7) br.cond.sptk.many 1f + movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r20=[r20];; + adds r21=IA64_VCPU_IRR0_OFFSET,r20; + adds r22=IA64_VCPU_IRR0_OFFSET+8,r20;; + ld8 r23=[r21],16; ld8 r24=[r22],16;; + ld8 r21=[r21]; ld8 r22=[r22];; + or r23=r23,r24; or r21=r21,r22;; + or r20=r23,r21;; +1: // when we get to here r20=~=interrupts pending + + // HYPERPRIVOP_RFI? + cmp.eq p7,p6=XEN_HYPER_RFI,r17 +(p7) br.sptk.many hyper_rfi;; + + // HYPERPRIVOP_GET_IVR? + cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17 +(p7) br.sptk.many hyper_get_ivr;; + + cmp.ne p7,p0=r20,r0 +(p7) br.spnt.many dispatch_break_fault ;; + + // HYPERPRIVOP_COVER? + cmp.eq p7,p6=XEN_HYPER_COVER,r17 +(p7) br.sptk.many hyper_cover;; + + // HYPERPRIVOP_SSM_DT? + cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17 +(p7) br.sptk.many hyper_ssm_dt;; + + // HYPERPRIVOP_RSM_DT? + cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17 +(p7) br.sptk.many hyper_rsm_dt;; + + // HYPERPRIVOP_GET_TPR? + cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17 +(p7) br.sptk.many hyper_get_tpr;; + + // HYPERPRIVOP_SET_TPR? + cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17 +(p7) br.sptk.many hyper_set_tpr;; + + // HYPERPRIVOP_EOI? + cmp.eq p7,p6=XEN_HYPER_EOI,r17 +(p7) br.sptk.many hyper_eoi;; + + // HYPERPRIVOP_SET_ITM? + cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17 +(p7) br.sptk.many hyper_set_itm;; + + // HYPERPRIVOP_SET_RR? + cmp.eq p7,p6=XEN_HYPER_SET_RR,r17 +(p7) br.sptk.many hyper_set_rr;; + + // HYPERPRIVOP_GET_RR? + cmp.eq p7,p6=XEN_HYPER_GET_RR,r17 +(p7) br.sptk.many hyper_get_rr;; + + // HYPERPRIVOP_PTC_GA? + cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17 +(p7) br.sptk.many hyper_ptc_ga;; + + // HYPERPRIVOP_ITC_D? + cmp.eq p7,p6=XEN_HYPER_ITC_D,r17 +(p7) br.sptk.many hyper_itc_d;; + + // HYPERPRIVOP_ITC_I? + cmp.eq p7,p6=XEN_HYPER_ITC_I,r17 +(p7) br.sptk.many hyper_itc_i;; + + // HYPERPRIVOP_THASH? + cmp.eq p7,p6=XEN_HYPER_THASH,r17 +(p7) br.sptk.many hyper_thash;; + + // if not one of the above, give up for now and do it the slow way + br.sptk.many dispatch_break_fault ;; + + +// give up for now if: ipsr.be==1, ipsr.pp==1 +// from reflect_interruption, don't need to: +// - printf first extint (debug only) +// - check for interrupt collection enabled (routine will force on) +// - set ifa (not valid for extint) +// - set iha (not valid for extint) +// - set itir (not valid for extint) +// DO need to +// - increment the HYPER_SSM_I fast_hyperprivop counter +// - set shared_mem iip to instruction after HYPER_SSM_I +// - set cr.iip to guest iva+0x3000 +// - set shared_mem ipsr to [vcpu_get_ipsr_int_state] +// be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0; +// i = shared_mem interrupt_delivery_enabled +// ic = shared_mem interrupt_collection_enabled +// ri = instruction after HYPER_SSM_I +// all other bits unchanged from real cr.ipsr +// - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!) +// - set shared_mem isr: isr.ei to instr following HYPER_SSM_I +// and isr.ri to cr.isr.ri (all other bits zero) +// - cover and set shared_mem precover_ifs to cr.ifs +// ^^^ MISSED THIS FOR fast_break?? +// - set shared_mem ifs and incomplete_regframe to 0 +// - set shared_mem interrupt_delivery_enabled to 0 +// - set shared_mem interrupt_collection_enabled to 0 +// - set r31 to SHAREDINFO_ADDR +// - virtual bank switch 0 +// maybe implement later +// - verify that there really IS a deliverable interrupt pending +// - set shared_mem iva +// needs to be done but not implemented (in reflect_interruption) +// - set shared_mem iipa +// don't know for sure +// - set shared_mem unat +// r16 == cr.isr +// r17 == cr.iim +// r18 == XSI_PSR_IC +// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits) +// r31 == pr +ENTRY(hyper_ssm_i) +#ifndef FAST_SSM_I + br.spnt.few dispatch_break_fault ;; +#endif + // give up for now if: ipsr.be==1, ipsr.pp==1 + mov r30=cr.ipsr;; + mov r29=cr.iip;; + extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; + cmp.ne p7,p0=r21,r0 +(p7) br.sptk.many dispatch_break_fault ;; + extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; + cmp.ne p7,p0=r21,r0 +(p7) br.sptk.many dispatch_break_fault ;; +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + // set shared_mem iip to instruction after HYPER_SSM_I + extr.u r20=r30,41,2 ;; + cmp.eq p6,p7=2,r20 ;; +(p6) mov r20=0 +(p6) adds r29=16,r29 +(p7) adds r20=1,r20 ;; + dep r30=r20,r30,41,2;; // adjust cr.ipsr.ri but don't save yet + adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r29 ;; + // set shared_mem isr + extr.u r16=r16,38,1;; // grab cr.isr.ir bit + dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero) + dep r16=r20,r16,41,2 ;; // deposit cr.isr.ri + adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r16 ;; + // set cr.ipsr + mov r29=r30 ;; + movl r28=DELIVER_PSR_SET;; + movl r27=~DELIVER_PSR_CLR;; + or r29=r29,r28;; + and r29=r29,r27;; + mov cr.ipsr=r29;; + // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set) + extr.u r29=r30,IA64_PSR_CPL0_BIT,2;; + cmp.eq p6,p7=3,r29;; +(p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2 +(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2 + ;; + // FOR SSM_I ONLY, also turn on psr.i and psr.ic + movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC);; + movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);; + or r30=r30,r28;; + and r30=r30,r27;; + adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r30 ;; + // set shared_mem interrupt_delivery_enabled to 0 + // set shared_mem interrupt_collection_enabled to 0 + st8 [r18]=r0;; + // cover and set shared_mem precover_ifs to cr.ifs + // set shared_mem ifs and incomplete_regframe to 0 + cover ;; + mov r20=cr.ifs;; + adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r21]=r0 ;; + adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r0 ;; + adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r20 ;; + // leave cr.ifs alone for later rfi + // set iip to go to domain IVA break instruction vector + movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r22=[r22];; + adds r22=IA64_VCPU_IVA_OFFSET,r22;; + ld8 r23=[r22];; + movl r24=0x3000;; + add r24=r24,r23;; + mov cr.iip=r24;; + // OK, now all set to go except for switch to virtual bank0 + mov r30=r2; mov r29=r3;; + adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; + adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; + bsw.1;; + // FIXME?: ar.unat is not really handled correctly, + // but may not matter if the OS is NaT-clean + .mem.offset 0,0; st8.spill [r2]=r16,16; + .mem.offset 8,0; st8.spill [r3]=r17,16 ;; + .mem.offset 0,0; st8.spill [r2]=r18,16; + .mem.offset 8,0; st8.spill [r3]=r19,16 ;; + .mem.offset 0,0; st8.spill [r2]=r20,16; + .mem.offset 8,0; st8.spill [r3]=r21,16 ;; + .mem.offset 0,0; st8.spill [r2]=r22,16; + .mem.offset 8,0; st8.spill [r3]=r23,16 ;; + .mem.offset 0,0; st8.spill [r2]=r24,16; + .mem.offset 8,0; st8.spill [r3]=r25,16 ;; + .mem.offset 0,0; st8.spill [r2]=r26,16; + .mem.offset 8,0; st8.spill [r3]=r27,16 ;; + .mem.offset 0,0; st8.spill [r2]=r28,16; + .mem.offset 8,0; st8.spill [r3]=r29,16 ;; + .mem.offset 0,0; st8.spill [r2]=r30,16; + .mem.offset 8,0; st8.spill [r3]=r31,16 ;; + movl r31=XSI_IPSR;; + bsw.0 ;; + mov r2=r30; mov r3=r29;; + adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r20]=r0 ;; + mov pr=r31,-1 ;; + rfi + ;; + +// reflect domain clock interrupt +// r31 == pr +// r30 == cr.ivr +// r29 == rp +GLOBAL_ENTRY(fast_tick_reflect) +#ifndef FAST_TICK // see beginning of file + br.cond.sptk.many rp;; +#endif + mov r28=IA64_TIMER_VECTOR;; + cmp.ne p6,p0=r28,r30 +(p6) br.cond.spnt.few rp;; + movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;; + ld8 r26=[r20];; + mov r27=ar.itc;; + adds r27=200,r27;; // safety margin + cmp.ltu p6,p0=r26,r27 +(p6) br.cond.spnt.few rp;; + mov r17=cr.ipsr;; + // slow path if: ipsr.be==1, ipsr.pp==1 + extr.u r21=r17,IA64_PSR_BE_BIT,1 ;; + cmp.ne p6,p0=r21,r0 +(p6) br.cond.spnt.few rp;; + extr.u r21=r17,IA64_PSR_PP_BIT,1 ;; + cmp.ne p6,p0=r21,r0 +(p6) br.cond.spnt.few rp;; + // definitely have a domain tick + mov cr.eoi=r0;; + mov rp=r29;; + mov cr.itm=r26;; // ensure next tick +#ifdef FAST_REFLECT_CNT + movl r20=fast_reflect_count+((0x3000>>8)*8);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + // vcpu_pend_timer(current) + movl r18=XSI_PSR_IC;; + adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r20=[r20];; + cmp.eq p6,p0=r20,r0 // if cr.itv==0 done +(p6) br.cond.spnt.few fast_tick_reflect_done;; + tbit.nz p6,p0=r20,16;; // check itv.m (discard) bit +(p6) br.cond.spnt.few fast_tick_reflect_done;; + extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector + extr.u r26=r20,6,2;; // r26 has irr index of itv.vector + movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r19=[r19];; + adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19 + adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;; + ld8 r24=[r22];; + ld8 r23=[r23];; + cmp.eq p6,p0=r23,r24 // skip if this tick already delivered +(p6) br.cond.spnt.few fast_tick_reflect_done;; + // set irr bit + adds r21=IA64_VCPU_IRR0_OFFSET,r19; + shl r26=r26,3;; + add r21=r21,r26;; + mov r25=1;; + shl r22=r25,r27;; + ld8 r23=[r21];; + or r22=r22,r23;; + st8 [r21]=r22;; + // set PSCB(pending_interruption)! + adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r20]=r25;; + + // if interrupted at pl0, we're done + extr.u r16=r17,IA64_PSR_CPL0_BIT,2;; + cmp.eq p6,p0=r16,r0;; +(p6) br.cond.spnt.few fast_tick_reflect_done;; + // if guest vpsr.i is off, we're done + adds r21=XSI_PSR_I_OFS-XSI_PSR_IC_OFS,r18 ;; + ld4 r21=[r21];; + cmp.eq p6,p0=r21,r0 +(p6) br.cond.spnt.few fast_tick_reflect_done;; + + // OK, we have a clock tick to deliver to the active domain! + // so deliver to iva+0x3000 + // r17 == cr.ipsr + // r18 == XSI_PSR_IC + // r19 == IA64_KR(CURRENT) + // r31 == pr + mov r16=cr.isr;; + mov r29=cr.iip;; + adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r29 ;; + // set shared_mem isr + extr.u r16=r16,38,1;; // grab cr.isr.ir bit + dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero) + extr.u r20=r17,41,2 ;; // get ipsr.ri + dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei + adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r16 ;; + // set cr.ipsr (make sure cpl==2!) + mov r29=r17 ;; + movl r28=DELIVER_PSR_SET;; + movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);; + or r29=r29,r28;; + and r29=r29,r27;; + mov cr.ipsr=r29;; + // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set) + extr.u r29=r17,IA64_PSR_CPL0_BIT,2;; + cmp.eq p6,p7=3,r29;; +(p6) dep r17=-1,r17,IA64_PSR_CPL0_BIT,2 +(p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2 + ;; + movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);; + movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);; + dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;; + or r17=r17,r28;; + and r17=r17,r27;; + ld4 r16=[r18],4;; + cmp.ne p6,p0=r16,r0;; +(p6) dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;; + ld4 r16=[r18],-4;; + cmp.ne p6,p0=r16,r0;; +(p6) dep r17=-1,r17,IA64_PSR_I_BIT,1 ;; + adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r17 ;; + // set shared_mem interrupt_delivery_enabled to 0 + // set shared_mem interrupt_collection_enabled to 0 + st8 [r18]=r0;; + // cover and set shared_mem precover_ifs to cr.ifs + // set shared_mem ifs and incomplete_regframe to 0 + cover ;; + mov r20=cr.ifs;; + adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r21]=r0 ;; + adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r0 ;; + adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r20 ;; + // leave cr.ifs alone for later rfi + // set iip to go to domain IVA break instruction vector + adds r22=IA64_VCPU_IVA_OFFSET,r19;; + ld8 r23=[r22];; + movl r24=0x3000;; + add r24=r24,r23;; + mov cr.iip=r24;; + // OK, now all set to go except for switch to virtual bank0 + mov r30=r2; mov r29=r3;; +#ifdef HANDLE_AR_UNAT + mov r28=ar.unat; +#endif + adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; + adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; + bsw.1;; + .mem.offset 0,0; st8.spill [r2]=r16,16; + .mem.offset 8,0; st8.spill [r3]=r17,16 ;; + .mem.offset 0,0; st8.spill [r2]=r18,16; + .mem.offset 8,0; st8.spill [r3]=r19,16 ;; + .mem.offset 0,0; st8.spill [r2]=r20,16; + .mem.offset 8,0; st8.spill [r3]=r21,16 ;; + .mem.offset 0,0; st8.spill [r2]=r22,16; + .mem.offset 8,0; st8.spill [r3]=r23,16 ;; + .mem.offset 0,0; st8.spill [r2]=r24,16; + .mem.offset 8,0; st8.spill [r3]=r25,16 ;; + .mem.offset 0,0; st8.spill [r2]=r26,16; + .mem.offset 8,0; st8.spill [r3]=r27,16 ;; + .mem.offset 0,0; st8.spill [r2]=r28,16; + .mem.offset 8,0; st8.spill [r3]=r29,16 ;; + .mem.offset 0,0; st8.spill [r2]=r30,16; + .mem.offset 8,0; st8.spill [r3]=r31,16 ;; +#ifdef HANDLE_AR_UNAT + // bank0 regs have no NaT bit, so ensure they are NaT clean + mov r16=r0; mov r17=r0; mov r18=r0; mov r19=r0; + mov r20=r0; mov r21=r0; mov r22=r0; mov r23=r0; + mov r24=r0; mov r25=r0; mov r26=r0; mov r27=r0; + mov r28=r0; mov r29=r0; mov r30=r0; movl r31=XSI_IPSR;; +#endif + bsw.0 ;; + mov r2=r30; mov r3=r29;; +#ifdef HANDLE_AR_UNAT + mov ar.unat=r28; +#endif + adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r20]=r0 ;; +fast_tick_reflect_done: + mov pr=r31,-1 ;; + rfi +END(fast_tick_reflect) + +// reflect domain breaks directly to domain +// r16 == cr.isr +// r17 == cr.iim +// r18 == XSI_PSR_IC +// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits) +// r31 == pr +GLOBAL_ENTRY(fast_break_reflect) +#ifndef FAST_BREAK // see beginning of file + br.sptk.many dispatch_break_fault ;; +#endif + mov r30=cr.ipsr;; + mov r29=cr.iip;; + extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; + cmp.ne p7,p0=r21,r0 ;; +(p7) br.spnt.few dispatch_break_fault ;; + extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; + cmp.ne p7,p0=r21,r0 ;; +(p7) br.spnt.few dispatch_break_fault ;; +#if 1 /* special handling in case running on simulator */ + movl r20=first_break;; + ld4 r23=[r20];; + movl r21=0x80001; + movl r22=0x80002;; + cmp.ne p7,p0=r23,r0;; +(p7) br.spnt.few dispatch_break_fault ;; + cmp.eq p7,p0=r21,r17; +(p7) br.spnt.few dispatch_break_fault ;; + cmp.eq p7,p0=r22,r17; +(p7) br.spnt.few dispatch_break_fault ;; +#endif + movl r20=0x2c00; + // save iim in shared_info + adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r17;; + // fall through + + +// reflect to domain ivt+r20 +// sets up isr,iip,ipsr,ifs (FIXME: do iipa too) +// r16 == cr.isr +// r18 == XSI_PSR_IC +// r20 == offset into ivt +// r29 == iip +// r30 == ipsr +// r31 == pr +ENTRY(fast_reflect) +#ifdef FAST_REFLECT_CNT + movl r22=fast_reflect_count; + shr r23=r20,5;; + add r22=r22,r23;; + ld8 r21=[r22];; + adds r21=1,r21;; + st8 [r22]=r21;; +#endif + // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!) + adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r29;; + // set shared_mem isr + adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r16 ;; + // set cr.ipsr + mov r29=r30 ;; + movl r28=DELIVER_PSR_SET;; + movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);; + or r29=r29,r28;; + and r29=r29,r27;; + mov cr.ipsr=r29;; + // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set) + extr.u r29=r30,IA64_PSR_CPL0_BIT,2;; + cmp.eq p6,p7=3,r29;; +(p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2 +(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2 + ;; + movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);; + movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);; + or r30=r30,r28;; + and r30=r30,r27;; + // also set shared_mem ipsr.i and ipsr.ic appropriately + ld8 r24=[r18];; + extr.u r22=r24,32,32 + cmp4.eq p6,p7=r24,r0;; +(p6) dep r30=0,r30,IA64_PSR_IC_BIT,1 +(p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;; + cmp4.eq p6,p7=r22,r0;; +(p6) dep r30=0,r30,IA64_PSR_I_BIT,1 +(p7) dep r30=-1,r30,IA64_PSR_I_BIT,1 ;; + adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r30 ;; + // set shared_mem interrupt_delivery_enabled to 0 + // set shared_mem interrupt_collection_enabled to 0 + st8 [r18]=r0;; + // cover and set shared_mem precover_ifs to cr.ifs + // set shared_mem ifs and incomplete_regframe to 0 + cover ;; + mov r24=cr.ifs;; + adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r21]=r0 ;; + adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r0 ;; + adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r24 ;; + // vpsr.i = vpsr.ic = 0 on delivery of interruption + st8 [r18]=r0;; + // FIXME: need to save iipa and isr to be arch-compliant + // set iip to go to domain IVA break instruction vector + movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r22=[r22];; + adds r22=IA64_VCPU_IVA_OFFSET,r22;; + ld8 r23=[r22];; + add r20=r20,r23;; + mov cr.iip=r20;; + // OK, now all set to go except for switch to virtual bank0 + mov r30=r2; mov r29=r3;; +#ifdef HANDLE_AR_UNAT + mov r28=ar.unat; +#endif + adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; + adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; + bsw.1;; + .mem.offset 0,0; st8.spill [r2]=r16,16; + .mem.offset 8,0; st8.spill [r3]=r17,16 ;; + .mem.offset 0,0; st8.spill [r2]=r18,16; + .mem.offset 8,0; st8.spill [r3]=r19,16 ;; + .mem.offset 0,0; st8.spill [r2]=r20,16; + .mem.offset 8,0; st8.spill [r3]=r21,16 ;; + .mem.offset 0,0; st8.spill [r2]=r22,16; + .mem.offset 8,0; st8.spill [r3]=r23,16 ;; + .mem.offset 0,0; st8.spill [r2]=r24,16; + .mem.offset 8,0; st8.spill [r3]=r25,16 ;; + .mem.offset 0,0; st8.spill [r2]=r26,16; + .mem.offset 8,0; st8.spill [r3]=r27,16 ;; + .mem.offset 0,0; st8.spill [r2]=r28,16; + .mem.offset 8,0; st8.spill [r3]=r29,16 ;; + .mem.offset 0,0; st8.spill [r2]=r30,16; + .mem.offset 8,0; st8.spill [r3]=r31,16 ;; +#ifdef HANDLE_AR_UNAT + // bank0 regs have no NaT bit, so ensure they are NaT clean + mov r16=r0; mov r17=r0; mov r18=r0; mov r19=r0; + mov r20=r0; mov r21=r0; mov r22=r0; mov r23=r0; + mov r24=r0; mov r25=r0; mov r26=r0; mov r27=r0; + mov r28=r0; mov r29=r0; mov r30=r0; movl r31=XSI_IPSR;; +#endif + movl r31=XSI_IPSR;; + bsw.0 ;; + mov r2=r30; mov r3=r29;; +#ifdef HANDLE_AR_UNAT + mov ar.unat=r28; +#endif + adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r20]=r0 ;; + mov pr=r31,-1 ;; + rfi + ;; + +// reflect access faults (0x2400,0x2800,0x5300) directly to domain +// r16 == isr +// r17 == ifa +// r19 == reflect number (only pass-thru to dispatch_reflection) +// r20 == offset into ivt +// r31 == pr +GLOBAL_ENTRY(fast_access_reflect) +#ifndef FAST_ACCESS_REFLECT // see beginning of file + br.spnt.few dispatch_reflection ;; +#endif + mov r30=cr.ipsr;; + mov r29=cr.iip;; + extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; + cmp.ne p7,p0=r21,r0 +(p7) br.spnt.few dispatch_reflection ;; + extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; + cmp.ne p7,p0=r21,r0 +(p7) br.spnt.few dispatch_reflection ;; + extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;; + cmp.eq p7,p0=r21,r0 +(p7) br.spnt.few dispatch_reflection ;; + movl r18=XSI_PSR_IC;; + ld8 r21=[r18];; + cmp.eq p7,p0=r0,r21 +(p7) br.spnt.few dispatch_reflection ;; + // set shared_mem ifa, FIXME: should we validate it? + mov r17=cr.ifa;; + adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r21]=r17 ;; + // get rr[ifa] and save to itir in shared memory (extra bits ignored) + shr.u r22=r17,61 + adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 + adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; + shladd r22=r22,3,r21;; + ld8 r22=[r22];; + st8 [r23]=r22;; + br.cond.sptk.many fast_reflect;; + + +// ensure that, if giving up, registers at entry to fast_hyperprivop unchanged +ENTRY(hyper_rfi) +#ifndef FAST_RFI + br.spnt.few dispatch_break_fault ;; +#endif + // if no interrupts pending, proceed + mov r30=r0 + cmp.eq p7,p0=r20,r0 +(p7) br.sptk.many 1f + ;; + adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r21=[r20];; // r21 = vcr.ipsr + extr.u r22=r21,IA64_PSR_I_BIT,1 ;; + mov r30=r22 + // r30 determines whether we might deliver an immediate extint +1: + adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r21=[r20];; // r21 = vcr.ipsr + extr.u r22=r21,IA64_PSR_BE_BIT,1 ;; + // if turning on psr.be, give up for now and do it the slow way + cmp.ne p7,p0=r22,r0 +(p7) br.spnt.few dispatch_break_fault ;; + // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way + movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);; + and r22=r20,r21 + ;; + cmp.ne p7,p0=r22,r20 +(p7) br.spnt.few dispatch_break_fault ;; + // if was in metaphys mode, do it the slow way (FIXME later?) + adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;; + ld4 r20=[r20];; + cmp.ne p7,p0=r20,r0 +(p7) br.spnt.few dispatch_break_fault ;; + // if domain hasn't already done virtual bank switch + // do it the slow way (FIXME later?) +#if 0 + adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; + ld4 r20=[r20];; + cmp.eq p7,p0=r20,r0 +(p7) br.spnt.few dispatch_break_fault ;; +#endif + // validate vcr.iip, if in Xen range, do it the slow way + adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r22=[r20];; + movl r23=XEN_VIRT_SPACE_LOW + movl r24=XEN_VIRT_SPACE_HIGH ;; + cmp.ltu p0,p7=r22,r23 ;; // if !(iip<low) && +(p7) cmp.geu p0,p7=r22,r24 ;; // !(iip>=high) +(p7) br.spnt.few dispatch_break_fault ;; +#ifndef RFI_TO_INTERRUPT // see beginning of file + cmp.ne p6,p0=r30,r0 +(p6) br.cond.spnt.few dispatch_break_fault ;; +#endif + +1: // OK now, let's do an rfi. +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);; + ld8 r23=[r20];; + adds r23=1,r23;; + st8 [r20]=r23;; +#endif +#ifdef RFI_TO_INTERRUPT + // maybe do an immediate interrupt delivery? + cmp.ne p6,p0=r30,r0 +(p6) br.cond.spnt.few rfi_check_extint;; +#endif + +just_do_rfi: + // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip + mov cr.iip=r22;; + adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r20]=r0 ;; + adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r20=[r20];; + dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set + mov cr.ifs=r20 ;; + // ipsr.cpl == (vcr.ipsr.cpl == 0) 2 : 3; + dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;; + // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic + mov r19=r0 ;; + extr.u r23=r21,IA64_PSR_I_BIT,1 ;; + cmp.ne p7,p6=r23,r0 ;; + // not done yet +(p7) dep r19=-1,r19,32,1 + extr.u r23=r21,IA64_PSR_IC_BIT,1 ;; + cmp.ne p7,p6=r23,r0 ;; +(p7) dep r19=-1,r19,0,1 ;; + st8 [r18]=r19 ;; + // force on psr.ic, i, dt, rt, it, bn + movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT|IA64_PSR_BN) + ;; + or r21=r21,r20 + ;; + mov cr.ipsr=r21 + adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; + ld4 r21=[r20];; + cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?" +(p7) br.cond.spnt.few 1f; + // OK, now all set to go except for switch to virtual bank1 + mov r22=1;; st4 [r20]=r22; + mov r30=r2; mov r29=r3;; + adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; + adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; + bsw.1;; + // FIXME?: ar.unat is not really handled correctly, + // but may not matter if the OS is NaT-clean + .mem.offset 0,0; ld8.fill r16=[r2],16 ; + .mem.offset 8,0; ld8.fill r17=[r3],16 ;; + .mem.offset 0,0; ld8.fill r18=[r2],16 ; + .mem.offset 0,0; ld8.fill r19=[r3],16 ;; + .mem.offset 8,0; ld8.fill r20=[r2],16 ; + .mem.offset 8,0; ld8.fill r21=[r3],16 ;; + .mem.offset 8,0; ld8.fill r22=[r2],16 ; + .mem.offset 8,0; ld8.fill r23=[r3],16 ;; + .mem.offset 8,0; ld8.fill r24=[r2],16 ; + .mem.offset 8,0; ld8.fill r25=[r3],16 ;; + .mem.offset 8,0; ld8.fill r26=[r2],16 ; + .mem.offset 8,0; ld8.fill r27=[r3],16 ;; + .mem.offset 8,0; ld8.fill r28=[r2],16 ; + .mem.offset 8,0; ld8.fill r29=[r3],16 ;; + .mem.offset 8,0; ld8.fill r30=[r2],16 ; + .mem.offset 8,0; ld8.fill r31=[r3],16 ;; + bsw.0 ;; + mov r2=r30; mov r3=r29;; +1: mov pr=r31,-1 + ;; + rfi + ;; + +#ifdef RFI_TO_INTERRUPT +GLOBAL_ENTRY(rfi_check_extint) + //br.sptk.many dispatch_break_fault ;; + + // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip + // make sure none of these get trashed in case going to just_do_rfi + movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r30=[r30];; + adds r24=IA64_VCPU_INSVC3_OFFSET,r30;; + mov r25=192 + adds r16=IA64_VCPU_IRR3_OFFSET,r30;; + ld8 r23=[r16];; + cmp.eq p6,p0=r23,r0;; +(p6) adds r16=-8,r16;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r16];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r16=-8,r16;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r16];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r16=-8,r16;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r16];; +(p6) cmp.eq p6,p0=r23,r0;; + cmp.eq p6,p0=r23,r0 +(p6) br.cond.spnt.few just_do_rfi; // this is actually an error + // r16 points to non-zero element of irr, r23 has value + // r24 points to corr element of insvc, r25 has elt*64 + ld8 r26=[r24];; + cmp.geu p6,p0=r26,r23 +(p6) br.cond.spnt.many just_do_rfi; + + // not masked by insvc, get vector number + shr.u r26=r23,1;; + or r26=r23,r26;; + shr.u r27=r26,2;; + or r26=r26,r27;; + shr.u r27=r26,4;; + or r26=r26,r27;; + shr.u r27=r26,8;; + or r26=r26,r27;; + shr.u r27=r26,16;; + or r26=r26,r27;; + shr.u r27=r26,32;; + or r26=r26,r27;; + andcm r26=0xffffffffffffffff,r26;; + popcnt r26=r26;; + sub r26=63,r26;; + // r26 now contains the bit index (mod 64) + mov r27=1;; + shl r27=r27,r26;; + // r27 now contains the (within the proper word) bit mask + add r26=r25,r26 + // r26 now contains the vector [0..255] + adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r20=[r20] ;; + extr.u r28=r20,16,1 + extr.u r29=r20,4,4 ;; + cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, just rfi +(p6) br.cond.spnt.few just_do_rfi;; + shl r29=r29,4;; + adds r29=15,r29;; + cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi +(p6) br.cond.spnt.few just_do_rfi;; + +// this doesn't work yet (dies early after getting to user mode) +// but happens relatively infrequently, so fix it later. +// NOTE that these will be counted incorrectly for now (for privcnt output) +GLOBAL_ENTRY(rfi_with_interrupt) +#if 1 + br.sptk.many dispatch_break_fault ;; +#endif + + // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000 + // r18 == XSI_PSR_IC + // r21 == vipsr (ipsr in shared_mem) + // r30 == IA64_KR(CURRENT) + // r31 == pr + mov r17=cr.ipsr;; + mov r16=cr.isr;; + // set shared_mem isr + extr.u r16=r16,38,1;; // grab cr.isr.ir bit + dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero) + extr.u r20=r21,41,2 ;; // get v(!)psr.ri + dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei + adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r22]=r16 ;; + // set cr.ipsr (make sure cpl==2!) + mov r29=r17 ;; + movl r28=DELIVER_PSR_SET;; + movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);; + or r29=r29,r28;; + and r29=r29,r27;; + mov cr.ipsr=r29;; + // v.ipsr and v.iip are already set (and v.iip validated) as rfi target + // set shared_mem interrupt_delivery_enabled to 0 + // set shared_mem interrupt_collection_enabled to 0 + st8 [r18]=r0;; + // cover and set shared_mem precover_ifs to cr.ifs + // set shared_mem ifs and incomplete_regframe to 0 +#if 0 + cover ;; + mov r20=cr.ifs;; + adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r22]=r0 ;; + adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r22]=r0 ;; + adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r22]=r20 ;; + // leave cr.ifs alone for later rfi +#else + adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r22]=r0 ;; + adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r20=[r22];; + st8 [r22]=r0 ;; + adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r22]=r20 ;; +#endif + // set iip to go to domain IVA break instruction vector + adds r22=IA64_VCPU_IVA_OFFSET,r30;; + ld8 r23=[r22];; + movl r24=0x3000;; + add r24=r24,r23;; + mov cr.iip=r24;; +#if 0 + // OK, now all set to go except for switch to virtual bank0 + mov r30=r2; mov r29=r3;; + adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; + adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; + bsw.1;; + // FIXME: need to handle ar.unat! + .mem.offset 0,0; st8.spill [r2]=r16,16; + .mem.offset 8,0; st8.spill [r3]=r17,16 ;; + .mem.offset 0,0; st8.spill [r2]=r18,16; + .mem.offset 8,0; st8.spill [r3]=r19,16 ;; + .mem.offset 0,0; st8.spill [r2]=r20,16; + .mem.offset 8,0; st8.spill [r3]=r21,16 ;; + .mem.offset 0,0; st8.spill [r2]=r22,16; + .mem.offset 8,0; st8.spill [r3]=r23,16 ;; + .mem.offset 0,0; st8.spill [r2]=r24,16; + .mem.offset 8,0; st8.spill [r3]=r25,16 ;; + .mem.offset 0,0; st8.spill [r2]=r26,16; + .mem.offset 8,0; st8.spill [r3]=r27,16 ;; + .mem.offset 0,0; st8.spill [r2]=r28,16; + .mem.offset 8,0; st8.spill [r3]=r29,16 ;; + .mem.offset 0,0; st8.spill [r2]=r30,16; + .mem.offset 8,0; st8.spill [r3]=r31,16 ;; + movl r31=XSI_IPSR;; + bsw.0 ;; + mov r2=r30; mov r3=r29;; +#else + bsw.1;; + movl r31=XSI_IPSR;; + bsw.0 ;; +#endif + adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; + st4 [r20]=r0 ;; + mov pr=r31,-1 ;; + rfi +#endif // RFI_TO_INTERRUPT + +ENTRY(hyper_cover) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + mov r24=cr.ipsr + mov r25=cr.iip;; + // skip test for vpsr.ic.. it's a prerequisite for hyperprivops + cover ;; + adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; + mov r30=cr.ifs;; + adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 + ld4 r21=[r20] ;; + cmp.eq p6,p7=r21,r0 ;; +(p6) st8 [r22]=r30;; +(p7) st4 [r20]=r0;; + mov cr.ifs=r0;; + // adjust return address to skip over break instruction + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; + +// return from metaphysical mode (meta=1) to virtual mode (meta=0) +ENTRY(hyper_ssm_dt) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + mov r24=cr.ipsr + mov r25=cr.iip;; + adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;; + ld4 r21=[r20];; + cmp.eq p7,p0=r21,r0 // meta==0? +(p7) br.spnt.many 1f ;; // already in virtual mode + movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r22=[r22];; + adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;; + ld4 r23=[r22];; + mov rr[r0]=r23;; + srlz.i;; + st4 [r20]=r0 ;; + // adjust return address to skip over break instruction +1: extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; + +// go to metaphysical mode (meta=1) from virtual mode (meta=0) +ENTRY(hyper_rsm_dt) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + mov r24=cr.ipsr + mov r25=cr.iip;; + adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;; + ld4 r21=[r20];; + cmp.ne p7,p0=r21,r0 // meta==0? +(p7) br.spnt.many 1f ;; // already in metaphysical mode + movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r22=[r22];; + adds r22=IA64_VCPU_META_RR0_OFFSET,r22;; + ld4 r23=[r22];; + mov rr[r0]=r23;; + srlz.i;; + adds r21=1,r0 ;; + st4 [r20]=r21 ;; + // adjust return address to skip over break instruction +1: extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; + +ENTRY(hyper_get_tpr) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + mov r24=cr.ipsr + mov r25=cr.iip;; + adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r8=[r20];; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_get_tpr) + +// if we get to here, there are no interrupts pending so we +// can change virtual tpr to any value without fear of provoking +// (or accidentally missing) delivering an interrupt +ENTRY(hyper_set_tpr) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + mov r24=cr.ipsr + mov r25=cr.iip;; + movl r27=0xff00;; + adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; + andcm r8=r8,r27;; + st8 [r20]=r8;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_set_tpr) + +ENTRY(hyper_get_ivr) +#ifdef FAST_HYPERPRIVOP_CNT + movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);; + ld8 r21=[r22];; + adds r21=1,r21;; + st8 [r22]=r21;; +#endif + mov r8=15;; + // when we get to here r20=~=interrupts pending + cmp.eq p7,p0=r20,r0;; +(p7) adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;; +(p7) st4 [r20]=r0;; +(p7) br.spnt.many 1f ;; + movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r30=[r30];; + adds r24=IA64_VCPU_INSVC3_OFFSET,r30;; + mov r25=192 + adds r22=IA64_VCPU_IRR3_OFFSET,r30;; + ld8 r23=[r22];; + cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; + cmp.eq p6,p0=r23,r0 +(p6) br.cond.spnt.few 1f; // this is actually an error + // r22 points to non-zero element of irr, r23 has value + // r24 points to corr element of insvc, r25 has elt*64 + ld8 r26=[r24];; + cmp.geu p6,p0=r26,r23 +(p6) br.cond.spnt.many 1f; + // not masked by insvc, get vector number + shr.u r26=r23,1;; + or r26=r23,r26;; + shr.u r27=r26,2;; + or r26=r26,r27;; + shr.u r27=r26,4;; + or r26=r26,r27;; + shr.u r27=r26,8;; + or r26=r26,r27;; + shr.u r27=r26,16;; + or r26=r26,r27;; + shr.u r27=r26,32;; + or r26=r26,r27;; + andcm r26=0xffffffffffffffff,r26;; + popcnt r26=r26;; + sub r26=63,r26;; + // r26 now contains the bit index (mod 64) + mov r27=1;; + shl r27=r27,r26;; + // r27 now contains the (within the proper word) bit mask + add r26=r25,r26 + // r26 now contains the vector [0..255] + adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r20=[r20] ;; + extr.u r28=r20,16,1 + extr.u r29=r20,4,4 ;; + cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, return SPURIOUS +(p6) br.cond.spnt.few 1f; + shl r29=r29,4;; + adds r29=15,r29;; + cmp.ge p6,p0=r29,r26 +(p6) br.cond.spnt.few 1f; + // OK, have an unmasked vector to process/return + ld8 r25=[r24];; + or r25=r25,r27;; + st8 [r24]=r25;; + ld8 r25=[r22];; + andcm r25=r25,r27;; + st8 [r22]=r25;; + mov r8=r26;; + // if its a clock tick, remember itm to avoid delivering it twice + adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r20=[r20];; + extr.u r20=r20,0,8;; + cmp.eq p6,p0=r20,r8 + adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r30 + adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r30;; + ld8 r23=[r23];; +(p6) st8 [r22]=r23;; + // all done +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_get_ivr) + +ENTRY(hyper_eoi) + // when we get to here r20=~=interrupts pending + cmp.ne p7,p0=r20,r0 +(p7) br.spnt.many dispatch_break_fault ;; +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r22=[r22];; + adds r22=IA64_VCPU_INSVC3_OFFSET,r22;; + ld8 r23=[r22];; + cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; + cmp.eq p6,p0=r23,r0 +(p6) br.cond.spnt.few 1f; // this is actually an error + // r22 points to non-zero element of insvc, r23 has value + shr.u r24=r23,1;; + or r24=r23,r24;; + shr.u r25=r24,2;; + or r24=r24,r25;; + shr.u r25=r24,4;; + or r24=r24,r25;; + shr.u r25=r24,8;; + or r24=r24,r25;; + shr.u r25=r24,16;; + or r24=r24,r25;; + shr.u r25=r24,32;; + or r24=r24,r25;; + andcm r24=0xffffffffffffffff,r24;; + popcnt r24=r24;; + sub r24=63,r24;; + // r24 now contains the bit index + mov r25=1;; + shl r25=r25,r24;; + andcm r23=r23,r25;; + st8 [r22]=r23;; +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_eoi) + +ENTRY(hyper_set_itm) + // when we get to here r20=~=interrupts pending + cmp.ne p7,p0=r20,r0 +(p7) br.spnt.many dispatch_break_fault ;; +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;; + ld8 r21=[r20];; + movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r20=[r20];; + adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;; + st8 [r20]=r8;; + cmp.geu p6,p0=r21,r8;; +(p6) mov r21=r8;; + // now "safe set" cr.itm=r21 + mov r23=100;; +2: mov cr.itm=r21;; + srlz.d;; + mov r22=ar.itc ;; + cmp.leu p6,p0=r21,r22;; + add r21=r21,r23;; + shl r23=r23,1;; +(p6) br.cond.spnt.few 2b;; +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_set_itm) + +ENTRY(hyper_get_rr) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + extr.u r25=r8,61,3;; + adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; + shl r25=r25,3;; + add r20=r20,r25;; + ld8 r8=[r20];; +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_get_rr) + +ENTRY(hyper_set_rr) + extr.u r25=r8,61,3;; + cmp.leu p7,p0=7,r25 // punt on setting rr7 +(p7) br.spnt.many dispatch_break_fault ;; +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + extr.u r26=r9,8,24 // r26 = r9.rid + movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r20=[r20];; + adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;; + ld4 r22=[r21];; + adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;; + ld4 r23=[r21];; + adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;; + add r22=r26,r22;; + cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid +(p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return + // r21=starting_rid + adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; + shl r25=r25,3;; + add r20=r20,r25;; + st8 [r20]=r9;; // store away exactly what was passed + // but adjust value actually placed in rr[r8] + // r22 contains adjusted rid, "mangle" it (see regionreg.c) + // and set ps to PAGE_SHIFT and ve to 1 + extr.u r27=r22,0,8 + extr.u r28=r22,8,8 + extr.u r29=r22,16,8;; + dep.z r23=PAGE_SHIFT,2,6;; + dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 + dep r23=r27,r23,24,8;; + dep r23=r28,r23,16,8;; + dep r23=r29,r23,8,8 + cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical +(p6) st4 [r24]=r23 + mov rr[r8]=r23;; + // done, mosey on back +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_set_rr) + +// this routine was derived from optimized assembly output from +// vcpu_thash so it is dense and difficult to read but it works +// On entry: +// r18 == XSI_PSR_IC +// r31 == pr +GLOBAL_ENTRY(hyper_thash) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + shr.u r20 = r8, 61 + addl r25 = 1, r0 + movl r17 = 0xe000000000000000 + ;; + and r21 = r17, r8 // VHPT_Addr1 + ;; + shladd r28 = r20, 3, r18 + adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18 + ;; + adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28 + addl r28 = 32767, r0 + ld8 r24 = [r19] // pta + ;; + ld8 r23 = [r27] // rrs[vadr>>61] + extr.u r26 = r24, 2, 6 + ;; + extr.u r22 = r23, 2, 6 + shl r30 = r25, r26 + ;; + shr.u r19 = r8, r22 + shr.u r29 = r24, 15 + ;; + adds r17 = -1, r30 + ;; + shladd r27 = r19, 3, r0 + extr.u r26 = r17, 15, 46 + ;; + andcm r24 = r29, r26 + and r19 = r28, r27 + shr.u r25 = r27, 15 + ;; + and r23 = r26, r25 + ;; + or r22 = r24, r23 + ;; + dep.z r20 = r22, 15, 46 + ;; + or r16 = r20, r21 + ;; + or r8 = r19, r16 + // done, update iip/ipsr to next instruction + mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_thash) + +ENTRY(hyper_ptc_ga) +#ifndef FAST_PTC_GA + br.spnt.few dispatch_break_fault ;; +#endif + // FIXME: validate not flushing Xen addresses +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + mov r28=r8 + extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2) + mov r20=1 + shr.u r24=r8,61 + addl r27=56,r0 // PAGE_SHIFT<<2 (for ptc.ga) + movl r26=0x8000000000000000 // INVALID_TI_TAG + mov r30=ar.lc + ;; + shl r19=r20,r19 + cmp.eq p7,p0=7,r24 +(p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7 + ;; + cmp.le p7,p0=r19,r0 // skip flush if size<=0 +(p7) br.cond.dpnt 2f ;; + extr.u r24=r19,0,PAGE_SHIFT + shr.u r23=r19,PAGE_SHIFT ;; // repeat loop for n pages + cmp.ne p7,p0=r24,r0 ;; +(p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter + mov ar.lc=r23 + movl r29=PAGE_SIZE;; +1: + thash r25=r28 ;; + adds r25=16,r25 ;; + ld8 r24=[r25] ;; + // FIXME: should check if tag matches, not just blow it away + or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1 + st8 [r25]=r24 + ptc.ga r28,r27 ;; + srlz.i ;; + add r28=r29,r28 + br.cloop.sptk.few 1b + ;; +2: + mov ar.lc=r30 ;; + mov r29=cr.ipsr + mov r30=cr.iip;; + movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r27=[r27];; + adds r25=IA64_VCPU_DTLB_OFFSET,r27 + adds r26=IA64_VCPU_ITLB_OFFSET,r27;; + ld8 r24=[r25] + ld8 r27=[r26] ;; + and r24=-2,r24 + and r27=-2,r27 ;; + st8 [r25]=r24 // set 1-entry i/dtlb as not present + st8 [r26]=r27 ;; + // increment to point to next instruction + extr.u r26=r29,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r30=16,r30 +(p7) adds r26=1,r26 + ;; + dep r29=r26,r29,41,2 + ;; + mov cr.ipsr=r29 + mov cr.iip=r30 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_ptc_ga) + +ENTRY(hyper_itc_d) + br.spnt.many dispatch_break_fault ;; +END(hyper_itc_d) + +ENTRY(hyper_itc_i) + br.spnt.many dispatch_break_fault ;; +END(hyper_itc_i) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/idle0_task.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/idle0_task.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,58 @@ +#include <xen/config.h> +#include <xen/sched.h> +#include <asm/desc.h> + +#define INIT_MM(name) \ +{ \ + .pgd = swapper_pg_dir, \ + .mm_users = ATOMIC_INIT(2), \ + .mm_count = ATOMIC_INIT(1), \ + .page_table_lock = SPIN_LOCK_UNLOCKED, \ + .mmlist = LIST_HEAD_INIT(name.mmlist), \ +} + +#define IDLE0_EXEC_DOMAIN(_ed,_d) \ +{ \ + processor: 0, \ + mm: 0, \ + thread: INIT_THREAD, \ + domain: (_d) \ +} + +#define IDLE0_DOMAIN(_t) \ +{ \ + domain_id: IDLE_DOMAIN_ID, \ + domain_flags:DOMF_idle_domain, \ + refcnt: ATOMIC_INIT(1) \ +} + +struct mm_struct init_mm = INIT_MM(init_mm); +EXPORT_SYMBOL(init_mm); + +struct domain idle0_domain = IDLE0_DOMAIN(idle0_domain); +#if 0 +struct vcpu idle0_vcpu = IDLE0_EXEC_DOMAIN(idle0_vcpu, + &idle0_domain); +#endif + + +/* + * Initial task structure. + * + * We need to make sure that this is properly aligned due to the way process stacks are + * handled. This is done by having a special ".data.init_task" section... + */ +union { + struct { + struct domain task; + } s; + unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)]; +} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))); +// = {{ + ; +//.task = IDLE0_EXEC_DOMAIN(init_task_mem.s.task,&idle0_domain), +//}; +//}; + +EXPORT_SYMBOL(init_task); + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/irq.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/irq.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,1503 @@ +/* + * linux/arch/ia64/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + * + * Copyright (C) Ashok Raj<ashok.raj@xxxxxxxxx>, Intel Corporation 2004 + * + * 4/14/2004: Added code to handle cpu migration and do safe irq + * migration without lossing interrupts for iosapic + * architecture. + */ + +/* + * (mostly architecture independent, will move to kernel/irq.c in 2.5.) + * + * IRQs are in fact implemented a bit like signal handlers for the kernel. + * Naturally it's not a 1:1 relation, but there are similarities. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/module.h> +#ifndef XEN +#include <linux/signal.h> +#endif +#include <linux/sched.h> +#include <linux/ioport.h> +#include <linux/interrupt.h> +#include <linux/timex.h> +#include <linux/slab.h> +#ifndef XEN +#include <linux/random.h> +#include <linux/cpu.h> +#endif +#include <linux/ctype.h> +#ifndef XEN +#include <linux/smp_lock.h> +#endif +#include <linux/init.h> +#ifndef XEN +#include <linux/kernel_stat.h> +#endif +#include <linux/irq.h> +#ifndef XEN +#include <linux/proc_fs.h> +#endif +#include <linux/seq_file.h> +#ifndef XEN +#include <linux/kallsyms.h> +#include <linux/notifier.h> +#endif + +#include <asm/atomic.h> +#ifndef XEN +#include <asm/cpu.h> +#endif +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/system.h> +#include <asm/bitops.h> +#include <asm/uaccess.h> +#include <asm/pgalloc.h> +#ifndef XEN +#include <asm/tlbflush.h> +#endif +#include <asm/delay.h> +#include <asm/irq.h> + +#ifdef XEN +#include <xen/event.h> +#define _irq_desc irq_desc +#define irq_descp(irq) &irq_desc[irq] +#define apicid_to_phys_cpu_present(x) 1 +#endif + + +/* + * Linux has a controller-independent x86 interrupt architecture. + * every controller has a 'controller-template', that is used + * by the main code to do the right thing. Each driver-visible + * interrupt source is transparently wired to the appropriate + * controller. Thus drivers need not be aware of the + * interrupt-controller. + * + * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, + * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. + * (IO-APICs assumed to be messaging to Pentium local-APICs) + * + * the code is designed to be easily extended with new/different + * interrupt controllers, without having to do assembly magic. + */ + +/* + * Controller mappings for all interrupt sources: + */ +irq_desc_t _irq_desc[NR_IRQS] __cacheline_aligned = { + [0 ... NR_IRQS-1] = { + .status = IRQ_DISABLED, + .handler = &no_irq_type, + .lock = SPIN_LOCK_UNLOCKED + } +}; + +/* + * This is updated when the user sets irq affinity via /proc + */ +cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; + +#ifdef CONFIG_IA64_GENERIC +irq_desc_t * __ia64_irq_desc (unsigned int irq) +{ + return _irq_desc + irq; +} + +ia64_vector __ia64_irq_to_vector (unsigned int irq) +{ + return (ia64_vector) irq; +} + +unsigned int __ia64_local_vector_to_irq (ia64_vector vec) +{ + return (unsigned int) vec; +} +#endif + +static void register_irq_proc (unsigned int irq); + +/* + * Special irq handlers. + */ + +#ifdef XEN +void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } +#else +irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) +{ return IRQ_NONE; } +#endif + +/* + * Generic no controller code + */ + +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves, it doesn't deserve + * a generic callback i think. + */ +#ifdef CONFIG_X86 + printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + */ + ack_APIC_irq(); +#endif +#endif +#ifdef CONFIG_IA64 + printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id()); +#endif +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; + +atomic_t irq_err_count; +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG +atomic_t irq_mis_count; +#endif +#endif + +/* + * Generic, controller-independent functions: + */ + +#ifndef XEN +int show_interrupts(struct seq_file *p, void *v) +{ + int j, i = *(loff_t *) v; + struct irqaction * action; + irq_desc_t *idesc; + unsigned long flags; + + if (i == 0) { + seq_puts(p, " "); + for (j=0; j<NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "CPU%d ",j); + seq_putc(p, '\n'); + } + + if (i < NR_IRQS) { + idesc = irq_descp(i); + spin_lock_irqsave(&idesc->lock, flags); + action = idesc->action; + if (!action) + goto skip; + seq_printf(p, "%3d: ",i); +#ifndef CONFIG_SMP + seq_printf(p, "%10u ", kstat_irqs(i)); +#else + for (j = 0; j < NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); +#endif + seq_printf(p, " %14s", idesc->handler->typename); + seq_printf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); +skip: + spin_unlock_irqrestore(&idesc->lock, flags); + } else if (i == NR_IRQS) { + seq_puts(p, "NMI: "); + for (j = 0; j < NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "%10u ", nmi_count(j)); + seq_putc(p, '\n'); +#ifdef CONFIG_X86_LOCAL_APIC + seq_puts(p, "LOC: "); + for (j = 0; j < NR_CPUS; j++) + if (cpu_online(j)) + seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs); + seq_putc(p, '\n'); +#endif + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#ifdef CONFIG_X86_IO_APIC +#ifdef APIC_MISMATCH_DEBUG + seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif +#endif + } + return 0; +} +#endif + +#ifdef CONFIG_SMP +inline void synchronize_irq(unsigned int irq) +{ +#ifndef XEN + struct irq_desc *desc = irq_desc + irq; + + while (desc->status & IRQ_INPROGRESS) + cpu_relax(); +#endif +} +EXPORT_SYMBOL(synchronize_irq); +#endif + +/* + * This should really return information about whether + * we should do bottom half handling etc. Right now we + * end up _always_ checking the bottom half, which is a + * waste of time and is not what some drivers would + * prefer. + */ +int handle_IRQ_event(unsigned int irq, + struct pt_regs *regs, struct irqaction *action) +{ + int status = 1; /* Force the "do bottom halves" bit */ + int retval = 0; + +#ifndef XEN + if (!(action->flags & SA_INTERRUPT)) +#endif + local_irq_enable(); + +#ifdef XEN + action->handler(irq, action->dev_id, regs); +#else + do { + status |= action->flags; + retval |= action->handler(irq, action->dev_id, regs); + action = action->next; + } while (action); + if (status & SA_SAMPLE_RANDOM) + add_interrupt_randomness(irq); +#endif + local_irq_disable(); + return retval; +} + +#ifndef XEN +static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret) +{ + struct irqaction *action; + + if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) { + printk(KERN_ERR "irq event %d: bogus return value %x\n", + irq, action_ret); + } else { + printk(KERN_ERR "irq %d: nobody cared!\n", irq); + } + dump_stack(); + printk(KERN_ERR "handlers:\n"); + action = desc->action; + do { + printk(KERN_ERR "[<%p>]", action->handler); + print_symbol(" (%s)", + (unsigned long)action->handler); + printk("\n"); + action = action->next; + } while (action); +} + +static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret) +{ + static int count = 100; + + if (count) { + count--; + __report_bad_irq(irq, desc, action_ret); + } +} +#endif + +static int noirqdebug; + +static int __init noirqdebug_setup(char *str) +{ + noirqdebug = 1; + printk("IRQ lockup detection disabled\n"); + return 1; +} + +__setup("noirqdebug", noirqdebug_setup); + +/* + * If 99,900 of the previous 100,000 interrupts have not been handled then + * assume that the IRQ is stuck in some manner. Drop a diagnostic and try to + * turn the IRQ off. + * + * (The other 100-of-100,000 interrupts may have been a correctly-functioning + * device sharing an IRQ with the failing one) + * + * Called under desc->lock + */ +#ifndef XEN +static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret) +{ + if (action_ret != IRQ_HANDLED) { + desc->irqs_unhandled++; + if (action_ret != IRQ_NONE) + report_bad_irq(irq, desc, action_ret); + } + + desc->irq_count++; + if (desc->irq_count < 100000) + return; + + desc->irq_count = 0; + if (desc->irqs_unhandled > 99900) { + /* + * The interrupt is stuck + */ + __report_bad_irq(irq, desc, action_ret); + /* + * Now kill the IRQ + */ + printk(KERN_EMERG "Disabling IRQ #%d\n", irq); + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); + } + desc->irqs_unhandled = 0; +} +#endif + +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ + +/** + * disable_irq_nosync - disable an irq without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and Enables are + * nested. + * Unlike disable_irq(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + * + * This function may be called from IRQ context. + */ + +inline void disable_irq_nosync(unsigned int irq) +{ + irq_desc_t *desc = irq_descp(irq); + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); + } + spin_unlock_irqrestore(&desc->lock, flags); +} +EXPORT_SYMBOL(disable_irq_nosync); + +/** + * disable_irq - disable an irq and wait for completion + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Enables and Disables are + * nested. + * This function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * This function may be called - with care - from IRQ context. + */ + +void disable_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_descp(irq); + + disable_irq_nosync(irq); + if (desc->action) + synchronize_irq(irq); +} +EXPORT_SYMBOL(disable_irq); + +/** + * enable_irq - enable handling of an irq + * @irq: Interrupt to enable + * + * Undoes the effect of one call to disable_irq(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + * + * This function may be called from IRQ context. + */ + +void enable_irq(unsigned int irq) +{ + irq_desc_t *desc = irq_descp(irq); + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { + case 1: { + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; +#ifndef XEN + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); + } +#endif + desc->handler->enable(irq); + /* fall-through */ + } + default: + desc->depth--; + break; + case 0: + printk(KERN_ERR "enable_irq(%u) unbalanced from %p\n", + irq, (void *) __builtin_return_address(0)); + } + spin_unlock_irqrestore(&desc->lock, flags); +} +EXPORT_SYMBOL(enable_irq); + +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ +fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs) +{ + irq_desc_t *desc = irq_desc + irq; + struct irqaction * action; + unsigned int status; + +#ifndef XEN + kstat_this_cpu.irqs[irq]++; +#endif + if (desc->status & IRQ_PER_CPU) { + irqreturn_t action_ret; + + /* + * No locking required for CPU-local interrupts: + */ + desc->handler->ack(irq); + action_ret = handle_IRQ_event(irq, regs, desc->action); +#ifndef XEN + if (!noirqdebug) + note_interrupt(irq, desc, action_ret); +#endif + desc->handler->end(irq); + return 1; + } + + spin_lock(&desc->lock); + desc->handler->ack(irq); + /* + * REPLAY is when Linux resends an IRQ that was dropped earlier + * WAITING is used by probe to mark irqs that are being tested + */ +#ifdef XEN + status = desc->status & ~IRQ_REPLAY; +#else + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); +#endif + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* + * If the IRQ is disabled for whatever reason, we cannot + * use the action we have. + */ + action = NULL; + if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { + action = desc->action; + status &= ~IRQ_PENDING; /* we commit to handling */ + status |= IRQ_INPROGRESS; /* we are handling it */ + } + desc->status = status; + + /* + * If there is no IRQ handler or it was disabled, exit early. + * Since we set PENDING, if another processor is handling + * a different instance of this same irq, the other processor + * will take care of it. + */ + if (unlikely(!action)) + goto out; + + /* + * Edge triggered interrupts need to remember + * pending events. + * This applies to any hw interrupts that allow a second + * instance of the same irq to arrive while we are in do_IRQ + * or in the handler. But the code here only handles the _second_ + * instance of the irq, not the third or fourth. So it is mostly + * useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + irqreturn_t action_ret; + + spin_unlock(&desc->lock); + + action_ret = handle_IRQ_event(irq, regs, action); + + spin_lock(&desc->lock); +#ifndef XEN + if (!noirqdebug) + note_interrupt(irq, desc, action_ret); +#endif + if (likely(!(desc->status & IRQ_PENDING))) + break; + desc->status &= ~IRQ_PENDING; + } + desc->status &= ~IRQ_INPROGRESS; + +out: + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + desc->handler->end(irq); + spin_unlock(&desc->lock); + + return 1; +} + +/** + * request_irq - allocate an interrupt line + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs + * @irqflags: Interrupt type flags + * @devname: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. From the point this + * call is made your handler function may be invoked. Since + * your handler function must clear any interrupt the board + * raises, you must take care both to initialise your hardware + * and to set up the interrupt handler in the right order. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If your interrupt is shared you must pass a non NULL dev_id + * as this is required when freeing the interrupt. + * + * Flags: + * + * SA_SHIRQ Interrupt is shared + * + * SA_INTERRUPT Disable local interrupts while processing + * + * SA_SAMPLE_RANDOM The interrupt can be used for entropy + * + */ + +int request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char * devname, + void *dev_id) +{ + int retval; + struct irqaction * action; + +#if 1 + /* + * Sanity-check: shared interrupts should REALLY pass in + * a real dev-ID, otherwise we'll have trouble later trying + * to figure out which interrupt is which (messes up the + * interrupt freeing logic etc). + */ + if (irqflags & SA_SHIRQ) { + if (!dev_id) + printk(KERN_ERR "Bad boy: %s called us without a dev_id!\n", devname); + } +#endif + + if (irq >= NR_IRQS) + return -EINVAL; + if (!handler) + return -EINVAL; + + action = xmalloc(struct irqaction); + if (!action) + return -ENOMEM; + + action->handler = handler; +#ifndef XEN + action->flags = irqflags; + action->mask = 0; +#endif + action->name = devname; +#ifndef XEN + action->next = NULL; +#endif + action->dev_id = dev_id; + + retval = setup_irq(irq, action); + if (retval) + xfree(action); + return retval; +} + +EXPORT_SYMBOL(request_irq); + +/** + * free_irq - free an interrupt + * @irq: Interrupt line to free + * @dev_id: Device identity to free + * + * Remove an interrupt handler. The handler is removed and if the + * interrupt line is no longer in use by any driver it is disabled. + * On a shared IRQ the caller must ensure the interrupt is disabled + * on the card it drives before calling this function. The function + * does not return until any executing interrupts for this IRQ + * have completed. + * + * This function must not be called from interrupt context. + */ + +#ifdef XEN +void free_irq(unsigned int irq) +#else +void free_irq(unsigned int irq, void *dev_id) +#endif +{ + irq_desc_t *desc; + struct irqaction **p; + unsigned long flags; + + if (irq >= NR_IRQS) + return; + + desc = irq_descp(irq); + spin_lock_irqsave(&desc->lock,flags); +#ifdef XEN + if (desc->action) { + struct irqaction * action = desc->action; + desc->action = NULL; +#else + p = &desc->action; + for (;;) { + struct irqaction * action = *p; + if (action) { + struct irqaction **pp = p; + p = &action->next; + if (action->dev_id != dev_id) + continue; + + /* Found it - now remove it from the list of entries */ + *pp = action->next; + if (!desc->action) { +#endif + desc->status |= IRQ_DISABLED; + desc->handler->shutdown(irq); +#ifndef XEN + } +#endif + spin_unlock_irqrestore(&desc->lock,flags); + + /* Wait to make sure it's not being used on another CPU */ + synchronize_irq(irq); + xfree(action); + return; + } + printk(KERN_ERR "Trying to free free IRQ%d\n",irq); + spin_unlock_irqrestore(&desc->lock,flags); +#ifndef XEN + return; + } +#endif +} + +EXPORT_SYMBOL(free_irq); + +/* + * IRQ autodetection code.. + * + * This depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck + * with "IRQ_WAITING" cleared and the interrupt + * disabled. + */ + +static DECLARE_MUTEX(probe_sem); + +/** + * probe_irq_on - begin an interrupt autodetect + * + * Commence probing for an interrupt. The interrupts are scanned + * and a mask of potential interrupt lines is returned. + * + */ + +#ifndef XEN +unsigned long probe_irq_on(void) +{ + unsigned int i; + irq_desc_t *desc; + unsigned long val; + unsigned long delay; + + down(&probe_sem); + /* + * something may have generated an irq long ago and we want to + * flush such a longstanding irq before considering it as spurious. + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_descp(i); + + spin_lock_irq(&desc->lock); + if (!desc->action) + desc->handler->startup(i); + spin_unlock_irq(&desc->lock); + } + + /* Wait for longstanding interrupts to trigger. */ + for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) + /* about 20ms delay */ barrier(); + + /* + * enable any unassigned irqs + * (we must startup again here because if a longstanding irq + * happened in the previous stage, it may have masked itself) + */ + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_descp(i); + + spin_lock_irq(&desc->lock); + if (!desc->action) { + desc->status |= IRQ_AUTODETECT | IRQ_WAITING; + if (desc->handler->startup(i)) + desc->status |= IRQ_PENDING; + } + spin_unlock_irq(&desc->lock); + } + + /* + * Wait for spurious interrupts to trigger + */ + for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) + /* about 100ms delay */ barrier(); + + /* + * Now filter out any obviously spurious interrupts + */ + val = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_descp(i); + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + /* It triggered already - consider it spurious. */ + if (!(status & IRQ_WAITING)) { + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } else + if (i < 32) + val |= 1 << i; + } + spin_unlock_irq(&desc->lock); + } + + return val; +} + +EXPORT_SYMBOL(probe_irq_on); + +/** + * probe_irq_mask - scan a bitmap of interrupt lines + * @val: mask of interrupts to consider + * + * Scan the ISA bus interrupt lines and return a bitmap of + * active interrupts. The interrupt probe logic state is then + * returned to its previous value. + * + * Note: we need to scan all the irq's even though we will + * only return ISA irq numbers - just so that we reset them + * all to a known state. + */ +unsigned int probe_irq_mask(unsigned long val) +{ + int i; + unsigned int mask; + + mask = 0; + for (i = 0; i < 16; i++) { + irq_desc_t *desc = irq_descp(i); + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) + mask |= 1 << i; + + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + up(&probe_sem); + + return mask & val; +} +EXPORT_SYMBOL(probe_irq_mask); + +/** + * probe_irq_off - end an interrupt autodetect + * @val: mask of potential interrupts (unused) + * + * Scans the unused interrupt lines and returns the line which + * appears to have triggered the interrupt. If no interrupt was + * found then zero is returned. If more than one interrupt is + * found then minus the first candidate is returned to indicate + * their is doubt. + * + * The interrupt probe logic state is returned to its previous + * value. + * + * BUGS: When used in a module (which arguably shouldn't happen) + * nothing prevents two IRQ probe callers from overlapping. The + * results of this are non-optimal. + */ + +int probe_irq_off(unsigned long val) +{ + int i, irq_found, nr_irqs; + + nr_irqs = 0; + irq_found = 0; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_descp(i); + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = i; + nr_irqs++; + } + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); + } + up(&probe_sem); + + if (nr_irqs > 1) + irq_found = -irq_found; + return irq_found; +} + +EXPORT_SYMBOL(probe_irq_off); +#endif + +int setup_irq(unsigned int irq, struct irqaction * new) +{ + int shared = 0; + unsigned long flags; + struct irqaction *old, **p; + irq_desc_t *desc = irq_descp(irq); + +#ifndef XEN + if (desc->handler == &no_irq_type) + return -ENOSYS; + /* + * Some drivers like serial.c use request_irq() heavily, + * so we have to be careful not to interfere with a + * running system. + */ + if (new->flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ + rand_initialize_irq(irq); + } + + if (new->flags & SA_PERCPU_IRQ) { + desc->status |= IRQ_PER_CPU; + desc->handler = &irq_type_ia64_lsapic; + } +#endif + + /* + * The following block of code has to be executed atomically + */ + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; + if ((old = *p) != NULL) { +#ifdef XEN + if (1) { + /* Can't share interrupts unless both agree to */ +#else + if (!(old->flags & new->flags & SA_SHIRQ)) { +#endif + spin_unlock_irqrestore(&desc->lock,flags); + return -EBUSY; + } + +#ifndef XEN + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; +#endif + } + + *p = new; + +#ifndef XEN + if (!shared) { +#else + { +#endif + desc->depth = 0; +#ifdef XEN + desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS); +#else + desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS); +#endif + desc->handler->startup(irq); + } + spin_unlock_irqrestore(&desc->lock,flags); + +#ifndef XEN + register_irq_proc(irq); +#endif + return 0; +} + +#ifndef XEN + +static struct proc_dir_entry * root_irq_dir; +static struct proc_dir_entry * irq_dir [NR_IRQS]; + +#ifdef CONFIG_SMP + +static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; + +static cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; + +static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; + +void set_irq_affinity_info (unsigned int irq, int hwid, int redir) +{ + cpumask_t mask = CPU_MASK_NONE; + + cpu_set(cpu_logical_id(hwid), mask); + + if (irq < NR_IRQS) { + irq_affinity[irq] = mask; + irq_redir[irq] = (char) (redir & 0xff); + } +} + +static int irq_affinity_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = sprintf(page, "%s", irq_redir[(long)data] ? "r " : ""); + + len += cpumask_scnprintf(page+len, count, irq_affinity[(long)data]); + if (count - len < 2) + return -EINVAL; + len += sprintf(page + len, "\n"); + return len; +} + +static int irq_affinity_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + unsigned int irq = (unsigned long) data; + int full_count = count, err; + cpumask_t new_value, tmp; +# define R_PREFIX_LEN 16 + char rbuf[R_PREFIX_LEN]; + int rlen; + int prelen; + irq_desc_t *desc = irq_descp(irq); + unsigned long flags; + + if (!desc->handler->set_affinity) + return -EIO; + + /* + * If string being written starts with a prefix of 'r' or 'R' + * and some limited number of spaces, set IA64_IRQ_REDIRECTED. + * If more than (R_PREFIX_LEN - 2) spaces are passed, they won't + * all be trimmed as part of prelen, the untrimmed spaces will + * cause the hex parsing to fail, and this write() syscall will + * fail with EINVAL. + */ + + if (!count) + return -EINVAL; + rlen = min(sizeof(rbuf)-1, count); + if (copy_from_user(rbuf, buffer, rlen)) + return -EFAULT; + rbuf[rlen] = 0; + prelen = 0; + if (tolower(*rbuf) == 'r') { + prelen = strspn(rbuf, "Rr "); + irq |= IA64_IRQ_REDIRECTED; + } + + err = cpumask_parse(buffer+prelen, count-prelen, new_value); + if (err) + return err; + + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + cpus_and(tmp, new_value, cpu_online_map); + if (cpus_empty(tmp)) + return -EINVAL; + + spin_lock_irqsave(&desc->lock, flags); + pending_irq_cpumask[irq] = new_value; + spin_unlock_irqrestore(&desc->lock, flags); + + return full_count; +} + +void move_irq(int irq) +{ + /* note - we hold desc->lock */ + cpumask_t tmp; + irq_desc_t *desc = irq_descp(irq); + + if (!cpus_empty(pending_irq_cpumask[irq])) { + cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); + if (unlikely(!cpus_empty(tmp))) { + desc->handler->set_affinity(irq, pending_irq_cpumask[irq]); + } + cpus_clear(pending_irq_cpumask[irq]); + } +} + + +#endif /* CONFIG_SMP */ +#endif + +#ifdef CONFIG_HOTPLUG_CPU +unsigned int vectors_in_migration[NR_IRQS]; + +/* + * Since cpu_online_map is already updated, we just need to check for + * affinity that has zeros + */ +static void migrate_irqs(void) +{ + cpumask_t mask; + irq_desc_t *desc; + int irq, new_cpu; + + for (irq=0; irq < NR_IRQS; irq++) { + desc = irq_descp(irq); + + /* + * No handling for now. + * TBD: Implement a disable function so we can now + * tell CPU not to respond to these local intr sources. + * such as ITV,CPEI,MCA etc. + */ + if (desc->status == IRQ_PER_CPU) + continue; + + cpus_and(mask, irq_affinity[irq], cpu_online_map); + if (any_online_cpu(mask) == NR_CPUS) { + /* + * Save it for phase 2 processing + */ + vectors_in_migration[irq] = irq; + + new_cpu = any_online_cpu(cpu_online_map); + mask = cpumask_of_cpu(new_cpu); + + /* + * Al three are essential, currently WARN_ON.. maybe panic? + */ + if (desc->handler && desc->handler->disable && + desc->handler->enable && desc->handler->set_affinity) { + desc->handler->disable(irq); + desc->handler->set_affinity(irq, mask); + desc->handler->enable(irq); + } else { + WARN_ON((!(desc->handler) || !(desc->handler->disable) || + !(desc->handler->enable) || + !(desc->handler->set_affinity))); + } + } + } +} + +void fixup_irqs(void) +{ + unsigned int irq; + extern void ia64_process_pending_intr(void); + + ia64_set_itv(1<<16); + /* + * Phase 1: Locate irq's bound to this cpu and + * relocate them for cpu removal. + */ + migrate_irqs(); + + /* + * Phase 2: Perform interrupt processing for all entries reported in + * local APIC. + */ + ia64_process_pending_intr(); + + /* + * Phase 3: Now handle any interrupts not captured in local APIC. + * This is to account for cases that device interrupted during the time the + * rte was being disabled and re-programmed. + */ + for (irq=0; irq < NR_IRQS; irq++) { + if (vectors_in_migration[irq]) { + vectors_in_migration[irq]=0; + do_IRQ(irq, NULL); + } + } + + /* + * Now let processor die. We do irq disable and max_xtp() to + * ensure there is no more interrupts routed to this processor. + * But the local timer interrupt can have 1 pending which we + * take care in timer_interrupt(). + */ + max_xtp(); + local_irq_disable(); +} +#endif + +#ifndef XEN +static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); + if (count - len < 2) + return -EINVAL; + len += sprintf(page + len, "\n"); + return len; +} + +static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + cpumask_t *mask = (cpumask_t *)data; + unsigned long full_count = count, err; + cpumask_t new_value; + + err = cpumask_parse(buffer, count, new_value); + if (err) + return err; + + *mask = new_value; + return full_count; +} + +#define MAX_NAMELEN 10 + +static void register_irq_proc (unsigned int irq) +{ + char name [MAX_NAMELEN]; + + if (!root_irq_dir || (irq_descp(irq)->handler == &no_irq_type) || irq_dir[irq]) + return; + + memset(name, 0, MAX_NAMELEN); + sprintf(name, "%d", irq); + + /* create /proc/irq/1234 */ + irq_dir[irq] = proc_mkdir(name, root_irq_dir); + +#ifdef CONFIG_SMP + { + struct proc_dir_entry *entry; + + /* create /proc/irq/1234/smp_affinity */ + entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); + + if (entry) { + entry->nlink = 1; + entry->data = (void *)(long)irq; + entry->read_proc = irq_affinity_read_proc; + entry->write_proc = irq_affinity_write_proc; + } + + smp_affinity_entry[irq] = entry; + } +#endif +} + +cpumask_t prof_cpu_mask = CPU_MASK_ALL; + +void init_irq_proc (void) +{ + struct proc_dir_entry *entry; + int i; + + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", 0); + + /* create /proc/irq/prof_cpu_mask */ + entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); + + if (!entry) + return; + + entry->nlink = 1; + entry->data = (void *)&prof_cpu_mask; + entry->read_proc = prof_cpu_mask_read_proc; + entry->write_proc = prof_cpu_mask_write_proc; + + /* + * Create entries for all existing IRQs. + */ + for (i = 0; i < NR_IRQS; i++) { + if (irq_descp(i)->handler == &no_irq_type) + continue; + register_irq_proc(i); + } +} +#endif + + +#ifdef XEN +/* + * HANDLING OF GUEST-BOUND PHYSICAL IRQS + */ + +#define IRQ_MAX_GUESTS 7 +typedef struct { + u8 nr_guests; + u8 in_flight; + u8 shareable; + struct domain *guest[IRQ_MAX_GUESTS]; +} irq_guest_action_t; + +static void __do_IRQ_guest(int irq) +{ + irq_desc_t *desc = &irq_desc[irq]; + irq_guest_action_t *action = (irq_guest_action_t *)desc->action; + struct domain *d; + int i; + + for ( i = 0; i < action->nr_guests; i++ ) + { + d = action->guest[i]; + if ( !test_and_set_bit(irq, &d->pirq_mask) ) + action->in_flight++; + send_guest_pirq(d, irq); + } +} + +int pirq_guest_unmask(struct domain *d) +{ + irq_desc_t *desc; + int i, j, pirq; + u32 m; + shared_info_t *s = d->shared_info; + + for ( i = 0; i < ARRAY_SIZE(d->pirq_mask); i++ ) + { + m = d->pirq_mask[i]; + while ( (j = ffs(m)) != 0 ) + { + m &= ~(1 << --j); + pirq = (i << 5) + j; + desc = &irq_desc[pirq]; + spin_lock_irq(&desc->lock); + if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) && + test_and_clear_bit(pirq, &d->pirq_mask) && + (--((irq_guest_action_t *)desc->action)->in_flight == 0) ) + desc->handler->end(pirq); + spin_unlock_irq(&desc->lock); + } + } + + return 0; +} + +int pirq_guest_bind(struct vcpu *d, int irq, int will_share) +{ + irq_desc_t *desc = &irq_desc[irq]; + irq_guest_action_t *action; + unsigned long flags; + int rc = 0; + + if ( !IS_CAPABLE_PHYSDEV(d->domain) ) + return -EPERM; + + spin_lock_irqsave(&desc->lock, flags); + + action = (irq_guest_action_t *)desc->action; + + if ( !(desc->status & IRQ_GUEST) ) + { + if ( desc->action != NULL ) + { + DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n", + irq, desc->action->name); + rc = -EBUSY; + goto out; + } + + action = xmalloc(irq_guest_action_t); + if ( (desc->action = (struct irqaction *)action) == NULL ) + { + DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq); + rc = -ENOMEM; + goto out; + } + + action->nr_guests = 0; + action->in_flight = 0; + action->shareable = will_share; + + desc->depth = 0; + desc->status |= IRQ_GUEST; + desc->status &= ~IRQ_DISABLED; + desc->handler->startup(irq); + + /* Attempt to bind the interrupt target to the correct CPU. */ +#if 0 /* FIXME CONFIG_SMP ??? */ + if ( desc->handler->set_affinity != NULL ) + desc->handler->set_affinity( + irq, apicid_to_phys_cpu_present(d->processor)); +#endif + } + else if ( !will_share || !action->shareable ) + { + DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n", + irq); + rc = -EBUSY; + goto out; + } + + if ( action->nr_guests == IRQ_MAX_GUESTS ) + { + DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq); + rc = -EBUSY; + goto out; + } + + action->guest[action->nr_guests++] = d; + + out: + spin_unlock_irqrestore(&desc->lock, flags); + return rc; +} + +int pirq_guest_unbind(struct domain *d, int irq) +{ + irq_desc_t *desc = &irq_desc[irq]; + irq_guest_action_t *action; + unsigned long flags; + int i; + + spin_lock_irqsave(&desc->lock, flags); + + action = (irq_guest_action_t *)desc->action; + + if ( test_and_clear_bit(irq, &d->pirq_mask) && + (--action->in_flight == 0) ) + desc->handler->end(irq); + + if ( action->nr_guests == 1 ) + { + desc->action = NULL; + xfree(action); + desc->depth = 1; + desc->status |= IRQ_DISABLED; + desc->status &= ~IRQ_GUEST; + desc->handler->shutdown(irq); + } + else + { + i = 0; + while ( action->guest[i] != d ) + i++; + memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1); + action->nr_guests--; + } + + spin_unlock_irqrestore(&desc->lock, flags); + return 0; +} + +#endif + +#ifdef XEN +#ifdef IA64 +// this is a temporary hack until real console input is implemented +irqreturn_t guest_forward_keyboard_input(int irq, void *nada, struct pt_regs *regs) +{ + domain_pend_keyboard_interrupt(irq); +} + +void serial_input_init(void) +{ + int retval; + int irq = 0x30; // FIXME + + retval = request_irq(irq,guest_forward_keyboard_input,SA_INTERRUPT,"siminput",NULL); + if (retval) { + printk("serial_input_init: broken request_irq call\n"); + while(1); + } +} +#endif +#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/ivt.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/ivt.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,1975 @@ + +#ifdef XEN +//#define CONFIG_DISABLE_VHPT // FIXME: change when VHPT is enabled?? +// these are all hacked out for now as the entire IVT +// will eventually be replaced... just want to use it +// for startup code to handle TLB misses +//#define ia64_leave_kernel 0 +//#define ia64_ret_from_syscall 0 +//#define ia64_handle_irq 0 +//#define ia64_fault 0 +#define ia64_illegal_op_fault 0 +#define ia64_prepare_handle_unaligned 0 +#define ia64_bad_break 0 +#define ia64_trace_syscall 0 +#define sys_call_table 0 +#define sys_ni_syscall 0 +#include <asm/vhpt.h> +#endif +/* + * arch/ia64/kernel/ivt.S + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * Stephane Eranian <eranian@xxxxxxxxxx> + * David Mosberger <davidm@xxxxxxxxxx> + * Copyright (C) 2000, 2002-2003 Intel Co + * Asit Mallick <asit.k.mallick@xxxxxxxxx> + * Suresh Siddha <suresh.b.siddha@xxxxxxxxx> + * Kenneth Chen <kenneth.w.chen@xxxxxxxxx> + * Fenghua Yu <fenghua.yu@xxxxxxxxx> + * + * 00/08/23 Asit Mallick <asit.k.mallick@xxxxxxxxx> TLB handling for SMP + * 00/12/20 David Mosberger-Tang <davidm@xxxxxxxxxx> DTLB/ITLB handler now uses virtual PT. + */ +/* + * This file defines the interruption vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for critical + * interruptions like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) + * entry offset ----/ / / / / + * entry number ---------/ / / / + * size of the entry -------------/ / / + * vector name -------------------------------------/ / + * interruptions triggering this vector ----------------------/ + * + * The table is 32KB in size and must be aligned on 32KB boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.6 (Oct 1999) + */ + +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/break.h> +#include <asm/ia32.h> +#include <asm/kregs.h> +#include <asm/offsets.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> +#include <asm/errno.h> + +#if 1 +# define PSR_DEFAULT_BITS psr.ac +#else +# define PSR_DEFAULT_BITS 0 +#endif + +#if 0 + /* + * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't + * needed for something else before enabling this... + */ +# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 +#else +# define DBG_FAULT(i) +#endif + +#define MINSTATE_VIRT /* needed by minstate.h */ +#include "minstate.h" + +#define FAULT(n) \ + mov r31=pr; \ + mov r19=n;; /* prepare to save predicates */ \ + br.sptk.many dispatch_to_fault_handler + +#ifdef XEN +#define REFLECT(n) \ + mov r31=pr; \ + mov r19=n;; /* prepare to save predicates */ \ + br.sptk.many dispatch_reflection +#endif + + .section .text.ivt,"ax" + + .align 32768 // align on 32KB boundary + .global ia64_ivt +ia64_ivt: +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) +ENTRY(vhpt_miss) + DBG_FAULT(0) + /* + * The VHPT vector is invoked when the TLB entry for the virtual page table + * is missing. This happens only as a result of a previous + * (the "original") TLB miss, which may either be caused by an instruction + * fetch or a data access (or non-access). + * + * What we do here is normal TLB miss handing for the _original_ miss, followed + * by inserting the TLB entry for the virtual page table page that the VHPT + * walker was attempting to access. The latter gets inserted as long + * as both L1 and L2 have valid mappings for the faulting address. + * The TLB entry for the original miss gets inserted only if + * the L3 entry indicates that the page is present. + * + * do_page_fault gets invoked in the following cases: + * - the faulting virtual address uses unimplemented address bits + * - the faulting virtual address has no L1, L2, or L3 mapping + */ + mov r16=cr.ifa // get address that caused the TLB miss +#ifdef CONFIG_HUGETLB_PAGE + movl r18=PAGE_SHIFT + mov r25=cr.itir +#endif + ;; + rsm psr.dt // use physical addressing for data + mov r31=pr // save the predicate registers +#ifdef XEN + movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;; +#else + mov r19=IA64_KR(PT_BASE) // get page table base address +#endif + shl r21=r16,3 // shift bit 60 into sign bit + shr.u r17=r16,61 // get the region number into r17 + ;; + shr r22=r21,3 +#ifdef CONFIG_HUGETLB_PAGE + extr.u r26=r25,2,6 + ;; + cmp.ne p8,p0=r18,r26 + sub r27=r26,r18 + ;; +(p8) dep r25=r18,r25,2,6 +(p8) shr r22=r22,r27 +#endif + ;; + cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? + shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + + srlz.d + LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir + + .pred.rel "mutex", p6, p7 +(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT +(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? + shr.u r18=r22,PMD_SHIFT // shift L2 index into position + ;; + ld8 r17=[r17] // fetch the L1 entry (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; +(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0) + shr.u r19=r22,PAGE_SHIFT // shift L3 index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL? + dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry + ;; +(p7) ld8 r18=[r21] // read the L3 PTE + mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss + ;; +(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? + mov r22=cr.iha // get the VHPT address that caused the TLB miss + ;; // avoid RAW on p7 +(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? + dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address + ;; +(p10) itc.i r18 // insert the instruction TLB entry +(p11) itc.d r18 // insert the data TLB entry +(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) + mov cr.ifa=r22 + +#ifdef CONFIG_HUGETLB_PAGE +(p8) mov cr.itir=r25 // change to default page-size for VHPT +#endif + + /* + * Now compute and insert the TLB entry for the virtual page table. We never + * execute in a page table page so there is no need to set the exception deferral + * bit. + */ + adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 + ;; +(p7) itc.d r24 + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + /* + * Re-check L2 and L3 pagetable. If they changed, we may have received a ptc.g + * between reading the pagetable and the "itc". If so, flush the entry we + * inserted and retry. + */ + ld8 r25=[r21] // read L3 PTE again + ld8 r26=[r17] // read L2 entry again + ;; + cmp.ne p6,p7=r26,r20 // did L2 entry change + mov r27=PAGE_SHIFT<<2 + ;; +(p6) ptc.l r22,r27 // purge PTE page translation +(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change + ;; +(p6) ptc.l r16,r27 // purge translation +#endif + + mov pr=r31,-1 // restore predicate registers + rfi +END(vhpt_miss) + + .org ia64_ivt+0x400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) +ENTRY(itlb_miss) + DBG_FAULT(1) +#ifdef XEN + VHPT_CCHAIN_LOOKUP(itlb_miss,i) +#ifdef VHPT_GLOBAL + br.cond.sptk page_fault + ;; +#endif +#endif + /* + * The ITLB handler accesses the L3 PTE via the virtually mapped linear + * page table. If a nested TLB miss occurs, we switch into physical + * mode, walk the page table, and then re-execute the L3 PTE read + * and go on normally after that. + */ + mov r16=cr.ifa // get virtual address + mov r29=b0 // save b0 + mov r31=pr // save predicates +.itlb_fault: + mov r17=cr.iha // get virtual address of L3 PTE + movl r30=1f // load nested fault continuation point + ;; +1: ld8 r18=[r17] // read L3 PTE + ;; + mov b0=r29 + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.cond.spnt page_fault + ;; + itc.i r18 + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r19=[r17] // read L3 PTE again and see if same + mov r20=PAGE_SHIFT<<2 // setup page size for purge + ;; + cmp.ne p7,p0=r18,r19 + ;; +(p7) ptc.l r16,r20 +#endif + mov pr=r31,-1 + rfi +END(itlb_miss) + + .org ia64_ivt+0x0800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) +ENTRY(dtlb_miss) + DBG_FAULT(2) +#ifdef XEN + VHPT_CCHAIN_LOOKUP(dtlb_miss,d) +#ifdef VHPT_GLOBAL + br.cond.sptk page_fault + ;; +#endif +#endif + /* + * The DTLB handler accesses the L3 PTE via the virtually mapped linear + * page table. If a nested TLB miss occurs, we switch into physical + * mode, walk the page table, and then re-execute the L3 PTE read + * and go on normally after that. + */ + mov r16=cr.ifa // get virtual address + mov r29=b0 // save b0 + mov r31=pr // save predicates +dtlb_fault: + mov r17=cr.iha // get virtual address of L3 PTE + movl r30=1f // load nested fault continuation point + ;; +1: ld8 r18=[r17] // read L3 PTE + ;; + mov b0=r29 + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.cond.spnt page_fault + ;; + itc.d r18 + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r19=[r17] // read L3 PTE again and see if same + mov r20=PAGE_SHIFT<<2 // setup page size for purge + ;; + cmp.ne p7,p0=r18,r19 + ;; +(p7) ptc.l r16,r20 +#endif + mov pr=r31,-1 + rfi +END(dtlb_miss) + + .org ia64_ivt+0x0c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) +ENTRY(alt_itlb_miss) + DBG_FAULT(3) +#ifdef XEN +//#ifdef VHPT_GLOBAL +// VHPT_CCHAIN_LOOKUP(alt_itlb_miss,i) +// br.cond.sptk page_fault +// ;; +//#endif +#endif +#ifdef XEN + mov r31=pr + mov r16=cr.ifa // get address that caused the TLB miss + ;; +late_alt_itlb_miss: + movl r17=PAGE_KERNEL + mov r21=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + ;; +#else + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=PAGE_KERNEL + mov r21=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r31=pr + ;; +#endif +#ifdef CONFIG_DISABLE_VHPT + shr.u r22=r16,61 // get the region number into r21 + ;; + cmp.gt p8,p0=6,r22 // user mode + ;; +(p8) thash r17=r16 + ;; +(p8) mov cr.iha=r17 +(p8) mov r29=b0 // save b0 +(p8) br.cond.dptk .itlb_fault +#endif + extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits +#ifdef XEN + shr.u r18=r16,55 // move address bit 59 to bit 4 + ;; + and r18=0x10,r18 // bit 4=address-bit(59) +#else + shr.u r18=r16,57 // move address bit 61 to bit 4 + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) +#endif + cmp.ne p8,p0=r0,r23 // psr.cpl != 0? + or r19=r17,r19 // insert PTE control bits into r19 + ;; + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 +(p8) br.cond.spnt page_fault + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(alt_itlb_miss) + + .org ia64_ivt+0x1000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) +ENTRY(alt_dtlb_miss) + DBG_FAULT(4) +#ifdef XEN +//#ifdef VHPT_GLOBAL +// VHPT_CCHAIN_LOOKUP(alt_dtlb_miss,d) +// br.cond.sptk page_fault +// ;; +//#endif +#endif +#ifdef XEN + mov r31=pr + mov r16=cr.ifa // get address that caused the TLB miss + ;; +late_alt_dtlb_miss: + movl r17=PAGE_KERNEL + mov r20=cr.isr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r21=cr.ipsr + ;; +#else +#endif +#ifdef CONFIG_DISABLE_VHPT + shr.u r22=r16,61 // get the region number into r21 + ;; + cmp.gt p8,p0=6,r22 // access to region 0-5 + ;; +(p8) thash r17=r16 + ;; +(p8) mov cr.iha=r17 +(p8) mov r29=b0 // save b0 +(p8) br.cond.dptk dtlb_fault +#endif + extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl + and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field + tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? +#ifdef XEN + shr.u r18=r16,55 // move address bit 59 to bit 4 + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? + ;; + and r18=0x10,r18 // bit 4=address-bit(59) +#else + shr.u r18=r16,57 // move address bit 61 to bit 4 + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) +#endif + cmp.ne p8,p0=r0,r23 +(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field +(p8) br.cond.spnt page_fault +#ifdef XEN + ;; + // Test for Xen address, if not handle via page_fault + // note that 0xf000 (cached) and 0xe800 (uncached) addresses + // should be OK. + extr.u r22=r16,59,5;; + cmp.eq p8,p0=0x1e,r22 +(p8) br.cond.spnt 1f;; + cmp.ne p8,p0=0x1d,r22 +(p8) br.cond.sptk page_fault ;; +1: +#endif + + dep r21=-1,r21,IA64_PSR_ED_BIT,1 + or r19=r19,r17 // insert PTE control bits into r19 + ;; + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 +(p6) mov cr.ipsr=r21 + ;; +(p7) itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(alt_dtlb_miss) + + .org ia64_ivt+0x1400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) +ENTRY(nested_dtlb_miss) + /* + * In the absence of kernel bugs, we get here when the virtually mapped linear + * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction + * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page + * table is missing, a nested TLB miss fault is triggered and control is + * transferred to this point. When this happens, we lookup the pte for the + * faulting address by walking the page table in physical mode and return to the + * continuation point passed in register r30 (or call page_fault if the address is + * not mapped). + * + * Input: r16: faulting address + * r29: saved b0 + * r30: continuation address + * r31: saved pr + * + * Output: r17: physical address of L3 PTE of faulting address + * r29: saved b0 + * r30: continuation address + * r31: saved pr + * + * Clobbered: b0, r18, r19, r21, psr.dt (cleared) + */ + rsm psr.dt // switch to using physical data addressing +#ifdef XEN + movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;; +#else + mov r19=IA64_KR(PT_BASE) // get the page table base address +#endif + shl r21=r16,3 // shift bit 60 into sign bit + ;; + shr.u r17=r16,61 // get the region number into r17 + ;; + cmp.eq p6,p7=5,r17 // is faulting address in region 5? + shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + + srlz.d + LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir + + .pred.rel "mutex", p6, p7 +(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT +(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? + shr.u r18=r16,PMD_SHIFT // shift L2 index into position + ;; + ld8 r17=[r17] // fetch the L1 entry (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry + ;; +(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) + shr.u r19=r16,PAGE_SHIFT // shift L3 index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry +(p6) br.cond.spnt page_fault + mov b0=r30 + br.sptk.many b0 // return to continuation point +END(nested_dtlb_miss) + + .org ia64_ivt+0x1800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) +ENTRY(ikey_miss) +#ifdef XEN + REFLECT(6) +#endif + DBG_FAULT(6) + FAULT(6) +END(ikey_miss) + + //----------------------------------------------------------------------------------- + // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) +ENTRY(page_fault) + ssm psr.dt + ;; + srlz.i + ;; + SAVE_MIN_WITH_COVER +#ifdef XEN + alloc r15=ar.pfs,0,0,4,0 + mov out0=cr.ifa + mov out1=cr.isr + mov out3=cr.itir +#else + alloc r15=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=cr.isr +#endif + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collectin is on + ;; +(p15) ssm psr.i // restore psr.i + movl r14=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 // out2 = pointer to pt_regs + br.call.sptk.many b6=ia64_do_page_fault // ignore return address +END(page_fault) + + .org ia64_ivt+0x1c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) +ENTRY(dkey_miss) +#ifdef XEN + REFLECT(7) +#endif + DBG_FAULT(7) + FAULT(7) +END(dkey_miss) + + .org ia64_ivt+0x2000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) +ENTRY(dirty_bit) +#ifdef XEN + REFLECT(8) +#endif + DBG_FAULT(8) + /* + * What we do here is to simply turn on the dirty bit in the PTE. We need to + * update both the page-table and the TLB entry. To efficiently access the PTE, + * we address it through the virtual page table. Most likely, the TLB entry for + * the relevant virtual page table page is still present in the TLB so we can + * normally do this without additional TLB misses. In case the necessary virtual + * page table TLB entry isn't present, we take a nested TLB miss hit where we look + * up the physical address of the L3 PTE and then continue at label 1 below. + */ + mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault + mov r31=pr // save pr +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits + ;; + cmpxchg8.acq r26=[r17],r25,ar.ccv + mov r24=PAGE_SHIFT<<2 + ;; + cmp.eq p6,p7=r26,r18 + ;; +(p6) itc.d r25 // install updated PTE + ;; + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov b0=r29 // restore b0 + mov ar.ccv=r28 +#else + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.d r18 // install updated PTE +#endif + mov pr=r31,-1 // restore pr + rfi +END(dirty_bit) + + .org ia64_ivt+0x2400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) +ENTRY(iaccess_bit) +#ifdef XEN + mov r31=pr; + mov r16=cr.isr + mov r17=cr.ifa + mov r19=9 + movl r20=0x2400 + br.sptk.many fast_access_reflect;; +#endif + DBG_FAULT(9) + // Like Entry 8, except for instruction access + mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + mov r31=pr // save predicates +#ifdef CONFIG_ITANIUM + /* + * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. + */ + mov r17=cr.ipsr + ;; + mov r18=cr.iip + tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? + ;; +(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa +#endif /* CONFIG_ITANIUM */ + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault) +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_A,r18 // set the accessed bit + ;; + cmpxchg8.acq r26=[r17],r25,ar.ccv + mov r24=PAGE_SHIFT<<2 + ;; + cmp.eq p6,p7=r26,r18 + ;; +(p6) itc.i r25 // install updated PTE + ;; + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov b0=r29 // restore b0 + mov ar.ccv=r28 +#else /* !CONFIG_SMP */ + ;; +1: ld8 r18=[r17] + ;; + or r18=_PAGE_A,r18 // set the accessed bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + itc.i r18 // install updated PTE +#endif /* !CONFIG_SMP */ + mov pr=r31,-1 + rfi +END(iaccess_bit) + + .org ia64_ivt+0x2800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) +ENTRY(daccess_bit) +#ifdef XEN + mov r31=pr; + mov r16=cr.isr + mov r17=cr.ifa + mov r19=10 + movl r20=0x2800 + br.sptk.many fast_access_reflect;; +#endif + DBG_FAULT(10) + // Like Entry 8, except for data access + mov r16=cr.ifa // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + thash r17=r16 // compute virtual address of L3 PTE + mov r31=pr + mov r29=b0 // save b0 in case of nested fault) +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_A,r18 // set the dirty bit + ;; + cmpxchg8.acq r26=[r17],r25,ar.ccv + mov r24=PAGE_SHIFT<<2 + ;; + cmp.eq p6,p7=r26,r18 + ;; +(p6) itc.d r25 // install updated PTE + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + ;; + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov ar.ccv=r28 +#else + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_A,r18 // set the accessed bit + ;; + st8 [r17]=r18 // store back updated PTE + itc.d r18 // install updated PTE +#endif + mov b0=r29 // restore b0 + mov pr=r31,-1 + rfi +END(daccess_bit) + + .org ia64_ivt+0x2c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) +ENTRY(break_fault) + /* + * The streamlined system call entry/exit paths only save/restore the initial part + * of pt_regs. This implies that the callers of system-calls must adhere to the + * normal procedure calling conventions. + * + * Registers to be saved & restored: + * CR registers: cr.ipsr, cr.iip, cr.ifs + * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr + * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15 + * Registers to be restored only: + * r8-r11: output value from the system call. + * + * During system call exit, scratch registers (including r15) are modified/cleared + * to prevent leaking bits from kernel to user level. + */ + DBG_FAULT(11) +#ifdef XEN + mov r16=cr.isr + mov r17=cr.iim + mov r31=pr + ;; + movl r18=XSI_PSR_IC + ;; + ld8 r19=[r18] + ;; + cmp.eq p7,p0=r0,r17 // is this a psuedo-cover? +(p7) br.spnt.many dispatch_privop_fault + ;; + // if vpsr.ic is off, we have a hyperprivop + // A hyperprivop is hand-coded assembly with psr.ic off + // which means no calls, no use of r1-r15 and no memory accesses + // except to pinned addresses! + cmp4.eq p7,p0=r0,r19 +(p7) br.sptk.many fast_hyperprivop + ;; + movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r22 = [r22] + ;; + adds r22=IA64_VCPU_BREAKIMM_OFFSET,r22;; + ld4 r23=[r22];; + cmp4.eq p6,p7=r23,r17 // Xen-reserved breakimm? +(p6) br.spnt.many dispatch_break_fault + ;; + br.sptk.many fast_break_reflect + ;; +#endif + movl r16=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r16=[r16] + mov r17=cr.iim + mov r18=__IA64_BREAK_SYSCALL + mov r21=ar.fpsr + mov r29=cr.ipsr + mov r19=b6 + mov r25=ar.unat + mov r27=ar.rsc + mov r26=ar.pfs + mov r28=cr.iip +#ifndef XEN + mov r31=pr // prepare to save predicates +#endif + mov r20=r1 + ;; + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 + cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so) +(p7) br.cond.spnt non_syscall + ;; + ld1 r17=[r16] // load current->thread.on_ustack flag + st1 [r16]=r0 // clear current->thread.on_ustack flag + add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT + ;; + invala + + /* adjust return address so we skip over the break instruction: */ + + extr.u r8=r29,41,2 // extract ei field from cr.ipsr + ;; + cmp.eq p6,p7=2,r8 // isr.ei==2? + mov r2=r1 // setup r2 for ia64_syscall_setup + ;; +(p6) mov r8=0 // clear ei to 0 +(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped +(p7) adds r8=1,r8 // increment ei to next slot + ;; + cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already? + dep r29=r8,r29,41,2 // insert new ei into cr.ipsr + ;; + + // switch from user to kernel RBS: + MINSTATE_START_SAVE_MIN_VIRT + br.call.sptk.many b7=ia64_syscall_setup + ;; + MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1 + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + mov r3=NR_syscalls - 1 + ;; +(p15) ssm psr.i // restore psr.i + // p10==true means out registers are more than 8 or r15's Nat is true +(p10) br.cond.spnt.many ia64_ret_from_syscall + ;; + movl r16=sys_call_table + + adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 + movl r2=ia64_ret_from_syscall + ;; + shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) + cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ? + mov rp=r2 // set the real return addr + ;; +(p6) ld8 r20=[r20] // load address of syscall entry point +(p7) movl r20=sys_ni_syscall + + add r2=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; + ld4 r2=[r2] // r2 = current_thread_info()->flags + ;; + and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit + ;; + cmp.eq p8,p0=r2,r0 + mov b6=r20 + ;; +(p8) br.call.sptk.many b6=b6 // ignore this return addr + br.cond.sptk ia64_trace_syscall + // NOT REACHED +END(break_fault) + + .org ia64_ivt+0x3000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) +ENTRY(interrupt) + DBG_FAULT(12) + mov r31=pr // prepare to save predicates + ;; +#ifdef XEN + mov r30=cr.ivr // pass cr.ivr as first arg + // FIXME: this is a hack... use cpuinfo.ksoftirqd because its + // not used anywhere else and we need a place to stash ivr and + // there's no registers available unused by SAVE_MIN/REST + movl r29=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;; + st8 [r29]=r30;; + movl r28=slow_interrupt;; + mov r29=rp;; + mov rp=r28;; + br.cond.sptk.many fast_tick_reflect + ;; +slow_interrupt: + mov rp=r29;; +#endif + SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 + ssm psr.ic | PSR_DEFAULT_BITS + ;; + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; + SAVE_REST + ;; + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group +#ifdef XEN + movl out0=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;; + ld8 out0=[out0];; +#else + mov out0=cr.ivr // pass cr.ivr as first arg +#endif + add out1=16,sp // pass pointer to pt_regs as second arg + ;; + srlz.d // make sure we see the effect of cr.ivr + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_handle_irq +END(interrupt) + + .org ia64_ivt+0x3400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved + DBG_FAULT(13) + FAULT(13) + +#ifdef XEN + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + +GLOBAL_ENTRY(dispatch_break_fault) + SAVE_MIN_WITH_COVER + ;; +dispatch_break_fault_post_save: + alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + adds out1=16,sp + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.iim // FIXME: pity to make this slow access twice + + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.sptk.many ia64_prepare_handle_break +END(dispatch_break_fault) +#endif + + .org ia64_ivt+0x3800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + DBG_FAULT(14) + FAULT(14) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + * + * ia64_syscall_setup() is a separate subroutine so that it can + * allocate stacked registers so it can safely demine any + * potential NaT values from the input registers. + * + * On entry: + * - executing on bank 0 or bank 1 register set (doesn't matter) + * - r1: stack pointer + * - r2: current task pointer + * - r3: preserved + * - r11: original contents (saved ar.pfs to be saved) + * - r12: original contents (sp to be saved) + * - r13: original contents (tp to be saved) + * - r15: original contents (syscall # to be saved) + * - r18: saved bsp (after switching to kernel stack) + * - r19: saved b6 + * - r20: saved r1 (gp) + * - r21: saved ar.fpsr + * - r22: kernel's register backing store base (krbs_base) + * - r23: saved ar.bspstore + * - r24: saved ar.rnat + * - r25: saved ar.unat + * - r26: saved ar.pfs + * - r27: saved ar.rsc + * - r28: saved cr.iip + * - r29: saved cr.ipsr + * - r31: saved pr + * - b0: original contents (to be saved) + * On exit: + * - executing on bank 1 registers + * - psr.ic enabled, interrupts restored + * - p10: TRUE if syscall is invoked with more than 8 out + * registers or r15's Nat is true + * - r1: kernel's gp + * - r3: preserved (same as on entry) + * - r8: -EINVAL if p10 is true + * - r12: points to kernel stack + * - r13: points to current task + * - p15: TRUE if interrupts need to be re-enabled + * - ar.fpsr: set to kernel settings + */ +GLOBAL_ENTRY(ia64_syscall_setup) +#ifndef XEN +#if PT(B6) != 0 +# error This code assumes that b6 is the first field in pt_regs. +#endif +#endif + st8 [r1]=r19 // save b6 + add r16=PT(CR_IPSR),r1 // initialize first base pointer + add r17=PT(R11),r1 // initialize second base pointer + ;; + alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable + st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr + tnat.nz p8,p0=in0 + + st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11 + tnat.nz p9,p0=in1 +(pKStk) mov r18=r0 // make sure r18 isn't NaT + ;; + + st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs + st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip + mov r28=b0 // save b0 (2 cyc) + ;; + + st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat + dep r19=0,r19,38,26 // clear all bits but 0..37 [I0] +(p8) mov in0=-1 + ;; + + st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs + extr.u r11=r19,7,7 // I0 // get sol of ar.pfs + and r8=0x7f,r19 // A // get sof of ar.pfs + + st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc + tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0 +(p9) mov in1=-1 + ;; + +(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8 + tnat.nz p10,p0=in2 + add r11=8,r11 + ;; +(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field +(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field + tnat.nz p11,p0=in3 + ;; +(p10) mov in2=-1 + tnat.nz p12,p0=in4 // [I0] +(p11) mov in3=-1 + ;; +(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat +(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore + shl r18=r18,16 // compute ar.rsc to be used for "loadrs" + ;; + st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates + st8 [r17]=r28,PT(R1)-PT(B0) // save b0 + tnat.nz p13,p0=in5 // [I0] + ;; + st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs" + st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1 +(p12) mov in4=-1 + ;; + +.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 +.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 +(p13) mov in5=-1 + ;; + st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr + tnat.nz p14,p0=in6 + cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 + ;; + stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error) +(p9) tnat.nz p10,p0=r15 + adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) + + st8.spill [r17]=r15 // save r15 + tnat.nz p8,p0=in7 + nop.i 0 + + mov r13=r2 // establish `current' + movl r1=__gp // establish kernel global pointer + ;; +(p14) mov in6=-1 +(p8) mov in7=-1 + nop.i 0 + + cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 + movl r17=FPSR_DEFAULT + ;; + mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value +(p10) mov r8=-EINVAL + br.ret.sptk.many b7 +END(ia64_syscall_setup) + + .org ia64_ivt+0x3c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + DBG_FAULT(15) + FAULT(15) + + /* + * Squatting in this space ... + * + * This special case dispatcher for illegal operation faults allows preserved + * registers to be modified through a callback function (asm only) that is handed + * back from the fault handler in r8. Up to three arguments can be passed to the + * callback function by returning an aggregate with the callback as its first + * element, followed by the arguments. + */ +ENTRY(dispatch_illegal_op_fault) + SAVE_MIN_WITH_COVER + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in insn group + mov out0=ar.ec + ;; + SAVE_REST + ;; + br.call.sptk.many rp=ia64_illegal_op_fault +.ret0: ;; + alloc r14=ar.pfs,0,0,3,0 // must be first in insn group + mov out0=r9 + mov out1=r10 + mov out2=r11 + movl r15=ia64_leave_kernel + ;; + mov rp=r15 + mov b6=r8 + ;; + cmp.ne p6,p0=0,r8 +(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel + br.sptk.many ia64_leave_kernel +END(dispatch_illegal_op_fault) + + .org ia64_ivt+0x4000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + DBG_FAULT(16) + FAULT(16) + +#ifdef XEN + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + +ENTRY(dispatch_privop_fault) + SAVE_MIN_WITH_COVER + ;; + alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + adds out1=16,sp + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.itir + + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.sptk.many ia64_prepare_handle_privop +END(dispatch_privop_fault) +#endif + + + .org ia64_ivt+0x4400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + DBG_FAULT(17) + FAULT(17) + +ENTRY(non_syscall) + SAVE_MIN_WITH_COVER + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + alloc r14=ar.pfs,0,0,2,0 + mov out0=cr.iim + add out1=16,sp + adds r3=8,r2 // set up second base pointer for SAVE_REST + + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + movl r15=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r15 + ;; + br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr +END(non_syscall) + + .org ia64_ivt+0x4800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + DBG_FAULT(18) + FAULT(18) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_unaligned_handler) + SAVE_MIN_WITH_COVER + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + adds out1=16,sp + + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.sptk.many ia64_prepare_handle_unaligned +END(dispatch_unaligned_handler) + + .org ia64_ivt+0x4c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + DBG_FAULT(19) + FAULT(19) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_to_fault_handler) + /* + * Input: + * psr.ic: off + * r19: fault vector number (e.g., 24 for General Exception) + * r31: contains saved predicates (pr) + */ + SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + mov out0=r15 + mov out1=cr.isr + mov out2=cr.ifa + mov out3=cr.iim + mov out4=cr.itir + ;; + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_fault +END(dispatch_to_fault_handler) + +// +// --- End of long entries, Beginning of short entries +// + + .org ia64_ivt+0x5000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) +ENTRY(page_not_present) +#ifdef XEN + REFLECT(20) +#endif + DBG_FAULT(20) + mov r16=cr.ifa + rsm psr.dt + /* + * The Linux page fault handler doesn't expect non-present pages to be in + * the TLB. Flush the existing entry now, so we meet that expectation. + */ + mov r17=PAGE_SHIFT<<2 + ;; + ptc.l r16,r17 + ;; + mov r31=pr + srlz.d + br.sptk.many page_fault +END(page_not_present) + + .org ia64_ivt+0x5100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) +ENTRY(key_permission) +#ifdef XEN + REFLECT(21) +#endif + DBG_FAULT(21) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(key_permission) + + .org ia64_ivt+0x5200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) +ENTRY(iaccess_rights) +#ifdef XEN + REFLECT(22) +#endif + DBG_FAULT(22) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(iaccess_rights) + + .org ia64_ivt+0x5300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) +ENTRY(daccess_rights) +#ifdef XEN + mov r31=pr; + mov r16=cr.isr + mov r17=cr.ifa + mov r19=23 + movl r20=0x5300 + br.sptk.many fast_access_reflect;; +#endif + DBG_FAULT(23) + mov r16=cr.ifa + rsm psr.dt + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(daccess_rights) + + .org ia64_ivt+0x5400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) +ENTRY(general_exception) + DBG_FAULT(24) + mov r16=cr.isr + mov r31=pr + ;; +#ifdef XEN + cmp4.ge p6,p0=0x20,r16 +(p6) br.sptk.many dispatch_privop_fault +#else + cmp4.eq p6,p0=0,r16 +(p6) br.sptk.many dispatch_illegal_op_fault +#endif + ;; + mov r19=24 // fault number + br.sptk.many dispatch_to_fault_handler +END(general_exception) + + .org ia64_ivt+0x5500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) +ENTRY(disabled_fp_reg) +#ifdef XEN + REFLECT(25) +#endif + DBG_FAULT(25) + rsm psr.dfh // ensure we can access fph + ;; + srlz.d + mov r31=pr + mov r19=25 + br.sptk.many dispatch_to_fault_handler +END(disabled_fp_reg) + + .org ia64_ivt+0x5600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) +ENTRY(nat_consumption) +#ifdef XEN + REFLECT(26) +#endif + DBG_FAULT(26) + FAULT(26) +END(nat_consumption) + + .org ia64_ivt+0x5700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) +ENTRY(speculation_vector) +#ifdef XEN + // this probably need not reflect... + REFLECT(27) +#endif + DBG_FAULT(27) + /* + * A [f]chk.[as] instruction needs to take the branch to the recovery code but + * this part of the architecture is not implemented in hardware on some CPUs, such + * as Itanium. Thus, in general we need to emulate the behavior. IIM contains + * the relative target (not yet sign extended). So after sign extending it we + * simply add it to IIP. We also need to reset the EI field of the IPSR to zero, + * i.e., the slot to restart into. + * + * cr.imm contains zero_ext(imm21) + */ + mov r18=cr.iim + ;; + mov r17=cr.iip + shl r18=r18,43 // put sign bit in position (43=64-21) + ;; + + mov r16=cr.ipsr + shr r18=r18,39 // sign extend (39=43-4) + ;; + + add r17=r17,r18 // now add the offset + ;; + mov cr.iip=r17 + dep r16=0,r16,41,2 // clear EI + ;; + + mov cr.ipsr=r16 + ;; + + rfi // and go back +END(speculation_vector) + + .org ia64_ivt+0x5800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + DBG_FAULT(28) + FAULT(28) + + .org ia64_ivt+0x5900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) +ENTRY(debug_vector) +#ifdef XEN + REFLECT(29) +#endif + DBG_FAULT(29) + FAULT(29) +END(debug_vector) + + .org ia64_ivt+0x5a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) +ENTRY(unaligned_access) +#ifdef XEN + REFLECT(30) +#endif + DBG_FAULT(30) + mov r16=cr.ipsr + mov r31=pr // prepare to save predicates + ;; + br.sptk.many dispatch_unaligned_handler +END(unaligned_access) + + .org ia64_ivt+0x5b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) +ENTRY(unsupported_data_reference) +#ifdef XEN + REFLECT(31) +#endif + DBG_FAULT(31) + FAULT(31) +END(unsupported_data_reference) + + .org ia64_ivt+0x5c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) +ENTRY(floating_point_fault) +#ifdef XEN + REFLECT(32) +#endif + DBG_FAULT(32) + FAULT(32) +END(floating_point_fault) + + .org ia64_ivt+0x5d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) +ENTRY(floating_point_trap) +#ifdef XEN + REFLECT(33) +#endif + DBG_FAULT(33) + FAULT(33) +END(floating_point_trap) + + .org ia64_ivt+0x5e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) +ENTRY(lower_privilege_trap) +#ifdef XEN + REFLECT(34) +#endif + DBG_FAULT(34) + FAULT(34) +END(lower_privilege_trap) + + .org ia64_ivt+0x5f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) +ENTRY(taken_branch_trap) +#ifdef XEN + REFLECT(35) +#endif + DBG_FAULT(35) + FAULT(35) +END(taken_branch_trap) + + .org ia64_ivt+0x6000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) +ENTRY(single_step_trap) +#ifdef XEN + REFLECT(36) +#endif + DBG_FAULT(36) + FAULT(36) +END(single_step_trap) + + .org ia64_ivt+0x6100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Reserved + DBG_FAULT(37) + FAULT(37) + + .org ia64_ivt+0x6200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + DBG_FAULT(38) + FAULT(38) + + .org ia64_ivt+0x6300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + DBG_FAULT(39) + FAULT(39) + + .org ia64_ivt+0x6400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + DBG_FAULT(40) + FAULT(40) + + .org ia64_ivt+0x6500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + DBG_FAULT(41) + FAULT(41) + + .org ia64_ivt+0x6600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + DBG_FAULT(42) + FAULT(42) + + .org ia64_ivt+0x6700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + DBG_FAULT(43) + FAULT(43) + + .org ia64_ivt+0x6800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + DBG_FAULT(44) + FAULT(44) + + .org ia64_ivt+0x6900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) +ENTRY(ia32_exception) +#ifdef XEN + REFLECT(45) +#endif + DBG_FAULT(45) + FAULT(45) +END(ia32_exception) + + .org ia64_ivt+0x6a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) +ENTRY(ia32_intercept) +#ifdef XEN + REFLECT(46) +#endif + DBG_FAULT(46) +#ifdef CONFIG_IA32_SUPPORT + mov r31=pr + mov r16=cr.isr + ;; + extr.u r17=r16,16,8 // get ISR.code + mov r18=ar.eflag + mov r19=cr.iim // old eflag value + ;; + cmp.ne p6,p0=2,r17 +(p6) br.cond.spnt 1f // not a system flag fault + xor r16=r18,r19 + ;; + extr.u r17=r16,18,1 // get the eflags.ac bit + ;; + cmp.eq p6,p0=0,r17 +(p6) br.cond.spnt 1f // eflags.ac bit didn't change + ;; + mov pr=r31,-1 // restore predicate registers + rfi + +1: +#endif // CONFIG_IA32_SUPPORT + FAULT(46) +END(ia32_intercept) + + .org ia64_ivt+0x6b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) +ENTRY(ia32_interrupt) +#ifdef XEN + REFLECT(47) +#endif + DBG_FAULT(47) +#ifdef CONFIG_IA32_SUPPORT + mov r31=pr + br.sptk.many dispatch_to_ia32_handler +#else + FAULT(47) +#endif +END(ia32_interrupt) + + .org ia64_ivt+0x6c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + DBG_FAULT(48) + FAULT(48) + + .org ia64_ivt+0x6d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + DBG_FAULT(49) + FAULT(49) + + .org ia64_ivt+0x6e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + DBG_FAULT(50) + FAULT(50) + + .org ia64_ivt+0x6f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + DBG_FAULT(51) + FAULT(51) + + .org ia64_ivt+0x7000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7000 Entry 52 (size 16 bundles) Reserved + DBG_FAULT(52) + FAULT(52) + + .org ia64_ivt+0x7100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + DBG_FAULT(53) + FAULT(53) + + .org ia64_ivt+0x7200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + DBG_FAULT(54) + FAULT(54) + + .org ia64_ivt+0x7300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + DBG_FAULT(55) + FAULT(55) + + .org ia64_ivt+0x7400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + DBG_FAULT(56) + FAULT(56) + + .org ia64_ivt+0x7500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + DBG_FAULT(57) + FAULT(57) + + .org ia64_ivt+0x7600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + DBG_FAULT(58) + FAULT(58) + + .org ia64_ivt+0x7700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + DBG_FAULT(59) + FAULT(59) + + .org ia64_ivt+0x7800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + DBG_FAULT(60) + FAULT(60) + + .org ia64_ivt+0x7900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + DBG_FAULT(61) + FAULT(61) + + .org ia64_ivt+0x7a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + DBG_FAULT(62) + FAULT(62) + + .org ia64_ivt+0x7b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + DBG_FAULT(63) + FAULT(63) + + .org ia64_ivt+0x7c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + DBG_FAULT(64) + FAULT(64) + + .org ia64_ivt+0x7d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + DBG_FAULT(65) + FAULT(65) + + .org ia64_ivt+0x7e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + DBG_FAULT(66) + FAULT(66) + + .org ia64_ivt+0x7f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + DBG_FAULT(67) + FAULT(67) + +#ifdef XEN + .org ia64_ivt+0x8000 +GLOBAL_ENTRY(dispatch_reflection) + /* + * Input: + * psr.ic: off + * r19: intr type (offset into ivt, see ia64_int.h) + * r31: contains saved predicates (pr) + */ + SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + mov out4=r15 + mov out0=cr.ifa + adds out1=16,sp + mov out2=cr.isr + mov out3=cr.iim +// mov out3=cr.itir + + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.sptk.many ia64_prepare_handle_reflection +END(dispatch_reflection) + +#define SAVE_MIN_COVER_DONE DO_SAVE_MIN(,mov r30=cr.ifs,) + +// same as dispatch_break_fault except cover has already been done +GLOBAL_ENTRY(dispatch_slow_hyperprivop) + SAVE_MIN_COVER_DONE + ;; + br.sptk.many dispatch_break_fault_post_save +END(dispatch_slow_hyperprivop) +#endif + +#ifdef CONFIG_IA32_SUPPORT + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + + // IA32 interrupt entry point + +ENTRY(dispatch_to_ia32_handler) + SAVE_MIN + ;; + mov r14=cr.isr + ssm psr.ic | PSR_DEFAULT_BITS + ;; + srlz.i // guarantee that interruption collection is on + ;; +(p15) ssm psr.i + adds r3=8,r2 // Base pointer for SAVE_REST + ;; + SAVE_REST + ;; + mov r15=0x80 + shr r14=r14,16 // Get interrupt number + ;; + cmp.ne p6,p0=r14,r15 +(p6) br.call.dpnt.many b6=non_ia32_syscall + + adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions + adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp + ;; + cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 + ld8 r8=[r14] // get r8 + ;; + st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP) + ;; + alloc r15=ar.pfs,0,0,6,0 // must first in an insn group + ;; + ld4 r8=[r14],8 // r8 == eax (syscall number) + mov r15=IA32_NR_syscalls + ;; + cmp.ltu.unc p6,p7=r8,r15 + ld4 out1=[r14],8 // r9 == ecx + ;; + ld4 out2=[r14],8 // r10 == edx + ;; + ld4 out0=[r14] // r11 == ebx + adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp + ;; + ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp + ;; + ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi + adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; + ld4 out4=[r14] // r15 == edi + movl r16=ia32_syscall_table + ;; +(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number + ld4 r2=[r2] // r2 = current_thread_info()->flags + ;; + ld8 r16=[r16] + and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit + ;; + mov b6=r16 + movl r15=ia32_ret_from_syscall + cmp.eq p8,p0=r2,r0 + ;; + mov rp=r15 +(p8) br.call.sptk.many b6=b6 + br.cond.sptk ia32_trace_syscall + +non_ia32_syscall: + alloc r15=ar.pfs,0,0,2,0 + mov out0=r14 // interrupt # + add out1=16,sp // pointer to pt_regs + ;; // avoid WAW on CFM + br.call.sptk.many rp=ia32_bad_interrupt +.ret1: movl r15=ia64_leave_kernel + ;; + mov rp=r15 + br.ret.sptk.many rp +END(dispatch_to_ia32_handler) + +#endif /* CONFIG_IA32_SUPPORT */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/mm_init.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/mm_init.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,549 @@ +/* + * Initialize MMU support. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@xxxxxxxxxx> + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> + +#ifdef XEN +#include <xen/sched.h> +#endif +#include <linux/bootmem.h> +#include <linux/efi.h> +#include <linux/elf.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#ifndef XEN +#include <linux/personality.h> +#endif +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/swap.h> +#ifndef XEN +#include <linux/proc_fs.h> +#endif + +#ifndef XEN +#include <asm/a.out.h> +#endif +#include <asm/bitops.h> +#include <asm/dma.h> +#ifndef XEN +#include <asm/ia32.h> +#endif +#include <asm/io.h> +#include <asm/machvec.h> +#include <asm/numa.h> +#include <asm/patch.h> +#include <asm/pgalloc.h> +#include <asm/sal.h> +#include <asm/sections.h> +#include <asm/system.h> +#include <asm/tlb.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> +#include <asm/mca.h> + +#ifndef XEN +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +#endif + +extern void ia64_tlb_init (void); + +unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; + +#ifdef CONFIG_VIRTUAL_MEM_MAP +unsigned long vmalloc_end = VMALLOC_END_INIT; +EXPORT_SYMBOL(vmalloc_end); +struct page *vmem_map; +EXPORT_SYMBOL(vmem_map); +#endif + +static int pgt_cache_water[2] = { 25, 50 }; + +struct page *zero_page_memmap_ptr; /* map entry for zero page */ +EXPORT_SYMBOL(zero_page_memmap_ptr); + +#ifdef XEN +void *high_memory; +EXPORT_SYMBOL(high_memory); + +///////////////////////////////////////////// +// following from linux-2.6.7/mm/mmap.c +/* description of effects of mapping type and prot in current implementation. + * this is due to the limited x86 page protection hardware. The expected + * behavior is in parens: + * + * map_type prot + * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC + * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes + * w: (no) no w: (no) no w: (yes) yes w: (no) no + * x: (no) no x: (no) yes x: (no) yes x: (yes) yes + * + * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes + * w: (no) no w: (no) no w: (copy) copy w: (no) no + * x: (no) no x: (no) yes x: (no) yes x: (yes) yes + * + */ +pgprot_t protection_map[16] = { + __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, + __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 +}; + +void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) +{ + printf("insert_vm_struct: called, not implemented yet\n"); +} + +///////////////////////////////////////////// +//following from linux/mm/memory.c + +#ifndef __ARCH_HAS_4LEVEL_HACK +/* + * Allocate page upper directory. + * + * We've already handled the fast-path in-line, and we own the + * page table lock. + * + * On a two-level or three-level page table, this ends up actually being + * entirely optimized away. + */ +pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +{ + pud_t *new; + + spin_unlock(&mm->page_table_lock); + new = pud_alloc_one(mm, address); + spin_lock(&mm->page_table_lock); + if (!new) + return NULL; + + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pgd_present(*pgd)) { + pud_free(new); + goto out; + } + pgd_populate(mm, pgd, new); + out: + return pud_offset(pgd, address); +} + +/* + * Allocate page middle directory. + * + * We've already handled the fast-path in-line, and we own the + * page table lock. + * + * On a two-level page table, this ends up actually being entirely + * optimized away. + */ +pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) +{ + pmd_t *new; + + spin_unlock(&mm->page_table_lock); + new = pmd_alloc_one(mm, address); + spin_lock(&mm->page_table_lock); + if (!new) + return NULL; + + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pud_present(*pud)) { + pmd_free(new); + goto out; + } + pud_populate(mm, pud, new); + out: + return pmd_offset(pud, address); +} +#endif + +pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address) +{ + if (!pmd_present(*pmd)) { + struct page *new; + + spin_unlock(&mm->page_table_lock); + new = pte_alloc_one(mm, address); + spin_lock(&mm->page_table_lock); + if (!new) + return NULL; + + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pmd_present(*pmd)) { + pte_free(new); + goto out; + } + inc_page_state(nr_page_table_pages); + pmd_populate(mm, pmd, new); + } +out: + return pte_offset_map(pmd, address); +} +///////////////////////////////////////////// +#endif /* XEN */ + +#if 0 +void +update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte) +{ + unsigned long addr; + struct page *page; + + if (!pte_exec(pte)) + return; /* not an executable page... */ + + page = pte_page(pte); + /* don't use VADDR: it may not be mapped on this CPU (or may have just been flushed): */ + addr = (unsigned long) page_address(page); + + if (test_bit(PG_arch_1, &page->flags)) + return; /* i-cache is already coherent with d-cache */ + + flush_icache_range(addr, addr + PAGE_SIZE); + set_bit(PG_arch_1, &page->flags); /* mark page as clean */ +} +#endif + +inline void +ia64_set_rbs_bot (void) +{ +#ifdef XEN + unsigned stack_size = MAX_USER_STACK_SIZE; +#else + unsigned long stack_size = current->rlim[RLIMIT_STACK].rlim_max & -16; +#endif + + if (stack_size > MAX_USER_STACK_SIZE) + stack_size = MAX_USER_STACK_SIZE; + current->arch._thread.rbs_bot = STACK_TOP - stack_size; +} + +/* + * This performs some platform-dependent address space initialization. + * On IA-64, we want to setup the VM area for the register backing + * store (which grows upwards) and install the gateway page which is + * used for signal trampolines, etc. + */ +void +ia64_init_addr_space (void) +{ +#ifdef XEN +printf("ia64_init_addr_space: called, not implemented\n"); +#else + struct vm_area_struct *vma; + + ia64_set_rbs_bot(); + + /* + * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore + * the problem. When the process attempts to write to the register backing store + * for the first time, it will get a SEGFAULT in this case. + */ + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma) { + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_start = current->arch._thread.rbs_bot & PAGE_MASK; + vma->vm_end = vma->vm_start + PAGE_SIZE; + vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7]; + vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP; + insert_vm_struct(current->mm, vma); + } + + /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ + if (!(current->personality & MMAP_PAGE_ZERO)) { + vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + if (vma) { + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = current->mm; + vma->vm_end = PAGE_SIZE; + vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); + vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; + insert_vm_struct(current->mm, vma); + } + } +#endif +} + +setup_gate (void) +{ + printk("setup_gate not-implemented.\n"); +} + +void __devinit +ia64_mmu_init (void *my_cpu_data) +{ + unsigned long psr, pta, impl_va_bits; + extern void __devinit tlb_init (void); + int cpu; + +#ifdef CONFIG_DISABLE_VHPT +# define VHPT_ENABLE_BIT 0 +#else +# define VHPT_ENABLE_BIT 1 +#endif + + /* Pin mapping for percpu area into TLB */ + psr = ia64_clear_ic(); + ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR, + pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), + PERCPU_PAGE_SHIFT); + + ia64_set_psr(psr); + ia64_srlz_i(); + + /* + * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped + * address space. The IA-64 architecture guarantees that at least 50 bits of + * virtual address space are implemented but if we pick a large enough page size + * (e.g., 64KB), the mapped address space is big enough that it will overlap with + * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages, + * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a + * problem in practice. Alternatively, we could truncate the top of the mapped + * address space to not permit mappings that would overlap with the VMLPT. + * --davidm 00/12/06 + */ +# define pte_bits 3 +# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT) + /* + * The virtual page table has to cover the entire implemented address space within + * a region even though not all of this space may be mappable. The reason for + * this is that the Access bit and Dirty bit fault handlers perform + * non-speculative accesses to the virtual page table, so the address range of the + * virtual page table itself needs to be covered by virtual page table. + */ +# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) +# define POW2(n) (1ULL << (n)) + + impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61))); + + if (impl_va_bits < 51 || impl_va_bits > 61) + panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1); + +#ifdef XEN + vhpt_init(); +#endif +#if 0 + /* place the VMLPT at the end of each page-table mapped region: */ + pta = POW2(61) - POW2(vmlpt_bits); + + if (POW2(mapped_space_bits) >= pta) + panic("mm/init: overlap between virtually mapped linear page table and " + "mapped kernel space!"); + /* + * Set the (virtually mapped linear) page table address. Bit + * 8 selects between the short and long format, bits 2-7 the + * size of the table, and bit 0 whether the VHPT walker is + * enabled. + */ + ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT); +#endif + ia64_tlb_init(); + +#ifdef CONFIG_HUGETLB_PAGE + ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2); + ia64_srlz_d(); +#endif + + cpu = smp_processor_id(); + +#ifndef XEN + /* mca handler uses cr.lid as key to pick the right entry */ + ia64_mca_tlb_list[cpu].cr_lid = ia64_getreg(_IA64_REG_CR_LID); + + /* insert this percpu data information into our list for MCA recovery purposes */ + ia64_mca_tlb_list[cpu].percpu_paddr = pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL)); + /* Also save per-cpu tlb flush recipe for use in physical mode mca handler */ + ia64_mca_tlb_list[cpu].ptce_base = local_cpu_data->ptce_base; + ia64_mca_tlb_list[cpu].ptce_count[0] = local_cpu_data->ptce_count[0]; + ia64_mca_tlb_list[cpu].ptce_count[1] = local_cpu_data->ptce_count[1]; + ia64_mca_tlb_list[cpu].ptce_stride[0] = local_cpu_data->ptce_stride[0]; + ia64_mca_tlb_list[cpu].ptce_stride[1] = local_cpu_data->ptce_stride[1]; +#endif +} + +#ifdef CONFIG_VIRTUAL_MEM_MAP + +int +create_mem_map_page_table (u64 start, u64 end, void *arg) +{ + unsigned long address, start_page, end_page; + struct page *map_start, *map_end; + int node; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + map_start = vmem_map + (__pa(start) >> PAGE_SHIFT); + map_end = vmem_map + (__pa(end) >> PAGE_SHIFT); + + start_page = (unsigned long) map_start & PAGE_MASK; + end_page = PAGE_ALIGN((unsigned long) map_end); + node = paddr_to_nid(__pa(start)); + + for (address = start_page; address < end_page; address += PAGE_SIZE) { + pgd = pgd_offset_k(address); + if (pgd_none(*pgd)) + pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); + pmd = pmd_offset(pgd, address); + + if (pmd_none(*pmd)) + pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); + pte = pte_offset_kernel(pmd, address); + + if (pte_none(*pte)) + set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT, + PAGE_KERNEL)); + } + return 0; +} + +struct memmap_init_callback_data { + struct page *start; + struct page *end; + int nid; + unsigned long zone; +}; + +static int +virtual_memmap_init (u64 start, u64 end, void *arg) +{ + struct memmap_init_callback_data *args; + struct page *map_start, *map_end; + + args = (struct memmap_init_callback_data *) arg; + + map_start = vmem_map + (__pa(start) >> PAGE_SHIFT); + map_end = vmem_map + (__pa(end) >> PAGE_SHIFT); + + if (map_start < args->start) + map_start = args->start; + if (map_end > args->end) + map_end = args->end; + + /* + * We have to initialize "out of bounds" struct page elements that fit completely + * on the same pages that were allocated for the "in bounds" elements because they + * may be referenced later (and found to be "reserved"). + */ + map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page); + map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) map_end) + / sizeof(struct page)); + + if (map_start < map_end) + memmap_init_zone(map_start, (unsigned long) (map_end - map_start), + args->nid, args->zone, page_to_pfn(map_start)); + return 0; +} + +void +memmap_init (struct page *start, unsigned long size, int nid, + unsigned long zone, unsigned long start_pfn) +{ + if (!vmem_map) + memmap_init_zone(start, size, nid, zone, start_pfn); + else { + struct memmap_init_callback_data args; + + args.start = start; + args.end = start + size; + args.nid = nid; + args.zone = zone; + + efi_memmap_walk(virtual_memmap_init, &args); + } +} + +int +ia64_pfn_valid (unsigned long pfn) +{ + char byte; + struct page *pg = pfn_to_page(pfn); + + return (__get_user(byte, (char *) pg) == 0) + && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK)) + || (__get_user(byte, (char *) (pg + 1) - 1) == 0)); +} +EXPORT_SYMBOL(ia64_pfn_valid); + +int +find_largest_hole (u64 start, u64 end, void *arg) +{ + u64 *max_gap = arg; + + static u64 last_end = PAGE_OFFSET; + + /* NOTE: this algorithm assumes efi memmap table is ordered */ + +#ifdef XEN +//printf("find_largest_hole: start=%lx,end=%lx,max_gap=%lx\n",start,end,*(unsigned long *)arg); +#endif + if (*max_gap < (start - last_end)) + *max_gap = start - last_end; + last_end = end; +#ifdef XEN +//printf("find_largest_hole2: max_gap=%lx,last_end=%lx\n",*max_gap,last_end); +#endif + return 0; +} +#endif /* CONFIG_VIRTUAL_MEM_MAP */ + +static int +count_reserved_pages (u64 start, u64 end, void *arg) +{ + unsigned long num_reserved = 0; + unsigned long *count = arg; + + for (; start < end; start += PAGE_SIZE) + if (PageReserved(virt_to_page(start))) + ++num_reserved; + *count += num_reserved; + return 0; +} + +/* + * Boot command-line option "nolwsys" can be used to disable the use of any light-weight + * system call handler. When this option is in effect, all fsyscalls will end up bubbling + * down into the kernel and calling the normal (heavy-weight) syscall handler. This is + * useful for performance testing, but conceivably could also come in handy for debugging + * purposes. + */ + +static int nolwsys; + +static int __init +nolwsys_setup (char *s) +{ + nolwsys = 1; + return 1; +} + +__setup("nolwsys", nolwsys_setup); + +void +mem_init (void) +{ +#ifdef CONFIG_PCI + /* + * This needs to be called _after_ the command line has been parsed but _before_ + * any drivers that may need the PCI DMA interface are initialized or bootmem has + * been freed. + */ + platform_dma_init(); +#endif + +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/pcdp.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/pcdp.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,120 @@ +/* + * Parse the EFI PCDP table to locate the console device. + * + * (c) Copyright 2002, 2003, 2004 Hewlett-Packard Development Company, L.P. + * Khalid Aziz <khalid.aziz@xxxxxx> + * Alex Williamson <alex.williamson@xxxxxx> + * Bjorn Helgaas <bjorn.helgaas@xxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/acpi.h> +#include <linux/console.h> +#include <linux/efi.h> +#include <linux/serial.h> +#ifdef XEN +#include <linux/errno.h> +#endif +#include "pcdp.h" + +static int __init +setup_serial_console(struct pcdp_uart *uart) +{ +#ifdef XEN + extern struct ns16550_defaults ns16550_com1; + ns16550_com1.baud = uart->baud; + ns16550_com1.io_base = uart->addr.address; + if (uart->bits) + ns16550_com1.data_bits = uart->bits; + return 0; +#else +#ifdef CONFIG_SERIAL_8250_CONSOLE + int mmio; + static char options[64]; + + mmio = (uart->addr.address_space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY); + snprintf(options, sizeof(options), "console=uart,%s,0x%lx,%lun%d", + mmio ? "mmio" : "io", uart->addr.address, uart->baud, + uart->bits ? uart->bits : 8); + + return early_serial_console_init(options); +#else + return -ENODEV; +#endif +#endif +} + +#ifndef XEN +static int __init +setup_vga_console(struct pcdp_vga *vga) +{ +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) + if (efi_mem_type(0xA0000) == EFI_CONVENTIONAL_MEMORY) { + printk(KERN_ERR "PCDP: VGA selected, but frame buffer is not MMIO!\n"); + return -ENODEV; + } + + conswitchp = &vga_con; + printk(KERN_INFO "PCDP: VGA console\n"); + return 0; +#else + return -ENODEV; +#endif +} +#endif + +int __init +efi_setup_pcdp_console(char *cmdline) +{ + struct pcdp *pcdp; + struct pcdp_uart *uart; + struct pcdp_device *dev, *end; + int i, serial = 0; + + pcdp = efi.hcdp; + if (!pcdp) + return -ENODEV; + +#ifndef XEN + printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, __pa(pcdp)); +#endif + + if (strstr(cmdline, "console=hcdp")) { + if (pcdp->rev < 3) + serial = 1; + } else if (strstr(cmdline, "console=")) { +#ifndef XEN + printk(KERN_INFO "Explicit \"console=\"; ignoring PCDP\n"); +#endif + return -ENODEV; + } + + if (pcdp->rev < 3 && efi_uart_console_only()) + serial = 1; + + for (i = 0, uart = pcdp->uart; i < pcdp->num_uarts; i++, uart++) { + if (uart->flags & PCDP_UART_PRIMARY_CONSOLE || serial) { + if (uart->type == PCDP_CONSOLE_UART) { + return setup_serial_console(uart); + } + } + } + +#ifndef XEN + end = (struct pcdp_device *) ((u8 *) pcdp + pcdp->length); + for (dev = (struct pcdp_device *) (pcdp->uart + pcdp->num_uarts); + dev < end; + dev = (struct pcdp_device *) ((u8 *) dev + dev->length)) { + if (dev->flags & PCDP_PRIMARY_CONSOLE) { + if (dev->type == PCDP_CONSOLE_VGA) { + return setup_vga_console((struct pcdp_vga *) dev); + } + } + } +#endif + + return -ENODEV; +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/privop.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/privop.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,1130 @@ +/* + * Privileged operation "API" handling functions. + * + * Copyright (C) 2004 Hewlett-Packard Co. + * Dan Magenheimer (dan.magenheimer@xxxxxx) + * + */ + +#include <asm/privop.h> +#include <asm/vcpu.h> +#include <asm/processor.h> +#include <asm/delay.h> // Debug only +//#include <debug.h> + +long priv_verbose=0; + +/************************************************************************** +Hypercall bundle creation +**************************************************************************/ + + +void build_hypercall_bundle(UINT64 *imva, UINT64 brkimm, UINT64 hypnum, UINT64 ret) +{ + INST64_A5 slot0; + INST64_I19 slot1; + INST64_B4 slot2; + IA64_BUNDLE bundle; + + // slot1: mov r2 = hypnum (low 20 bits) + slot0.inst = 0; + slot0.qp = 0; slot0.r1 = 2; slot0.r3 = 0; slot0.major = 0x9; + slot0.imm7b = hypnum; slot0.imm9d = hypnum >> 7; + slot0.imm5c = hypnum >> 16; slot0.s = 0; + // slot1: break brkimm + slot1.inst = 0; + slot1.qp = 0; slot1.x6 = 0; slot1.x3 = 0; slot1.major = 0x0; + slot1.imm20 = brkimm; slot1.i = brkimm >> 20; + // if ret slot2: br.ret.sptk.many rp + // else slot2: br.cond.sptk.many rp + slot2.inst = 0; slot2.qp = 0; slot2.p = 1; slot2.b2 = 0; + slot2.wh = 0; slot2.d = 0; slot2.major = 0x0; + if (ret) { + slot2.btype = 4; slot2.x6 = 0x21; + } + else { + slot2.btype = 0; slot2.x6 = 0x20; + } + + bundle.i64[0] = 0; bundle.i64[1] = 0; + bundle.template = 0x11; + bundle.slot0 = slot0.inst; bundle.slot2 = slot2.inst; + bundle.slot1a = slot1.inst; bundle.slot1b = slot1.inst >> 18; + + *imva++ = bundle.i64[0]; *imva = bundle.i64[1]; +} + +/************************************************************************** +Privileged operation emulation routines +**************************************************************************/ + +IA64FAULT priv_rfi(VCPU *vcpu, INST64 inst) +{ + return vcpu_rfi(vcpu); +} + +IA64FAULT priv_bsw0(VCPU *vcpu, INST64 inst) +{ + return vcpu_bsw0(vcpu); +} + +IA64FAULT priv_bsw1(VCPU *vcpu, INST64 inst) +{ + return vcpu_bsw1(vcpu); +} + +IA64FAULT priv_cover(VCPU *vcpu, INST64 inst) +{ + return vcpu_cover(vcpu); +} + +IA64FAULT priv_ptc_l(VCPU *vcpu, INST64 inst) +{ + UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3); + UINT64 addr_range; + + addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2); + return vcpu_ptc_l(vcpu,vadr,addr_range); +} + +IA64FAULT priv_ptc_e(VCPU *vcpu, INST64 inst) +{ + UINT src = inst.M28.r3; + + // NOTE: ptc_e with source gr > 63 is emulated as a fc r(y-64) + if (src > 63) return(vcpu_fc(vcpu,vcpu_get_gr(vcpu,src - 64))); + return vcpu_ptc_e(vcpu,vcpu_get_gr(vcpu,src)); +} + +IA64FAULT priv_ptc_g(VCPU *vcpu, INST64 inst) +{ + UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3); + UINT64 addr_range; + + addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2); + return vcpu_ptc_g(vcpu,vadr,addr_range); +} + +IA64FAULT priv_ptc_ga(VCPU *vcpu, INST64 inst) +{ + UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3); + UINT64 addr_range; + + addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2); + return vcpu_ptc_ga(vcpu,vadr,addr_range); +} + +IA64FAULT priv_ptr_d(VCPU *vcpu, INST64 inst) +{ + UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3); + UINT64 addr_range; + + addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2); + return vcpu_ptr_d(vcpu,vadr,addr_range); +} + +IA64FAULT priv_ptr_i(VCPU *vcpu, INST64 inst) +{ + UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3); + UINT64 addr_range; + + addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2); + return vcpu_ptr_i(vcpu,vadr,addr_range); +} + +IA64FAULT priv_tpa(VCPU *vcpu, INST64 inst) +{ + UINT64 padr; + UINT fault; + UINT src = inst.M46.r3; + + // NOTE: tpa with source gr > 63 is emulated as a ttag rx=r(y-64) + if (src > 63) + fault = vcpu_ttag(vcpu,vcpu_get_gr(vcpu,src-64),&padr); + else fault = vcpu_tpa(vcpu,vcpu_get_gr(vcpu,src),&padr); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M46.r1, padr); + else return fault; +} + +IA64FAULT priv_tak(VCPU *vcpu, INST64 inst) +{ + UINT64 key; + UINT fault; + UINT src = inst.M46.r3; + + // NOTE: tak with source gr > 63 is emulated as a thash rx=r(y-64) + if (src > 63) + fault = vcpu_thash(vcpu,vcpu_get_gr(vcpu,src-64),&key); + else fault = vcpu_tak(vcpu,vcpu_get_gr(vcpu,src),&key); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M46.r1, key); + else return fault; +} + +/************************************ + * Insert translation register/cache +************************************/ + +IA64FAULT priv_itr_d(VCPU *vcpu, INST64 inst) +{ + UINT64 fault, itir, ifa, pte, slot; + + //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT); + if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT) + return(IA64_ILLOP_FAULT); + if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) + return(IA64_ILLOP_FAULT); + pte = vcpu_get_gr(vcpu,inst.M42.r2); + slot = vcpu_get_gr(vcpu,inst.M42.r3); + + return (vcpu_itr_d(vcpu,slot,pte,itir,ifa)); +} + +IA64FAULT priv_itr_i(VCPU *vcpu, INST64 inst) +{ + UINT64 fault, itir, ifa, pte, slot; + + //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT); + if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT) + return(IA64_ILLOP_FAULT); + if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) + return(IA64_ILLOP_FAULT); + pte = vcpu_get_gr(vcpu,inst.M42.r2); + slot = vcpu_get_gr(vcpu,inst.M42.r3); + + return (vcpu_itr_i(vcpu,slot,pte,itir,ifa)); +} + +IA64FAULT priv_itc_d(VCPU *vcpu, INST64 inst) +{ + UINT64 fault, itir, ifa, pte; + + //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT); + if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT) + return(IA64_ILLOP_FAULT); + if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) + return(IA64_ILLOP_FAULT); + pte = vcpu_get_gr(vcpu,inst.M41.r2); + + return (vcpu_itc_d(vcpu,pte,itir,ifa)); +} + +IA64FAULT priv_itc_i(VCPU *vcpu, INST64 inst) +{ + UINT64 fault, itir, ifa, pte; + + //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT); + if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT) + return(IA64_ILLOP_FAULT); + if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) + return(IA64_ILLOP_FAULT); + pte = vcpu_get_gr(vcpu,inst.M41.r2); + + return (vcpu_itc_i(vcpu,pte,itir,ifa)); +} + +/************************************* + * Moves to semi-privileged registers +*************************************/ + +IA64FAULT priv_mov_to_ar_imm(VCPU *vcpu, INST64 inst) +{ + // I27 and M30 are identical for these fields + UINT64 ar3 = inst.M30.ar3; + UINT64 imm = vcpu_get_gr(vcpu,inst.M30.imm); + return (vcpu_set_ar(vcpu,ar3,imm)); +} + +IA64FAULT priv_mov_to_ar_reg(VCPU *vcpu, INST64 inst) +{ + // I26 and M29 are identical for these fields + UINT64 ar3 = inst.M29.ar3; + + if (inst.M29.r2 > 63 && inst.M29.ar3 < 8) { // privified mov from kr + UINT64 val; + if (vcpu_get_ar(vcpu,ar3,&val) != IA64_ILLOP_FAULT) + return vcpu_set_gr(vcpu, inst.M29.r2-64, val); + else return IA64_ILLOP_FAULT; + } + else { + UINT64 r2 = vcpu_get_gr(vcpu,inst.M29.r2); + return (vcpu_set_ar(vcpu,ar3,r2)); + } +} + +/******************************** + * Moves to privileged registers +********************************/ + +IA64FAULT priv_mov_to_pkr(VCPU *vcpu, INST64 inst) +{ + UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); + UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); + return (vcpu_set_pkr(vcpu,r3,r2)); +} + +IA64FAULT priv_mov_to_rr(VCPU *vcpu, INST64 inst) +{ + UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); + UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); + return (vcpu_set_rr(vcpu,r3,r2)); +} + +IA64FAULT priv_mov_to_dbr(VCPU *vcpu, INST64 inst) +{ + UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); + UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); + return (vcpu_set_dbr(vcpu,r3,r2)); +} + +IA64FAULT priv_mov_to_ibr(VCPU *vcpu, INST64 inst) +{ + UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); + UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); + return (vcpu_set_ibr(vcpu,r3,r2)); +} + +IA64FAULT priv_mov_to_pmc(VCPU *vcpu, INST64 inst) +{ + UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); + UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); + return (vcpu_set_pmc(vcpu,r3,r2)); +} + +IA64FAULT priv_mov_to_pmd(VCPU *vcpu, INST64 inst) +{ + UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); + UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); + return (vcpu_set_pmd(vcpu,r3,r2)); +} + +unsigned long to_cr_cnt[128] = { 0 }; + +IA64FAULT priv_mov_to_cr(VCPU *vcpu, INST64 inst) +{ + UINT64 val = vcpu_get_gr(vcpu, inst.M32.r2); + to_cr_cnt[inst.M32.cr3]++; + switch (inst.M32.cr3) { + case 0: return vcpu_set_dcr(vcpu,val); + case 1: return vcpu_set_itm(vcpu,val); + case 2: return vcpu_set_iva(vcpu,val); + case 8: return vcpu_set_pta(vcpu,val); + case 16:return vcpu_set_ipsr(vcpu,val); + case 17:return vcpu_set_isr(vcpu,val); + case 19:return vcpu_set_iip(vcpu,val); + case 20:return vcpu_set_ifa(vcpu,val); + case 21:return vcpu_set_itir(vcpu,val); + case 22:return vcpu_set_iipa(vcpu,val); + case 23:return vcpu_set_ifs(vcpu,val); + case 24:return vcpu_set_iim(vcpu,val); + case 25:return vcpu_set_iha(vcpu,val); + case 64:return vcpu_set_lid(vcpu,val); + case 65:return IA64_ILLOP_FAULT; + case 66:return vcpu_set_tpr(vcpu,val); + case 67:return vcpu_set_eoi(vcpu,val); + case 68:return IA64_ILLOP_FAULT; + case 69:return IA64_ILLOP_FAULT; + case 70:return IA64_ILLOP_FAULT; + case 71:return IA64_ILLOP_FAULT; + case 72:return vcpu_set_itv(vcpu,val); + case 73:return vcpu_set_pmv(vcpu,val); + case 74:return vcpu_set_cmcv(vcpu,val); + case 80:return vcpu_set_lrr0(vcpu,val); + case 81:return vcpu_set_lrr1(vcpu,val); + default: return IA64_ILLOP_FAULT; + } +} + +IA64FAULT priv_rsm(VCPU *vcpu, INST64 inst) +{ + UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm; + return vcpu_reset_psr_sm(vcpu,imm24); +} + +IA64FAULT priv_ssm(VCPU *vcpu, INST64 inst) +{ + UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm; + return vcpu_set_psr_sm(vcpu,imm24); +} + +/** + * @todo Check for reserved bits and return IA64_RSVDREG_FAULT. + */ +IA64FAULT priv_mov_to_psr(VCPU *vcpu, INST64 inst) +{ + UINT64 val = vcpu_get_gr(vcpu, inst.M35.r2); + return vcpu_set_psr_l(vcpu,val); +} + +/********************************** + * Moves from privileged registers + **********************************/ + +IA64FAULT priv_mov_from_rr(VCPU *vcpu, INST64 inst) +{ + UINT64 val; + IA64FAULT fault; + + if (inst.M43.r1 > 63) { // privified mov from cpuid + fault = vcpu_get_cpuid(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M43.r1-64, val); + } + else { + fault = vcpu_get_rr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M43.r1, val); + } + return fault; +} + +IA64FAULT priv_mov_from_pkr(VCPU *vcpu, INST64 inst) +{ + UINT64 val; + IA64FAULT fault; + + fault = vcpu_get_pkr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M43.r1, val); + else return fault; +} + +IA64FAULT priv_mov_from_dbr(VCPU *vcpu, INST64 inst) +{ + UINT64 val; + IA64FAULT fault; + + fault = vcpu_get_dbr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M43.r1, val); + else return fault; +} + +IA64FAULT priv_mov_from_ibr(VCPU *vcpu, INST64 inst) +{ + UINT64 val; + IA64FAULT fault; + + fault = vcpu_get_ibr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M43.r1, val); + else return fault; +} + +IA64FAULT priv_mov_from_pmc(VCPU *vcpu, INST64 inst) +{ + UINT64 val; + IA64FAULT fault; + + if (inst.M43.r1 > 63) { // privified mov from pmd + fault = vcpu_get_pmd(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M43.r1-64, val); + } + else { + fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M43.r1, val); + } + return fault; +} + +unsigned long from_cr_cnt[128] = { 0 }; + +#define cr_get(cr) \ + ((fault = vcpu_get_##cr(vcpu,&val)) == IA64_NO_FAULT) ? \ + vcpu_set_gr(vcpu, tgt, val) : fault; + +IA64FAULT priv_mov_from_cr(VCPU *vcpu, INST64 inst) +{ + UINT64 tgt = inst.M33.r1; + UINT64 val; + IA64FAULT fault; + + from_cr_cnt[inst.M33.cr3]++; + switch (inst.M33.cr3) { + case 0: return cr_get(dcr); + case 1: return cr_get(itm); + case 2: return cr_get(iva); + case 8: return cr_get(pta); + case 16:return cr_get(ipsr); + case 17:return cr_get(isr); + case 19:return cr_get(iip); + case 20:return cr_get(ifa); + case 21:return cr_get(itir); + case 22:return cr_get(iipa); + case 23:return cr_get(ifs); + case 24:return cr_get(iim); + case 25:return cr_get(iha); + case 64:return cr_get(lid); + case 65:return cr_get(ivr); + case 66:return cr_get(tpr); + case 67:return vcpu_set_gr(vcpu,tgt,0L); + case 68:return cr_get(irr0); + case 69:return cr_get(irr1); + case 70:return cr_get(irr2); + case 71:return cr_get(irr3); + case 72:return cr_get(itv); + case 73:return cr_get(pmv); + case 74:return cr_get(cmcv); + case 80:return cr_get(lrr0); + case 81:return cr_get(lrr1); + default: return IA64_ILLOP_FAULT; + } + return IA64_ILLOP_FAULT; +} + +IA64FAULT priv_mov_from_psr(VCPU *vcpu, INST64 inst) +{ + UINT64 tgt = inst.M33.r1; + UINT64 val; + IA64FAULT fault; + + if ((fault = vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, tgt, val); + else return fault; +} + +/************************************************************************** +Privileged operation decode and dispatch routines +**************************************************************************/ + +IA64_SLOT_TYPE slot_types[0x20][3] = { + {M, I, I}, {M, I, I}, {M, I, I}, {M, I, I}, + {M, I, ILLEGAL}, {M, I, ILLEGAL}, + {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL}, + {M, M, I}, {M, M, I}, {M, M, I}, {M, M, I}, + {M, F, I}, {M, F, I}, + {M, M, F}, {M, M, F}, + {M, I, B}, {M, I, B}, + {M, B, B}, {M, B, B}, + {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL}, + {B, B, B}, {B, B, B}, + {M, M, B}, {M, M, B}, + {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL}, + {M, F, B}, {M, F, B}, + {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL} +}; + +// pointer to privileged emulation function +typedef IA64FAULT (*PPEFCN)(VCPU *vcpu, INST64 inst); + +PPEFCN Mpriv_funcs[64] = { + priv_mov_to_rr, priv_mov_to_dbr, priv_mov_to_ibr, priv_mov_to_pkr, + priv_mov_to_pmc, priv_mov_to_pmd, 0, 0, + 0, priv_ptc_l, priv_ptc_g, priv_ptc_ga, + priv_ptr_d, priv_ptr_i, priv_itr_d, priv_itr_i, + priv_mov_from_rr, priv_mov_from_dbr, priv_mov_from_ibr, priv_mov_from_pkr, + priv_mov_from_pmc, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, priv_tpa, priv_tak, + 0, 0, 0, 0, + priv_mov_from_cr, priv_mov_from_psr, 0, 0, + 0, 0, 0, 0, + priv_mov_to_cr, priv_mov_to_psr, priv_itc_d, priv_itc_i, + 0, 0, 0, 0, + priv_ptc_e, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +struct { + unsigned long mov_to_ar_imm; + unsigned long mov_to_ar_reg; + unsigned long mov_from_ar; + unsigned long ssm; + unsigned long rsm; + unsigned long rfi; + unsigned long bsw0; + unsigned long bsw1; + unsigned long cover; + unsigned long fc; + unsigned long cpuid; + unsigned long Mpriv_cnt[64]; +} privcnt = { 0 }; + +unsigned long privop_trace = 0; + +IA64FAULT +priv_handle_op(VCPU *vcpu, REGS *regs, int privlvl) +{ + IA64_BUNDLE bundle; + IA64_BUNDLE __get_domain_bundle(UINT64); + int slot; + IA64_SLOT_TYPE slot_type; + INST64 inst; + PPEFCN pfunc; + unsigned long ipsr = regs->cr_ipsr; + UINT64 iip = regs->cr_iip; + int x6; + + // make a local copy of the bundle containing the privop +#if 1 + bundle = __get_domain_bundle(iip); + if (!bundle.i64[0] && !bundle.i64[1]) +#else + if (__copy_from_user(&bundle,iip,sizeof(bundle))) +#endif + { +//printf("*** priv_handle_op: privop bundle @%p not mapped, retrying\n",iip); + return vcpu_force_data_miss(vcpu,regs->cr_iip); + } +#if 0 + if (iip==0xa000000100001820) { + static int firstpagefault = 1; + if (firstpagefault) { + printf("*** First time to domain page fault!\n"); firstpagefault=0; + } + } +#endif + if (privop_trace) { + static long i = 400; + //if (i > 0) printf("privop @%p\n",iip); + if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n", + iip,ia64_get_itc(),ia64_get_itm()); + i--; + } + slot = ((struct ia64_psr *)&ipsr)->ri; + if (!slot) inst.inst = (bundle.i64[0]>>5) & MASK_41; + else if (slot == 1) + inst.inst = ((bundle.i64[0]>>46) | bundle.i64[1]<<18) & MASK_41; + else if (slot == 2) inst.inst = (bundle.i64[1]>>23) & MASK_41; + else printf("priv_handle_op: illegal slot: %d\n", slot); + + slot_type = slot_types[bundle.template][slot]; + if (priv_verbose) { + printf("priv_handle_op: checking bundle at 0x%lx (op=0x%016lx) slot %d (type=%d)\n", + iip, (UINT64)inst.inst, slot, slot_type); + } + if (slot_type == B && inst.generic.major == 0 && inst.B8.x6 == 0x0) { + // break instr for privified cover + } + else if (privlvl != 2) return (IA64_ILLOP_FAULT); + switch (slot_type) { + case M: + if (inst.generic.major == 0) { +#if 0 + if (inst.M29.x6 == 0 && inst.M29.x3 == 0) { + privcnt.cover++; + return priv_cover(vcpu,inst); + } +#endif + if (inst.M29.x3 != 0) break; + if (inst.M30.x4 == 8 && inst.M30.x2 == 2) { + privcnt.mov_to_ar_imm++; + return priv_mov_to_ar_imm(vcpu,inst); + } + if (inst.M44.x4 == 6) { + privcnt.ssm++; + return priv_ssm(vcpu,inst); + } + if (inst.M44.x4 == 7) { + privcnt.rsm++; + return priv_rsm(vcpu,inst); + } + break; + } + else if (inst.generic.major != 1) break; + x6 = inst.M29.x6; + if (x6 == 0x2a) { + if (inst.M29.r2 > 63 && inst.M29.ar3 < 8) + privcnt.mov_from_ar++; // privified mov from kr + else privcnt.mov_to_ar_reg++; + return priv_mov_to_ar_reg(vcpu,inst); + } + if (inst.M29.x3 != 0) break; + if (!(pfunc = Mpriv_funcs[x6])) break; + if (x6 == 0x1e || x6 == 0x1f) { // tpa or tak are "special" + if (inst.M46.r3 > 63) { + if (x6 == 0x1e) x6 = 0x1b; + else x6 = 0x1a; + } + } + if (x6 == 52 && inst.M28.r3 > 63) + privcnt.fc++; + else if (x6 == 16 && inst.M43.r3 > 63) + privcnt.cpuid++; + else privcnt.Mpriv_cnt[x6]++; + return (*pfunc)(vcpu,inst); + break; + case B: + if (inst.generic.major != 0) break; + if (inst.B8.x6 == 0x08) { + IA64FAULT fault; + privcnt.rfi++; + fault = priv_rfi(vcpu,inst); + if (fault == IA64_NO_FAULT) fault = IA64_RFI_IN_PROGRESS; + return fault; + } + if (inst.B8.x6 == 0x0c) { + privcnt.bsw0++; + return priv_bsw0(vcpu,inst); + } + if (inst.B8.x6 == 0x0d) { + privcnt.bsw1++; + return priv_bsw1(vcpu,inst); + } + if (inst.B8.x6 == 0x0) { // break instr for privified cover + privcnt.cover++; + return priv_cover(vcpu,inst); + } + break; + case I: + if (inst.generic.major != 0) break; +#if 0 + if (inst.I26.x6 == 0 && inst.I26.x3 == 0) { + privcnt.cover++; + return priv_cover(vcpu,inst); + } +#endif + if (inst.I26.x3 != 0) break; // I26.x3 == I27.x3 + if (inst.I26.x6 == 0x2a) { + if (inst.I26.r2 > 63 && inst.I26.ar3 < 8) + privcnt.mov_from_ar++; // privified mov from kr + else privcnt.mov_to_ar_reg++; + return priv_mov_to_ar_reg(vcpu,inst); + } + if (inst.I27.x6 == 0x0a) { + privcnt.mov_to_ar_imm++; + return priv_mov_to_ar_imm(vcpu,inst); + } + break; + default: + break; + } + //printf("We who are about do die salute you\n"); + printf("handle_op: can't handle privop at 0x%lx (op=0x%016lx) slot %d (type=%d), ipsr=%p\n", + iip, (UINT64)inst.inst, slot, slot_type, ipsr); + //printf("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip)); + //thread_mozambique("privop fault\n"); + return (IA64_ILLOP_FAULT); +} + +/** Emulate a privileged operation. + * + * This should probably return 0 on success and the "trap number" + * (e.g. illegal operation for bad register, priv op for an + * instruction that isn't allowed, etc.) on "failure" + * + * @param vcpu virtual cpu + * @param isrcode interrupt service routine code + * @return fault + */ +IA64FAULT +priv_emulate(VCPU *vcpu, REGS *regs, UINT64 isr) +{ + IA64FAULT fault; + UINT64 ipsr = regs->cr_ipsr; + UINT64 isrcode = (isr >> 4) & 0xf; + int privlvl; + + // handle privops masked as illops? and breaks (6) + if (isrcode != 1 && isrcode != 2 && isrcode != 0 && isrcode != 6) { + printf("priv_emulate: isrcode != 0 or 1 or 2\n"); + printf("priv_emulate: returning ILLOP, not implemented!\n"); + while (1); + return IA64_ILLOP_FAULT; + } + //if (isrcode != 1 && isrcode != 2) return 0; + vcpu_set_regs(vcpu,regs); + privlvl = (ipsr & IA64_PSR_CPL) >> IA64_PSR_CPL0_BIT; + // its OK for a privified-cover to be executed in user-land + fault = priv_handle_op(vcpu,regs,privlvl); + if ((fault == IA64_NO_FAULT) || (fault == IA64_EXTINT_VECTOR)) { // success!! + // update iip/ipsr to point to the next instruction + (void)vcpu_increment_iip(vcpu); + } + if (fault == IA64_ILLOP_FAULT) + printf("priv_emulate: priv_handle_op fails, isr=%p\n",isr); + return fault; +} + + +// FIXME: Move these to include/public/arch-ia64? +#define HYPERPRIVOP_RFI 0x1 +#define HYPERPRIVOP_RSM_DT 0x2 +#define HYPERPRIVOP_SSM_DT 0x3 +#define HYPERPRIVOP_COVER 0x4 +#define HYPERPRIVOP_ITC_D 0x5 +#define HYPERPRIVOP_ITC_I 0x6 +#define HYPERPRIVOP_SSM_I 0x7 +#define HYPERPRIVOP_GET_IVR 0x8 +#define HYPERPRIVOP_GET_TPR 0x9 +#define HYPERPRIVOP_SET_TPR 0xa +#define HYPERPRIVOP_EOI 0xb +#define HYPERPRIVOP_SET_ITM 0xc +#define HYPERPRIVOP_THASH 0xd +#define HYPERPRIVOP_PTC_GA 0xe +#define HYPERPRIVOP_ITR_D 0xf +#define HYPERPRIVOP_GET_RR 0x10 +#define HYPERPRIVOP_SET_RR 0x11 +#define HYPERPRIVOP_MAX 0x11 + +char *hyperpriv_str[HYPERPRIVOP_MAX+1] = { + 0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i", + "=ivr", "=tpr", "tpr=", "eoi", "itm=", "thash", "ptc.ga", "itr.d", + "=rr", "rr=", + 0 +}; + +unsigned long slow_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 }; +unsigned long fast_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 }; + +/* hyperprivops are generally executed in assembly (with physical psr.ic off) + * so this code is primarily used for debugging them */ +int +ia64_hyperprivop(unsigned long iim, REGS *regs) +{ + struct vcpu *v = (struct domain *) current; + INST64 inst; + UINT64 val; + UINT64 itir, ifa; + +// FIXME: Handle faults appropriately for these + if (!iim || iim > HYPERPRIVOP_MAX) { + printf("bad hyperprivop; ignored\n"); + printf("iim=%d, iip=%p\n",iim,regs->cr_iip); + return 1; + } + slow_hyperpriv_cnt[iim]++; + switch(iim) { + case HYPERPRIVOP_RFI: + (void)vcpu_rfi(v); + return 0; // don't update iip + case HYPERPRIVOP_RSM_DT: + (void)vcpu_reset_psr_dt(v); + return 1; + case HYPERPRIVOP_SSM_DT: + (void)vcpu_set_psr_dt(v); + return 1; + case HYPERPRIVOP_COVER: + (void)vcpu_cover(v); + return 1; + case HYPERPRIVOP_ITC_D: + (void)vcpu_get_itir(v,&itir); + (void)vcpu_get_ifa(v,&ifa); + (void)vcpu_itc_d(v,regs->r8,itir,ifa); + return 1; + case HYPERPRIVOP_ITC_I: + (void)vcpu_get_itir(v,&itir); + (void)vcpu_get_ifa(v,&ifa); + (void)vcpu_itc_i(v,regs->r8,itir,ifa); + return 1; + case HYPERPRIVOP_SSM_I: + (void)vcpu_set_psr_i(v); + return 1; + case HYPERPRIVOP_GET_IVR: + (void)vcpu_get_ivr(v,&val); + regs->r8 = val; + return 1; + case HYPERPRIVOP_GET_TPR: + (void)vcpu_get_tpr(v,&val); + regs->r8 = val; + return 1; + case HYPERPRIVOP_SET_TPR: + (void)vcpu_set_tpr(v,regs->r8); + return 1; + case HYPERPRIVOP_EOI: + (void)vcpu_set_eoi(v,0L); + return 1; + case HYPERPRIVOP_SET_ITM: + (void)vcpu_set_itm(v,regs->r8); + return 1; + case HYPERPRIVOP_THASH: + (void)vcpu_thash(v,regs->r8,&val); + regs->r8 = val; + return 1; + case HYPERPRIVOP_PTC_GA: + (void)vcpu_ptc_ga(v,regs->r8,(1L << ((regs->r9 & 0xfc) >> 2))); + return 1; + case HYPERPRIVOP_ITR_D: + (void)vcpu_get_itir(v,&itir); + (void)vcpu_get_ifa(v,&ifa); + (void)vcpu_itr_d(v,regs->r8,regs->r9,itir,ifa); + return 1; + case HYPERPRIVOP_GET_RR: + (void)vcpu_get_rr(v,regs->r8,&val); + regs->r8 = val; + return 1; + case HYPERPRIVOP_SET_RR: + (void)vcpu_set_rr(v,regs->r8,regs->r9); + return 1; + } + return 0; +} + + +/************************************************************************** +Privileged operation instrumentation routines +**************************************************************************/ + +char *Mpriv_str[64] = { + "mov_to_rr", "mov_to_dbr", "mov_to_ibr", "mov_to_pkr", + "mov_to_pmc", "mov_to_pmd", "<0x06>", "<0x07>", + "<0x08>", "ptc_l", "ptc_g", "ptc_ga", + "ptr_d", "ptr_i", "itr_d", "itr_i", + "mov_from_rr", "mov_from_dbr", "mov_from_ibr", "mov_from_pkr", + "mov_from_pmc", "<0x15>", "<0x16>", "<0x17>", + "<0x18>", "<0x19>", "privified-thash", "privified-ttag", + "<0x1c>", "<0x1d>", "tpa", "tak", + "<0x20>", "<0x21>", "<0x22>", "<0x23>", + "mov_from_cr", "mov_from_psr", "<0x26>", "<0x27>", + "<0x28>", "<0x29>", "<0x2a>", "<0x2b>", + "mov_to_cr", "mov_to_psr", "itc_d", "itc_i", + "<0x30>", "<0x31>", "<0x32>", "<0x33>", + "ptc_e", "<0x35>", "<0x36>", "<0x37>", + "<0x38>", "<0x39>", "<0x3a>", "<0x3b>", + "<0x3c>", "<0x3d>", "<0x3e>", "<0x3f>" +}; + +#define RS "Rsvd" +char *cr_str[128] = { + "dcr","itm","iva",RS,RS,RS,RS,RS, + "pta",RS,RS,RS,RS,RS,RS,RS, + "ipsr","isr",RS,"iip","ifa","itir","iipa","ifs", + "iim","iha",RS,RS,RS,RS,RS,RS, + RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS, + RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS, + "lid","ivr","tpr","eoi","irr0","irr1","irr2","irr3", + "itv","pmv","cmcv",RS,RS,RS,RS,RS, + "lrr0","lrr1",RS,RS,RS,RS,RS,RS, + RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS, + RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS, + RS,RS,RS,RS,RS,RS,RS,RS +}; + +// FIXME: should use snprintf to ensure no buffer overflow +int dump_privop_counts(char *buf) +{ + int i, j; + UINT64 sum = 0; + char *s = buf; + + // this is ugly and should probably produce sorted output + // but it will have to do for now + sum += privcnt.mov_to_ar_imm; sum += privcnt.mov_to_ar_reg; + sum += privcnt.ssm; sum += privcnt.rsm; + sum += privcnt.rfi; sum += privcnt.bsw0; + sum += privcnt.bsw1; sum += privcnt.cover; + for (i=0; i < 64; i++) sum += privcnt.Mpriv_cnt[i]; + s += sprintf(s,"Privop statistics: (Total privops: %ld)\n",sum); + if (privcnt.mov_to_ar_imm) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_imm, + "mov_to_ar_imm", (privcnt.mov_to_ar_imm*100L)/sum); + if (privcnt.mov_to_ar_reg) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_reg, + "mov_to_ar_reg", (privcnt.mov_to_ar_reg*100L)/sum); + if (privcnt.mov_from_ar) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_from_ar, + "privified-mov_from_ar", (privcnt.mov_from_ar*100L)/sum); + if (privcnt.ssm) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.ssm, + "ssm", (privcnt.ssm*100L)/sum); + if (privcnt.rsm) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rsm, + "rsm", (privcnt.rsm*100L)/sum); + if (privcnt.rfi) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rfi, + "rfi", (privcnt.rfi*100L)/sum); + if (privcnt.bsw0) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw0, + "bsw0", (privcnt.bsw0*100L)/sum); + if (privcnt.bsw1) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw1, + "bsw1", (privcnt.bsw1*100L)/sum); + if (privcnt.cover) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cover, + "cover", (privcnt.cover*100L)/sum); + if (privcnt.fc) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.fc, + "privified-fc", (privcnt.fc*100L)/sum); + if (privcnt.cpuid) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cpuid, + "privified-getcpuid", (privcnt.cpuid*100L)/sum); + for (i=0; i < 64; i++) if (privcnt.Mpriv_cnt[i]) { + if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\n"); + else s += sprintf(s,"%10d %s [%d%%]\n", privcnt.Mpriv_cnt[i], + Mpriv_str[i], (privcnt.Mpriv_cnt[i]*100L)/sum); + if (i == 0x24) { // mov from CR + s += sprintf(s," ["); + for (j=0; j < 128; j++) if (from_cr_cnt[j]) { + if (!cr_str[j]) + s += sprintf(s,"PRIVSTRING NULL!!\n"); + s += sprintf(s,"%s(%d),",cr_str[j],from_cr_cnt[j]); + } + s += sprintf(s,"]\n"); + } + else if (i == 0x2c) { // mov to CR + s += sprintf(s," ["); + for (j=0; j < 128; j++) if (to_cr_cnt[j]) { + if (!cr_str[j]) + s += sprintf(s,"PRIVSTRING NULL!!\n"); + s += sprintf(s,"%s(%d),",cr_str[j],to_cr_cnt[j]); + } + s += sprintf(s,"]\n"); + } + } + return s - buf; +} + +int zero_privop_counts(char *buf) +{ + int i, j; + char *s = buf; + + // this is ugly and should probably produce sorted output + // but it will have to do for now + privcnt.mov_to_ar_imm = 0; privcnt.mov_to_ar_reg = 0; + privcnt.mov_from_ar = 0; + privcnt.ssm = 0; privcnt.rsm = 0; + privcnt.rfi = 0; privcnt.bsw0 = 0; + privcnt.bsw1 = 0; privcnt.cover = 0; + privcnt.fc = 0; privcnt.cpuid = 0; + for (i=0; i < 64; i++) privcnt.Mpriv_cnt[i] = 0; + for (j=0; j < 128; j++) from_cr_cnt[j] = 0; + for (j=0; j < 128; j++) to_cr_cnt[j] = 0; + s += sprintf(s,"All privop statistics zeroed\n"); + return s - buf; +} + +#ifdef PRIVOP_ADDR_COUNT + +extern struct privop_addr_count privop_addr_counter[]; + +void privop_count_addr(unsigned long iip, int inst) +{ + struct privop_addr_count *v = &privop_addr_counter[inst]; + int i; + + for (i = 0; i < PRIVOP_COUNT_NADDRS; i++) { + if (!v->addr[i]) { v->addr[i] = iip; v->count[i]++; return; } + else if (v->addr[i] == iip) { v->count[i]++; return; } + } + v->overflow++;; +} + +int dump_privop_addrs(char *buf) +{ + int i,j; + char *s = buf; + s += sprintf(s,"Privop addresses:\n"); + for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) { + struct privop_addr_count *v = &privop_addr_counter[i]; + s += sprintf(s,"%s:\n",v->instname); + for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) { + if (!v->addr[j]) break; + s += sprintf(s," @%p #%ld\n",v->addr[j],v->count[j]); + } + if (v->overflow) + s += sprintf(s," other #%ld\n",v->overflow); + } + return s - buf; +} + +void zero_privop_addrs(void) +{ + int i,j; + for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) { + struct privop_addr_count *v = &privop_addr_counter[i]; + for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) + v->addr[j] = v->count[j] = 0; + v->overflow = 0; + } +} +#endif + +extern unsigned long dtlb_translate_count; +extern unsigned long tr_translate_count; +extern unsigned long phys_translate_count; +extern unsigned long vhpt_translate_count; +extern unsigned long lazy_cover_count; +extern unsigned long idle_when_pending; +extern unsigned long pal_halt_light_count; +extern unsigned long context_switch_count; + +int dump_misc_stats(char *buf) +{ + char *s = buf; + s += sprintf(s,"Virtual TR translations: %d\n",tr_translate_count); + s += sprintf(s,"Virtual VHPT translations: %d\n",vhpt_translate_count); + s += sprintf(s,"Virtual DTLB translations: %d\n",dtlb_translate_count); + s += sprintf(s,"Physical translations: %d\n",phys_translate_count); + s += sprintf(s,"Idle when pending: %d\n",idle_when_pending); + s += sprintf(s,"PAL_HALT_LIGHT (no pending): %d\n",pal_halt_light_count); + s += sprintf(s,"context switches: %d\n",context_switch_count); + s += sprintf(s,"Lazy covers: %d\n",lazy_cover_count); + return s - buf; +} + +void zero_misc_stats(void) +{ + dtlb_translate_count = 0; + tr_translate_count = 0; + phys_translate_count = 0; + vhpt_translate_count = 0; + lazy_cover_count = 0; + pal_halt_light_count = 0; + idle_when_pending = 0; + context_switch_count = 0; +} + +int dump_hyperprivop_counts(char *buf) +{ + int i; + char *s = buf; + unsigned long total = 0; + for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += slow_hyperpriv_cnt[i]; + s += sprintf(s,"Slow hyperprivops (total %d):\n",total); + for (i = 1; i <= HYPERPRIVOP_MAX; i++) + if (slow_hyperpriv_cnt[i]) + s += sprintf(s,"%10d %s\n", + slow_hyperpriv_cnt[i], hyperpriv_str[i]); + total = 0; + for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += fast_hyperpriv_cnt[i]; + s += sprintf(s,"Fast hyperprivops (total %d):\n",total); + for (i = 1; i <= HYPERPRIVOP_MAX; i++) + if (fast_hyperpriv_cnt[i]) + s += sprintf(s,"%10d %s\n", + fast_hyperpriv_cnt[i], hyperpriv_str[i]); + return s - buf; +} + +void zero_hyperprivop_counts(void) +{ + int i; + for (i = 0; i <= HYPERPRIVOP_MAX; i++) slow_hyperpriv_cnt[i] = 0; + for (i = 0; i <= HYPERPRIVOP_MAX; i++) fast_hyperpriv_cnt[i] = 0; +} + +#define TMPBUFLEN 8*1024 +int dump_privop_counts_to_user(char __user *ubuf, int len) +{ + char buf[TMPBUFLEN]; + int n = dump_privop_counts(buf); + + n += dump_hyperprivop_counts(buf + n); + n += dump_reflect_counts(buf + n); +#ifdef PRIVOP_ADDR_COUNT + n += dump_privop_addrs(buf + n); +#endif + n += dump_misc_stats(buf + n); + if (len < TMPBUFLEN) return -1; + if (__copy_to_user(ubuf,buf,n)) return -1; + return n; +} + +int zero_privop_counts_to_user(char __user *ubuf, int len) +{ + char buf[TMPBUFLEN]; + int n = zero_privop_counts(buf); + + zero_hyperprivop_counts(); +#ifdef PRIVOP_ADDR_COUNT + zero_privop_addrs(); +#endif + zero_misc_stats(); + zero_reflect_counts(); + if (len < TMPBUFLEN) return -1; + if (__copy_to_user(ubuf,buf,n)) return -1; + return n; +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/process.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/process.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,749 @@ +/* + * Miscellaneous process/domain related routines + * + * Copyright (C) 2004 Hewlett-Packard Co. + * Dan Magenheimer (dan.magenheimer@xxxxxx) + * + */ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/errno.h> +#include <xen/sched.h> +#include <xen/smp.h> +#include <asm/ptrace.h> +#include <xen/delay.h> + +#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */ +#include <asm/sal.h> /* FOR struct ia64_sal_retval */ + +#include <asm/system.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/desc.h> +//#include <asm/ldt.h> +#include <xen/irq.h> +#include <xen/event.h> +#include <asm/regionreg.h> +#include <asm/privop.h> +#include <asm/vcpu.h> +#include <asm/ia64_int.h> +#include <asm/dom_fw.h> +#include "hpsim_ssc.h" + +extern unsigned long vcpu_get_itir_on_fault(struct vcpu *, UINT64); +extern struct ia64_sal_retval pal_emulator_static(UINT64); +extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64); + +extern unsigned long dom0_start, dom0_size; + +#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT) +// note IA64_PSR_PK removed from following, why is this necessary? +#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \ + IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \ + IA64_PSR_IT | IA64_PSR_BN) + +#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \ + IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \ + IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \ + IA64_PSR_CPL | IA64_PSR_MC | IA64_PSR_IS | \ + IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \ + IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA) + +#define PSCB(x,y) VCPU(x,y) +#define PSCBX(x,y) x->arch.y + +extern unsigned long vcpu_verbose; + +long do_iopl(domid_t domain, unsigned int new_io_pl) +{ + dummy(); + return 0; +} + +void schedule_tail(struct vcpu *next) +{ + unsigned long rr7; + //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info); + //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info); +#ifdef CONFIG_VTI + /* rr7 will be postponed to last point when resuming back to guest */ + vmx_load_all_rr(current); +#else // CONFIG_VTI + if (rr7 = load_region_regs(current)) { + printk("schedule_tail: change to rr7 not yet implemented\n"); + } +#endif // CONFIG_VTI +} + +void tdpfoo(void) { } + +// given a domain virtual address, pte and pagesize, extract the metaphysical +// address, convert the pte for a physical address for (possibly different) +// Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use +// PAGE_SIZE!) +unsigned long translate_domain_pte(unsigned long pteval, + unsigned long address, unsigned long itir) +{ + struct domain *d = current->domain; + unsigned long mask, pteval2, mpaddr; + unsigned long lookup_domain_mpa(struct domain *,unsigned long); + extern struct domain *dom0; + extern unsigned long dom0_start, dom0_size; + + // FIXME address had better be pre-validated on insert + mask = (1L << ((itir >> 2) & 0x3f)) - 1; + mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask); + if (d == dom0) { + if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { + //printk("translate_domain_pte: out-of-bounds dom0 mpaddr %p! itc=%lx...\n",mpaddr,ia64_get_itc()); + tdpfoo(); + } + } + else if ((mpaddr >> PAGE_SHIFT) > d->max_pages) { + printf("translate_domain_pte: bad mpa=%p (> %p),vadr=%p,pteval=%p,itir=%p\n", + mpaddr,d->max_pages<<PAGE_SHIFT,address,pteval,itir); + tdpfoo(); + } + pteval2 = lookup_domain_mpa(d,mpaddr); + pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits + pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected) + pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2; + return pteval2; +} + +// given a current domain metaphysical address, return the physical address +unsigned long translate_domain_mpaddr(unsigned long mpaddr) +{ + extern unsigned long lookup_domain_mpa(struct domain *,unsigned long); + unsigned long pteval; + + if (current->domain == dom0) { + if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { + printk("translate_domain_mpaddr: out-of-bounds dom0 mpaddr %p! continuing...\n",mpaddr); + tdpfoo(); + } + } + pteval = lookup_domain_mpa(current->domain,mpaddr); + return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK)); +} + +unsigned long slow_reflect_count[0x80] = { 0 }; +unsigned long fast_reflect_count[0x80] = { 0 }; + +#define inc_slow_reflect_count(vec) slow_reflect_count[vec>>8]++; + +void zero_reflect_counts(void) +{ + int i; + for (i=0; i<0x80; i++) slow_reflect_count[i] = 0; + for (i=0; i<0x80; i++) fast_reflect_count[i] = 0; +} + +int dump_reflect_counts(char *buf) +{ + int i,j,cnt; + char *s = buf; + + s += sprintf(s,"Slow reflections by vector:\n"); + for (i = 0, j = 0; i < 0x80; i++) { + if (cnt = slow_reflect_count[i]) { + s += sprintf(s,"0x%02x00:%10d, ",i,cnt); + if ((j++ & 3) == 3) s += sprintf(s,"\n"); + } + } + if (j & 3) s += sprintf(s,"\n"); + s += sprintf(s,"Fast reflections by vector:\n"); + for (i = 0, j = 0; i < 0x80; i++) { + if (cnt = fast_reflect_count[i]) { + s += sprintf(s,"0x%02x00:%10d, ",i,cnt); + if ((j++ & 3) == 3) s += sprintf(s,"\n"); + } + } + if (j & 3) s += sprintf(s,"\n"); + return s - buf; +} + +void reflect_interruption(unsigned long ifa, unsigned long isr, unsigned long itiriim, struct pt_regs *regs, unsigned long vector) +{ + unsigned long vcpu_get_ipsr_int_state(struct vcpu *,unsigned long); + unsigned long vcpu_get_rr_ve(struct vcpu *,unsigned long); + struct domain *d = current->domain; + struct vcpu *v = current; + + if (vector == IA64_EXTINT_VECTOR) { + + extern unsigned long vcpu_verbose, privop_trace; + static first_extint = 1; + if (first_extint) { + printf("Delivering first extint to domain: ifa=%p, isr=%p, itir=%p, iip=%p\n",ifa,isr,itiriim,regs->cr_iip); + //privop_trace = 1; vcpu_verbose = 1; + first_extint = 0; + } + } + if (!PSCB(v,interrupt_collection_enabled)) { + if (!(PSCB(v,ipsr) & IA64_PSR_DT)) { + panic_domain(regs,"psr.dt off, trying to deliver nested dtlb!\n"); + } + vector &= ~0xf; + if (vector != IA64_DATA_TLB_VECTOR && + vector != IA64_ALT_DATA_TLB_VECTOR && + vector != IA64_VHPT_TRANS_VECTOR) { +panic_domain(regs,"psr.ic off, delivering fault=%lx,ipsr=%p,iip=%p,ifa=%p,isr=%p,PSCB.iip=%p\n", + vector,regs->cr_ipsr,regs->cr_iip,ifa,isr,PSCB(v,iip)); + + } +//printf("Delivering NESTED DATA TLB fault\n"); + vector = IA64_DATA_NESTED_TLB_VECTOR; + regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL; + regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; +// NOTE: nested trap must NOT pass PSCB address + //regs->r31 = (unsigned long) &PSCB(v); + inc_slow_reflect_count(vector); + return; + + } + if ((vector & 0xf) == IA64_FORCED_IFA) + ifa = PSCB(v,tmp[0]); + vector &= ~0xf; + PSCB(v,ifa) = ifa; + if (vector < IA64_DATA_NESTED_TLB_VECTOR) /* VHPT miss, TLB miss, Alt TLB miss */ + vcpu_thash(v,ifa,&PSCB(current,iha)); + PSCB(v,unat) = regs->ar_unat; // not sure if this is really needed? + PSCB(v,precover_ifs) = regs->cr_ifs; + vcpu_bsw0(v); + PSCB(v,ipsr) = vcpu_get_ipsr_int_state(v,regs->cr_ipsr); + if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) + PSCB(v,iim) = itiriim; + else PSCB(v,itir) = vcpu_get_itir_on_fault(v,ifa); + PSCB(v,isr) = isr; // this is unnecessary except for interrupts! + PSCB(v,iip) = regs->cr_iip; + PSCB(v,ifs) = 0; + PSCB(v,incomplete_regframe) = 0; + + regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL; + regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; +#ifdef CONFIG_SMP +#warning "SMP FIXME: sharedinfo doesn't handle smp yet, need page per vcpu" +#endif + regs->r31 = &(((mapped_regs_t *)SHARED_ARCHINFO_ADDR)->ipsr); + + PSCB(v,interrupt_delivery_enabled) = 0; + PSCB(v,interrupt_collection_enabled) = 0; + + inc_slow_reflect_count(vector); +} + +void foodpi(void) {} + +unsigned long pending_false_positive = 0; + +// ONLY gets called from ia64_leave_kernel +// ONLY call with interrupts disabled?? (else might miss one?) +// NEVER successful if already reflecting a trap/fault because psr.i==0 +void deliver_pending_interrupt(struct pt_regs *regs) +{ + struct domain *d = current->domain; + struct vcpu *v = current; + // FIXME: Will this work properly if doing an RFI??? + if (!is_idle_task(d) && user_mode(regs)) { + //vcpu_poke_timer(v); + if (vcpu_deliverable_interrupts(v)) { + unsigned long isr = regs->cr_ipsr & IA64_PSR_RI; + if (vcpu_timer_pending_early(v)) +printf("*#*#*#* about to deliver early timer to domain %d!!!\n",v->domain->domain_id); + reflect_interruption(0,isr,0,regs,IA64_EXTINT_VECTOR); + } + else if (PSCB(v,pending_interruption)) + ++pending_false_positive; + } +} +unsigned long lazy_cover_count = 0; + +int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs) +{ + if (!PSCB(v,interrupt_collection_enabled)) { + PSCB(v,ifs) = regs->cr_ifs; + PSCB(v,incomplete_regframe) = 1; + regs->cr_ifs = 0; + lazy_cover_count++; + return(1); // retry same instruction with cr.ifs off + } + return(0); +} + +void ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs, unsigned long itir) +{ + unsigned long iip = regs->cr_iip; + // FIXME should validate address here + unsigned long pteval; + unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL); + IA64FAULT fault; + + if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, isr, regs)) return; + if ((isr & IA64_ISR_SP) + || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) + { + /* + * This fault was due to a speculative load or lfetch.fault, set the "ed" + * bit in the psr to ensure forward progress. (Target register will get a + * NaT for ld.s, lfetch will be canceled.) + */ + ia64_psr(regs)->ed = 1; + return; + } + + fault = vcpu_translate(current,address,is_data,&pteval,&itir); + if (fault == IA64_NO_FAULT) + { + pteval = translate_domain_pte(pteval,address,itir); + vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(itir>>2)&0x3f); + return; + } + else if (IS_VMM_ADDRESS(iip)) + { + if (!ia64_done_with_exception(regs)) { + // should never happen. If it does, region 0 addr may + // indicate a bad xen pointer + printk("*** xen_handle_domain_access: exception table" + " lookup failed, iip=%p, addr=%p, spinning...\n", + iip,address); + panic_domain(regs,"*** xen_handle_domain_access: exception table" + " lookup failed, iip=%p, addr=%p, spinning...\n", + iip,address); + } + return; + } + + reflect_interruption(address, isr, 0, regs, fault); +} + +void +ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, + unsigned long iim, unsigned long itir, unsigned long arg5, + unsigned long arg6, unsigned long arg7, unsigned long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + unsigned long code, error = isr; + char buf[128]; + int result, sig; + static const char *reason[] = { + "IA-64 Illegal Operation fault", + "IA-64 Privileged Operation fault", + "IA-64 Privileged Register fault", + "IA-64 Reserved Register/Field fault", + "Disabled Instruction Set Transition fault", + "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", + "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", + "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" + }; +#if 0 +printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n", + vector, ifa, regs->cr_iip, regs->cr_ipsr, isr); +#endif + + if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { + /* + * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel + * the lfetch. + */ + ia64_psr(regs)->ed = 1; + printf("ia64_fault: handled lfetch.fault\n"); + return; + } + + switch (vector) { + case 24: /* General Exception */ + code = (isr >> 4) & 0xf; + sprintf(buf, "General Exception: %s%s", reason[code], + (code == 3) ? ((isr & (1UL << 37)) + ? " (RSE access)" : " (data access)") : ""); + if (code == 8) { +# ifdef CONFIG_IA64_PRINT_HAZARDS + printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", + current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, + regs->pr); +# endif + printf("ia64_fault: returning on hazard\n"); + return; + } + break; + + case 25: /* Disabled FP-Register */ + if (isr & 2) { + //disabled_fph_fault(regs); + //return; + } + sprintf(buf, "Disabled FPL fault---not supposed to happen!"); + break; + + case 26: /* NaT Consumption */ + if (user_mode(regs)) { + void *addr; + + if (((isr >> 4) & 0xf) == 2) { + /* NaT page consumption */ + //sig = SIGSEGV; + //code = SEGV_ACCERR; + addr = (void *) ifa; + } else { + /* register NaT consumption */ + //sig = SIGILL; + //code = ILL_ILLOPN; + addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); + } + //siginfo.si_signo = sig; + //siginfo.si_code = code; + //siginfo.si_errno = 0; + //siginfo.si_addr = addr; + //siginfo.si_imm = vector; + //siginfo.si_flags = __ISR_VALID; + //siginfo.si_isr = isr; + //force_sig_info(sig, &siginfo, current); + //return; + } //else if (ia64_done_with_exception(regs)) + //return; + sprintf(buf, "NaT consumption"); + break; + + case 31: /* Unsupported Data Reference */ + if (user_mode(regs)) { + //siginfo.si_signo = SIGILL; + //siginfo.si_code = ILL_ILLOPN; + //siginfo.si_errno = 0; + //siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); + //siginfo.si_imm = vector; + //siginfo.si_flags = __ISR_VALID; + //siginfo.si_isr = isr; + //force_sig_info(SIGILL, &siginfo, current); + //return; + } + sprintf(buf, "Unsupported data reference"); + break; + + case 29: /* Debug */ + case 35: /* Taken Branch Trap */ + case 36: /* Single Step Trap */ + //if (fsys_mode(current, regs)) {} + switch (vector) { + case 29: + //siginfo.si_code = TRAP_HWBKPT; +#ifdef CONFIG_ITANIUM + /* + * Erratum 10 (IFA may contain incorrect address) now has + * "NoFix" status. There are no plans for fixing this. + */ + if (ia64_psr(regs)->is == 0) + ifa = regs->cr_iip; +#endif + break; + case 35: ifa = 0; break; + case 36: ifa = 0; break; + //case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; + //case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; + } + //siginfo.si_signo = SIGTRAP; + //siginfo.si_errno = 0; + //siginfo.si_addr = (void *) ifa; + //siginfo.si_imm = 0; + //siginfo.si_flags = __ISR_VALID; + //siginfo.si_isr = isr; + //force_sig_info(SIGTRAP, &siginfo, current); + //return; + + case 32: /* fp fault */ + case 33: /* fp trap */ + //result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr); + //if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { + //siginfo.si_signo = SIGFPE; + //siginfo.si_errno = 0; + //siginfo.si_code = FPE_FLTINV; + //siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); + //siginfo.si_flags = __ISR_VALID; + //siginfo.si_isr = isr; + //siginfo.si_imm = 0; + //force_sig_info(SIGFPE, &siginfo, current); + //} + //return; + sprintf(buf, "FP fault/trap"); + break; + + case 34: + if (isr & 0x2) { + /* Lower-Privilege Transfer Trap */ + /* + * Just clear PSR.lp and then return immediately: all the + * interesting work (e.g., signal delivery is done in the kernel + * exit path). + */ + //ia64_psr(regs)->lp = 0; + //return; + sprintf(buf, "Lower-Privilege Transfer trap"); + } else { + /* Unimplemented Instr. Address Trap */ + if (user_mode(regs)) { + //siginfo.si_signo = SIGILL; + //siginfo.si_code = ILL_BADIADDR; + //siginfo.si_errno = 0; + //siginfo.si_flags = 0; + //siginfo.si_isr = 0; + //siginfo.si_imm = 0; + //siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); + //force_sig_info(SIGILL, &siginfo, current); + //return; + } + sprintf(buf, "Unimplemented Instruction Address fault"); + } + break; + + case 45: + printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); + printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", + regs->cr_iip, ifa, isr); + //force_sig(SIGSEGV, current); + break; + + case 46: + printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); + printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", + regs->cr_iip, ifa, isr, iim); + //force_sig(SIGSEGV, current); + return; + + case 47: + sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16); + break; + + default: + sprintf(buf, "Fault %lu", vector); + break; + } + //die_if_kernel(buf, regs, error); +printk("ia64_fault: %s: reflecting\n",buf); +reflect_interruption(ifa,isr,iim,regs,IA64_GENEX_VECTOR); +//while(1); + //force_sig(SIGILL, current); +} + +unsigned long running_on_sim = 0; + +void +do_ssc(unsigned long ssc, struct pt_regs *regs) +{ + extern unsigned long lookup_domain_mpa(struct domain *,unsigned long); + unsigned long arg0, arg1, arg2, arg3, retval; + char buf[2]; +/**/ static int last_fd, last_count; // FIXME FIXME FIXME +/**/ // BROKEN FOR MULTIPLE DOMAINS & SMP +/**/ struct ssc_disk_stat { int fd; unsigned count;} *stat, last_stat; + extern unsigned long vcpu_verbose, privop_trace; + + arg0 = vcpu_get_gr(current,32); + switch(ssc) { + case SSC_PUTCHAR: + buf[0] = arg0; + buf[1] = '\0'; + printf(buf); + break; + case SSC_GETCHAR: + retval = ia64_ssc(0,0,0,0,ssc); + vcpu_set_gr(current,8,retval); + break; + case SSC_WAIT_COMPLETION: + if (arg0) { // metaphysical address + + arg0 = translate_domain_mpaddr(arg0); +/**/ stat = (struct ssc_disk_stat *)__va(arg0); +///**/ if (stat->fd == last_fd) stat->count = last_count; +/**/ stat->count = last_count; +//if (last_count >= PAGE_SIZE) printf("ssc_wait: stat->fd=%d,last_fd=%d,last_count=%d\n",stat->fd,last_fd,last_count); +///**/ retval = ia64_ssc(arg0,0,0,0,ssc); +/**/ retval = 0; + } + else retval = -1L; + vcpu_set_gr(current,8,retval); + break; + case SSC_OPEN: + arg1 = vcpu_get_gr(current,33); // access rights +if (!running_on_sim) { printf("SSC_OPEN, not implemented on hardware. (ignoring...)\n"); arg0 = 0; } + if (arg0) { // metaphysical address + arg0 = translate_domain_mpaddr(arg0); + retval = ia64_ssc(arg0,arg1,0,0,ssc); + } + else retval = -1L; + vcpu_set_gr(current,8,retval); + break; + case SSC_WRITE: + case SSC_READ: +//if (ssc == SSC_WRITE) printf("DOING AN SSC_WRITE\n"); + arg1 = vcpu_get_gr(current,33); + arg2 = vcpu_get_gr(current,34); + arg3 = vcpu_get_gr(current,35); + if (arg2) { // metaphysical address of descriptor + struct ssc_disk_req *req; + unsigned long mpaddr, paddr; + long len; + + arg2 = translate_domain_mpaddr(arg2); + req = (struct disk_req *)__va(arg2); + req->len &= 0xffffffffL; // avoid strange bug + len = req->len; +/**/ last_fd = arg1; +/**/ last_count = len; + mpaddr = req->addr; +//if (last_count >= PAGE_SIZE) printf("do_ssc: read fd=%d, addr=%p, len=%lx ",last_fd,mpaddr,len); + retval = 0; + if ((mpaddr & PAGE_MASK) != ((mpaddr+len-1) & PAGE_MASK)) { + // do partial page first + req->addr = translate_domain_mpaddr(mpaddr); + req->len = PAGE_SIZE - (req->addr & ~PAGE_MASK); + len -= req->len; mpaddr += req->len; + retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc); + arg3 += req->len; // file offset +/**/ last_stat.fd = last_fd; +/**/ (void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION); +//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)[part]=%x ",req->addr,req->len,retval); + } + if (retval >= 0) while (len > 0) { + req->addr = translate_domain_mpaddr(mpaddr); + req->len = (len > PAGE_SIZE) ? PAGE_SIZE : len; + len -= PAGE_SIZE; mpaddr += PAGE_SIZE; + retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc); + arg3 += req->len; // file offset +// TEMP REMOVED AGAIN arg3 += req->len; // file offset +/**/ last_stat.fd = last_fd; +/**/ (void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION); +//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)=%x ",req->addr,req->len,retval); + } + // set it back to the original value + req->len = last_count; + } + else retval = -1L; + vcpu_set_gr(current,8,retval); +//if (last_count >= PAGE_SIZE) printf("retval=%x\n",retval); + break; + case SSC_CONNECT_INTERRUPT: + arg1 = vcpu_get_gr(current,33); + arg2 = vcpu_get_gr(current,34); + arg3 = vcpu_get_gr(current,35); + if (!running_on_sim) { printf("SSC_CONNECT_INTERRUPT, not implemented on hardware. (ignoring...)\n"); break; } + (void)ia64_ssc(arg0,arg1,arg2,arg3,ssc); + break; + case SSC_NETDEV_PROBE: + vcpu_set_gr(current,8,-1L); + break; + default: + printf("ia64_handle_break: bad ssc code %lx, iip=%p, b0=%p... spinning\n",ssc,regs->cr_iip,regs->b0); + while(1); + break; + } + vcpu_increment_iip(current); +} + +int first_break = 1; + +void +ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim) +{ + struct domain *d = (struct domain *) current->domain; + struct vcpu *v = (struct domain *) current; + extern unsigned long running_on_sim; + + if (first_break) { + if (platform_is_hp_ski()) running_on_sim = 1; + else running_on_sim = 0; + first_break = 0; + } + if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant + if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs); + else do_ssc(vcpu_get_gr(current,36), regs); + } + else if (iim == d->arch.breakimm) { + if (ia64_hypercall(regs)) + vcpu_increment_iip(current); + } + else if (!PSCB(v,interrupt_collection_enabled)) { + if (ia64_hyperprivop(iim,regs)) + vcpu_increment_iip(current); + } + else reflect_interruption(ifa,isr,iim,regs,IA64_BREAK_VECTOR); +} + +void +ia64_handle_privop (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long itir) +{ + IA64FAULT vector; + struct domain *d = current->domain; + struct vcpu *v = current; + // FIXME: no need to pass itir in to this routine as we need to + // compute the virtual itir anyway (based on domain's RR.ps) + // AND ACTUALLY reflect_interruption doesn't use it anyway! + itir = vcpu_get_itir_on_fault(v,ifa); + vector = priv_emulate(current,regs,isr); + if (vector != IA64_NO_FAULT && vector != IA64_RFI_IN_PROGRESS) { + reflect_interruption(ifa,isr,itir,regs,vector); + } +} + +#define INTR_TYPE_MAX 10 +UINT64 int_counts[INTR_TYPE_MAX]; + +void +ia64_handle_reflection (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim, unsigned long vector) +{ + struct domain *d = (struct domain *) current->domain; + struct vcpu *v = (struct domain *) current; + unsigned long check_lazy_cover = 0; + unsigned long psr = regs->cr_ipsr; + unsigned long itir = vcpu_get_itir_on_fault(v,ifa); + + if (!(psr & IA64_PSR_CPL)) { + printk("ia64_handle_reflection: reflecting with priv=0!!\n"); + } + // FIXME: no need to pass itir in to this routine as we need to + // compute the virtual itir anyway (based on domain's RR.ps) + // AND ACTUALLY reflect_interruption doesn't use it anyway! + itir = vcpu_get_itir_on_fault(v,ifa); + switch(vector) { + case 8: + vector = IA64_DIRTY_BIT_VECTOR; break; + case 9: + vector = IA64_INST_ACCESS_BIT_VECTOR; break; + case 10: + check_lazy_cover = 1; + vector = IA64_DATA_ACCESS_BIT_VECTOR; break; + case 20: + check_lazy_cover = 1; + vector = IA64_PAGE_NOT_PRESENT_VECTOR; break; + case 22: + vector = IA64_INST_ACCESS_RIGHTS_VECTOR; break; + case 23: + check_lazy_cover = 1; + vector = IA64_DATA_ACCESS_RIGHTS_VECTOR; break; + case 25: + vector = IA64_DISABLED_FPREG_VECTOR; + break; + case 26: +printf("*** NaT fault... attempting to handle as privop\n"); +printf("isr=%p, ifa=%p,iip=%p,ipsr=%p\n",isr,ifa,regs->cr_iip,psr); + vector = priv_emulate(v,regs,isr); + if (vector == IA64_NO_FAULT) { +printf("*** Handled privop masquerading as NaT fault\n"); + return; + } + vector = IA64_NAT_CONSUMPTION_VECTOR; break; + case 27: +//printf("*** Handled speculation vector, itc=%lx!\n",ia64_get_itc()); + itir = iim; + vector = IA64_SPECULATION_VECTOR; break; + case 30: + // FIXME: Should we handle unaligned refs in Xen?? + vector = IA64_UNALIGNED_REF_VECTOR; break; + default: + printf("ia64_handle_reflection: unhandled vector=0x%lx\n",vector); + while(vector); + return; + } + if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, isr, regs)) return; + reflect_interruption(ifa,isr,itir,regs,vector); +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/regionreg.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/regionreg.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,376 @@ +/* + * Region register and region id management + * + * Copyright (C) 2001-2004 Hewlett-Packard Co. + * Dan Magenheimer (dan.magenheimer@xxxxxx + * Bret Mckee (bret.mckee@xxxxxx) + * + */ + + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <asm/page.h> +#include <asm/regionreg.h> +#include <asm/vhpt.h> +#include <asm/vcpu.h> +extern void ia64_new_rr7(unsigned long rid,void *shared_info, void *shared_arch_info); + + +#define IA64_MIN_IMPL_RID_BITS (IA64_MIN_IMPL_RID_MSB+1) +#define IA64_MAX_IMPL_RID_BITS 24 + +#define MIN_RIDS (1 << IA64_MIN_IMPL_RID_BITS) +#define MIN_RID_MAX (MIN_RIDS - 1) +#define MIN_RID_MASK (MIN_RIDS - 1) +#define MAX_RIDS (1 << (IA64_MAX_IMPL_RID_BITS)) +#define MAX_RID (MAX_RIDS - 1) +#define MAX_RID_BLOCKS (1 << (IA64_MAX_IMPL_RID_BITS-IA64_MIN_IMPL_RID_BITS)) +#define RIDS_PER_RIDBLOCK MIN_RIDS + +#if 0 +// following already defined in include/asm-ia64/gcc_intrin.h +// it should probably be ifdef'd out from there to ensure all region +// register usage is encapsulated in this file +static inline unsigned long +ia64_get_rr (unsigned long rr) +{ + unsigned long r; + __asm__ __volatile__ (";;mov %0=rr[%1];;":"=r"(r):"r"(rr):"memory"); + return r; +} + +static inline void +ia64_set_rr (unsigned long rr, unsigned long rrv) +{ + __asm__ __volatile__ (";;mov rr[%0]=%1;;"::"r"(rr),"r"(rrv):"memory"); +} +#endif + +// use this to allocate a rid out of the "Xen reserved rid block" +unsigned long allocate_reserved_rid(void) +{ + static unsigned long currentrid = XEN_DEFAULT_RID; + unsigned long t = currentrid; + + unsigned long max = RIDS_PER_RIDBLOCK; + + if (++currentrid >= max) return(-1UL); + return t; +} + + +// returns -1 if none available +unsigned long allocate_metaphysical_rr(void) +{ + ia64_rr rrv; + + rrv.rid = allocate_reserved_rid(); + rrv.ps = PAGE_SHIFT; + rrv.ve = 0; + return rrv.rrval; +} + +int deallocate_metaphysical_rid(unsigned long rid) +{ + // fix this when the increment allocation mechanism is fixed. + return 1; +} + +/************************************* + Region Block setup/management +*************************************/ + +static int implemented_rid_bits = 0; +static struct domain *ridblock_owner[MAX_RID_BLOCKS] = { 0 }; + +void get_impl_rid_bits(void) +{ + // FIXME (call PAL) +//#ifdef CONFIG_MCKINLEY + implemented_rid_bits = IA64_MAX_IMPL_RID_BITS; +//#else +//#error "rid ranges won't work on Merced" +//#endif + if (implemented_rid_bits <= IA64_MIN_IMPL_RID_BITS || + implemented_rid_bits > IA64_MAX_IMPL_RID_BITS) + BUG(); +} + + +/* + * Allocate a power-of-two-sized chunk of region id space -- one or more + * "rid blocks" + */ +int allocate_rid_range(struct domain *d, unsigned long ridbits) +{ + int i, j, n_rid_blocks; + + if (implemented_rid_bits == 0) get_impl_rid_bits(); + + if (ridbits >= IA64_MAX_IMPL_RID_BITS) + ridbits = IA64_MAX_IMPL_RID_BITS - 1; + + if (ridbits < IA64_MIN_IMPL_RID_BITS) + ridbits = IA64_MIN_IMPL_RID_BITS; + + // convert to rid_blocks and find one + n_rid_blocks = ridbits - IA64_MIN_IMPL_RID_BITS + 1; + + // skip over block 0, reserved for "meta-physical mappings (and Xen)" + for (i = n_rid_blocks; i < MAX_RID_BLOCKS; i += n_rid_blocks) { + if (ridblock_owner[i] == NULL) { + for (j = i; j < i + n_rid_blocks; ++j) { + if (ridblock_owner[j]) break; + } + if (ridblock_owner[j] == NULL) break; + } + } + + if (i >= MAX_RID_BLOCKS) return 0; + + // found an unused block: + // (i << min_rid_bits) <= rid < ((i + n) << min_rid_bits) + // mark this block as owned + for (j = i; j < i + n_rid_blocks; ++j) ridblock_owner[j] = d; + + // setup domain struct + d->arch.rid_bits = ridbits; + d->arch.starting_rid = i << IA64_MIN_IMPL_RID_BITS; d->arch.ending_rid = (i+n_rid_blocks) << IA64_MIN_IMPL_RID_BITS; +printf("###allocating rid_range, domain %p: starting_rid=%lx, ending_rid=%lx\n", +d,d->arch.starting_rid, d->arch.ending_rid); + + return 1; +} + + +int deallocate_rid_range(struct domain *d) +{ + int i; + int rid_block_end = d->arch.ending_rid >> IA64_MIN_IMPL_RID_BITS; + int rid_block_start = d->arch.starting_rid >> IA64_MIN_IMPL_RID_BITS; + + return 1; // KLUDGE ALERT + // + // not all domains will have allocated RIDs (physical mode loaders for instance) + // + if (d->arch.rid_bits == 0) return 1; + +#ifdef DEBUG + for (i = rid_block_start; i < rid_block_end; ++i) { + ASSERT(ridblock_owner[i] == d); + } +#endif + + for (i = rid_block_start; i < rid_block_end; ++i) + ridblock_owner[i] = NULL; + + d->arch.rid_bits = 0; + d->arch.starting_rid = 0; + d->arch.ending_rid = 0; + return 1; +} + + +static inline void +set_rr_no_srlz(unsigned long rr, unsigned long rrval) +{ + ia64_set_rr(rr, vmMangleRID(rrval)); +} + +void +set_rr(unsigned long rr, unsigned long rrval) +{ + ia64_set_rr(rr, vmMangleRID(rrval)); + ia64_srlz_d(); +} + +unsigned long +get_rr(unsigned long rr) +{ + return vmUnmangleRID(ia64_get_rr(rr)); +} + +static inline int validate_page_size(unsigned long ps) +{ + switch(ps) { + case 12: case 13: case 14: case 16: case 18: + case 20: case 22: case 24: case 26: case 28: + return 1; + default: + return 0; + } +} + +// validates and changes a single region register +// in the currently executing domain +// Passing a value of -1 is a (successful) no-op +// NOTE: DOES NOT SET VCPU's rrs[x] value!! +int set_one_rr(unsigned long rr, unsigned long val) +{ + struct vcpu *v = current; + unsigned long rreg = REGION_NUMBER(rr); + ia64_rr rrv, newrrv, memrrv; + unsigned long newrid; + + if (val == -1) return 1; + + rrv.rrval = val; + newrrv.rrval = 0; + newrid = v->arch.starting_rid + rrv.rid; + + if (newrid > v->arch.ending_rid) { + printk("can't set rr%d to %lx, starting_rid=%lx," + "ending_rid=%lx, val=%lx\n", rreg, newrid, + v->arch.starting_rid,v->arch.ending_rid,val); + return 0; + } + +#ifdef CONFIG_VTI + memrrv.rrval = rrv.rrval; + if (rreg == 7) { + newrrv.rid = newrid; + newrrv.ve = VHPT_ENABLED_REGION_7; + newrrv.ps = IA64_GRANULE_SHIFT; + ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info, + v->vcpu_info->arch.privregs); + } + else { + newrrv.rid = newrid; + // FIXME? region 6 needs to be uncached for EFI to work + if (rreg == 6) newrrv.ve = VHPT_ENABLED_REGION_7; + else newrrv.ve = VHPT_ENABLED_REGION_0_TO_6; + newrrv.ps = PAGE_SHIFT; + if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval; + set_rr(rr,newrrv.rrval); + } +#else + memrrv.rrval = rrv.rrval; + newrrv.rid = newrid; + newrrv.ve = 1; // VHPT now enabled for region 7!! + newrrv.ps = PAGE_SHIFT; + if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval; + if (rreg == 7) ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info, + v->vcpu_info->arch.privregs); + else set_rr(rr,newrrv.rrval); +#endif + return 1; +} + +// set rr0 to the passed rid (for metaphysical mode so don't use domain offset +int set_metaphysical_rr0(void) +{ + struct vcpu *v = current; + ia64_rr rrv; + +// rrv.ve = 1; FIXME: TURN ME BACK ON WHEN VHPT IS WORKING + set_rr(0,v->arch.metaphysical_rr0); +} + +// validates/changes region registers 0-6 in the currently executing domain +// Note that this is the one and only SP API (other than executing a privop) +// for a domain to use to change region registers +int set_all_rr( u64 rr0, u64 rr1, u64 rr2, u64 rr3, + u64 rr4, u64 rr5, u64 rr6, u64 rr7) +{ + if (!set_one_rr(0x0000000000000000L, rr0)) return 0; + if (!set_one_rr(0x2000000000000000L, rr1)) return 0; + if (!set_one_rr(0x4000000000000000L, rr2)) return 0; + if (!set_one_rr(0x6000000000000000L, rr3)) return 0; + if (!set_one_rr(0x8000000000000000L, rr4)) return 0; + if (!set_one_rr(0xa000000000000000L, rr5)) return 0; + if (!set_one_rr(0xc000000000000000L, rr6)) return 0; + if (!set_one_rr(0xe000000000000000L, rr7)) return 0; + return 1; +} + +void init_all_rr(struct vcpu *v) +{ + ia64_rr rrv; + + rrv.rrval = 0; + rrv.rrval = v->domain->arch.metaphysical_rr0; + rrv.ps = PAGE_SHIFT; + rrv.ve = 1; +if (!v->vcpu_info) { printf("Stopping in init_all_rr\n"); dummy(); } + VCPU(v,rrs[0]) = -1; + VCPU(v,rrs[1]) = rrv.rrval; + VCPU(v,rrs[2]) = rrv.rrval; + VCPU(v,rrs[3]) = rrv.rrval; + VCPU(v,rrs[4]) = rrv.rrval; + VCPU(v,rrs[5]) = rrv.rrval; + rrv.ve = 0; + VCPU(v,rrs[6]) = rrv.rrval; +// v->shared_info->arch.rrs[7] = rrv.rrval; +} + + +/* XEN/ia64 INTERNAL ROUTINES */ + +unsigned long physicalize_rid(struct vcpu *v, unsigned long rrval) +{ + ia64_rr rrv; + + rrv.rrval = rrval; + rrv.rid += v->arch.starting_rid; + return rrv.rrval; +} + +unsigned long +virtualize_rid(struct vcpu *v, unsigned long rrval) +{ + ia64_rr rrv; + + rrv.rrval = rrval; + rrv.rid -= v->arch.starting_rid; + return rrv.rrval; +} + +// loads a thread's region register (0-6) state into +// the real physical region registers. Returns the +// (possibly mangled) bits to store into rr7 +// iff it is different than what is currently in physical +// rr7 (because we have to to assembly and physical mode +// to change rr7). If no change to rr7 is required, returns 0. +// +unsigned long load_region_regs(struct vcpu *v) +{ + unsigned long rr0, rr1,rr2, rr3, rr4, rr5, rr6, rr7; + // TODO: These probably should be validated + unsigned long bad = 0; + + if (VCPU(v,metaphysical_mode)) { + ia64_rr rrv; + + rrv.rrval = 0; + rrv.rid = v->domain->arch.metaphysical_rr0; + rrv.ps = PAGE_SHIFT; + rrv.ve = 1; + rr0 = rrv.rrval; + set_rr_no_srlz(0x0000000000000000L, rr0); + ia64_srlz_d(); + } + else { + rr0 = VCPU(v,rrs[0]); + if (!set_one_rr(0x0000000000000000L, rr0)) bad |= 1; + } + rr1 = VCPU(v,rrs[1]); + rr2 = VCPU(v,rrs[2]); + rr3 = VCPU(v,rrs[3]); + rr4 = VCPU(v,rrs[4]); + rr5 = VCPU(v,rrs[5]); + rr6 = VCPU(v,rrs[6]); + rr7 = VCPU(v,rrs[7]); + if (!set_one_rr(0x2000000000000000L, rr1)) bad |= 2; + if (!set_one_rr(0x4000000000000000L, rr2)) bad |= 4; + if (!set_one_rr(0x6000000000000000L, rr3)) bad |= 8; + if (!set_one_rr(0x8000000000000000L, rr4)) bad |= 0x10; + if (!set_one_rr(0xa000000000000000L, rr5)) bad |= 0x20; + if (!set_one_rr(0xc000000000000000L, rr6)) bad |= 0x40; + if (!set_one_rr(0xe000000000000000L, rr7)) bad |= 0x80; + if (bad) { + panic_domain(0,"load_region_regs: can't set! bad=%lx\n",bad); + } + return 0; +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/sn_console.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/sn_console.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,84 @@ +/* + * C-Brick Serial Port (and console) driver for SGI Altix machines. + * + * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include <asm/acpi.h> +#include <asm/sn/sn_sal.h> +#include <xen/serial.h> + +void sn_putc(struct serial_port *, char); + +static struct uart_driver sn_sal_console = { + .putc = sn_putc, +}; + +/** + * early_sn_setup - early setup routine for SN platforms + * + * pulled from arch/ia64/sn/kernel/setup.c + */ +static void __init early_sn_setup(void) +{ + efi_system_table_t *efi_systab; + efi_config_table_t *config_tables; + struct ia64_sal_systab *sal_systab; + struct ia64_sal_desc_entry_point *ep; + char *p; + int i, j; + + /* + * Parse enough of the SAL tables to locate the SAL entry point. Since, console + * IO on SN2 is done via SAL calls, early_printk won't work without this. + * + * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c. + * Any changes to those file may have to be made hereas well. + */ + efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab); + config_tables = __va(efi_systab->tables); + for (i = 0; i < efi_systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == + 0) { + sal_systab = __va(config_tables[i].table); + p = (char *)(sal_systab + 1); + for (j = 0; j < sal_systab->entry_count; j++) { + if (*p == SAL_DESC_ENTRY_POINT) { + ep = (struct ia64_sal_desc_entry_point + *)p; + ia64_sal_handler_init(__va + (ep->sal_proc), + __va(ep->gp)); + return; + } + p += SAL_DESC_SIZE(*p); + } + } + } + /* Uh-oh, SAL not available?? */ + printk(KERN_ERR "failed to find SAL entry point\n"); +} + +/** + * sn_serial_console_early_setup - Sets up early console output support + * + * pulled from drivers/serial/sn_console.c + */ +int __init sn_serial_console_early_setup(void) +{ + if (strcmp("sn2",acpi_get_sysname())) + return -1; + + early_sn_setup(); /* Find SAL entry points */ + serial_register_uart(0, &sn_sal_console, NULL); + + return 0; +} + +/* + * sn_putc - Send a character to the console, polled or interrupt mode + */ +void sn_putc(struct serial_port *port, char c) +{ + return ia64_sn_console_putc(c); +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/vcpu.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/vcpu.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,1843 @@ +/* + * Virtualized CPU functions + * + * Copyright (C) 2004 Hewlett-Packard Co. + * Dan Magenheimer (dan.magenheimer@xxxxxx) + * + */ + +#include <linux/sched.h> +#include <public/arch-ia64.h> +#include <asm/ia64_int.h> +#include <asm/vcpu.h> +#include <asm/regionreg.h> +#include <asm/tlb.h> +#include <asm/processor.h> +#include <asm/delay.h> +#include <asm/vmx_vcpu.h> + +typedef union { + struct ia64_psr ia64_psr; + unsigned long i64; +} PSR; + +//typedef struct pt_regs REGS; +//typedef struct domain VCPU; + +// this def for vcpu_regs won't work if kernel stack is present +#define vcpu_regs(vcpu) ((struct pt_regs *) vcpu->arch.regs) +#define PSCB(x,y) VCPU(x,y) +#define PSCBX(x,y) x->arch.y + +#define TRUE 1 +#define FALSE 0 +#define IA64_PTA_SZ_BIT 2 +#define IA64_PTA_VF_BIT 8 +#define IA64_PTA_BASE_BIT 15 +#define IA64_PTA_LFMT (1UL << IA64_PTA_VF_BIT) +#define IA64_PTA_SZ(x) (x##UL << IA64_PTA_SZ_BIT) + +#define STATIC + +#ifdef PRIVOP_ADDR_COUNT +struct privop_addr_count privop_addr_counter[PRIVOP_COUNT_NINSTS] = { + { "=ifa", { 0 }, { 0 }, 0 }, + { "thash", { 0 }, { 0 }, 0 }, + 0 +}; +extern void privop_count_addr(unsigned long addr, int inst); +#define PRIVOP_COUNT_ADDR(regs,inst) privop_count_addr(regs->cr_iip,inst) +#else +#define PRIVOP_COUNT_ADDR(x,y) do {} while (0) +#endif + +unsigned long dtlb_translate_count = 0; +unsigned long tr_translate_count = 0; +unsigned long phys_translate_count = 0; + +unsigned long vcpu_verbose = 0; +#define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0) + +extern TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa); +extern TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa); + +/************************************************************************** + VCPU general register access routines +**************************************************************************/ + +UINT64 +vcpu_get_gr(VCPU *vcpu, unsigned reg) +{ + REGS *regs = vcpu_regs(vcpu); + UINT64 val; + + if (!reg) return 0; + getreg(reg,&val,0,regs); // FIXME: handle NATs later + return val; +} + +// returns: +// IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault +// IA64_NO_FAULT otherwise +IA64FAULT +vcpu_set_gr(VCPU *vcpu, unsigned reg, UINT64 value) +{ + REGS *regs = vcpu_regs(vcpu); + long sof = (regs->cr_ifs) & 0x7f; + + if (!reg) return IA64_ILLOP_FAULT; + if (reg >= sof + 32) return IA64_ILLOP_FAULT; + setreg(reg,value,0,regs); // FIXME: handle NATs later + return IA64_NO_FAULT; +} + +/************************************************************************** + VCPU privileged application register access routines +**************************************************************************/ + +IA64FAULT vcpu_set_ar(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + if (reg == 44) return (vcpu_set_itc(vcpu,val)); + else if (reg == 27) return (IA64_ILLOP_FAULT); + else if (reg == 24) + printf("warning: setting ar.eflg is a no-op; no IA-32 support\n"); + else if (reg > 7) return (IA64_ILLOP_FAULT); + else PSCB(vcpu,krs[reg]) = val; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_get_ar(VCPU *vcpu, UINT64 reg, UINT64 *val) +{ + if (reg == 24) + printf("warning: getting ar.eflg is a no-op; no IA-32 support\n"); + else if (reg > 7) return (IA64_ILLOP_FAULT); + else *val = PSCB(vcpu,krs[reg]); + return IA64_NO_FAULT; +} + +/************************************************************************** + VCPU processor status register access routines +**************************************************************************/ + +void vcpu_set_metaphysical_mode(VCPU *vcpu, BOOLEAN newmode) +{ + /* only do something if mode changes */ + if (!!newmode ^ !!PSCB(vcpu,metaphysical_mode)) { + if (newmode) set_metaphysical_rr0(); + else if (PSCB(vcpu,rrs[0]) != -1) + set_one_rr(0, PSCB(vcpu,rrs[0])); + PSCB(vcpu,metaphysical_mode) = newmode; + } +} + +IA64FAULT vcpu_reset_psr_dt(VCPU *vcpu) +{ + vcpu_set_metaphysical_mode(vcpu,TRUE); + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24) +{ + struct ia64_psr psr, imm, *ipsr; + REGS *regs = vcpu_regs(vcpu); + + //PRIVOP_COUNT_ADDR(regs,_RSM); + // TODO: All of these bits need to be virtualized + // TODO: Only allowed for current vcpu + __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory"); + ipsr = (struct ia64_psr *)&regs->cr_ipsr; + imm = *(struct ia64_psr *)&imm24; + // interrupt flag + if (imm.i) PSCB(vcpu,interrupt_delivery_enabled) = 0; + if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 0; + // interrupt collection flag + //if (imm.ic) PSCB(vcpu,interrupt_delivery_enabled) = 0; + // just handle psr.up and psr.pp for now + if (imm24 & ~(IA64_PSR_BE | IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP + | IA64_PSR_I | IA64_PSR_IC | IA64_PSR_DT + | IA64_PSR_DFL | IA64_PSR_DFH)) + return (IA64_ILLOP_FAULT); + if (imm.dfh) ipsr->dfh = 0; + if (imm.dfl) ipsr->dfl = 0; + if (imm.pp) { ipsr->pp = 0; psr.pp = 0; } + if (imm.up) { ipsr->up = 0; psr.up = 0; } + if (imm.sp) { ipsr->sp = 0; psr.sp = 0; } + if (imm.be) ipsr->be = 0; + if (imm.dt) vcpu_set_metaphysical_mode(vcpu,TRUE); + __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory"); + return IA64_NO_FAULT; +} + +extern UINT64 vcpu_check_pending_interrupts(VCPU *vcpu); +#define SPURIOUS_VECTOR 0xf + +IA64FAULT vcpu_set_psr_dt(VCPU *vcpu) +{ + vcpu_set_metaphysical_mode(vcpu,FALSE); + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_psr_i(VCPU *vcpu) +{ + PSCB(vcpu,interrupt_delivery_enabled) = 1; + PSCB(vcpu,interrupt_collection_enabled) = 1; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24) +{ + struct ia64_psr psr, imm, *ipsr; + REGS *regs = vcpu_regs(vcpu); + UINT64 mask, enabling_interrupts = 0; + + //PRIVOP_COUNT_ADDR(regs,_SSM); + // TODO: All of these bits need to be virtualized + __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory"); + imm = *(struct ia64_psr *)&imm24; + ipsr = (struct ia64_psr *)&regs->cr_ipsr; + // just handle psr.sp,pp and psr.i,ic (and user mask) for now + mask = IA64_PSR_PP|IA64_PSR_SP|IA64_PSR_I|IA64_PSR_IC|IA64_PSR_UM | + IA64_PSR_DT|IA64_PSR_DFL|IA64_PSR_DFH; + if (imm24 & ~mask) return (IA64_ILLOP_FAULT); + if (imm.dfh) ipsr->dfh = 1; + if (imm.dfl) ipsr->dfl = 1; + if (imm.pp) { ipsr->pp = 1; psr.pp = 1; } + if (imm.sp) { ipsr->sp = 1; psr.sp = 1; } + if (imm.i) { + if (!PSCB(vcpu,interrupt_delivery_enabled)) { +//printf("vcpu_set_psr_sm: psr.ic 0->1 "); + enabling_interrupts = 1; + } + PSCB(vcpu,interrupt_delivery_enabled) = 1; + } + if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 1; + // TODO: do this faster + if (imm.mfl) { ipsr->mfl = 1; psr.mfl = 1; } + if (imm.mfh) { ipsr->mfh = 1; psr.mfh = 1; } + if (imm.ac) { ipsr->ac = 1; psr.ac = 1; } + if (imm.up) { ipsr->up = 1; psr.up = 1; } + if (imm.be) { + printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n"); + return (IA64_ILLOP_FAULT); + } + if (imm.dt) vcpu_set_metaphysical_mode(vcpu,FALSE); + __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory"); +#if 0 // now done with deliver_pending_interrupts + if (enabling_interrupts) { + if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) { +//printf("with interrupts pending\n"); + return IA64_EXTINT_VECTOR; + } +//else printf("but nothing pending\n"); + } +#endif + if (enabling_interrupts && + vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) + PSCB(vcpu,pending_interruption) = 1; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_psr_l(VCPU *vcpu, UINT64 val) +{ + struct ia64_psr psr, newpsr, *ipsr; + REGS *regs = vcpu_regs(vcpu); + UINT64 enabling_interrupts = 0; + + // TODO: All of these bits need to be virtualized + __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory"); + newpsr = *(struct ia64_psr *)&val; + ipsr = (struct ia64_psr *)&regs->cr_ipsr; + // just handle psr.up and psr.pp for now + //if (val & ~(IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP)) return (IA64_ILLOP_FAULT); + // however trying to set other bits can't be an error as it is in ssm + if (newpsr.dfh) ipsr->dfh = 1; + if (newpsr.dfl) ipsr->dfl = 1; + if (newpsr.pp) { ipsr->pp = 1; psr.pp = 1; } + if (newpsr.up) { ipsr->up = 1; psr.up = 1; } + if (newpsr.sp) { ipsr->sp = 1; psr.sp = 1; } + if (newpsr.i) { + if (!PSCB(vcpu,interrupt_delivery_enabled)) + enabling_interrupts = 1; + PSCB(vcpu,interrupt_delivery_enabled) = 1; + } + if (newpsr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1; + if (newpsr.mfl) { ipsr->mfl = 1; psr.mfl = 1; } + if (newpsr.mfh) { ipsr->mfh = 1; psr.mfh = 1; } + if (newpsr.ac) { ipsr->ac = 1; psr.ac = 1; } + if (newpsr.up) { ipsr->up = 1; psr.up = 1; } + if (newpsr.dt && newpsr.rt) vcpu_set_metaphysical_mode(vcpu,FALSE); + else vcpu_set_metaphysical_mode(vcpu,TRUE); + if (newpsr.be) { + printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n"); + return (IA64_ILLOP_FAULT); + } + //__asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory"); +#if 0 // now done with deliver_pending_interrupts + if (enabling_interrupts) { + if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) + return IA64_EXTINT_VECTOR; + } +#endif + if (enabling_interrupts && + vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) + PSCB(vcpu,pending_interruption) = 1; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_get_psr(VCPU *vcpu, UINT64 *pval) +{ + UINT64 psr; + struct ia64_psr newpsr; + + // TODO: This needs to return a "filtered" view of + // the psr, not the actual psr. Probably the psr needs + // to be a field in regs (in addition to ipsr). + __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory"); + newpsr = *(struct ia64_psr *)&psr; + if (newpsr.cpl == 2) newpsr.cpl = 0; + if (PSCB(vcpu,interrupt_delivery_enabled)) newpsr.i = 1; + else newpsr.i = 0; + if (PSCB(vcpu,interrupt_collection_enabled)) newpsr.ic = 1; + else newpsr.ic = 0; + *pval = *(unsigned long *)&newpsr; + return IA64_NO_FAULT; +} + +BOOLEAN vcpu_get_psr_ic(VCPU *vcpu) +{ + return !!PSCB(vcpu,interrupt_collection_enabled); +} + +BOOLEAN vcpu_get_psr_i(VCPU *vcpu) +{ + return !!PSCB(vcpu,interrupt_delivery_enabled); +} + +UINT64 vcpu_get_ipsr_int_state(VCPU *vcpu,UINT64 prevpsr) +{ + UINT64 dcr = PSCBX(vcpu,dcr); + PSR psr = {0}; + + //printf("*** vcpu_get_ipsr_int_state (0x%016lx)...",prevpsr); + psr.i64 = prevpsr; + psr.ia64_psr.be = 0; if (dcr & IA64_DCR_BE) psr.ia64_psr.be = 1; + psr.ia64_psr.pp = 0; if (dcr & IA64_DCR_PP) psr.ia64_psr.pp = 1; + psr.ia64_psr.ic = PSCB(vcpu,interrupt_collection_enabled); + psr.ia64_psr.i = PSCB(vcpu,interrupt_delivery_enabled); + psr.ia64_psr.bn = PSCB(vcpu,banknum); + psr.ia64_psr.dt = 1; psr.ia64_psr.it = 1; psr.ia64_psr.rt = 1; + if (psr.ia64_psr.cpl == 2) psr.ia64_psr.cpl = 0; // !!!! fool domain + // psr.pk = 1; + //printf("returns 0x%016lx...",psr.i64); + return psr.i64; +} + +/************************************************************************** + VCPU control register access routines +**************************************************************************/ + +IA64FAULT vcpu_get_dcr(VCPU *vcpu, UINT64 *pval) +{ +extern unsigned long privop_trace; +//privop_trace=0; +//verbose("vcpu_get_dcr: called @%p\n",PSCB(vcpu,iip)); + // Reads of cr.dcr on Xen always have the sign bit set, so + // a domain can differentiate whether it is running on SP or not + *pval = PSCBX(vcpu,dcr) | 0x8000000000000000L; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_iva(VCPU *vcpu, UINT64 *pval) +{ + *pval = PSCBX(vcpu,iva) & ~0x7fffL; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_pta(VCPU *vcpu, UINT64 *pval) +{ + *pval = PSCB(vcpu,pta); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_ipsr(VCPU *vcpu, UINT64 *pval) +{ + //REGS *regs = vcpu_regs(vcpu); + //*pval = regs->cr_ipsr; + *pval = PSCB(vcpu,ipsr); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_isr(VCPU *vcpu, UINT64 *pval) +{ + *pval = PSCB(vcpu,isr); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_iip(VCPU *vcpu, UINT64 *pval) +{ + //REGS *regs = vcpu_regs(vcpu); + //*pval = regs->cr_iip; + *pval = PSCB(vcpu,iip); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_ifa(VCPU *vcpu, UINT64 *pval) +{ + UINT64 val = PSCB(vcpu,ifa); + REGS *regs = vcpu_regs(vcpu); + PRIVOP_COUNT_ADDR(regs,_GET_IFA); + *pval = val; + return (IA64_NO_FAULT); +} + +unsigned long vcpu_get_rr_ps(VCPU *vcpu,UINT64 vadr) +{ + ia64_rr rr; + + rr.rrval = PSCB(vcpu,rrs)[vadr>>61]; + return(rr.ps); +} + +unsigned long vcpu_get_rr_rid(VCPU *vcpu,UINT64 vadr) +{ + ia64_rr rr; + + rr.rrval = PSCB(vcpu,rrs)[vadr>>61]; + return(rr.rid); +} + +unsigned long vcpu_get_itir_on_fault(VCPU *vcpu, UINT64 ifa) +{ + ia64_rr rr; + + rr.rrval = 0; + rr.ps = vcpu_get_rr_ps(vcpu,ifa); + rr.rid = vcpu_get_rr_rid(vcpu,ifa); + return (rr.rrval); +} + + +IA64FAULT vcpu_get_itir(VCPU *vcpu, UINT64 *pval) +{ + UINT64 val = PSCB(vcpu,itir); + *pval = val; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_iipa(VCPU *vcpu, UINT64 *pval) +{ + UINT64 val = PSCB(vcpu,iipa); + // SP entry code does not save iipa yet nor does it get + // properly delivered in the pscb + printf("*** vcpu_get_iipa: cr.iipa not fully implemented yet!!\n"); + *pval = val; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_ifs(VCPU *vcpu, UINT64 *pval) +{ + //PSCB(vcpu,ifs) = PSCB(vcpu)->regs.cr_ifs; + //*pval = PSCB(vcpu,regs).cr_ifs; + *pval = PSCB(vcpu,ifs); + PSCB(vcpu,incomplete_regframe) = 0; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_iim(VCPU *vcpu, UINT64 *pval) +{ + UINT64 val = PSCB(vcpu,iim); + *pval = val; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_iha(VCPU *vcpu, UINT64 *pval) +{ + //return vcpu_thash(vcpu,PSCB(vcpu,ifa),pval); + UINT64 val = PSCB(vcpu,iha); + REGS *regs = vcpu_regs(vcpu); + PRIVOP_COUNT_ADDR(regs,_THASH); + *pval = val; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_dcr(VCPU *vcpu, UINT64 val) +{ +extern unsigned long privop_trace; +//privop_trace=1; + // Reads of cr.dcr on SP always have the sign bit set, so + // a domain can differentiate whether it is running on SP or not + // Thus, writes of DCR should ignore the sign bit +//verbose("vcpu_set_dcr: called\n"); + PSCBX(vcpu,dcr) = val & ~0x8000000000000000L; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_iva(VCPU *vcpu, UINT64 val) +{ + PSCBX(vcpu,iva) = val & ~0x7fffL; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_pta(VCPU *vcpu, UINT64 val) +{ + if (val & IA64_PTA_LFMT) { + printf("*** No support for VHPT long format yet!!\n"); + return (IA64_ILLOP_FAULT); + } + if (val & (0x3f<<9)) /* reserved fields */ return IA64_RSVDREG_FAULT; + if (val & 2) /* reserved fields */ return IA64_RSVDREG_FAULT; + PSCB(vcpu,pta) = val; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_ipsr(VCPU *vcpu, UINT64 val) +{ + PSCB(vcpu,ipsr) = val; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_isr(VCPU *vcpu, UINT64 val) +{ + PSCB(vcpu,isr) = val; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_iip(VCPU *vcpu, UINT64 val) +{ + PSCB(vcpu,iip) = val; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_increment_iip(VCPU *vcpu) +{ + REGS *regs = vcpu_regs(vcpu); + struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr; + if (ipsr->ri == 2) { ipsr->ri=0; regs->cr_iip += 16; } + else ipsr->ri++; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_ifa(VCPU *vcpu, UINT64 val) +{ + PSCB(vcpu,ifa) = val; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_itir(VCPU *vcpu, UINT64 val) +{ + PSCB(vcpu,itir) = val; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_iipa(VCPU *vcpu, UINT64 val) +{ + // SP entry code does not save iipa yet nor does it get + // properly delivered in the pscb + printf("*** vcpu_set_iipa: cr.iipa not fully implemented yet!!\n"); + PSCB(vcpu,iipa) = val; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_ifs(VCPU *vcpu, UINT64 val) +{ + //REGS *regs = vcpu_regs(vcpu); + PSCB(vcpu,ifs) = val; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_iim(VCPU *vcpu, UINT64 val) +{ + PSCB(vcpu,iim) = val; + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_iha(VCPU *vcpu, UINT64 val) +{ + PSCB(vcpu,iha) = val; + return IA64_NO_FAULT; +} + +/************************************************************************** + VCPU interrupt control register access routines +**************************************************************************/ + +void vcpu_pend_unspecified_interrupt(VCPU *vcpu) +{ + PSCB(vcpu,pending_interruption) = 1; +} + +void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector) +{ + if (vector & ~0xff) { + printf("vcpu_pend_interrupt: bad vector\n"); + return; + } +#ifdef CONFIG_VTI + if ( VMX_DOMAIN(vcpu) ) { + set_bit(vector,VPD_CR(vcpu,irr)); + } else +#endif // CONFIG_VTI + { + /* if (!test_bit(vector,PSCB(vcpu,delivery_mask))) return; */ + if (test_bit(vector,PSCBX(vcpu,irr))) { +//printf("vcpu_pend_interrupt: overrun\n"); + } + set_bit(vector,PSCBX(vcpu,irr)); + PSCB(vcpu,pending_interruption) = 1; + } + +#if 0 + /* Keir: I think you should unblock when an interrupt is pending. */ + { + int running = test_bit(_VCPUF_running, &vcpu->vcpu_flags); + vcpu_unblock(vcpu); + if ( running ) + smp_send_event_check_cpu(vcpu->processor); + } +#endif +} + +void early_tick(VCPU *vcpu) +{ + UINT64 *p = &PSCBX(vcpu,irr[3]); + printf("vcpu_check_pending: about to deliver early tick\n"); + printf("&irr[0]=%p, irr[0]=0x%lx\n",p,*p); +} + +#define IA64_TPR_MMI 0x10000 +#define IA64_TPR_MIC 0x000f0 + +/* checks to see if a VCPU has any unmasked pending interrupts + * if so, returns the highest, else returns SPURIOUS_VECTOR */ +/* NOTE: Since this gets called from vcpu_get_ivr() and the + * semantics of "mov rx=cr.ivr" ignore the setting of the psr.i bit, + * this routine also ignores pscb.interrupt_delivery_enabled + * and this must be checked independently; see vcpu_deliverable interrupts() */ +UINT64 vcpu_check_pending_interrupts(VCPU *vcpu) +{ + UINT64 *p, *q, *r, bits, bitnum, mask, i, vector; + + p = &PSCBX(vcpu,irr[3]); + /* q = &PSCB(vcpu,delivery_mask[3]); */ + r = &PSCBX(vcpu,insvc[3]); + for (i = 3; ; p--, q--, r--, i--) { + bits = *p /* & *q */; + if (bits) break; // got a potential interrupt + if (*r) { + // nothing in this word which is pending+inservice + // but there is one inservice which masks lower + return SPURIOUS_VECTOR; + } + if (i == 0) { + // checked all bits... nothing pending+inservice + return SPURIOUS_VECTOR; + } + } + // have a pending,deliverable interrupt... see if it is masked + bitnum = ia64_fls(bits); +//printf("XXXXXXX vcpu_check_pending_interrupts: got bitnum=%p...",bitnum); + vector = bitnum+(i*64); + mask = 1L << bitnum; +//printf("XXXXXXX vcpu_check_pending_interrupts: got vector=%p...",vector); + if (*r >= mask) { + // masked by equal inservice +//printf("but masked by equal inservice\n"); + return SPURIOUS_VECTOR; + } + if (PSCB(vcpu,tpr) & IA64_TPR_MMI) { + // tpr.mmi is set +//printf("but masked by tpr.mmi\n"); + return SPURIOUS_VECTOR; + } + if (((PSCB(vcpu,tpr) & IA64_TPR_MIC) + 15) >= vector) { + //tpr.mic masks class +//printf("but masked by tpr.mic\n"); + return SPURIOUS_VECTOR; + } + +//printf("returned to caller\n"); +#if 0 +if (vector == (PSCB(vcpu,itv) & 0xff)) { + UINT64 now = ia64_get_itc(); + UINT64 itm = PSCBX(vcpu,domain_itm); + if (now < itm) early_tick(vcpu); + +} +#endif + return vector; +} + +UINT64 vcpu_deliverable_interrupts(VCPU *vcpu) +{ + return (vcpu_get_psr_i(vcpu) && + vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR); +} + +UINT64 vcpu_deliverable_timer(VCPU *vcpu) +{ + return (vcpu_get_psr_i(vcpu) && + vcpu_check_pending_interrupts(vcpu) == PSCB(vcpu,itv)); +} + +IA64FAULT vcpu_get_lid(VCPU *vcpu, UINT64 *pval) +{ +extern unsigned long privop_trace; +//privop_trace=1; + //TODO: Implement this + printf("vcpu_get_lid: WARNING: Getting cr.lid always returns zero\n"); + //*pval = 0; + *pval = ia64_getreg(_IA64_REG_CR_LID); + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT64 *pval) +{ + int i; + UINT64 vector, mask; + +#define HEARTBEAT_FREQ 16 // period in seconds +#ifdef HEARTBEAT_FREQ +#define N_DOMS 16 // period in seconds + static long count[N_DOMS] = { 0 }; + static long nonclockcount[N_DOMS] = { 0 }; + REGS *regs = vcpu_regs(vcpu); + unsigned domid = vcpu->domain->domain_id; +#endif +#ifdef IRQ_DEBUG + static char firstivr = 1; + static char firsttime[256]; + if (firstivr) { + int i; + for (i=0;i<256;i++) firsttime[i]=1; + firstivr=0; + } +#endif + + vector = vcpu_check_pending_interrupts(vcpu); + if (vector == SPURIOUS_VECTOR) { + PSCB(vcpu,pending_interruption) = 0; + *pval = vector; + return IA64_NO_FAULT; + } +#ifdef HEARTBEAT_FREQ + if (domid >= N_DOMS) domid = N_DOMS-1; + if (vector == (PSCB(vcpu,itv) & 0xff)) { + if (!(++count[domid] & ((HEARTBEAT_FREQ*1024)-1))) { + printf("Dom%d heartbeat... ticks=%lx,nonticks=%lx\n", + domid, count[domid], nonclockcount[domid]); + //count[domid] = 0; + //dump_runq(); + } + } + else nonclockcount[domid]++; +#endif + // now have an unmasked, pending, deliverable vector! + // getting ivr has "side effects" +#ifdef IRQ_DEBUG + if (firsttime[vector]) { + printf("*** First get_ivr on vector=%d,itc=%lx\n", + vector,ia64_get_itc()); + firsttime[vector]=0; + } +#endif + i = vector >> 6; + mask = 1L << (vector & 0x3f); +//printf("ZZZZZZ vcpu_get_ivr: setting insvc mask for vector %ld\n",vector); + PSCBX(vcpu,insvc[i]) |= mask; + PSCBX(vcpu,irr[i]) &= ~mask; + //PSCB(vcpu,pending_interruption)--; + *pval = vector; + // if delivering a timer interrupt, remember domain_itm + if (vector == (PSCB(vcpu,itv) & 0xff)) { + PSCBX(vcpu,domain_itm_last) = PSCBX(vcpu,domain_itm); + } + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_get_tpr(VCPU *vcpu, UINT64 *pval) +{ + *pval = PSCB(vcpu,tpr); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_eoi(VCPU *vcpu, UINT64 *pval) +{ + *pval = 0L; // reads of eoi always return 0 + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_irr0(VCPU *vcpu, UINT64 *pval) +{ +#ifndef IRR_USE_FIXED + printk("vcpu_get_irr: called, not implemented yet\n"); + return IA64_ILLOP_FAULT; +#else + *pval = vcpu->irr[0]; + return (IA64_NO_FAULT); +#endif +} + +IA64FAULT vcpu_get_irr1(VCPU *vcpu, UINT64 *pval) +{ +#ifndef IRR_USE_FIXED + printk("vcpu_get_irr: called, not implemented yet\n"); + return IA64_ILLOP_FAULT; +#else + *pval = vcpu->irr[1]; + return (IA64_NO_FAULT); +#endif +} + +IA64FAULT vcpu_get_irr2(VCPU *vcpu, UINT64 *pval) +{ +#ifndef IRR_USE_FIXED + printk("vcpu_get_irr: called, not implemented yet\n"); + return IA64_ILLOP_FAULT; +#else + *pval = vcpu->irr[2]; + return (IA64_NO_FAULT); +#endif +} + +IA64FAULT vcpu_get_irr3(VCPU *vcpu, UINT64 *pval) +{ +#ifndef IRR_USE_FIXED + printk("vcpu_get_irr: called, not implemented yet\n"); + return IA64_ILLOP_FAULT; +#else + *pval = vcpu->irr[3]; + return (IA64_NO_FAULT); +#endif +} + +IA64FAULT vcpu_get_itv(VCPU *vcpu, UINT64 *pval) +{ + *pval = PSCB(vcpu,itv); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_pmv(VCPU *vcpu, UINT64 *pval) +{ + *pval = PSCB(vcpu,pmv); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_cmcv(VCPU *vcpu, UINT64 *pval) +{ + *pval = PSCB(vcpu,cmcv); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_lrr0(VCPU *vcpu, UINT64 *pval) +{ + // fix this when setting values other than m-bit is supported + printf("vcpu_get_lrr0: Unmasked interrupts unsupported\n"); + *pval = (1L << 16); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_lrr1(VCPU *vcpu, UINT64 *pval) +{ + // fix this when setting values other than m-bit is supported + printf("vcpu_get_lrr1: Unmasked interrupts unsupported\n"); + *pval = (1L << 16); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_lid(VCPU *vcpu, UINT64 val) +{ + printf("vcpu_set_lid: Setting cr.lid is unsupported\n"); + return (IA64_ILLOP_FAULT); +} + +IA64FAULT vcpu_set_tpr(VCPU *vcpu, UINT64 val) +{ + if (val & 0xff00) return IA64_RSVDREG_FAULT; + PSCB(vcpu,tpr) = val; + if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) + PSCB(vcpu,pending_interruption) = 1; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT64 val) +{ + UINT64 *p, bits, vec, bitnum; + int i; + + p = &PSCBX(vcpu,insvc[3]); + for (i = 3; (i >= 0) && !(bits = *p); i--, p--); + if (i < 0) { + printf("Trying to EOI interrupt when none are in-service.\r\n"); + return; + } + bitnum = ia64_fls(bits); + vec = bitnum + (i*64); + /* clear the correct bit */ + bits &= ~(1L << bitnum); + *p = bits; + /* clearing an eoi bit may unmask another pending interrupt... */ + if (PSCB(vcpu,interrupt_delivery_enabled)) { // but only if enabled... + // worry about this later... Linux only calls eoi + // with interrupts disabled + printf("Trying to EOI interrupt with interrupts enabled\r\n"); + } + if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) + PSCB(vcpu,pending_interruption) = 1; +//printf("YYYYY vcpu_set_eoi: Successful\n"); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_lrr0(VCPU *vcpu, UINT64 val) +{ + if (!(val & (1L << 16))) { + printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n"); + return (IA64_ILLOP_FAULT); + } + // no place to save this state but nothing to do anyway + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_lrr1(VCPU *vcpu, UINT64 val) +{ + if (!(val & (1L << 16))) { + printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n"); + return (IA64_ILLOP_FAULT); + } + // no place to save this state but nothing to do anyway + return (IA64_NO_FAULT); +} + +// parameter is a time interval specified in cycles +void vcpu_enable_timer(VCPU *vcpu,UINT64 cycles) +{ + PSCBX(vcpu,xen_timer_interval) = cycles; + vcpu_set_next_timer(vcpu); + printf("vcpu_enable_timer(%d): interval set to %d cycles\n", + PSCBX(vcpu,xen_timer_interval)); + __set_bit(PSCB(vcpu,itv), PSCB(vcpu,delivery_mask)); +} + +IA64FAULT vcpu_set_itv(VCPU *vcpu, UINT64 val) +{ +extern unsigned long privop_trace; +//privop_trace=1; + if (val & 0xef00) return (IA64_ILLOP_FAULT); + PSCB(vcpu,itv) = val; + if (val & 0x10000) { +printf("**** vcpu_set_itv(%d): vitm=%lx, setting to 0\n",val,PSCBX(vcpu,domain_itm)); + PSCBX(vcpu,domain_itm) = 0; + } + else vcpu_enable_timer(vcpu,1000000L); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_pmv(VCPU *vcpu, UINT64 val) +{ + if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT; + PSCB(vcpu,pmv) = val; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_cmcv(VCPU *vcpu, UINT64 val) +{ + if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT; + PSCB(vcpu,cmcv) = val; + return (IA64_NO_FAULT); +} + +/************************************************************************** + VCPU temporary register access routines +**************************************************************************/ +UINT64 vcpu_get_tmp(VCPU *vcpu, UINT64 index) +{ + if (index > 7) return 0; + return PSCB(vcpu,tmp[index]); +} + +void vcpu_set_tmp(VCPU *vcpu, UINT64 index, UINT64 val) +{ + if (index <= 7) PSCB(vcpu,tmp[index]) = val; +} + +/************************************************************************** +Interval timer routines +**************************************************************************/ + +BOOLEAN vcpu_timer_disabled(VCPU *vcpu) +{ + UINT64 itv = PSCB(vcpu,itv); + return(!itv || !!(itv & 0x10000)); +} + +BOOLEAN vcpu_timer_inservice(VCPU *vcpu) +{ + UINT64 itv = PSCB(vcpu,itv); + return (test_bit(itv, PSCBX(vcpu,insvc))); +} + +BOOLEAN vcpu_timer_expired(VCPU *vcpu) +{ + unsigned long domain_itm = PSCBX(vcpu,domain_itm); + unsigned long now = ia64_get_itc(); + + if (!domain_itm) return FALSE; + if (now < domain_itm) return FALSE; + if (vcpu_timer_disabled(vcpu)) return FALSE; + return TRUE; +} + +void vcpu_safe_set_itm(unsigned long val) +{ + unsigned long epsilon = 100; + UINT64 now = ia64_get_itc(); + + local_irq_disable(); + while (1) { +//printf("*** vcpu_safe_set_itm: Setting itm to %lx, itc=%lx\n",val,now); + ia64_set_itm(val); + if (val > (now = ia64_get_itc())) break; + val = now + epsilon; + epsilon <<= 1; + } + local_irq_enable(); +} + +void vcpu_set_next_timer(VCPU *vcpu) +{ + UINT64 d = PSCBX(vcpu,domain_itm); + //UINT64 s = PSCBX(vcpu,xen_itm); + UINT64 s = local_cpu_data->itm_next; + UINT64 now = ia64_get_itc(); + //UINT64 interval = PSCBX(vcpu,xen_timer_interval); + + /* gloss over the wraparound problem for now... we know it exists + * but it doesn't matter right now */ + +#if 0 + /* ensure at least next SP tick is in the future */ + if (!interval) PSCBX(vcpu,xen_itm) = now + +#if 0 + (running_on_sim() ? SIM_DEFAULT_CLOCK_RATE : + DEFAULT_CLOCK_RATE); +#else + 3000000; +//printf("vcpu_set_next_timer: HACK!\n"); +#endif +#if 0 + if (PSCBX(vcpu,xen_itm) < now) + while (PSCBX(vcpu,xen_itm) < now + (interval>>1)) + PSCBX(vcpu,xen_itm) += interval; +#endif +#endif + + if (is_idle_task(vcpu->domain)) { + printf("****** vcpu_set_next_timer called during idle!!\n"); + } + //s = PSCBX(vcpu,xen_itm); + if (d && (d > now) && (d < s)) { + vcpu_safe_set_itm(d); + //using_domain_as_itm++; + } + else { + vcpu_safe_set_itm(s); + //using_xen_as_itm++; + } +} + +IA64FAULT vcpu_set_itm(VCPU *vcpu, UINT64 val) +{ + UINT now = ia64_get_itc(); + + //if (val < now) val = now + 1000; +//printf("*** vcpu_set_itm: called with %lx\n",val); + PSCBX(vcpu,domain_itm) = val; + vcpu_set_next_timer(vcpu); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_itc(VCPU *vcpu, UINT64 val) +{ + + UINT64 oldnow = ia64_get_itc(); + UINT64 olditm = PSCBX(vcpu,domain_itm); + unsigned long d = olditm - oldnow; + unsigned long x = local_cpu_data->itm_next - oldnow; + + UINT64 newnow = val, min_delta; + +#define DISALLOW_SETTING_ITC_FOR_NOW +#ifdef DISALLOW_SETTING_ITC_FOR_NOW +printf("vcpu_set_itc: Setting ar.itc is currently disabled\n"); +#else + local_irq_disable(); + if (olditm) { +printf("**** vcpu_set_itc(%lx): vitm changed to %lx\n",val,newnow+d); + PSCBX(vcpu,domain_itm) = newnow + d; + } + local_cpu_data->itm_next = newnow + x; + d = PSCBX(vcpu,domain_itm); + x = local_cpu_data->itm_next; + + ia64_set_itc(newnow); + if (d && (d > newnow) && (d < x)) { + vcpu_safe_set_itm(d); + //using_domain_as_itm++; + } + else { + vcpu_safe_set_itm(x); + //using_xen_as_itm++; + } + local_irq_enable(); +#endif + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_itm(VCPU *vcpu, UINT64 *pval) +{ + //FIXME: Implement this + printf("vcpu_get_itm: Getting cr.itm is unsupported... continuing\n"); + return (IA64_NO_FAULT); + //return (IA64_ILLOP_FAULT); +} + +IA64FAULT vcpu_get_itc(VCPU *vcpu, UINT64 *pval) +{ + //TODO: Implement this + printf("vcpu_get_itc: Getting ar.itc is unsupported\n"); + return (IA64_ILLOP_FAULT); +} + +void vcpu_pend_timer(VCPU *vcpu) +{ + UINT64 itv = PSCB(vcpu,itv) & 0xff; + + if (vcpu_timer_disabled(vcpu)) return; + //if (vcpu_timer_inservice(vcpu)) return; + if (PSCBX(vcpu,domain_itm_last) == PSCBX(vcpu,domain_itm)) { + // already delivered an interrupt for this so + // don't deliver another + return; + } +#if 0 + // attempt to flag "timer tick before its due" source + { + UINT64 itm = PSCBX(vcpu,domain_itm); + UINT64 now = ia64_get_itc(); + if (now < itm) printf("******* vcpu_pend_timer: pending before due!\n"); + } +#endif + vcpu_pend_interrupt(vcpu, itv); +} + +// returns true if ready to deliver a timer interrupt too early +UINT64 vcpu_timer_pending_early(VCPU *vcpu) +{ + UINT64 now = ia64_get_itc(); + UINT64 itm = PSCBX(vcpu,domain_itm); + + if (vcpu_timer_disabled(vcpu)) return 0; + if (!itm) return 0; + return (vcpu_deliverable_timer(vcpu) && (now < itm)); +} + +//FIXME: This is a hack because everything dies if a timer tick is lost +void vcpu_poke_timer(VCPU *vcpu) +{ + UINT64 itv = PSCB(vcpu,itv) & 0xff; + UINT64 now = ia64_get_itc(); + UINT64 itm = PSCBX(vcpu,domain_itm); + UINT64 irr; + + if (vcpu_timer_disabled(vcpu)) return; + if (!itm) return; + if (itv != 0xefL) { + printf("vcpu_poke_timer: unimplemented itv=%lx!\n",itv); + while(1); + } + // using 0xef instead of itv so can get real irr + if (now > itm && !test_bit(0xefL, PSCBX(vcpu,insvc))) { + if (!test_bit(0xefL,PSCBX(vcpu,irr))) { + irr = ia64_getreg(_IA64_REG_CR_IRR3); + if (irr & (1L<<(0xef-0xc0))) return; +if (now-itm>0x800000) +printf("*** poking timer: now=%lx,vitm=%lx,xitm=%lx,itm=%lx\n",now,itm,local_cpu_data->itm_next,ia64_get_itm()); + vcpu_pend_timer(vcpu); + } + } +} + + +/************************************************************************** +Privileged operation emulation routines +**************************************************************************/ + +IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa) +{ + PSCB(vcpu,tmp[0]) = ifa; // save ifa in vcpu structure, then specify IA64_FORCED_IFA + return (vcpu_get_rr_ve(vcpu,ifa) ? IA64_DATA_TLB_VECTOR : IA64_ALT_DATA_TLB_VECTOR) | IA64_FORCED_IFA; +} + + +IA64FAULT vcpu_rfi(VCPU *vcpu) +{ + // TODO: Only allowed for current vcpu + PSR psr; + UINT64 int_enable, regspsr = 0; + UINT64 ifs; + REGS *regs = vcpu_regs(vcpu); + extern void dorfirfi(void); + + psr.i64 = PSCB(vcpu,ipsr); + if (psr.ia64_psr.cpl < 3) psr.ia64_psr.cpl = 2; + if (psr.ia64_psr.i) PSCB(vcpu,interrupt_delivery_enabled) = 1; + int_enable = psr.ia64_psr.i; + if (psr.ia64_psr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1; + if (psr.ia64_psr.dt && psr.ia64_psr.rt && psr.ia64_psr.it) vcpu_set_metaphysical_mode(vcpu,FALSE); + else vcpu_set_metaphysical_mode(vcpu,TRUE); + psr.ia64_psr.ic = 1; psr.ia64_psr.i = 1; + psr.ia64_psr.dt = 1; psr.ia64_psr.rt = 1; psr.ia64_psr.it = 1; + psr.ia64_psr.bn = 1; + //psr.pk = 1; // checking pkeys shouldn't be a problem but seems broken + if (psr.ia64_psr.be) { + printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n"); + return (IA64_ILLOP_FAULT); + } + PSCB(vcpu,incomplete_regframe) = 0; // is this necessary? + ifs = PSCB(vcpu,ifs); + //if ((ifs & regs->cr_ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) { + //if ((ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) { + if (ifs & regs->cr_ifs & 0x8000000000000000L) { + // TODO: validate PSCB(vcpu,iip) + // TODO: PSCB(vcpu,ipsr) = psr; + PSCB(vcpu,ipsr) = psr.i64; + // now set up the trampoline + regs->cr_iip = *(unsigned long *)dorfirfi; // function pointer!! + __asm__ __volatile ("mov %0=psr;;":"=r"(regspsr)::"memory"); + regs->cr_ipsr = regspsr & ~(IA64_PSR_I | IA64_PSR_IC | IA64_PSR_BN); + } + else { + regs->cr_ipsr = psr.i64; + regs->cr_iip = PSCB(vcpu,iip); + } + PSCB(vcpu,interrupt_collection_enabled) = 1; + vcpu_bsw1(vcpu); + PSCB(vcpu,interrupt_delivery_enabled) = int_enable; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_cover(VCPU *vcpu) +{ + // TODO: Only allowed for current vcpu + REGS *regs = vcpu_regs(vcpu); + + if (!PSCB(vcpu,interrupt_collection_enabled)) { + if (!PSCB(vcpu,incomplete_regframe)) + PSCB(vcpu,ifs) = regs->cr_ifs; + else PSCB(vcpu,incomplete_regframe) = 0; + } + regs->cr_ifs = 0; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval) +{ + UINT64 pta = PSCB(vcpu,pta); + UINT64 pta_sz = (pta & IA64_PTA_SZ(0x3f)) >> IA64_PTA_SZ_BIT; + UINT64 pta_base = pta & ~((1UL << IA64_PTA_BASE_BIT)-1); + UINT64 Mask = (1L << pta_sz) - 1; + UINT64 Mask_60_15 = (Mask >> 15) & 0x3fffffffffff; + UINT64 compMask_60_15 = ~Mask_60_15; + //UINT64 rr_ps = RR_TO_PS(get_rr(vadr)); + UINT64 rr_ps = vcpu_get_rr_ps(vcpu,vadr); + UINT64 VHPT_offset = (vadr >> rr_ps) << 3; + UINT64 VHPT_addr1 = vadr & 0xe000000000000000L; + UINT64 VHPT_addr2a = + ((pta_base >> 15) & 0x3fffffffffff) & compMask_60_15; + UINT64 VHPT_addr2b = + ((VHPT_offset >> 15) & 0x3fffffffffff) & Mask_60_15;; + UINT64 VHPT_addr3 = VHPT_offset & 0x7fff; + UINT64 VHPT_addr = VHPT_addr1 | ((VHPT_addr2a | VHPT_addr2b) << 15) | + VHPT_addr3; + +#if 0 + if (VHPT_addr1 == 0xe000000000000000L) { + printf("vcpu_thash: thash unsupported with rr7 @%lx\n", + PSCB(vcpu,iip)); + return (IA64_ILLOP_FAULT); + } +#endif +//verbose("vcpu_thash: vadr=%p, VHPT_addr=%p\n",vadr,VHPT_addr); + *pval = VHPT_addr; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *padr) +{ + printf("vcpu_ttag: ttag instruction unsupported\n"); + return (IA64_ILLOP_FAULT); +} + +#define itir_ps(itir) ((itir >> 2) & 0x3f) +#define itir_mask(itir) (~((1UL << itir_ps(itir)) - 1)) + +unsigned long vhpt_translate_count = 0; + +IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 *pteval, UINT64 *itir) +{ + unsigned long pta, pta_mask, iha, pte, ps; + TR_ENTRY *trp; + ia64_rr rr; + + if (!(address >> 61)) { + if (!PSCB(vcpu,metaphysical_mode)) { + REGS *regs = vcpu_regs(vcpu); + unsigned long viip = PSCB(vcpu,iip); + unsigned long vipsr = PSCB(vcpu,ipsr); + unsigned long iip = regs->cr_iip; + unsigned long ipsr = regs->cr_ipsr; + printk("vcpu_translate: bad address %p, viip=%p, vipsr=%p, iip=%p, ipsr=%p continuing\n", address, viip, vipsr, iip, ipsr); + } + + *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX; + *itir = PAGE_SHIFT << 2; + phys_translate_count++; + return IA64_NO_FAULT; + } + + /* check translation registers */ + if ((trp = match_tr(vcpu,address))) { + tr_translate_count++; + *pteval = trp->page_flags; + *itir = trp->itir; + return IA64_NO_FAULT; + } + + /* check 1-entry TLB */ + if ((trp = match_dtlb(vcpu,address))) { + dtlb_translate_count++; + *pteval = trp->page_flags; + *itir = trp->itir; + return IA64_NO_FAULT; + } + + /* check guest VHPT */ + pta = PSCB(vcpu,pta); + rr.rrval = PSCB(vcpu,rrs)[address>>61]; + if (rr.ve && (pta & IA64_PTA_VE)) + { + if (pta & IA64_PTA_VF) + { + /* long format VHPT - not implemented */ + return (is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR); + } + else + { + /* short format VHPT */ + + /* avoid recursively walking VHPT */ + pta_mask = (itir_mask(pta) << 3) >> 3; + if (((address ^ pta) & pta_mask) == 0) + return (is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR); + + vcpu_thash(vcpu, address, &iha); + if (__copy_from_user(&pte, (void *)iha, sizeof(pte)) != 0) + return IA64_VHPT_TRANS_VECTOR; + + /* + * Optimisation: this VHPT walker aborts on not-present pages + * instead of inserting a not-present translation, this allows + * vectoring directly to the miss handler. + \ */ + if (pte & _PAGE_P) + { + *pteval = pte; + *itir = vcpu_get_itir_on_fault(vcpu,address); + vhpt_translate_count++; + return IA64_NO_FAULT; + } + return (is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR); + } + } + return (is_data ? IA64_ALT_DATA_TLB_VECTOR : IA64_ALT_INST_TLB_VECTOR); +} + +IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr) +{ + UINT64 pteval, itir, mask; + IA64FAULT fault; + + fault = vcpu_translate(vcpu, vadr, 1, &pteval, &itir); + if (fault == IA64_NO_FAULT) + { + mask = itir_mask(itir); + *padr = (pteval & _PAGE_PPN_MASK & mask) | (vadr & ~mask); + return (IA64_NO_FAULT); + } + else + { + PSCB(vcpu,tmp[0]) = vadr; // save ifa in vcpu structure, then specify IA64_FORCED_IFA + return (fault | IA64_FORCED_IFA); + } +} + +IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key) +{ + printf("vcpu_tak: tak instruction unsupported\n"); + return (IA64_ILLOP_FAULT); + // HACK ALERT: tak does a thash for now + //return vcpu_thash(vcpu,vadr,key); +} + +/************************************************************************** + VCPU debug breakpoint register access routines +**************************************************************************/ + +IA64FAULT vcpu_set_dbr(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + // TODO: unimplemented DBRs return a reserved register fault + // TODO: Should set Logical CPU state, not just physical + ia64_set_dbr(reg,val); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_ibr(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + // TODO: unimplemented IBRs return a reserved register fault + // TODO: Should set Logical CPU state, not just physical + ia64_set_ibr(reg,val); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_dbr(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + // TODO: unimplemented DBRs return a reserved register fault + UINT64 val = ia64_get_dbr(reg); + *pval = val; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_ibr(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + // TODO: unimplemented IBRs return a reserved register fault + UINT64 val = ia64_get_ibr(reg); + *pval = val; + return (IA64_NO_FAULT); +} + +/************************************************************************** + VCPU performance monitor register access routines +**************************************************************************/ + +IA64FAULT vcpu_set_pmc(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + // TODO: Should set Logical CPU state, not just physical + // NOTE: Writes to unimplemented PMC registers are discarded + ia64_set_pmc(reg,val); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_set_pmd(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + // TODO: Should set Logical CPU state, not just physical + // NOTE: Writes to unimplemented PMD registers are discarded + ia64_set_pmd(reg,val); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_pmc(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + // NOTE: Reads from unimplemented PMC registers return zero + UINT64 val = (UINT64)ia64_get_pmc(reg); + *pval = val; + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_pmd(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + // NOTE: Reads from unimplemented PMD registers return zero + UINT64 val = (UINT64)ia64_get_pmd(reg); + *pval = val; + return (IA64_NO_FAULT); +} + +/************************************************************************** + VCPU banked general register access routines +**************************************************************************/ + +IA64FAULT vcpu_bsw0(VCPU *vcpu) +{ + // TODO: Only allowed for current vcpu + REGS *regs = vcpu_regs(vcpu); + unsigned long *r = &regs->r16; + unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]); + unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]); + int i; + + if (PSCB(vcpu,banknum)) { + for (i = 0; i < 16; i++) { *b1++ = *r; *r++ = *b0++; } + PSCB(vcpu,banknum) = 0; + } + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_bsw1(VCPU *vcpu) +{ + // TODO: Only allowed for current vcpu + REGS *regs = vcpu_regs(vcpu); + unsigned long *r = &regs->r16; + unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]); + unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]); + int i; + + if (!PSCB(vcpu,banknum)) { + for (i = 0; i < 16; i++) { *b0++ = *r; *r++ = *b1++; } + PSCB(vcpu,banknum) = 1; + } + return (IA64_NO_FAULT); +} + +/************************************************************************** + VCPU cpuid access routines +**************************************************************************/ + + +IA64FAULT vcpu_get_cpuid(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + // FIXME: This could get called as a result of a rsvd-reg fault + // if reg > 3 + switch(reg) { + case 0: + memcpy(pval,"Xen/ia64",8); + break; + case 1: + *pval = 0; + break; + case 2: + *pval = 0; + break; + case 3: + *pval = ia64_get_cpuid(3); + break; + case 4: + *pval = ia64_get_cpuid(4); + break; + default: + if (reg > (ia64_get_cpuid(3) & 0xff)) + return IA64_RSVDREG_FAULT; + *pval = ia64_get_cpuid(reg); + break; + } + return (IA64_NO_FAULT); +} + +/************************************************************************** + VCPU region register access routines +**************************************************************************/ + +unsigned long vcpu_get_rr_ve(VCPU *vcpu,UINT64 vadr) +{ + ia64_rr rr; + + rr.rrval = PSCB(vcpu,rrs)[vadr>>61]; + return(rr.ve); +} + +IA64FAULT vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + PSCB(vcpu,rrs)[reg>>61] = val; + // warning: set_one_rr() does it "live" + set_one_rr(reg,val); + return (IA64_NO_FAULT); +} + +IA64FAULT vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + UINT val = PSCB(vcpu,rrs)[reg>>61]; + *pval = val; + return (IA64_NO_FAULT); +} + +/************************************************************************** + VCPU protection key register access routines +**************************************************************************/ + +IA64FAULT vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ +#ifndef PKR_USE_FIXED + printk("vcpu_get_pkr: called, not implemented yet\n"); + return IA64_ILLOP_FAULT; +#else + UINT64 val = (UINT64)ia64_get_pkr(reg); + *pval = val; + return (IA64_NO_FAULT); +#endif +} + +IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val) +{ +#ifndef PKR_USE_FIXED + printk("vcpu_set_pkr: called, not implemented yet\n"); + return IA64_ILLOP_FAULT; +#else +// if (reg >= NPKRS) return (IA64_ILLOP_FAULT); + vcpu->pkrs[reg] = val; + ia64_set_pkr(reg,val); + return (IA64_NO_FAULT); +#endif +} + +/************************************************************************** + VCPU translation register access routines +**************************************************************************/ + +static void vcpu_purge_tr_entry(TR_ENTRY *trp) +{ + trp->p = 0; +} + +static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64 ifa) +{ + UINT64 ps; + + trp->itir = itir; + trp->rid = virtualize_rid(current, get_rr(ifa) & RR_RID_MASK); + trp->p = 1; + ps = trp->ps; + trp->page_flags = pte; + if (trp->pl < 2) trp->pl = 2; + trp->vadr = ifa & ~0xfff; + if (ps > 12) { // "ignore" relevant low-order bits + trp->ppn &= ~((1UL<<(ps-12))-1); + trp->vadr &= ~((1UL<<ps)-1); + } +} + +TR_ENTRY *vcpu_match_tr_entry(VCPU *vcpu, TR_ENTRY *trp, UINT64 ifa, int count) +{ + unsigned long rid = (get_rr(ifa) & RR_RID_MASK); + int i; + + for (i = 0; i < count; i++, trp++) { + if (!trp->p) continue; + if (physicalize_rid(vcpu,trp->rid) != rid) continue; + if (ifa < trp->vadr) continue; + if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue; + //if (trp->key && !match_pkr(vcpu,trp->key)) continue; + return trp; + } + return 0; +} + +TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa) +{ + TR_ENTRY *trp; + + trp = vcpu_match_tr_entry(vcpu,vcpu->arch.dtrs,ifa,NDTRS); + if (trp) return trp; + trp = vcpu_match_tr_entry(vcpu,vcpu->arch.itrs,ifa,NITRS); + if (trp) return trp; + return 0; +} + +IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte, + UINT64 itir, UINT64 ifa) +{ + TR_ENTRY *trp; + + if (slot >= NDTRS) return IA64_RSVDREG_FAULT; + trp = &PSCBX(vcpu,dtrs[slot]); +//printf("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa); + vcpu_set_tr_entry(trp,pte,itir,ifa); + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 pte, + UINT64 itir, UINT64 ifa) +{ + TR_ENTRY *trp; + + if (slot >= NITRS) return IA64_RSVDREG_FAULT; + trp = &PSCBX(vcpu,itrs[slot]); +//printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa); + vcpu_set_tr_entry(trp,pte,itir,ifa); + return IA64_NO_FAULT; +} + +/************************************************************************** + VCPU translation cache access routines +**************************************************************************/ + +void foobar(void) { /*vcpu_verbose = 1;*/ } + +extern struct domain *dom0; + +void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps) +{ + unsigned long psr; + unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT; + + // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK! + // FIXME, must be inlined or potential for nested fault here! + if ((vcpu->domain==dom0) && (logps < PAGE_SHIFT)) { + printf("vcpu_itc_no_srlz: domain0 use of smaller page size!\n"); + //FIXME: kill domain here + while(1); + } + psr = ia64_clear_ic(); + ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings + ia64_set_psr(psr); + // ia64_srlz_i(); // no srls req'd, will rfi later +#ifdef VHPT_GLOBAL + if (vcpu->domain==dom0 && ((vaddr >> 61) == 7)) { + // FIXME: this is dangerous... vhpt_flush_address ensures these + // addresses never get flushed. More work needed if this + // ever happens. +//printf("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps); + if (logps > PAGE_SHIFT) vhpt_multiple_insert(vaddr,pte,logps); + else vhpt_insert(vaddr,pte,logps<<2); + } + // even if domain pagesize is larger than PAGE_SIZE, just put + // PAGE_SIZE mapping in the vhpt for now, else purging is complicated + else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2); +#endif + if (IorD & 0x4) return; // don't place in 1-entry TLB + if (IorD & 0x1) { + vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr); + PSCBX(vcpu,itlb_pte) = mp_pte; + } + if (IorD & 0x2) { + vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr); + PSCBX(vcpu,dtlb_pte) = mp_pte; + } +} + +// NOTE: returns a physical pte, NOT a "metaphysical" pte, so do not check +// the physical address contained for correctness +TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa) +{ + TR_ENTRY *trp; + + if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1)) + return (&vcpu->arch.dtlb); + return 0UL; +} + +IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) +{ + unsigned long pteval, logps = (itir >> 2) & 0x3f; + unsigned long translate_domain_pte(UINT64,UINT64,UINT64); + + if (logps < PAGE_SHIFT) { + printf("vcpu_itc_d: domain trying to use smaller page size!\n"); + //FIXME: kill domain here + while(1); + } + //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize + pteval = translate_domain_pte(pte,ifa,itir); + if (!pteval) return IA64_ILLOP_FAULT; + vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps); + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) +{ + unsigned long pteval, logps = (itir >> 2) & 0x3f; + unsigned long translate_domain_pte(UINT64,UINT64,UINT64); + + // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK! + if (logps < PAGE_SHIFT) { + printf("vcpu_itc_i: domain trying to use smaller page size!\n"); + //FIXME: kill domain here + while(1); + } + //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize + pteval = translate_domain_pte(pte,ifa,itir); + // FIXME: what to do if bad physical address? (machine check?) + if (!pteval) return IA64_ILLOP_FAULT; + vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps); + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 addr_range) +{ + printk("vcpu_ptc_l: called, not implemented yet\n"); + return IA64_ILLOP_FAULT; +} + +// At privlvl=0, fc performs no access rights or protection key checks, while +// at privlvl!=0, fc performs access rights checks as if it were a 1-byte +// read but no protection key check. Thus in order to avoid an unexpected +// access rights fault, we have to translate the virtual address to a +// physical address (possibly via a metaphysical address) and do the fc +// on the physical address, which is guaranteed to flush the same cache line +IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vadr) +{ + // TODO: Only allowed for current vcpu + UINT64 mpaddr, paddr; + IA64FAULT fault; + unsigned long translate_domain_mpaddr(unsigned long); + IA64FAULT vcpu_tpa(VCPU *, UINT64, UINT64 *); + + fault = vcpu_tpa(vcpu, vadr, &mpaddr); + if (fault == IA64_NO_FAULT) { + paddr = translate_domain_mpaddr(mpaddr); + ia64_fc(__va(paddr)); + } + return fault; +} + +int ptce_count = 0; +IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr) +{ + // Note that this only needs to be called once, i.e. the + // architected loop to purge the entire TLB, should use + // base = stride1 = stride2 = 0, count0 = count 1 = 1 + +#ifdef VHPT_GLOBAL + vhpt_flush(); // FIXME: This is overdoing it +#endif + local_flush_tlb_all(); + // just invalidate the "whole" tlb + vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb)); + vcpu_purge_tr_entry(&PSCBX(vcpu,itlb)); + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 addr_range) +{ + printk("vcpu_ptc_g: called, not implemented yet\n"); + return IA64_ILLOP_FAULT; +} + +IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 addr_range) +{ + extern ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits); + // FIXME: validate not flushing Xen addresses + // if (Xen address) return(IA64_ILLOP_FAULT); + // FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE +//printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range); +#ifdef VHPT_GLOBAL + vhpt_flush_address(vadr,addr_range); +#endif + ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT); + vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb)); + vcpu_purge_tr_entry(&PSCBX(vcpu,itlb)); + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 addr_range) +{ + printf("vcpu_ptr_d: Purging TLB is unsupported\n"); + return (IA64_ILLOP_FAULT); +} + +IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 addr_range) +{ + printf("vcpu_ptr_i: Purging TLB is unsupported\n"); + return (IA64_ILLOP_FAULT); +} + +void vcpu_set_regs(VCPU *vcpu, REGS *regs) +{ + vcpu->arch.regs = regs; +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/vhpt.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/vhpt.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,151 @@ +/* + * Initialize VHPT support. + * + * Copyright (C) 2004 Hewlett-Packard Co + * Dan Magenheimer <dan.magenheimer@xxxxxx> + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/init.h> + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/dma.h> +#include <asm/vhpt.h> + +unsigned long vhpt_paddr, vhpt_pend, vhpt_pte; + +void vhpt_flush(void) +{ + struct vhpt_lf_entry *v = (void *)VHPT_ADDR; + int i, cnt = 0; +#if 0 +static int firsttime = 2; + +if (firsttime) firsttime--; +else { +printf("vhpt_flush: *********************************************\n"); +printf("vhpt_flush: *********************************************\n"); +printf("vhpt_flush: *********************************************\n"); +printf("vhpt_flush: flushing vhpt (seems to crash at rid wrap?)...\n"); +printf("vhpt_flush: *********************************************\n"); +printf("vhpt_flush: *********************************************\n"); +printf("vhpt_flush: *********************************************\n"); +} +#endif + for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) { + v->itir = 0; + v->CChain = 0; + v->page_flags = 0; + v->ti_tag = INVALID_TI_TAG; + } + // initialize cache too??? +} + +#ifdef VHPT_GLOBAL +void vhpt_flush_address(unsigned long vadr, unsigned long addr_range) +{ + unsigned long ps; + struct vhpt_lf_entry *vlfe; + + if ((vadr >> 61) == 7) { + // no vhpt for region 7 yet, see vcpu_itc_no_srlz + printf("vhpt_flush_address: region 7, spinning...\n"); + while(1); + } +#if 0 + // this only seems to occur at shutdown, but it does occur + if ((!addr_range) || addr_range & (addr_range - 1)) { + printf("vhpt_flush_address: weird range, spinning...\n"); + while(1); + } +//printf("************** vhpt_flush_address(%p,%p)\n",vadr,addr_range); +#endif + while ((long)addr_range > 0) { + vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr); + // FIXME: for now, just blow it away even if it belongs to + // another domain. Later, use ttag to check for match +//if (!(vlfe->ti_tag & INVALID_TI_TAG)) { +//printf("vhpt_flush_address: blowing away valid tag for vadr=%p\n",vadr); +//} + vlfe->ti_tag |= INVALID_TI_TAG; + addr_range -= PAGE_SIZE; + vadr += PAGE_SIZE; + } +} +#endif + +void vhpt_map(void) +{ + unsigned long psr; + + psr = ia64_clear_ic(); + ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, vhpt_pte, VHPT_SIZE_LOG2); + ia64_set_psr(psr); + ia64_srlz_i(); +} + +void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned long logps) +{ + unsigned long mask = (1L << logps) - 1; + extern long running_on_sim; + int i; + + if (logps-PAGE_SHIFT > 10 && !running_on_sim) { + // if this happens, we may want to revisit this algorithm + printf("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n"); + while(1); + } + if (logps-PAGE_SHIFT > 2) { + // FIXME: Should add counter here to see how often this + // happens (e.g. for 16MB pages!) and determine if it + // is a performance problem. On a quick look, it takes + // about 39000 instrs for a 16MB page and it seems to occur + // only a few times/second, so OK for now. + // An alternate solution would be to just insert the one + // 16KB in the vhpt (but with the full mapping)? + //printf("vhpt_multiple_insert: logps-PAGE_SHIFT==%d," + //"va=%p, pa=%p, pa-masked=%p\n", + //logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK, + //(pte&_PFN_MASK)&~mask); + } + vaddr &= ~mask; + pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK); + for (i = 1L << (logps-PAGE_SHIFT); i > 0; i--) { + vhpt_insert(vaddr,pte,logps<<2); + vaddr += PAGE_SIZE; + } +} + +void vhpt_init(void) +{ + unsigned long vhpt_total_size, vhpt_alignment, vhpt_imva; +#if !VHPT_ENABLED + return; +#endif + // allocate a huge chunk of physical memory.... how??? + vhpt_total_size = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB + vhpt_alignment = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB + printf("vhpt_init: vhpt size=%p, align=%p\n",vhpt_total_size,vhpt_alignment); + /* This allocation only holds true if vhpt table is unique for + * all domains. Or else later new vhpt table should be allocated + * from domain heap when each domain is created. Assume xen buddy + * allocator can provide natural aligned page by order? + */ + vhpt_imva = alloc_xenheap_pages(VHPT_SIZE_LOG2 - PAGE_SHIFT); + if (!vhpt_imva) { + printf("vhpt_init: can't allocate VHPT!\n"); + while(1); + } + vhpt_paddr = __pa(vhpt_imva); + vhpt_pend = vhpt_paddr + vhpt_total_size - 1; + printf("vhpt_init: vhpt paddr=%p, end=%p\n",vhpt_paddr,vhpt_pend); + vhpt_pte = pte_val(pfn_pte(vhpt_paddr >> PAGE_SHIFT, PAGE_KERNEL)); + vhpt_map(); + ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | + VHPT_ENABLED); + vhpt_flush(); +} + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xen.lds.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/xen.lds.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,251 @@ +#include <linux/config.h> + +#include <asm/cache.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/pgtable.h> + +#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) +#include <asm-generic/vmlinux.lds.h> + +OUTPUT_FORMAT("elf64-ia64-little") +OUTPUT_ARCH(ia64) +ENTRY(phys_start) +jiffies = jiffies_64; +PHDRS { + code PT_LOAD; + percpu PT_LOAD; + data PT_LOAD; +} +SECTIONS +{ + /* Sections to be discarded */ + /DISCARD/ : { + *(.exit.text) + *(.exit.data) + *(.exitcall.exit) + *(.IA_64.unwind.exit.text) + *(.IA_64.unwind_info.exit.text) + } + + v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ + phys_start = _start - LOAD_OFFSET; + + code : { } :code + . = KERNEL_START; + + _text = .; + _stext = .; + + .text : AT(ADDR(.text) - LOAD_OFFSET) + { + *(.text.ivt) + *(.text) + SCHED_TEXT + LOCK_TEXT + *(.gnu.linkonce.t*) + } + .text2 : AT(ADDR(.text2) - LOAD_OFFSET) + { *(.text2) } +#ifdef CONFIG_SMP + .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) + { *(.text.lock) } +#endif + _etext = .; + + /* Read-only data */ + + /* Exception table */ + . = ALIGN(16); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) + { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET) + { + __start___vtop_patchlist = .; + *(.data.patch.vtop) + __end___vtop_patchlist = .; + } + + .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET) + { + __start___mckinley_e9_bundles = .; + *(.data.patch.mckinley_e9) + __end___mckinley_e9_bundles = .; + } + + /* Global data */ + _data = .; + +#if defined(CONFIG_IA64_GENERIC) + /* Machine Vector */ + . = ALIGN(16); + .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) + { + machvec_start = .; + *(.machvec) + machvec_end = .; + } +#endif + + /* Unwind info & table: */ + . = ALIGN(8); + .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) + { *(.IA_64.unwind_info*) } + .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) + { + __start_unwind = .; + *(.IA_64.unwind*) + __end_unwind = .; + } + + RODATA + + .opd : AT(ADDR(.opd) - LOAD_OFFSET) + { *(.opd) } + + /* Initialization code and data: */ + + . = ALIGN(PAGE_SIZE); + __init_begin = .; + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) + { + _sinittext = .; + *(.init.text) + _einittext = .; + } + + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) + { *(.init.data) } + + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) + { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } + + . = ALIGN(16); + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) + { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) + { + __initcall_start = .; + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + __initcall_end = .; + } + __con_initcall_start = .; + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) + { *(.con_initcall.init) } + __con_initcall_end = .; + __security_initcall_start = .; + .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) + { *(.security_initcall.init) } + __security_initcall_end = .; + . = ALIGN(PAGE_SIZE); + __init_end = .; + + /* The initial task and kernel stack */ + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) + { *(.data.init_task) } + + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) + { *(__special_page_section) + __start_gate_section = .; + *(.data.gate) + __stop_gate_section = .; + } + . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */ + + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) + { *(.data.cacheline_aligned) } + + /* Per-cpu data: */ + percpu : { } :percpu + . = ALIGN(PERCPU_PAGE_SIZE); + __phys_per_cpu_start = .; + .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) + { + __per_cpu_start = .; + *(.data.percpu) + __per_cpu_end = .; + } + . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */ + + data : { } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) + { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) + { *(.got.plt) *(.got) } + __gp = ADDR(.got) + 0x200000; + /* We want the small data sections together, so single-instruction offsets + can access them all, and initialized data all before uninitialized, so + we can shorten the on-disk segment size. */ + .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) + { *(.sdata) *(.sdata1) *(.srdata) } + _edata = .; + _bss = .; + .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) + { *(.sbss) *(.scommon) } + .bss : AT(ADDR(.bss) - LOAD_OFFSET) + { *(.bss) *(COMMON) } + + _end = .; + + code : { } :code + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* These must appear regardless of . */ + /* Discard them for now since Intel SoftSDV cannot handle them. + .comment 0 : { *(.comment) } + .note 0 : { *(.note) } + */ + /DISCARD/ : { *(.comment) } + /DISCARD/ : { *(.note) } +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenasm.S --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/xenasm.S Thu Sep 1 18:46:28 2005 @@ -0,0 +1,501 @@ +/* + * Assembly support routines for Xen/ia64 + * + * Copyright (C) 2004 Hewlett-Packard Co + * Dan Magenheimer <dan.magenheimer@xxxxxx> + */ + +#include <linux/config.h> +#include <asm/asmmacro.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/vhpt.h> + +#if 0 +// FIXME: there's gotta be a better way... +// ski and spaski are different... moved to xenmisc.c +#define RunningOnHpSki(rx,ry,pn) \ + addl rx = 2, r0; \ + addl ry = 3, r0; \ + ;; \ + mov rx = cpuid[rx]; \ + mov ry = cpuid[ry]; \ + ;; \ + cmp.eq pn,p0 = 0, rx; \ + ;; \ + (pn) movl rx = 0x7000004 ; \ + ;; \ + (pn) cmp.ge pn,p0 = ry, rx; \ + ;; + +//int platform_is_hp_ski(void) +GLOBAL_ENTRY(platform_is_hp_ski) + mov r8 = 0 + RunningOnHpSki(r3,r9,p8) +(p8) mov r8 = 1 + br.ret.sptk.many b0 +END(platform_is_hp_ski) +#endif + +// Change rr7 to the passed value while ensuring +// Xen is mapped into the new region. +// in0: new rr7 value +// in1: Xen virtual address of shared info (to be pinned) +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) +// FIXME? Note that this turns off the DB bit (debug) +#define PSR_BITS_TO_SET IA64_PSR_BN + +//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void *shared_arch_info); +GLOBAL_ENTRY(ia64_new_rr7) + // not sure this unwind statement is correct... + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1) + alloc loc1 = ar.pfs, 3, 8, 0, 0 +1: { + mov r28 = in0 // copy procedure index + mov r8 = ip // save ip to compute branch + mov loc0 = rp // save rp + };; + .body + movl loc2=PERCPU_ADDR + ;; + tpa loc2=loc2 // grab this BEFORE changing rr7 + ;; +#if VHPT_ENABLED + movl loc6=VHPT_ADDR + ;; + tpa loc6=loc6 // grab this BEFORE changing rr7 + ;; +#endif + mov loc5=in1 + ;; + tpa loc5=loc5 // grab this BEFORE changing rr7 + ;; + mov loc7=in2 // arch_vcpu_info_t + ;; + tpa loc7=loc7 // grab this BEFORE changing rr7 + ;; + mov loc3 = psr // save psr + adds r8 = 1f-1b,r8 // calculate return address for call + ;; + tpa r8=r8 // convert rp to physical + ;; + mov loc4=ar.rsc // save RSE configuration + ;; + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + movl r16=PSR_BITS_TO_CLEAR + movl r17=PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 // add in psr the bits to set + ;; + andcm r16=loc3,r16 // removes bits to clear from psr + br.call.sptk.many rp=ia64_switch_mode_phys +1: + // now in physical mode with psr.i/ic off so do rr7 switch + dep r16=-1,r0,61,3 + ;; + mov rr[r16]=in0 + srlz.d + ;; + + // re-pin mappings for kernel text and data + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + movl r17=KERNEL_START + ;; + rsm psr.i | psr.ic + ;; + srlz.i + ;; + ptr.i r17,r18 + ptr.d r17,r18 + ;; + mov cr.itir=r18 + mov cr.ifa=r17 + mov r16=IA64_TR_KERNEL + //mov r3=ip + movl r18=PAGE_KERNEL + ;; + dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT + ;; + or r18=r2,r18 + ;; + srlz.i + ;; + itr.i itr[r16]=r18 + ;; + itr.d dtr[r16]=r18 + ;; + + // re-pin mappings for stack (current), per-cpu, vhpt, and shared info + + // unless overlaps with KERNEL_TR + dep r18=0,r13,0,KERNEL_TR_PAGE_SHIFT + ;; + cmp.eq p7,p0=r17,r18 +(p7) br.cond.sptk .stack_overlaps + ;; + movl r25=PAGE_KERNEL + dep r21=0,r13,60,4 // physical address of "current" + ;; + or r23=r25,r21 // construct PA | page properties + mov r25=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r13,r25 + ;; + mov cr.itir=r25 + mov cr.ifa=r13 // VA of next task... + ;; + mov r25=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r25]=r23 // wire in new mapping... + ;; +.stack_overlaps: + + movl r22=PERCPU_ADDR + ;; + movl r25=PAGE_KERNEL + ;; + mov r21=loc2 // saved percpu physical address + ;; + or r23=r25,r21 // construct PA | page properties + mov r24=PERCPU_PAGE_SHIFT<<2 + ;; + ptr.d r22,r24 + ;; + mov cr.itir=r24 + mov cr.ifa=r22 + ;; + mov r25=IA64_TR_PERCPU_DATA + ;; + itr.d dtr[r25]=r23 // wire in new mapping... + ;; + +#if VHPT_ENABLED + movl r22=VHPT_ADDR + ;; + movl r25=PAGE_KERNEL + ;; + mov r21=loc6 // saved vhpt physical address + ;; + or r23=r25,r21 // construct PA | page properties + mov r24=VHPT_PAGE_SHIFT<<2 + ;; + ptr.d r22,r24 + ;; + mov cr.itir=r24 + mov cr.ifa=r22 + ;; + mov r25=IA64_TR_VHPT + ;; + itr.d dtr[r25]=r23 // wire in new mapping... + ;; +#endif + + movl r22=SHAREDINFO_ADDR + ;; + movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW) + ;; + mov r21=loc5 // saved sharedinfo physical address + ;; + or r23=r25,r21 // construct PA | page properties + mov r24=PAGE_SHIFT<<2 + ;; + ptr.d r22,r24 + ;; + mov cr.itir=r24 + mov cr.ifa=r22 + ;; + mov r25=IA64_TR_SHARED_INFO + ;; + itr.d dtr[r25]=r23 // wire in new mapping... + ;; + // Map for arch_vcpu_info_t + movl r22=SHARED_ARCHINFO_ADDR + ;; + movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW) + ;; + mov r21=loc7 // saved sharedinfo physical address + ;; + or r23=r25,r21 // construct PA | page properties + mov r24=PAGE_SHIFT<<2 + ;; + ptr.d r22,r24 + ;; + mov cr.itir=r24 + mov cr.ifa=r22 + ;; + mov r25=IA64_TR_ARCH_INFO + ;; + itr.d dtr[r25]=r23 // wire in new mapping... + ;; + + // done, switch back to virtual and return + mov r16=loc3 // r16= original psr + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode + mov psr.l = loc3 // restore init PSR + + mov ar.pfs = loc1 + mov rp = loc0 + ;; + mov ar.rsc=loc4 // restore RSE configuration + srlz.d // seralize restoration of psr.l + br.ret.sptk.many rp +END(ia64_new_rr7) + +#include "minstate.h" + +GLOBAL_ENTRY(ia64_prepare_handle_privop) + .prologue + /* + * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 + */ + mov r16=r0 + DO_SAVE_SWITCH_STACK + br.call.sptk.many rp=ia64_handle_privop // stack frame setup in ivt +.ret22: .body + DO_LOAD_SWITCH_STACK + br.cond.sptk.many rp // goes to ia64_leave_kernel +END(ia64_prepare_handle_privop) + +GLOBAL_ENTRY(ia64_prepare_handle_break) + .prologue + /* + * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 + */ + mov r16=r0 + DO_SAVE_SWITCH_STACK + br.call.sptk.many rp=ia64_handle_break // stack frame setup in ivt +.ret23: .body + DO_LOAD_SWITCH_STACK + br.cond.sptk.many rp // goes to ia64_leave_kernel +END(ia64_prepare_handle_break) + +GLOBAL_ENTRY(ia64_prepare_handle_reflection) + .prologue + /* + * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 + */ + mov r16=r0 + DO_SAVE_SWITCH_STACK + br.call.sptk.many rp=ia64_handle_reflection // stack frame setup in ivt +.ret24: .body + DO_LOAD_SWITCH_STACK + br.cond.sptk.many rp // goes to ia64_leave_kernel +END(ia64_prepare_handle_reflection) + +GLOBAL_ENTRY(__get_domain_bundle) + EX(.failure_in_get_bundle,ld8 r8=[r32],8) + ;; + EX(.failure_in_get_bundle,ld8 r9=[r32]) + ;; + br.ret.sptk.many rp + ;; +.failure_in_get_bundle: + mov r8=0 + ;; + mov r9=0 + ;; + br.ret.sptk.many rp + ;; +END(__get_domain_bundle) + +GLOBAL_ENTRY(dorfirfi) + movl r16 = XSI_IIP + movl r17 = XSI_IPSR + movl r18 = XSI_IFS + ;; + ld8 r16 = [r16] + ld8 r17 = [r17] + ld8 r18 = [r18] + ;; + mov cr.iip=r16 + mov cr.ipsr=r17 + mov cr.ifs=r18 + ;; + // fall through +END(dorfirfi) + +GLOBAL_ENTRY(dorfi) + rfi + ;; +END(dorfirfi) + +// +// Long's Peak UART Offsets +// +#define COM_TOP 0xff5e0000 +#define COM_BOT 0xff5e2000 + +// UART offsets +#define UART_TX 0 /* Out: Transmit buffer (DLAB=0) */ +#define UART_INT_ENB 1 /* interrupt enable (DLAB=0) */ +#define UART_INT_ID 2 /* Interrupt ID register */ +#define UART_LINE_CTL 3 /* Line control register */ +#define UART_MODEM_CTL 4 /* Modem Control Register */ +#define UART_LSR 5 /* In: Line Status Register */ +#define UART_MSR 6 /* Modem status register */ +#define UART_DLATCH_LOW UART_TX +#define UART_DLATCH_HIGH UART_INT_ENB +#define COM1 0x3f8 +#define COM2 0x2F8 +#define COM3 0x3E8 + +/* interrupt enable bits (offset 1) */ +#define DATA_AVAIL_INT 1 +#define XMIT_HOLD_EMPTY_INT 2 +#define LINE_STAT_INT 4 +#define MODEM_STAT_INT 8 + +/* line status bits (offset 5) */ +#define REC_DATA_READY 1 +#define OVERRUN 2 +#define PARITY_ERROR 4 +#define FRAMING_ERROR 8 +#define BREAK_INTERRUPT 0x10 +#define XMIT_HOLD_EMPTY 0x20 +#define XMIT_SHIFT_EMPTY 0x40 + +// Write a single character +// input: r32 = character to be written +// output: none +GLOBAL_ENTRY(longs_peak_putc) + rsm psr.dt + movl r16 = 0x8000000000000000 + COM_TOP + UART_LSR + ;; + srlz.i + ;; + +.Chk_THRE_p: + ld1.acq r18=[r16] + ;; + + and r18 = XMIT_HOLD_EMPTY, r18 + ;; + cmp4.eq p6,p0=0,r18 + ;; + +(p6) br .Chk_THRE_p + ;; + movl r16 = 0x8000000000000000 + COM_TOP + UART_TX + ;; + st1.rel [r16]=r32 + ;; + ssm psr.dt + ;; + srlz.i + ;; + br.ret.sptk.many b0 +END(longs_peak_putc) + +/* derived from linux/arch/ia64/hp/sim/boot/boot_head.S */ +GLOBAL_ENTRY(pal_emulator_static) + mov r8=-1 + mov r9=256 + ;; + cmp.gtu p7,p8=r9,r32 /* r32 <= 255? */ +(p7) br.cond.sptk.few static + ;; + mov r9=512 + ;; + cmp.gtu p7,p8=r9,r32 +(p7) br.cond.sptk.few stacked + ;; +static: cmp.eq p7,p8=6,r32 /* PAL_PTCE_INFO */ +(p8) br.cond.sptk.few 1f + ;; + mov r8=0 /* status = 0 */ + movl r9=0x100000000 /* tc.base */ + movl r10=0x0000000200000003 /* count[0], count[1] */ + movl r11=0x1000000000002000 /* stride[0], stride[1] */ + br.ret.sptk.few rp +1: cmp.eq p7,p8=14,r32 /* PAL_FREQ_RATIOS */ +(p8) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + movl r9 =0x900000002 /* proc_ratio (1/100) */ + movl r10=0x100000100 /* bus_ratio<<32 (1/256) */ + movl r11=0x900000002 /* itc_ratio<<32 (1/100) */ + ;; +1: cmp.eq p7,p8=19,r32 /* PAL_RSE_INFO */ +(p8) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + mov r9=96 /* num phys stacked */ + mov r10=0 /* hints */ + mov r11=0 + br.ret.sptk.few rp +1: cmp.eq p7,p8=1,r32 /* PAL_CACHE_FLUSH */ +(p8) br.cond.sptk.few 1f +#if 0 + mov r9=ar.lc + movl r8=524288 /* flush 512k million cache lines (16MB) */ + ;; + mov ar.lc=r8 + movl r8=0xe000000000000000 + ;; +.loop: fc r8 + add r8=32,r8 + br.cloop.sptk.few .loop + sync.i + ;; + srlz.i + ;; + mov ar.lc=r9 + mov r8=r0 + ;; +1: cmp.eq p7,p8=15,r32 /* PAL_PERF_MON_INFO */ +(p8) br.cond.sptk.few 1f + mov r8=0 /* status = 0 */ + movl r9 =0x08122f04 /* generic=4 width=47 retired=8 cycles=18 */ + mov r10=0 /* reserved */ + mov r11=0 /* reserved */ + mov r16=0xffff /* implemented PMC */ + mov r17=0x3ffff /* implemented PMD */ + add r18=8,r29 /* second index */ + ;; + st8 [r29]=r16,16 /* store implemented PMC */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r17,16 /* store implemented PMD */ + st8 [r18]=r0,16 /* clear remaining bits */ + mov r16=0xf0 /* cycles count capable PMC */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + mov r17=0xf0 /* retired bundles capable PMC */ + ;; + st8 [r29]=r16,16 /* store cycles capable */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r17,16 /* store retired bundle capable */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; + st8 [r29]=r0,16 /* clear remaining bits */ + st8 [r18]=r0,16 /* clear remaining bits */ + ;; +1: br.cond.sptk.few rp +#else +1: +#endif +stacked: + br.ret.sptk.few rp +END(pal_emulator_static) + +GLOBAL_ENTRY(vhpt_insert) +// alloc loc0 = ar.pfs, 3, 1, 0, 0 + mov r16=r32 + mov r26=r33 + mov r27=r34 + ;; + VHPT_INSERT() +// VHPT_INSERT1() ... add collision chains later +// mov ar.pfs = loc0 + br.ret.sptk.few rp + ;; +END(vhpt_insert) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenirq.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/xenirq.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,78 @@ +/* + * Xen irq routines + * + * Copyright (C) 2005 Hewlett-Packard Co. + * Dan Magenheimer (dan.magenheimer@xxxxxx) + * + */ + +#include <asm/ptrace.h> +#include <asm/hw_irq.h> +#include <asm/delay.h> + + +void +xen_debug_irq(ia64_vector vector, struct pt_regs *regs) +{ +//FIXME: For debug only, can be removed + static char firstirq = 1; + static char firsttime[256]; + static char firstpend[256]; + if (firstirq) { + int i; + for (i=0;i<256;i++) firsttime[i] = 1; + for (i=0;i<256;i++) firstpend[i] = 1; + firstirq = 0; + } + if (firsttime[vector]) { + printf("**** (entry) First received int on vector=%d,itc=%lx\n", + (unsigned long) vector, ia64_get_itc()); + firsttime[vector] = 0; + } +} + + +int +xen_do_IRQ(ia64_vector vector) +{ + if (vector != 0xef) { + extern void vcpu_pend_interrupt(void *, int); +#if 0 + if (firsttime[vector]) { + printf("**** (iterate) First received int on vector=%d,itc=%lx\n", + (unsigned long) vector, ia64_get_itc()); + firsttime[vector] = 0; + } + if (firstpend[vector]) { + printf("**** First pended int on vector=%d,itc=%lx\n", + (unsigned long) vector,ia64_get_itc()); + firstpend[vector] = 0; + } +#endif + //FIXME: TEMPORARY HACK!!!! + vcpu_pend_interrupt(dom0->vcpu[0],vector); + vcpu_wake(dom0->vcpu[0]); + return(1); + } + return(0); +} + +/* From linux/kernel/softirq.c */ +#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED +# define invoke_softirq() __do_softirq() +#else +# define invoke_softirq() do_softirq() +#endif + +/* + * Exit an interrupt context. Process softirqs if needed and possible: + */ +void irq_exit(void) +{ + //account_system_vtime(current); + //sub_preempt_count(IRQ_EXIT_OFFSET); + if (!in_interrupt() && local_softirq_pending()) + invoke_softirq(); + //preempt_enable_no_resched(); +} +/* end from linux/kernel/softirq.c */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenmem.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/xenmem.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,86 @@ +/* + * Xen memory allocator routines + * + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer <dan.magenheimer@xxxxxx> + * Copyright (C) 2005 Intel Corp. + * + * Routines used by ia64 machines with contiguous (or virtually contiguous) + * memory. + */ + +#include <linux/config.h> +#include <asm/pgtable.h> +#include <xen/mm.h> + +extern struct page *zero_page_memmap_ptr; +struct pfn_info *frame_table; +unsigned long frame_table_size; +unsigned long max_page; + +struct page *mem_map; +#define MAX_DMA_ADDRESS ~0UL // FIXME??? + +#ifdef CONFIG_VIRTUAL_MEM_MAP +static unsigned long num_dma_physpages; +#endif + +/* + * Set up the page tables. + */ +#ifdef CONFIG_VTI +unsigned long *mpt_table; +unsigned long mpt_table_size; +#endif // CONFIG_VTI + +void +paging_init (void) +{ + struct pfn_info *pg; + +#ifdef CONFIG_VTI + unsigned int mpt_order; + /* Create machine to physical mapping table + * NOTE: similar to frame table, later we may need virtually + * mapped mpt table if large hole exists. Also MAX_ORDER needs + * to be changed in common code, which only support 16M by far + */ + mpt_table_size = max_page * sizeof(unsigned long); + mpt_order = get_order(mpt_table_size); + ASSERT(mpt_order <= MAX_ORDER); + if ((mpt_table = alloc_xenheap_pages(mpt_order)) == NULL) + panic("Not enough memory to bootstrap Xen.\n"); + + printk("machine to physical table: 0x%lx\n", (u64)mpt_table); + memset(mpt_table, INVALID_M2P_ENTRY, mpt_table_size); +#endif // CONFIG_VTI + + /* Other mapping setup */ + + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); +} + +/* FIXME: postpone support to machines with big holes between physical memorys. + * Current hack allows only efi memdesc upto 4G place. (See efi.c) + */ +#ifndef CONFIG_VIRTUAL_MEM_MAP +#define FT_ALIGN_SIZE (16UL << 20) +void __init init_frametable(void) +{ + unsigned long i, pfn; + frame_table_size = max_page * sizeof(struct pfn_info); + frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK; + + /* Request continuous trunk from boot allocator, since HV + * address is identity mapped */ + pfn = alloc_boot_pages( + frame_table_size >> PAGE_SHIFT, FT_ALIGN_SIZE >> PAGE_SHIFT); + if (pfn == 0) + panic("Not enough memory for frame table.\n"); + + frame_table = __va(pfn << PAGE_SHIFT); + memset(frame_table, 0, frame_table_size); + printk("size of frame_table: %lukB\n", + frame_table_size >> 10); +} +#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xenmisc.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/xenmisc.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,391 @@ +/* + * Xen misc + * + * Functions/decls that are/may be needed to link with Xen because + * of x86 dependencies + * + * Copyright (C) 2004 Hewlett-Packard Co. + * Dan Magenheimer (dan.magenheimer@xxxxxx) + * + */ + +#include <linux/config.h> +#include <xen/sched.h> +#include <linux/efi.h> +#include <asm/processor.h> +#include <xen/serial.h> +#include <asm/io.h> +#include <xen/softirq.h> + +efi_memory_desc_t ia64_efi_io_md; +EXPORT_SYMBOL(ia64_efi_io_md); +unsigned long wait_init_idle; +int phys_proc_id[NR_CPUS]; +unsigned long loops_per_jiffy = (1<<12); // from linux/init/main.c + +void unw_init(void) { printf("unw_init() skipped (NEED FOR KERNEL UNWIND)\n"); } +void ia64_mca_init(void) { printf("ia64_mca_init() skipped (Machine check abort handling)\n"); } +void ia64_mca_cpu_init(void *x) { } +void ia64_patch_mckinley_e9(unsigned long a, unsigned long b) { } +void ia64_patch_vtop(unsigned long a, unsigned long b) { } +void hpsim_setup(char **x) +{ +#ifdef CONFIG_SMP + init_smp_config(); +#endif +} + +// called from mem_init... don't think s/w I/O tlb is needed in Xen +//void swiotlb_init(void) { } ...looks like it IS needed + +long +is_platform_hp_ski(void) +{ + int i; + long cpuid[6]; + + for (i = 0; i < 5; ++i) + cpuid[i] = ia64_get_cpuid(i); + if ((cpuid[0] & 0xff) != 'H') return 0; + if ((cpuid[3] & 0xff) != 0x4) return 0; + if (((cpuid[3] >> 8) & 0xff) != 0x0) return 0; + if (((cpuid[3] >> 16) & 0xff) != 0x0) return 0; + if (((cpuid[3] >> 24) & 0x7) != 0x7) return 0; + return 1; +} + +long +platform_is_hp_ski(void) +{ + extern long running_on_sim; + return running_on_sim; +} + +/* calls in xen/common code that are unused on ia64 */ + +void sync_lazy_execstate_cpu(unsigned int cpu) {} + +#ifdef CONFIG_VTI +int grant_table_create(struct domain *d) { return 0; } +void grant_table_destroy(struct domain *d) { return; } +#endif + +struct pt_regs *guest_cpu_user_regs(void) { return ia64_task_regs(current); } + +void raise_actimer_softirq(void) +{ + raise_softirq(AC_TIMER_SOFTIRQ); +} + +#ifndef CONFIG_VTI +unsigned long +__gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) +{ + if (d == dom0) + return(gpfn); + else { + unsigned long pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT); + if (!pte) { +printk("__gpfn_to_mfn_foreign: bad gpfn. spinning...\n"); +while(1); + return 0; + } + return ((pte & _PFN_MASK) >> PAGE_SHIFT); + } +} + +u32 +__mfn_to_gpfn(struct domain *d, unsigned long frame) +{ + // FIXME: is this right? +if ((frame << PAGE_SHIFT) & _PAGE_PPN_MASK) { +printk("__mfn_to_gpfn: bad frame. spinning...\n"); +while(1); +} + return frame; +} +#endif + +#ifndef CONFIG_VTI +unsigned long __hypercall_create_continuation( + unsigned int op, unsigned int nr_args, ...) +{ + printf("__hypercall_create_continuation: not implemented!!!\n"); +} +#endif + +/////////////////////////////// + +/////////////////////////////// +// from arch/x86/apic.c +/////////////////////////////// + +extern unsigned long domain0_ready; + +int reprogram_ac_timer(s_time_t timeout) +{ + struct vcpu *v = current; + +#ifdef CONFIG_VTI +// if(VMX_DOMAIN(v)) + return 1; +#endif // CONFIG_VTI + if (!domain0_ready) return 1; + local_cpu_data->itm_next = timeout; + if (is_idle_task(v->domain)) vcpu_safe_set_itm(timeout); + else vcpu_set_next_timer(current); + return 1; +} + +/////////////////////////////// +// from arch/ia64/page_alloc.c +/////////////////////////////// +DEFINE_PER_CPU(struct page_state, page_states) = {0}; +unsigned long totalram_pages; + +void __mod_page_state(unsigned long offset, unsigned long delta) +{ + unsigned long flags; + void* ptr; + + local_irq_save(flags); + ptr = &__get_cpu_var(page_states); + *(unsigned long*)(ptr + offset) += delta; + local_irq_restore(flags); +} + +/////////////////////////////// +// from arch/x86/flushtlb.c +/////////////////////////////// + +u32 tlbflush_clock; +u32 tlbflush_time[NR_CPUS]; + +/////////////////////////////// +// from arch/x86/memory.c +/////////////////////////////// + +void init_percpu_info(void) +{ + dummy(); + //memset(percpu_info, 0, sizeof(percpu_info)); +} + +void free_page_type(struct pfn_info *page, unsigned int type) +{ + dummy(); +} + +/////////////////////////////// +//// misc memory stuff +/////////////////////////////// + +unsigned long __get_free_pages(unsigned int mask, unsigned int order) +{ + void *p = alloc_xenheap_pages(order); + + memset(p,0,PAGE_SIZE<<order); + return (unsigned long)p; +} + +void __free_pages(struct page *page, unsigned int order) +{ + if (order) BUG(); + free_xenheap_page(page); +} + +void *pgtable_quicklist_alloc(void) +{ + return alloc_xenheap_pages(0); +} + +void pgtable_quicklist_free(void *pgtable_entry) +{ + free_xenheap_page(pgtable_entry); +} + +/////////////////////////////// +// from arch/ia64/traps.c +/////////////////////////////// + +void show_registers(struct pt_regs *regs) +{ + printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n"); +} + +int is_kernel_text(unsigned long addr) +{ + extern char _stext[], _etext[]; + if (addr >= (unsigned long) _stext && + addr <= (unsigned long) _etext) + return 1; + + return 0; +} + +unsigned long kernel_text_end(void) +{ + extern char _etext[]; + return (unsigned long) _etext; +} + +/////////////////////////////// +// from common/keyhandler.c +/////////////////////////////// +void dump_pageframe_info(struct domain *d) +{ + printk("dump_pageframe_info not implemented\n"); +} + +/////////////////////////////// +// called from arch/ia64/head.S +/////////////////////////////// + +void console_print(char *msg) +{ + printk("console_print called, how did start_kernel return???\n"); +} + +void kernel_thread_helper(void) +{ + printk("kernel_thread_helper not implemented\n"); + dummy(); +} + +void sys_exit(void) +{ + printk("sys_exit not implemented\n"); + dummy(); +} + +//////////////////////////////////// +// called from unaligned.c +//////////////////////////////////// + +void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ ((noreturn)) */ +{ + printk("die_if_kernel: called, not implemented\n"); +} + +long +ia64_peek (struct task_struct *child, struct switch_stack *child_stack, + unsigned long user_rbs_end, unsigned long addr, long *val) +{ + printk("ia64_peek: called, not implemented\n"); +} + +long +ia64_poke (struct task_struct *child, struct switch_stack *child_stack, + unsigned long user_rbs_end, unsigned long addr, long val) +{ + printk("ia64_poke: called, not implemented\n"); +} + +void +ia64_sync_fph (struct task_struct *task) +{ + printk("ia64_sync_fph: called, not implemented\n"); +} + +void +ia64_flush_fph (struct task_struct *task) +{ + printk("ia64_flush_fph: called, not implemented\n"); +} + +//////////////////////////////////// +// called from irq_ia64.c:init_IRQ() +// (because CONFIG_IA64_HP_SIM is specified) +//////////////////////////////////// +void hpsim_irq_init(void) { } + + +// accomodate linux extable.c +//const struct exception_table_entry * +void *search_module_extables(unsigned long addr) { return NULL; } +void *__module_text_address(unsigned long addr) { return NULL; } +void *module_text_address(unsigned long addr) { return NULL; } + +void cs10foo(void) {} +void cs01foo(void) {} + +unsigned long context_switch_count = 0; + +void context_switch(struct vcpu *prev, struct vcpu *next) +{ +//printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); +//printk("@@@@@@ context switch from domain %d (%x) to domain %d (%x)\n", +//prev->domain->domain_id,(long)prev&0xffffff,next->domain->domain_id,(long)next&0xffffff); +//if (prev->domain->domain_id == 1 && next->domain->domain_id == 0) cs10foo(); +//if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo(); +//printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id); +#ifdef CONFIG_VTI + vtm_domain_out(prev); +#endif + context_switch_count++; + switch_to(prev,next,prev); +#ifdef CONFIG_VTI + vtm_domain_in(current); +#endif + +// leave this debug for now: it acts as a heartbeat when more than +// one domain is active +{ +static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50}; +static int i = 100; +int id = ((struct vcpu *)current)->domain->domain_id & 0xf; +if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; } +if (!i--) { printk("+",id); i = 1000000; } +} + +#ifdef CONFIG_VTI + if (VMX_DOMAIN(current)) + vmx_load_all_rr(current); +#else + if (!is_idle_task(current->domain)) { + load_region_regs(current); + if (vcpu_timer_expired(current)) vcpu_pend_timer(current); + } + if (vcpu_timer_expired(current)) vcpu_pend_timer(current); +#endif +} + +void context_switch_finalise(struct vcpu *next) +{ + /* nothing to do */ +} + +void continue_running(struct vcpu *same) +{ + /* nothing to do */ +} + +void panic_domain(struct pt_regs *regs, const char *fmt, ...) +{ + va_list args; + char buf[128]; + struct vcpu *v = current; + static volatile int test = 1; // so can continue easily in debug + extern spinlock_t console_lock; + unsigned long flags; + +loop: + printf("$$$$$ PANIC in domain %d (k6=%p): ", + v->domain->domain_id, + __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT]); + va_start(args, fmt); + (void)vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + printf(buf); + if (regs) show_registers(regs); + domain_pause_by_systemcontroller(current->domain); + v->domain->shutdown_code = SHUTDOWN_crash; + set_bit(_DOMF_shutdown, v->domain->domain_flags); + if (v->domain->domain_id == 0) { + int i = 1000000000L; + // if domain0 crashes, just periodically print out panic + // message to make post-mortem easier + while(i--); + goto loop; + } +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xensetup.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/xensetup.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,389 @@ +/****************************************************************************** + * xensetup.c + * Copyright (c) 2004-2005 Hewlett-Packard Co + * Dan Magenheimer <dan.magenheimer@xxxxxx> + */ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/errno.h> +//#include <xen/spinlock.h> +#include <xen/multiboot.h> +#include <xen/sched.h> +#include <xen/mm.h> +//#include <xen/delay.h> +#include <xen/compile.h> +//#include <xen/console.h> +#include <xen/serial.h> +#include <xen/trace.h> +#include <asm/meminit.h> +#include <asm/page.h> +#include <asm/setup.h> +#include <xen/string.h> + +unsigned long xenheap_phys_end; + +char saved_command_line[COMMAND_LINE_SIZE]; + +struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu }; + +cpumask_t cpu_present_map; + +#ifdef CLONE_DOMAIN0 +struct domain *clones[CLONE_DOMAIN0]; +#endif +extern unsigned long domain0_ready; + +int find_max_pfn (unsigned long, unsigned long, void *); +void start_of_day(void); + +/* opt_nosmp: If true, secondary processors are ignored. */ +static int opt_nosmp = 0; +boolean_param("nosmp", opt_nosmp); + +/* maxcpus: maximum number of CPUs to activate. */ +static unsigned int max_cpus = NR_CPUS; +integer_param("maxcpus", max_cpus); + +/* + * opt_xenheap_megabytes: Size of Xen heap in megabytes, including: + * xen image + * bootmap bits + * xen heap + * Note: To allow xenheap size configurable, the prerequisite is + * to configure elilo allowing relocation defaultly. Then since + * elilo chooses 256M as alignment when relocating, alignment issue + * on IPF can be addressed. + */ +unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB; +unsigned long xenheap_size = XENHEAP_DEFAULT_SIZE; +extern long running_on_sim; +unsigned long xen_pstart; + +static int +xen_count_pages(u64 start, u64 end, void *arg) +{ + unsigned long *count = arg; + + /* FIXME: do we need consider difference between DMA-usable memory and + * normal memory? Seems that HV has no requirement to operate DMA which + * is owned by Dom0? */ + *count += (end - start) >> PAGE_SHIFT; + return 0; +} + +/* Find first hole after trunk for xen image */ +static int +xen_find_first_hole(u64 start, u64 end, void *arg) +{ + unsigned long *first_hole = arg; + + if ((*first_hole) == 0) { + if ((start <= KERNEL_START) && (KERNEL_START < end)) + *first_hole = __pa(end); + } + + return 0; +} + +static void __init do_initcalls(void) +{ + initcall_t *call; + for ( call = &__initcall_start; call < &__initcall_end; call++ ) + (*call)(); +} + +/* + * IPF loader only supports one commaind line currently, for + * both xen and guest kernel. This function provides pre-parse + * to mixed command line, to split it into two parts. + * + * User should split the parameters by "--", with strings after + * spliter for guest kernel. Missing "--" means whole line belongs + * to guest. Example: + * "com2=57600,8n1 console=com2 -- console=ttyS1 console=tty + * root=/dev/sda3 ro" + */ +static char null[4] = { 0 }; + +void early_cmdline_parse(char **cmdline_p) +{ + char *guest_cmd; + char *split = "--"; + + if (*cmdline_p == NULL) { + *cmdline_p = &null[0]; + saved_command_line[0] = '\0'; + return; + } + + guest_cmd = strstr(*cmdline_p, split); + /* If no spliter, whole line is for guest */ + if (guest_cmd == NULL) { + guest_cmd = *cmdline_p; + *cmdline_p = &null[0]; + } else { + *guest_cmd = '\0'; /* Split boot parameters for xen and guest */ + guest_cmd += strlen(split); + while (*guest_cmd == ' ') guest_cmd++; + } + + strlcpy(saved_command_line, guest_cmd, COMMAND_LINE_SIZE); + return; +} + +struct ns16550_defaults ns16550_com1 = { + .baud = BAUD_AUTO, + .data_bits = 8, + .parity = 'n', + .stop_bits = 1 +}; + +struct ns16550_defaults ns16550_com2 = { + .baud = BAUD_AUTO, + .data_bits = 8, + .parity = 'n', + .stop_bits = 1 +}; + +void start_kernel(void) +{ + unsigned char *cmdline; + void *heap_start; + int i; + unsigned long max_mem, nr_pages, firsthole_start; + unsigned long dom0_memory_start, dom0_memory_end; + unsigned long initial_images_start, initial_images_end; + + running_on_sim = is_platform_hp_ski(); + /* Kernel may be relocated by EFI loader */ + xen_pstart = ia64_tpa(KERNEL_START); + + /* Must do this early -- e.g., spinlocks rely on get_current(). */ + //set_current(&idle0_vcpu); + ia64_r13 = (void *)&idle0_vcpu; + idle0_vcpu.domain = &idle0_domain; + + early_setup_arch(&cmdline); + + /* We initialise the serial devices very early so we can get debugging. */ + if (running_on_sim) hpsim_serial_init(); + else { + ns16550_init(0, &ns16550_com1); + /* Also init com2 for Tiger4. */ + ns16550_com2.io_base = 0x2f8; + ns16550_com2.irq = 3; + ns16550_init(1, &ns16550_com2); + } + serial_init_preirq(); + + init_console(); + set_printk_prefix("(XEN) "); + + /* xenheap should be in same TR-covered range with xen image */ + xenheap_phys_end = xen_pstart + xenheap_size; + printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n", + xen_pstart, xenheap_phys_end); + + /* Find next hole */ + firsthole_start = 0; + efi_memmap_walk(xen_find_first_hole, &firsthole_start); + + initial_images_start = xenheap_phys_end; + initial_images_end = initial_images_start + ia64_boot_param->initrd_size; + + /* Later may find another memory trunk, even away from xen image... */ + if (initial_images_end > firsthole_start) { + printk("Not enough memory to stash the DOM0 kernel image.\n"); + printk("First hole:0x%lx, relocation end: 0x%lx\n", + firsthole_start, initial_images_end); + for ( ; ; ); + } + + /* This copy is time consuming, but elilo may load Dom0 image + * within xenheap range */ + printk("ready to move Dom0 to 0x%lx...", initial_images_start); + memmove(__va(initial_images_start), + __va(ia64_boot_param->initrd_start), + ia64_boot_param->initrd_size); + ia64_boot_param->initrd_start = initial_images_start; + printk("Done\n"); + + /* first find highest page frame number */ + max_page = 0; + efi_memmap_walk(find_max_pfn, &max_page); + printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page); + + heap_start = memguard_init(ia64_imva(&_end)); + printf("Before heap_start: 0x%lx\n", heap_start); + heap_start = __va(init_boot_allocator(__pa(heap_start))); + printf("After heap_start: 0x%lx\n", heap_start); + + reserve_memory(); + + efi_memmap_walk(filter_rsvd_memory, init_boot_pages); + efi_memmap_walk(xen_count_pages, &nr_pages); + + printk("System RAM: %luMB (%lukB)\n", + nr_pages >> (20 - PAGE_SHIFT), + nr_pages << (PAGE_SHIFT - 10)); + + init_frametable(); + + ia64_fph_enable(); + __ia64_init_fpu(); + + alloc_dom0(); +#ifdef DOMU_BUILD_STAGING + alloc_domU_staging(); +#endif + + end_boot_allocator(); + + init_xenheap_pages(__pa(heap_start), xenheap_phys_end); + printk("Xen heap: %luMB (%lukB)\n", + (xenheap_phys_end-__pa(heap_start)) >> 20, + (xenheap_phys_end-__pa(heap_start)) >> 10); + + late_setup_arch(&cmdline); + setup_per_cpu_areas(); + mem_init(); + +printk("About to call scheduler_init()\n"); + scheduler_init(); + local_irq_disable(); +printk("About to call xen_time_init()\n"); + xen_time_init(); +#ifdef CONFIG_VTI + init_xen_time(); /* initialise the time */ +#endif // CONFIG_VTI +printk("About to call ac_timer_init()\n"); + ac_timer_init(); +// init_xen_time(); ??? + +#ifdef CONFIG_SMP + if ( opt_nosmp ) + { + max_cpus = 0; + smp_num_siblings = 1; + //boot_cpu_data.x86_num_cores = 1; + } + + smp_prepare_cpus(max_cpus); + + /* We aren't hotplug-capable yet. */ + //BUG_ON(!cpus_empty(cpu_present_map)); + for_each_cpu ( i ) + cpu_set(i, cpu_present_map); + + //BUG_ON(!local_irq_is_enabled()); + +printk("num_online_cpus=%d, max_cpus=%d\n",num_online_cpus(),max_cpus); + for_each_present_cpu ( i ) + { + if ( num_online_cpus() >= max_cpus ) + break; + if ( !cpu_online(i) ) { +printk("About to call __cpu_up(%d)\n",i); + __cpu_up(i); + } + } + + printk("Brought up %ld CPUs\n", (long)num_online_cpus()); + smp_cpus_done(max_cpus); +#endif + + + // FIXME: Should the following be swapped and moved later? + schedulers_start(); + do_initcalls(); +printk("About to call sort_main_extable()\n"); + sort_main_extable(); + + /* surrender usage of kernel registers to domain, use percpu area instead */ + __get_cpu_var(cpu_kr)._kr[IA64_KR_IO_BASE] = ia64_get_kr(IA64_KR_IO_BASE); + __get_cpu_var(cpu_kr)._kr[IA64_KR_PER_CPU_DATA] = ia64_get_kr(IA64_KR_PER_CPU_DATA); + __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT_STACK] = ia64_get_kr(IA64_KR_CURRENT_STACK); + __get_cpu_var(cpu_kr)._kr[IA64_KR_FPU_OWNER] = ia64_get_kr(IA64_KR_FPU_OWNER); + __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT] = ia64_get_kr(IA64_KR_CURRENT); + __get_cpu_var(cpu_kr)._kr[IA64_KR_PT_BASE] = ia64_get_kr(IA64_KR_PT_BASE); + + /* Create initial domain 0. */ +printk("About to call do_createdomain()\n"); + dom0 = do_createdomain(0, 0); + init_task.domain = &idle0_domain; + init_task.processor = 0; +// init_task.mm = &init_mm; + init_task.domain->arch.mm = &init_mm; +// init_task.thread = INIT_THREAD; + //arch_do_createdomain(current); +#ifdef CLONE_DOMAIN0 + { + int i; + for (i = 0; i < CLONE_DOMAIN0; i++) { + clones[i] = do_createdomain(i+1, 0); + if ( clones[i] == NULL ) + panic("Error creating domain0 clone %d\n",i); + } + } +#endif + if ( dom0 == NULL ) + panic("Error creating domain 0\n"); + + set_bit(_DOMF_privileged, &dom0->domain_flags); + + /* + * We're going to setup domain0 using the module(s) that we stashed safely + * above our heap. The second module, if present, is an initrd ramdisk. + */ +printk("About to call construct_dom0()\n"); + dom0_memory_start = __va(ia64_boot_param->initrd_start); + dom0_memory_end = ia64_boot_param->initrd_size; + if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end, + 0, + 0, + 0) != 0) + panic("Could not set up DOM0 guest OS\n"); +#ifdef CLONE_DOMAIN0 + { + int i; + dom0_memory_start = __va(ia64_boot_param->initrd_start); + dom0_memory_end = ia64_boot_param->initrd_size; + for (i = 0; i < CLONE_DOMAIN0; i++) { +printk("CONSTRUCTING DOMAIN0 CLONE #%d\n",i+1); + if ( construct_domU(clones[i], dom0_memory_start, dom0_memory_end, + 0, + 0, + 0) != 0) + panic("Could not set up DOM0 clone %d\n",i); + } + } +#endif + + /* The stash space for the initial kernel image can now be freed up. */ + init_domheap_pages(ia64_boot_param->initrd_start, + ia64_boot_param->initrd_start + ia64_boot_param->initrd_size); + if (!running_on_sim) // slow on ski and pages are pre-initialized to zero + scrub_heap_pages(); + +printk("About to call init_trace_bufs()\n"); + init_trace_bufs(); + + /* Give up the VGA console if DOM0 is configured to grab it. */ +#ifndef IA64 + console_endboot(cmdline && strstr(cmdline, "tty0")); +#endif + +#ifdef CLONE_DOMAIN0 + { + int i; + for (i = 0; i < CLONE_DOMAIN0; i++) + domain_unpause_by_systemcontroller(clones[i]); + } +#endif + domain_unpause_by_systemcontroller(dom0); + domain0_ready = 1; + local_irq_enable(); +printk("About to call startup_cpu_idle_loop()\n"); + startup_cpu_idle_loop(); +} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen/xentime.c --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/arch/ia64/xen/xentime.c Thu Sep 1 18:46:28 2005 @@ -0,0 +1,382 @@ +/* + * xen/arch/ia64/time.c + * + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer <dan.magenheimer@xxxxxx> + */ + +#include <linux/config.h> + +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/profile.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/interrupt.h> +#include <linux/efi.h> +#include <linux/profile.h> +#include <linux/timex.h> + +#include <asm/machvec.h> +#include <asm/delay.h> +#include <asm/hw_irq.h> +#include <asm/ptrace.h> +#include <asm/sal.h> +#include <asm/sections.h> +#include <asm/system.h> +#ifdef XEN +#include <asm/vcpu.h> +#include <linux/jiffies.h> // not included by xen/sched.h +#endif +#include <xen/softirq.h> + +#ifdef XEN +seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; +#endif + +#define TIME_KEEPER_ID 0 +extern unsigned long wall_jiffies; + +static s_time_t stime_irq; /* System time at last 'time update' */ + +unsigned long domain0_ready = 0; + +#ifndef CONFIG_VTI +static inline u64 get_time_delta(void) +{ + return ia64_get_itc(); +} +#else // CONFIG_VTI +static s_time_t stime_irq = 0x0; /* System time at last 'time update' */ +unsigned long itc_scale; +unsigned long itc_at_irq; +static unsigned long wc_sec, wc_nsec; /* UTC time at last 'time update'. */ +//static rwlock_t time_lock = RW_LOCK_UNLOCKED; +static irqreturn_t vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs); + +static inline u64 get_time_delta(void) +{ + s64 delta_itc; + u64 delta, cur_itc; + + cur_itc = ia64_get_itc(); + + delta_itc = (s64)(cur_itc - itc_at_irq); + if ( unlikely(delta_itc < 0) ) delta_itc = 0; + delta = ((u64)delta_itc) * itc_scale; + delta = delta >> 32; + + return delta; +} + +u64 tick_to_ns(u64 tick) +{ + return (tick * itc_scale) >> 32; +} +#endif // CONFIG_VTI + +s_time_t get_s_time(void) +{ + s_time_t now; + unsigned long flags; + + read_lock_irqsave(&xtime_lock, flags); + + now = stime_irq + get_time_delta(); + + /* Ensure that the returned system time is monotonically increasing. */ + { + static s_time_t prev_now = 0; + if ( unlikely(now < prev_now) ) + now = prev_now; + prev_now = now; + } + + read_unlock_irqrestore(&xtime_lock, flags); + + return now; +} + +void update_dom_time(struct vcpu *v) +{ +// FIXME: implement this? +// printf("update_dom_time: called, not implemented, skipping\n"); + return; +} + +/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */ +void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base) +{ +#ifdef CONFIG_VTI + u64 _nsecs; + + write_lock_irq(&xtime_lock); + + _nsecs = (u64)nsecs + (s64)(stime_irq - system_time_base); + while ( _nsecs >= 1000000000 ) + { + _nsecs -= 1000000000; + secs++; + } + + wc_sec = secs; + wc_nsec = (unsigned long)_nsecs; + + write_unlock_irq(&xtime_lock); + + update_dom_time(current->domain); +#else +// FIXME: Should this be do_settimeofday (from linux)??? + printf("do_settime: called, not implemented, stopping\n"); + dummy(); +#endif +} + +irqreturn_t +xen_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned long new_itm; + +#define HEARTBEAT_FREQ 16 // period in seconds +#ifdef HEARTBEAT_FREQ + static long count = 0; + if (!(++count & ((HEARTBEAT_FREQ*1024)-1))) { + printf("Heartbeat... iip=%p,psr.i=%d,pend=%d\n", + regs->cr_iip, + VCPU(current,interrupt_delivery_enabled), + VCPU(current,pending_interruption)); + count = 0; + } +#endif +#ifndef XEN + if (unlikely(cpu_is_offline(smp_processor_id()))) { + return IRQ_HANDLED; + } +#endif +#ifdef XEN + if (current->domain == dom0) { + // FIXME: there's gotta be a better way of doing this... + // We have to ensure that domain0 is launched before we + // call vcpu_timer_expired on it + //domain0_ready = 1; // moved to xensetup.c + VCPU(current,pending_interruption) = 1; + } + if (domain0_ready && vcpu_timer_expired(dom0->vcpu[0])) { + vcpu_pend_timer(dom0->vcpu[0]); + //vcpu_set_next_timer(dom0->vcpu[0]); + vcpu_wake(dom0->vcpu[0]); + } + if (!is_idle_task(current->domain) && current->domain != dom0) { + if (vcpu_timer_expired(current)) { + vcpu_pend_timer(current); + // ensure another timer interrupt happens even if domain doesn't + vcpu_set_next_timer(current); + vcpu_wake(current); + } + } + raise_actimer_softirq(); +#endif + +#ifndef XEN + platform_timer_interrupt(irq, dev_id, regs); +#endif + + new_itm = local_cpu_data->itm_next; + + if (!time_after(ia64_get_itc(), new_itm)) +#ifdef XEN + return; +#else + printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", + ia64_get_itc(), new_itm); +#endif + +#ifdef XEN +// printf("GOT TO HERE!!!!!!!!!!!\n"); + //while(1); +#else + profile_tick(CPU_PROFILING, regs); +#endif + + while (1) { +#ifndef XEN + update_process_times(user_mode(regs)); +#endif + + new_itm += local_cpu_data->itm_delta; + + if (smp_processor_id() == TIME_KEEPER_ID) { + /* + * Here we are in the timer irq handler. We have irqs locally + * disabled, but we don't know if the timer_bh is running on + * another CPU. We need to avoid to SMP race by acquiring the + * xtime_lock. + */ +#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN + write_seqlock(&xtime_lock); +#endif +#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN + do_timer(regs); +#endif + local_cpu_data->itm_next = new_itm; +#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN + write_sequnlock(&xtime_lock); +#endif + } else + local_cpu_data->itm_next = new_itm; + + if (time_after(new_itm, ia64_get_itc())) + break; + } + + do { + /* + * If we're too close to the next clock tick for + * comfort, we increase the safety margin by + * intentionally dropping the next tick(s). We do NOT + * update itm.next because that would force us to call + * do_timer() which in turn would let our clock run + * too fast (with the potentially devastating effect + * of losing monotony of time). + */ + while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) + new_itm += local_cpu_data->itm_delta; +//#ifdef XEN +// vcpu_set_next_timer(current); +//#else +//printf("***** timer_interrupt: Setting itm to %lx\n",new_itm); + ia64_set_itm(new_itm); +//#endif + /* double check, in case we got hit by a (slow) PMI: */ + } while (time_after_eq(ia64_get_itc(), new_itm)); + return IRQ_HANDLED; +} + +static struct irqaction xen_timer_irqaction = { +#ifdef CONFIG_VTI + .handler = vmx_timer_interrupt, +#else // CONFIG_VTI + .handler = xen_timer_interrupt, +#endif // CONFIG_VTI +#ifndef XEN + .flags = SA_INTERRUPT, +#endif + .name = "timer" +}; + +void __init +xen_time_init (void) +{ + register_percpu_irq(IA64_TIMER_VECTOR, &xen_timer_irqaction); + ia64_init_itm(); +} + + +#ifdef CONFIG_VTI + +/* Late init function (after all CPUs are booted). */ +int __init init_xen_time() +{ + struct timespec tm; + + itc_scale = 1000000000UL << 32 ; + itc_scale /= local_cpu_data->itc_freq; + + /* System time ticks from zero. */ + stime_irq = (s_time_t)0; + itc_at_irq = ia64_get_itc(); + + /* Wallclock time starts as the initial RTC time. */ + efi_gettimeofday(&tm); + wc_sec = tm.tv_sec; + wc_nsec = tm.tv_nsec; + + + printk("Time init:\n"); + printk(".... System Time: %ldns\n", NOW()); + printk(".... scale: %16lX\n", itc_scale); + printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_nsec/1000); + + return 0; +} + +static irqreturn_t +vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned long new_itm; + struct vcpu *v = current; + + + new_itm = local_cpu_data->itm_next; + + if (!time_after(ia64_get_itc(), new_itm)) + return; + + while (1) { +#ifdef CONFIG_SMP + /* + * For UP, this is done in do_timer(). Weird, but + * fixing that would require updates to all + * platforms. + */ + update_process_times(user_mode(v, regs)); +#endif + new_itm += local_cpu_data->itm_delta; + + if (smp_processor_id() == TIME_KEEPER_ID) { + /* + * Here we are in the timer irq handler. We have irqs locally + * disabled, but we don't know if the timer_bh is running on + * another CPU. We need to avoid to SMP race by acquiring the + * xtime_lock. + */ + local_cpu_data->itm_next = new_itm; + + write_lock_irq(&xtime_lock); + /* Update jiffies counter. */ + (*(unsigned long *)&jiffies_64)++; + + /* Update wall time. */ + wc_nsec += 1000000000/HZ; + if ( wc_nsec >= 1000000000 ) + { + wc_nsec -= 1000000000; + wc_sec++; + } + + /* Updates system time (nanoseconds since boot). */ + stime_irq += MILLISECS(1000/HZ); + itc_at_irq = ia64_get_itc(); + + write_unlock_irq(&xtime_lock); + + } else + local_cpu_data->itm_next = new_itm; + + if (time_after(new_itm, ia64_get_itc())) + break; + } + + do { + /* + * If we're too close to the next clock tick for + * comfort, we increase the safety margin by + * intentionally dropping the next tick(s). We do NOT + * update itm.next because that would force us to call + * do_timer() which in turn would let our clock run + * too fast (with the potentially devastating effect + * of losing monotony of time). + */ + while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) + new_itm += local_cpu_data->itm_delta; + ia64_set_itm(new_itm); + /* double check, in case we got hit by a (slow) PMI: */ + } while (time_after_eq(ia64_get_itc(), new_itm)); + raise_softirq(AC_TIMER_SOFTIRQ); + + return IRQ_HANDLED; +} +#endif // CONFIG_VTI + diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux-xen/asm/pgtable.h --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Thu Sep 1 18:46:28 2005 @@ -0,0 +1,577 @@ +#ifndef _ASM_IA64_PGTABLE_H +#define _ASM_IA64_PGTABLE_H + +/* + * This file contains the functions and defines necessary to modify and use + * the IA-64 page table tree. + * + * This hopefully works with any (fixed) IA-64 page-size, as defined + * in <asm/page.h>. + * + * Copyright (C) 1998-2005 Hewlett-Packard Co + * David Mosberger-Tang <davidm@xxxxxxxxxx> + */ + +#include <linux/config.h> + +#include <asm/mman.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/types.h> +#ifdef XEN +#ifndef __ASSEMBLY__ +#include <xen/sched.h> /* needed for mm_struct (via asm/domain.h) */ +#endif +#endif + +#define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits (architected) */ + +/* + * First, define the various bits in a PTE. Note that the PTE format + * matches the VHPT short format, the firt doubleword of the VHPD long + * format, and the first doubleword of the TLB insertion format. + */ +#define _PAGE_P_BIT 0 +#define _PAGE_A_BIT 5 +#define _PAGE_D_BIT 6 + +#define _PAGE_P (1 << _PAGE_P_BIT) /* page present bit */ +#define _PAGE_MA_WB (0x0 << 2) /* write back memory attribute */ +#define _PAGE_MA_UC (0x4 << 2) /* uncacheable memory attribute */ +#define _PAGE_MA_UCE (0x5 << 2) /* UC exported attribute */ +#define _PAGE_MA_WC (0x6 << 2) /* write coalescing memory attribute */ +#define _PAGE_MA_NAT (0x7 << 2) /* not-a-thing attribute */ +#define _PAGE_MA_MASK (0x7 << 2) +#define _PAGE_PL_0 (0 << 7) /* privilege level 0 (kernel) */ +#define _PAGE_PL_1 (1 << 7) /* privilege level 1 (unused) */ +#define _PAGE_PL_2 (2 << 7) /* privilege level 2 (unused) */ +#define _PAGE_PL_3 (3 << 7) /* privilege level 3 (user) */ +#define _PAGE_PL_MASK (3 << 7) +#define _PAGE_AR_R (0 << 9) /* read only */ +#define _PAGE_AR_RX (1 << 9) /* read & execute */ +#define _PAGE_AR_RW (2 << 9) /* read & write */ +#define _PAGE_AR_RWX (3 << 9) /* read, write & execute */ +#define _PAGE_AR_R_RW (4 << 9) /* read / read & write */ +#define _PAGE_AR_RX_RWX (5 << 9) /* read & exec / read, write & exec */ +#define _PAGE_AR_RWX_RW (6 << 9) /* read, write & exec / read & write */ +#define _PAGE_AR_X_RX (7 << 9) /* exec & promote / read & exec */ +#define _PAGE_AR_MASK (7 << 9) +#define _PAGE_AR_SHIFT 9 +#define _PAGE_A (1 << _PAGE_A_BIT) /* page accessed bit */ +#define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty bit */ +#define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL) +#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */ +#define _PAGE_PROTNONE (__IA64_UL(1) << 63) + +/* Valid only for a PTE with the present bit cleared: */ +#define _PAGE_FILE (1 << 1) /* see swap & file pte remarks below */ + +#define _PFN_MASK _PAGE_PPN_MASK +/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */ +#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED) + +#define _PAGE_SIZE_4K 12 +#define _PAGE_SIZE_8K 13 +#define _PAGE_SIZE_16K 14 +#define _PAGE_SIZE_64K 16 +#define _PAGE_SIZE_256K 18 +#define _PAGE_SIZE_1M 20 +#define _PAGE_SIZE_4M 22 +#define _PAGE_SIZE_16M 24 +#define _PAGE_SIZE_64M 26 +#define _PAGE_SIZE_256M 28 +#define _PAGE_SIZE_1G 30 +#define _PAGE_SIZE_4G 32 + +#define __ACCESS_BITS _PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB +#define __DIRTY_BITS_NO_ED _PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB +#define __DIRTY_BITS _PAGE_ED | __DIRTY_BITS_NO_ED + +/* + * Definitions for first level: + * + * PGDIR_SHIFT determines what a first-level page table entry can map. + */ +#define PGDIR_SHIFT (PAGE_SHIFT + 2*(PAGE_SHIFT-3)) +#define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PTRS_PER_PGD (1UL << (PAGE_SHIFT-3)) +#define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */ +#define FIRST_USER_ADDRESS 0 + +/* + * Definitions for second level: + * + * PMD_SHIFT determines the size of the area a second-level page table + * can map. + */ +#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PTRS_PER_PMD (1UL << (PAGE_SHIFT-3)) + +/* + * Definitions for third level: + */ +#define PTRS_PER_PTE (__IA64_UL(1) << (PAGE_SHIFT-3)) + +/* + * All the normal masks have the "page accessed" bits on, as any time + * they are used, the page is accessed. They are cleared only by the + * page-out routines. + */ +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_A) +#define PAGE_SHARED __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW) +#define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +#define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +#define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) +#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) +#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) +#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) + +# ifndef __ASSEMBLY__ + +#include <asm/bitops.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> +#include <asm/processor.h> + +/* + * Next come the mappings that determine how mmap() protection bits + * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented. The + * _P version gets used for a private shared memory segment, the _S + * version gets used for a shared memory segment with MAP_SHARED on. + * In a private shared memory segment, we do a copy-on-write if a task + * attempts to write to the page. + */ + /* xwr */ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 PAGE_READONLY /* write to priv pg -> copy & make writable */ +#define __P011 PAGE_READONLY /* ditto */ +#define __P100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX) +#define __P101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) +#define __P110 PAGE_COPY_EXEC +#define __P111 PAGE_COPY_EXEC + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_SHARED /* we don't have (and don't need) write-only */ +#define __S011 PAGE_SHARED +#define __S100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX) +#define __S101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) +#define __S110 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) +#define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) + +#define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) +#define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pte_ERROR(e) printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) + + +/* + * Some definitions to translate between mem_map, PTEs, and page addresses: + */ + + +/* Quick test to see if ADDR is a (potentially) valid physical address. */ +static inline long +ia64_phys_addr_valid (unsigned long addr) +{ + return (addr & (local_cpu_data->unimpl_pa_mask)) == 0; +} + +/* + * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel + * memory. For the return value to be meaningful, ADDR must be >= + * PAGE_OFFSET. This operation can be relatively expensive (e.g., + * require a hash-, or multi-level tree-lookup or something of that + * sort) but it guarantees to return TRUE only if accessing the page + * at that address does not cause an error. Note that there may be + * addresses for which kern_addr_valid() returns FALSE even though an + * access would not cause an error (e.g., this is typically true for + * memory mapped I/O regions. + * + * XXX Need to implement this for IA-64. + */ +#define kern_addr_valid(addr) (1) + + +/* + * Now come the defines and routines to manage and access the three-level + * page table. + */ + +/* + * On some architectures, special things need to be done when setting + * the PTE in a page table. Nothing special needs to be on IA-64. + */ +#define set_pte(ptep, pteval) (*(ptep) = (pteval)) +#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) + +#define RGN_SIZE (1UL << 61) +#define RGN_KERNEL 7 + +#define VMALLOC_START 0xa000000200000000UL +#ifdef CONFIG_VIRTUAL_MEM_MAP +# define VMALLOC_END_INIT (0xa000000000000000UL + (1UL << (4*PAGE_SHIFT - 9))) +# define VMALLOC_END vmalloc_end + extern unsigned long vmalloc_end; +#else +# define VMALLOC_END (0xa000000000000000UL + (1UL << (4*PAGE_SHIFT - 9))) +#endif + +/* fs/proc/kcore.c */ +#define kc_vaddr_to_offset(v) ((v) - 0xa000000000000000UL) +#define kc_offset_to_vaddr(o) ((o) + 0xa000000000000000UL) + +/* + * Conversion functions: convert page frame number (pfn) and a protection value to a page + * table entry (pte). + */ +#define pfn_pte(pfn, pgprot) \ +({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot); __pte; }) + +/* Extract pfn from pte. */ +#define pte_pfn(_pte) ((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT) + +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +/* This takes a physical page address that is used by the remapping functions */ +#define mk_pte_phys(physpage, pgprot) \ +({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; }) + +#define pte_modify(_pte, newprot) \ + (__pte((pte_val(_pte) & ~_PAGE_CHG_MASK) | (pgprot_val(newprot) & _PAGE_CHG_MASK))) + +#define page_pte_prot(page,prot) mk_pte(page, prot) +#define page_pte(page) page_pte_prot(page, __pgprot(0)) + +#define pte_none(pte) (!pte_val(pte)) +#define pte_present(pte) (pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE)) +#define pte_clear(mm,addr,pte) (pte_val(*(pte)) = 0UL) +/* pte_page() returns the "struct page *" corresponding to the PTE: */ +#define pte_page(pte) virt_to_page(((pte_val(pte) & _PFN_MASK) + PAGE_OFFSET)) + +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd))) +#define pmd_present(pmd) (pmd_val(pmd) != 0UL) +#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) +#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK)) +#define pmd_page(pmd) virt_to_page((pmd_val(pmd) + PAGE_OFFSET)) + +#define pud_none(pud) (!pud_val(pud)) +#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud))) +#define pud_present(pud) (pud_val(pud) != 0UL) +#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) + +#define pud_page(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) + +/* + * The following have defined behavior only work if pte_present() is true. + */ +#define pte_user(pte) ((pte_val(pte) & _PAGE_PL_MASK) == _PAGE_PL_3) +#define pte_read(pte) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) < 6) +#define pte_write(pte) ((unsigned) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) - 2) <= 4) +#define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) +#define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) +#define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) +#define pte_file(pte) ((pte_val(pte) & _PAGE_FILE) != 0) +/* + * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the + * access rights: + */ +#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW)) +#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_AR_RW)) +#define pte_mkexec(pte) (__pte(pte_val(pte) | _PAGE_AR_RX)) +#define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A)) +#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) +#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) +#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) +#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P)) + +/* + * Macro to a page protection value as "uncacheable". Note that "protection" is really a + * misnomer here as the protection value contains the memory attribute bits, dirty bits, + * and various other bits as well. + */ +#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC) + +/* + * Macro to make mark a page protection value as "write-combining". + * Note that "protection" is really a misnomer here as the protection + * value contains the memory attribute bits, dirty bits, and various + * other bits as well. Accesses through a write-combining translation + * works bypasses the caches, but does allow for consecutive writes to + * be combined into single (but larger) write transactions. + */ +#define pgprot_writecombine(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC) + +static inline unsigned long +pgd_index (unsigned long address) +{ + unsigned long region = address >> 61; + unsigned long l1index = (address >> PGDIR_SHIFT) & ((PTRS_PER_PGD >> 3) - 1); + + return (region << (PAGE_SHIFT - 6)) | l1index; +} + +/* The offset in the 1-level directory is given by the 3 region bits + (61..63) and the level-1 bits. */ +static inline pgd_t* +pgd_offset (struct mm_struct *mm, unsigned long address) +{ + return mm->pgd + pgd_index(address); +} + +/* In the kernel's mapped region we completely ignore the region number + (since we know it's in region number 5). */ +#define pgd_offset_k(addr) \ + (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))) + +/* Look up a pgd entry in the gate area. On IA-64, the gate-area + resides in the kernel-mapped segment, hence we use pgd_offset_k() + here. */ +#define pgd_offset_gate(mm, addr) pgd_offset_k(addr) + +/* Find an entry in the second-level page table.. */ +#define pmd_offset(dir,addr) \ + ((pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + +/* + * Find an entry in the third-level page table. This looks more complicated than it + * should be because some platforms place page tables in high memory. + */ +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr)) +#define pte_offset_map(dir,addr) pte_offset_kernel(dir, addr) +#define pte_offset_map_nested(dir,addr) pte_offset_map(dir, addr) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + +/* atomic versions of the some PTE manipulations: */ + +static inline int +ptep_test_and_clear_young (struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + if (!pte_young(*ptep)) + return 0; + return test_and_clear_bit(_PAGE_A_BIT, ptep); +#else + pte_t pte = *ptep; + if (!pte_young(pte)) + return 0; + set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte)); + return 1; +#endif +} + +static inline int +ptep_test_and_clear_dirty (struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + if (!pte_dirty(*ptep)) + return 0; + return test_and_clear_bit(_PAGE_D_BIT, ptep); +#else + pte_t pte = *ptep; + if (!pte_dirty(pte)) + return 0; + set_pte_at(vma->vm_mm, addr, ptep, pte_mkclean(pte)); + return 1; +#endif +} + +static inline pte_t +ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + return __pte(xchg((long *) ptep, 0)); +#else + pte_t pte = *ptep; + pte_clear(mm, addr, ptep); + return pte; +#endif +} + +static inline void +ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + unsigned long new, old; + + do { + old = pte_val(*ptep); + new = pte_val(pte_wrprotect(__pte (old))); + } while (cmpxchg((unsigned long *) ptep, old, new) != old); +#else + pte_t old_pte = *ptep; + set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); +#endif +} + +static inline int +pte_same (pte_t a, pte_t b) +{ + return pte_val(a) == pte_val(b); +} + +#define update_mmu_cache(vma, address, pte) do { } while (0) + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern void paging_init (void); + +/* + * Note: The macros below rely on the fact that MAX_SWAPFILES_SHIFT <= number of + * bits in the swap-type field of the swap pte. It would be nice to + * enforce that, but we can't easily include <linux/swap.h> here. + * (Of course, better still would be to define MAX_SWAPFILES_SHIFT here...). + * + * Format of swap pte: + * bit 0 : present bit (must be zero) + * bit 1 : _PAGE_FILE (must be zero) + * bits 2- 8: swap-type + * bits 9-62: swap offset + * bit 63 : _PAGE_PROTNONE bit + * + * Format of file pte: + * bit 0 : present bit (must be zero) + * bit 1 : _PAGE_FILE (must be one) + * bits 2-62: file_offset/PAGE_SIZE + * bit 63 : _PAGE_PROTNONE bit + */ +#define __swp_type(entry) (((entry).val >> 2) & 0x7f) +#define __swp_offset(entry) (((entry).val << 1) >> 10) +#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 2) | ((long) (offset) << 9) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +#define PTE_FILE_MAX_BITS 61 +#define pte_to_pgoff(pte) ((pte_val(pte) << 1) >> 3) +#define pgoff_to_pte(off) ((pte_t) { ((off) << 2) | _PAGE_FILE }) + +/* XXX is this right? */ +#define io_remap_page_range(vma, vaddr, paddr, size, prot) \ + remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot) + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + +#define MK_IOSPACE_PFN(space, pfn) (pfn) +#define GET_IOSPACE(pfn) 0 +#define GET_PFN(pfn) (pfn) + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +extern struct page *zero_page_memmap_ptr; +#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr) + +/* We provide our own get_unmapped_area to cope with VA holes for userland */ +#define HAVE_ARCH_UNMAPPED_AREA + +#ifdef CONFIG_HUGETLB_PAGE +#define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3)) +#define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT) +#define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1)) +struct mmu_gather; +void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, + unsigned long end, unsigned long floor, unsigned long ceiling); +#endif + +/* + * IA-64 doesn't have any external MMU info: the page tables contain all the necessary + * information. However, we use this routine to take care of any (delayed) i-cache + * flushing that may be necessary. + */ +extern void lazy_mmu_prot_update (pte_t pte); + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +/* + * Update PTEP with ENTRY, which is guaranteed to be a less + * restrictive PTE. That is, ENTRY may have the ACCESSED, DIRTY, and + * WRITABLE bits turned on, when the value at PTEP did not. The + * WRITABLE bit may only be turned if SAFELY_WRITABLE is TRUE. + * + * SAFELY_WRITABLE is TRUE if we can update the value at PTEP without + * having to worry about races. On SMP machines, there are only two + * cases where this is true: + * + * (1) *PTEP has the PRESENT bit turned OFF + * (2) ENTRY has the DIRTY bit turned ON + * + * On ia64, we could implement this routine with a cmpxchg()-loop + * which ORs in the _PAGE_A/_PAGE_D bit if they're set in ENTRY. + * However, like on x86, we can get a more streamlined version by + * observing that it is OK to drop ACCESSED bit updates when + * SAFELY_WRITABLE is FALSE. Besides being rare, all that would do is + * result in an extra Access-bit fault, which would then turn on the + * ACCESSED bit in the low-level fault handler (iaccess_bit or + * daccess_bit in ivt.S). + */ +#ifdef CONFIG_SMP +# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \ +do { \ + if (__safely_writable) { \ + set_pte(__ptep, __entry); \ + flush_tlb_page(__vma, __addr); \ + } \ +} while (0) +#else +# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \ + ptep_establish(__vma, __addr, __ptep, __entry) +#endif + +# ifdef CONFIG_VIRTUAL_MEM_MAP + /* arch mem_map init routine is needed due to holes in a virtual mem_map */ +# define __HAVE_ARCH_MEMMAP_INIT + extern void memmap_init (unsigned long size, int nid, unsigned long zone, + unsigned long start_pfn); +# endif /* CONFIG_VIRTUAL_MEM_MAP */ +# endif /* !__ASSEMBLY__ */ + +/* + * Identity-mapped regions use a large page size. We'll call such large pages + * "granules". If you can think of a better name that's unambiguous, let me + * know... + */ +#if defined(CONFIG_IA64_GRANULE_64MB) +# define IA64_GRANULE_SHIFT _PAGE_SIZE_64M +#elif defined(CONFIG_IA64_GRANULE_16MB) +# define IA64_GRANULE_SHIFT _PAGE_SIZE_16M +#endif +#define IA64_GRANULE_SIZE (1 << IA64_GRANULE_SHIFT) +/* + * log2() of the page size we use to map the kernel image (IA64_TR_KERNEL): + */ +#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M +#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT) + +/* + * No page table caches to initialise + */ +#define pgtable_cache_init() do { } while (0) + +/* These tell get_user_pages() that the first gate page is accessible from user-level. */ +#define FIXADDR_USER_START GATE_ADDR +#ifdef HAVE_BUGGY_SEGREL +# define FIXADDR_USER_END (GATE_ADDR + 2*PAGE_SIZE) +#else +# define FIXADDR_USER_END (GATE_ADDR + 2*PERCPU_PAGE_SIZE) +#endif + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +#define __HAVE_ARCH_PTE_SAME +#define __HAVE_ARCH_PGD_OFFSET_GATE +#define __HAVE_ARCH_LAZY_MMU_PROT_UPDATE + +#include <asm-generic/pgtable-nopud.h> +#include <asm-generic/pgtable.h> + +#endif /* _ASM_IA64_PGTABLE_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm-generic/unaligned.h --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm-generic/unaligned.h Thu Sep 1 18:46:28 2005 @@ -0,0 +1,122 @@ +#ifndef _ASM_GENERIC_UNALIGNED_H_ +#define _ASM_GENERIC_UNALIGNED_H_ + +/* + * For the benefit of those who are trying to port Linux to another + * architecture, here are some C-language equivalents. + * + * This is based almost entirely upon Richard Henderson's + * asm-alpha/unaligned.h implementation. Some comments were + * taken from David Mosberger's asm-ia64/unaligned.h header. + */ + +#include <linux/types.h> + +/* + * The main single-value unaligned transfer routines. + */ +#define get_unaligned(ptr) \ + ((__typeof__(*(ptr)))__get_unaligned((ptr), sizeof(*(ptr)))) +#define put_unaligned(x,ptr) \ + __put_unaligned((unsigned long)(x), (ptr), sizeof(*(ptr))) + +/* + * This function doesn't actually exist. The idea is that when + * someone uses the macros below with an unsupported size (datatype), + * the linker will alert us to the problem via an unresolved reference + * error. + */ +extern void bad_unaligned_access_length(void) __attribute__((noreturn)); + +struct __una_u64 { __u64 x __attribute__((packed)); }; +struct __una_u32 { __u32 x __attribute__((packed)); }; +struct __una_u16 { __u16 x __attribute__((packed)); }; + +/* + * Elemental unaligned loads + */ + +static inline unsigned long __uldq(const __u64 *addr) +{ + const struct __una_u64 *ptr = (const struct __una_u64 *) addr; + return ptr->x; +} + +static inline unsigned long __uldl(const __u32 *addr) +{ + const struct __una_u32 *ptr = (const struct __una_u32 *) addr; + return ptr->x; +} + +static inline unsigned long __uldw(const __u16 *addr) +{ + const struct __una_u16 *ptr = (const struct __una_u16 *) addr; + return ptr->x; +} + +/* + * Elemental unaligned stores + */ + +static inline void __ustq(__u64 val, __u64 *addr) +{ + struct __una_u64 *ptr = (struct __una_u64 *) addr; + ptr->x = val; +} + +static inline void __ustl(__u32 val, __u32 *addr) +{ + struct __una_u32 *ptr = (struct __una_u32 *) addr; + ptr->x = val; +} + +static inline void __ustw(__u16 val, __u16 *addr) +{ + struct __una_u16 *ptr = (struct __una_u16 *) addr; + ptr->x = val; +} + +#define __get_unaligned(ptr, size) ({ \ + const void *__gu_p = ptr; \ + unsigned long val; \ + switch (size) { \ + case 1: \ + val = *(const __u8 *)__gu_p; \ + break; \ + case 2: \ + val = __uldw(__gu_p); \ + break; \ + case 4: \ + val = __uldl(__gu_p); \ + break; \ + case 8: \ + val = __uldq(__gu_p); \ + break; \ + default: \ + bad_unaligned_access_length(); \ + }; \ + val; \ +}) + +#define __put_unaligned(val, ptr, size) \ +do { \ + void *__gu_p = ptr; \ + switch (size) { \ + case 1: \ + *(__u8 *)__gu_p = val; \ + break; \ + case 2: \ + __ustw(val, __gu_p); \ + break; \ + case 4: \ + __ustl(val, __gu_p); \ + break; \ + case 8: \ + __ustq(val, __gu_p); \ + break; \ + default: \ + bad_unaligned_access_length(); \ + }; \ +} while(0) + +#endif /* _ASM_GENERIC_UNALIGNED_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/numnodes.h --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/asm/numnodes.h Thu Sep 1 18:46:28 2005 @@ -0,0 +1,15 @@ +#ifndef _ASM_MAX_NUMNODES_H +#define _ASM_MAX_NUMNODES_H + +#ifdef CONFIG_IA64_DIG +/* Max 8 Nodes */ +#define NODES_SHIFT 3 +#elif defined(CONFIG_IA64_HP_ZX1) || defined(CONFIG_IA64_HP_ZX1_SWIOTLB) +/* Max 32 Nodes */ +#define NODES_SHIFT 5 +#elif defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) +/* Max 256 Nodes */ +#define NODES_SHIFT 8 +#endif + +#endif /* _ASM_MAX_NUMNODES_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/time.h --- /dev/null Thu Sep 1 17:09:27 2005 +++ b/xen/include/asm-ia64/linux/time.h Thu Sep 1 18:46:28 2005 @@ -0,0 +1,181 @@ +#ifndef _LINUX_TIME_H +#define _LINUX_TIME_H + +#include <linux/types.h> + +#ifdef __KERNEL__ +#include <linux/seqlock.h> +#endif + +#ifndef _STRUCT_TIMESPEC +#define _STRUCT_TIMESPEC +struct timespec { + time_t tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +}; +#endif /* _STRUCT_TIMESPEC */ + +struct timeval { + time_t tv_sec; /* seconds */ + suseconds_t tv_usec; /* microseconds */ +}; + +struct timezone { + int tz_minuteswest; /* minutes west of Greenwich */ + int tz_dsttime; /* type of dst correction */ +}; + +#ifdef __KERNEL__ + +/* Parameters used to convert the timespec values */ +#ifndef USEC_PER_SEC +#define USEC_PER_SEC (1000000L) +#endif + +#ifndef NSEC_PER_SEC +#define NSEC_PER_SEC (1000000000L) +#endif + +#ifndef NSEC_PER_USEC +#define NSEC_PER_USEC (1000L) +#endif + +static __inline__ int timespec_equal(struct timespec *a, struct timespec *b) +{ + return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); +} + +/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. + * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 + * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. + * + * [For the Julian calendar (which was used in Russia before 1917, + * Britain & colonies before 1752, anywhere else before 1582, + * and is still in use by some communities) leave out the + * -year/100+year/400 terms, and add 10.] + * + * This algorithm was first published by Gauss (I think). + * + * WARNING: this function will overflow on 2106-02-07 06:28:16 on + * machines were long is 32-bit! (However, as time_t is signed, we + * will already get problems at other places on 2038-01-19 03:14:08) + */ +static inline unsigned long +mktime (unsigned int year, unsigned int mon, + unsigned int day, unsigned int hour, + unsigned int min, unsigned int sec) +{ + if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ + mon += 12; /* Puts Feb last since it has leap day */ + year -= 1; + } + + return ((( + (unsigned long) (year/4 - year/100 + year/400 + 367*mon/12 + day) + + year*365 - 719499 + )*24 + hour /* now have hours */ + )*60 + min /* now have minutes */ + )*60 + sec; /* finally seconds */ +} + +extern struct timespec xtime; +extern struct timespec wall_to_monotonic; +extern seqlock_t xtime_lock; + +static inline unsigned long get_seconds(void) +{ + return xtime.tv_sec; +} + +struct timespec current_kernel_time(void); + +#define CURRENT_TIME (current_kernel_time()) +#define CURRENT_TIME_SEC ((struct timespec) { xtime.tv_sec, 0 }) + +extern void do_gettimeofday(struct timeval *tv); +extern int do_settimeofday(struct timespec *tv); +extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); +extern void clock_was_set(void); // call when ever the clock is set +extern int do_posix_clock_monotonic_gettime(struct timespec *tp); +extern long do_nanosleep(struct timespec *t); +extern long do_utimes(char __user * filename, struct timeval * times); +struct itimerval; +extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); +extern int do_getitimer(int which, struct itimerval *value); +extern void getnstimeofday (struct timespec *tv); + +extern struct timespec timespec_trunc(struct timespec t, unsigned gran); + +static inline void +set_normalized_timespec (struct timespec *ts, time_t sec, long nsec) +{ + while (nsec > NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) { + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +#endif /* __KERNEL__ */ + +#define NFDBITS __NFDBITS + +#define FD_SETSIZE __FD_SETSIZE +#define FD_SET(fd,fdsetp) __FD_SET(fd,fdsetp) +#define FD_CLR(fd,fdsetp) __FD_CLR(fd,fdsetp) +#define FD_ISSET(fd,fdsetp) __FD_ISSET(fd,fdsetp) +#define FD_ZERO(fdsetp) __FD_ZERO(fdsetp) + +/* + * Names of the interval timers, and structure + * defining a timer setting. + */ +#define ITIMER_REAL 0 +#define ITIMER_VIRTUAL 1 +#define ITIMER_PROF 2 + +struct itimerspec { + struct timespec it_interval; /* timer period */ + struct timespec it_value; /* timer expiration */ +}; + +struct itimerval { + struct timeval it_interval; /* timer interval */ + struct timeval it_value; /* current value */ +}; + + +/* + * The IDs of the various system clocks (for POSIX.1b interval timers). + */ +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_REALTIME_HR 4 +#define CLOCK_MONOTONIC_HR 5 + +/* + * The IDs of various hardware clocks + */ + + +#define CLOCK_SGI_CYCLE 10 +#define MAX_CLOCKS 16 +#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC | \ + CLOCK_REALTIME_HR | CLOCK_MONOTONIC_HR) +#define CLOCKS_MONO (CLOCK_MONOTONIC & CLOCK_MONOTONIC_HR) + +/* + * The various flags for setting POSIX.1b interval timers. + */ + +#define TIMER_ABSTIME 0x01 + + +#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/acpi.c --- a/xen/arch/ia64/acpi.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,678 +0,0 @@ -/* - * acpi.c - Architecture-Specific Low-Level ACPI Support - * - * Copyright (C) 1999 VA Linux Systems - * Copyright (C) 1999,2000 Walt Drummond <drummond@xxxxxxxxxxx> - * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co. - * David Mosberger-Tang <davidm@xxxxxxxxxx> - * Copyright (C) 2000 Intel Corp. - * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@xxxxxxxxx> - * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx> - * Copyright (C) 2001 Jenna Hall <jenna.s.hall@xxxxxxxxx> - * Copyright (C) 2001 Takayoshi Kochi <t-kochi@xxxxxxxxxxxxx> - * Copyright (C) 2002 Erich Focht <efocht@xxxxxxxxxx> - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/smp.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/irq.h> -#include <linux/acpi.h> -#include <linux/efi.h> -#include <linux/mmzone.h> -#include <asm/io.h> -//#include <asm/iosapic.h> -#include <asm/machvec.h> -#include <asm/page.h> -#include <asm/system.h> -#include <asm/numa.h> -#include <asm/sal.h> -//#include <asm/cyclone.h> - -#define BAD_MADT_ENTRY(entry, end) ( \ - (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ - ((acpi_table_entry_header *)entry)->length != sizeof(*entry)) - -#define PREFIX "ACPI: " - -void (*pm_idle) (void); -EXPORT_SYMBOL(pm_idle); -void (*pm_power_off) (void); - -unsigned char acpi_kbd_controller_present = 1; -unsigned char acpi_legacy_devices; - -const char * -acpi_get_sysname (void) -{ -/* #ifdef CONFIG_IA64_GENERIC */ - unsigned long rsdp_phys; - struct acpi20_table_rsdp *rsdp; - struct acpi_table_xsdt *xsdt; - struct acpi_table_header *hdr; - - rsdp_phys = acpi_find_rsdp(); - if (!rsdp_phys) { - printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n"); - return "dig"; - } - - rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys); - if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) { - printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n"); - return "dig"; - } - - xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address); - hdr = &xsdt->header; - if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) { - printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n"); - return "dig"; - } - - if (!strcmp(hdr->oem_id, "HP")) { - return "hpzx1"; - } - else if (!strcmp(hdr->oem_id, "SGI")) { - return "sn2"; - } - - return "dig"; -/* -#else -# if defined (CONFIG_IA64_HP_SIM) - return "hpsim"; -# elif defined (CONFIG_IA64_HP_ZX1) - return "hpzx1"; -# elif defined (CONFIG_IA64_SGI_SN2) - return "sn2"; -# elif defined (CONFIG_IA64_DIG) - return "dig"; -# else -# error Unknown platform. Fix acpi.c. -# endif -#endif -*/ -} - -#ifdef CONFIG_ACPI_BOOT - -#define ACPI_MAX_PLATFORM_INTERRUPTS 256 - -#if 0 -/* Array to record platform interrupt vectors for generic interrupt routing. */ -int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = { - [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1 -}; - -enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC; - -/* - * Interrupt routing API for device drivers. Provides interrupt vector for - * a generic platform event. Currently only CPEI is implemented. - */ -int -acpi_request_vector (u32 int_type) -{ - int vector = -1; - - if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) { - /* corrected platform error interrupt */ - vector = platform_intr_list[int_type]; - } else - printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n"); - return vector; -} -#endif -char * -__acpi_map_table (unsigned long phys_addr, unsigned long size) -{ - return __va(phys_addr); -} - -/* -------------------------------------------------------------------------- - Boot-time Table Parsing - -------------------------------------------------------------------------- */ - -static int total_cpus __initdata; -static int available_cpus __initdata; -struct acpi_table_madt * acpi_madt __initdata; -static u8 has_8259; - -#if 0 -static int __init -acpi_parse_lapic_addr_ovr ( - acpi_table_entry_header *header, const unsigned long end) -{ - struct acpi_table_lapic_addr_ovr *lapic; - - lapic = (struct acpi_table_lapic_addr_ovr *) header; - - if (BAD_MADT_ENTRY(lapic, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - if (lapic->address) { - iounmap((void *) ipi_base_addr); - ipi_base_addr = (unsigned long) ioremap(lapic->address, 0); - } - return 0; -} - - -static int __init -acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end) -{ - struct acpi_table_lsapic *lsapic; - - lsapic = (struct acpi_table_lsapic *) header; - - if (BAD_MADT_ENTRY(lsapic, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - printk(KERN_INFO "CPU %d (0x%04x)", total_cpus, (lsapic->id << 8) | lsapic->eid); - - if (!lsapic->flags.enabled) - printk(" disabled"); - else { - printk(" enabled"); -#ifdef CONFIG_SMP - smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid; - if (hard_smp_processor_id() - == (unsigned int) smp_boot_data.cpu_phys_id[available_cpus]) - printk(" (BSP)"); -#endif - ++available_cpus; - } - - printk("\n"); - - total_cpus++; - return 0; -} - - -static int __init -acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end) -{ - struct acpi_table_lapic_nmi *lacpi_nmi; - - lacpi_nmi = (struct acpi_table_lapic_nmi*) header; - - if (BAD_MADT_ENTRY(lacpi_nmi, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - /* TBD: Support lapic_nmi entries */ - return 0; -} - - -static int __init -acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end) -{ - struct acpi_table_iosapic *iosapic; - - iosapic = (struct acpi_table_iosapic *) header; - - if (BAD_MADT_ENTRY(iosapic, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - iosapic_init(iosapic->address, iosapic->global_irq_base); - - return 0; -} - - -static int __init -acpi_parse_plat_int_src ( - acpi_table_entry_header *header, const unsigned long end) -{ - struct acpi_table_plat_int_src *plintsrc; - int vector; - - plintsrc = (struct acpi_table_plat_int_src *) header; - - if (BAD_MADT_ENTRY(plintsrc, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - /* - * Get vector assignment for this interrupt, set attributes, - * and program the IOSAPIC routing table. - */ - vector = iosapic_register_platform_intr(plintsrc->type, - plintsrc->global_irq, - plintsrc->iosapic_vector, - plintsrc->eid, - plintsrc->id, - (plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, - (plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); - - platform_intr_list[plintsrc->type] = vector; - return 0; -} - - -static int __init -acpi_parse_int_src_ovr ( - acpi_table_entry_header *header, const unsigned long end) -{ - struct acpi_table_int_src_ovr *p; - - p = (struct acpi_table_int_src_ovr *) header; - - if (BAD_MADT_ENTRY(p, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - iosapic_override_isa_irq(p->bus_irq, p->global_irq, - (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, - (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); - return 0; -} - - -static int __init -acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end) -{ - struct acpi_table_nmi_src *nmi_src; - - nmi_src = (struct acpi_table_nmi_src*) header; - - if (BAD_MADT_ENTRY(nmi_src, end)) - return -EINVAL; - - acpi_table_print_madt_entry(header); - - /* TBD: Support nimsrc entries */ - return 0; -} -/* Hook from generic ACPI tables.c */ -void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ - if (!strncmp(oem_id, "IBM", 3) && - (!strncmp(oem_table_id, "SERMOW", 6))){ - - /* Unfortunatly ITC_DRIFT is not yet part of the - * official SAL spec, so the ITC_DRIFT bit is not - * set by the BIOS on this hardware. - */ - sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT; - - /*Start cyclone clock*/ - cyclone_setup(0); - } -} - -static int __init -acpi_parse_madt (unsigned long phys_addr, unsigned long size) -{ - if (!phys_addr || !size) - return -EINVAL; - - acpi_madt = (struct acpi_table_madt *) __va(phys_addr); - - /* remember the value for reference after free_initmem() */ -#ifdef CONFIG_ITANIUM - has_8259 = 1; /* Firmware on old Itanium systems is broken */ -#else - has_8259 = acpi_madt->flags.pcat_compat; -#endif - iosapic_system_init(has_8259); - - /* Get base address of IPI Message Block */ - - if (acpi_madt->lapic_address) - ipi_base_addr = (unsigned long) ioremap(acpi_madt->lapic_address, 0); - - printk(KERN_INFO PREFIX "Local APIC address 0x%lx\n", ipi_base_addr); - - acpi_madt_oem_check(acpi_madt->header.oem_id, - acpi_madt->header.oem_table_id); - - return 0; -} -#endif - -#ifdef CONFIG_ACPI_NUMA - -#undef SLIT_DEBUG - -#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32) - -static int __initdata srat_num_cpus; /* number of cpus */ -static u32 __initdata pxm_flag[PXM_FLAG_LEN]; -#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) -#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) -/* maps to convert between proximity domain and logical node ID */ -int __initdata pxm_to_nid_map[MAX_PXM_DOMAINS]; -int __initdata nid_to_pxm_map[MAX_NUMNODES]; -static struct acpi_table_slit __initdata *slit_table; - -/* - * ACPI 2.0 SLIT (System Locality Information Table) - * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf - */ -void __init -acpi_numa_slit_init (struct acpi_table_slit *slit) -{ - u32 len; - - len = sizeof(struct acpi_table_header) + 8 - + slit->localities * slit->localities; - if (slit->header.length != len) { - printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", - len, slit->header.length); - memset(numa_slit, 10, sizeof(numa_slit)); - return; - } - slit_table = slit; -} - -void __init -acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa) -{ - /* record this node in proximity bitmap */ - pxm_bit_set(pa->proximity_domain); - - node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid); - /* nid should be overridden as logical node id later */ - node_cpuid[srat_num_cpus].nid = pa->proximity_domain; - srat_num_cpus++; -} - -void __init -acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma) -{ - unsigned long paddr, size; - u8 pxm; - struct node_memblk_s *p, *q, *pend; - - pxm = ma->proximity_domain; - - /* fill node memory chunk structure */ - paddr = ma->base_addr_hi; - paddr = (paddr << 32) | ma->base_addr_lo; - size = ma->length_hi; - size = (size << 32) | ma->length_lo; - - /* Ignore disabled entries */ - if (!ma->flags.enabled) - return; - - /* record this node in proximity bitmap */ - pxm_bit_set(pxm); - - /* Insertion sort based on base address */ - pend = &node_memblk[num_node_memblks]; - for (p = &node_memblk[0]; p < pend; p++) { - if (paddr < p->start_paddr) - break; - } - if (p < pend) { - for (q = pend - 1; q >= p; q--) - *(q + 1) = *q; - } - p->start_paddr = paddr; - p->size = size; - p->nid = pxm; - num_node_memblks++; -} - -void __init -acpi_numa_arch_fixup (void) -{ - int i, j, node_from, node_to; - - /* If there's no SRAT, fix the phys_id */ - if (srat_num_cpus == 0) { - node_cpuid[0].phys_id = hard_smp_processor_id(); - return; - } - - /* calculate total number of nodes in system from PXM bitmap */ - numnodes = 0; /* init total nodes in system */ - - memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); - memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); - for (i = 0; i < MAX_PXM_DOMAINS; i++) { - if (pxm_bit_test(i)) { - pxm_to_nid_map[i] = numnodes; - node_set_online(numnodes); - nid_to_pxm_map[numnodes++] = i; - } - } - - /* set logical node id in memory chunk structure */ - for (i = 0; i < num_node_memblks; i++) - node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid]; - - /* assign memory bank numbers for each chunk on each node */ - for (i = 0; i < numnodes; i++) { - int bank; - - bank = 0; - for (j = 0; j < num_node_memblks; j++) - if (node_memblk[j].nid == i) - node_memblk[j].bank = bank++; - } - - /* set logical node id in cpu structure */ - for (i = 0; i < srat_num_cpus; i++) - node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid]; - - printk(KERN_INFO "Number of logical nodes in system = %d\n", numnodes); - printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks); - - if (!slit_table) return; - memset(numa_slit, -1, sizeof(numa_slit)); - for (i=0; i<slit_table->localities; i++) { - if (!pxm_bit_test(i)) - continue; - node_from = pxm_to_nid_map[i]; - for (j=0; j<slit_table->localities; j++) { - if (!pxm_bit_test(j)) - continue; - node_to = pxm_to_nid_map[j]; - node_distance(node_from, node_to) = - slit_table->entry[i*slit_table->localities + j]; - } - } - -#ifdef SLIT_DEBUG - printk("ACPI 2.0 SLIT locality table:\n"); - for (i = 0; i < numnodes; i++) { - for (j = 0; j < numnodes; j++) - printk("%03d ", node_distance(i,j)); - printk("\n"); - } -#endif -} -#endif /* CONFIG_ACPI_NUMA */ - -#if 0 -unsigned int -acpi_register_gsi (u32 gsi, int polarity, int trigger) -{ - return acpi_register_irq(gsi, polarity, trigger); -} -EXPORT_SYMBOL(acpi_register_gsi); -static int __init -acpi_parse_fadt (unsigned long phys_addr, unsigned long size) -{ - struct acpi_table_header *fadt_header; - struct fadt_descriptor_rev2 *fadt; - - if (!phys_addr || !size) - return -EINVAL; - - fadt_header = (struct acpi_table_header *) __va(phys_addr); - if (fadt_header->revision != 3) - return -ENODEV; /* Only deal with ACPI 2.0 FADT */ - - fadt = (struct fadt_descriptor_rev2 *) fadt_header; - - if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER)) - acpi_kbd_controller_present = 0; - - if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES) - acpi_legacy_devices = 1; - - acpi_register_gsi(fadt->sci_int, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE); - return 0; -} -#endif - -unsigned long __init -acpi_find_rsdp (void) -{ - unsigned long rsdp_phys = 0; - - if (efi.acpi20) - rsdp_phys = __pa(efi.acpi20); - else if (efi.acpi) - printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n"); - return rsdp_phys; -} - -#if 0 -int __init -acpi_boot_init (void) -{ - - /* - * MADT - * ---- - * Parse the Multiple APIC Description Table (MADT), if exists. - * Note that this table provides platform SMP configuration - * information -- the successor to MPS tables. - */ - - if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) { - printk(KERN_ERR PREFIX "Can't find MADT\n"); - goto skip_madt; - } - - /* Local APIC */ - - if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0) - printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); - - if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1) - printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n"); - - if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0) - printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); - - /* I/O APIC */ - - if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) - printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n"); - - /* System-Level Interrupt Routing */ - - if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0) - printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n"); - - if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0) - printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); - - if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0) - printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); - skip_madt: - - /* - * FADT says whether a legacy keyboard controller is present. - * The FADT also contains an SCI_INT line, by which the system - * gets interrupts such as power and sleep buttons. If it's not - * on a Legacy interrupt, it needs to be setup. - */ - if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1) - printk(KERN_ERR PREFIX "Can't find FADT\n"); - -#ifdef CONFIG_SMP - if (available_cpus == 0) { - printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); - printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); - smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id(); - available_cpus = 1; /* We've got at least one of these, no? */ - } - smp_boot_data.cpu_count = available_cpus; - - smp_build_cpu_map(); -# ifdef CONFIG_ACPI_NUMA - if (srat_num_cpus == 0) { - int cpu, i = 1; - for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) - if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id()) - node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu]; - } - build_cpu_to_node_map(); -# endif -#endif - /* Make boot-up look pretty */ - printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); - return 0; -} -int -acpi_gsi_to_irq (u32 gsi, unsigned int *irq) -{ - int vector; - - if (has_8259 && gsi < 16) - *irq = isa_irq_to_vector(gsi); - else { - vector = gsi_to_vector(gsi); - if (vector == -1) - return -1; - - *irq = vector; - } - return 0; -} - -int -acpi_register_irq (u32 gsi, u32 polarity, u32 trigger) -{ - if (has_8259 && gsi < 16) - return isa_irq_to_vector(gsi); - - return iosapic_register_intr(gsi, - (polarity == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, - (trigger == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); -} -EXPORT_SYMBOL(acpi_register_irq); -#endif -#endif /* CONFIG_ACPI_BOOT */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/dom0_ops.c --- a/xen/arch/ia64/dom0_ops.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,237 +0,0 @@ -/****************************************************************************** - * Arch-specific dom0_ops.c - * - * Process command requests from domain-0 guest OS. - * - * Copyright (c) 2002, K A Fraser - */ - -#include <xen/config.h> -#include <xen/types.h> -#include <xen/lib.h> -#include <xen/mm.h> -#include <public/dom0_ops.h> -#include <xen/sched.h> -#include <xen/event.h> -#include <asm/pdb.h> -#include <xen/trace.h> -#include <xen/console.h> -#include <public/sched_ctl.h> - -long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op) -{ - long ret = 0; - - if ( !IS_PRIV(current->domain) ) - return -EPERM; - - switch ( op->cmd ) - { - case DOM0_GETPAGEFRAMEINFO: - { - struct pfn_info *page; - unsigned long pfn = op->u.getpageframeinfo.pfn; - domid_t dom = op->u.getpageframeinfo.domain; - struct domain *d; - - ret = -EINVAL; - - if ( unlikely(pfn >= max_page) || - unlikely((d = find_domain_by_id(dom)) == NULL) ) - break; - - page = &frame_table[pfn]; - - if ( likely(get_page(page, d)) ) - { - ret = 0; - - op->u.getpageframeinfo.type = NOTAB; - - if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) - { - switch ( page->u.inuse.type_info & PGT_type_mask ) - { - default: - panic("No such page type\n"); - break; - } - } - - put_page(page); - } - - put_domain(d); - - copy_to_user(u_dom0_op, op, sizeof(*op)); - } - break; - - case DOM0_GETPAGEFRAMEINFO2: - { -#define GPF2_BATCH 128 - int n,j; - int num = op->u.getpageframeinfo2.num; - domid_t dom = op->u.getpageframeinfo2.domain; - unsigned long *s_ptr = (unsigned long*) op->u.getpageframeinfo2.array; - struct domain *d; - unsigned long *l_arr; - ret = -ESRCH; - - if ( unlikely((d = find_domain_by_id(dom)) == NULL) ) - break; - - if ( unlikely(num > 1024) ) - { - ret = -E2BIG; - break; - } - - l_arr = (unsigned long *)alloc_xenheap_page(); - - ret = 0; - for( n = 0; n < num; ) - { - int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n); - - if ( copy_from_user(l_arr, &s_ptr[n], k*sizeof(unsigned long)) ) - { - ret = -EINVAL; - break; - } - - for( j = 0; j < k; j++ ) - { - struct pfn_info *page; - unsigned long mfn = l_arr[j]; - - if ( unlikely(mfn >= max_page) ) - goto e2_err; - - page = &frame_table[mfn]; - - if ( likely(get_page(page, d)) ) - { - unsigned long type = 0; - - switch( page->u.inuse.type_info & PGT_type_mask ) - { - default: - panic("No such page type\n"); - break; - } - - if ( page->u.inuse.type_info & PGT_pinned ) - type |= LPINTAB; - l_arr[j] |= type; - put_page(page); - } - else - { - e2_err: - l_arr[j] |= XTAB; - } - - } - - if ( copy_to_user(&s_ptr[n], l_arr, k*sizeof(unsigned long)) ) - { - ret = -EINVAL; - break; - } - - n += j; - } - - free_xenheap_page((unsigned long)l_arr); - - put_domain(d); - } - break; -#ifndef CONFIG_VTI - /* - * NOTE: DOM0_GETMEMLIST has somewhat different semantics on IA64 - - * it actually allocates and maps pages. - */ - case DOM0_GETMEMLIST: - { - unsigned long i; - struct domain *d = find_domain_by_id(op->u.getmemlist.domain); - unsigned long start_page = op->u.getmemlist.max_pfns >> 32; - unsigned long nr_pages = op->u.getmemlist.max_pfns & 0xffffffff; - unsigned long pfn; - unsigned long *buffer = op->u.getmemlist.buffer; - struct page *page; - - ret = -EINVAL; - if ( d != NULL ) - { - ret = 0; - - for ( i = start_page; i < (start_page + nr_pages); i++ ) - { - page = map_new_domain_page(d, i << PAGE_SHIFT); - if ( page == NULL ) - { - ret = -ENOMEM; - break; - } - pfn = page_to_pfn(page); - if ( put_user(pfn, buffer) ) - { - ret = -EFAULT; - break; - } - buffer++; - } - - op->u.getmemlist.num_pfns = i - start_page; - copy_to_user(u_dom0_op, op, sizeof(*op)); - - put_domain(d); - } - } - break; -#else - case DOM0_GETMEMLIST: - { - int i; - struct domain *d = find_domain_by_id(op->u.getmemlist.domain); - unsigned long max_pfns = op->u.getmemlist.max_pfns; - unsigned long pfn; - unsigned long *buffer = op->u.getmemlist.buffer; - struct list_head *list_ent; - - ret = -EINVAL; - if (!d) { - ret = 0; - - spin_lock(&d->page_alloc_lock); - list_ent = d->page_list.next; - for (i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++) { - pfn = list_entry(list_ent, struct pfn_info, list) - - frame_table; - if (put_user(pfn, buffer)) { - ret = -EFAULT; - break; - } - buffer++; - list_ent = frame_table[pfn].list.next; - } - spin_unlock(&d->page_alloc_lock); - - op->u.getmemlist.num_pfns = i; - copy_to_user(u_dom0_op, op, sizeof(*op)); - - put_domain(d); - } - } - break; -#endif // CONFIG_VTI - default: - ret = -ENOSYS; - - } - - return ret; -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/dom_fw.c --- a/xen/arch/ia64/dom_fw.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,688 +0,0 @@ -/* - * Xen domain firmware emulation support - * Copyright (C) 2004 Hewlett-Packard Co. - * Dan Magenheimer (dan.magenheimer@xxxxxx) - * - */ - -#include <xen/config.h> -#include <asm/system.h> -#include <asm/pgalloc.h> - -#include <linux/efi.h> -#include <asm/io.h> -#include <asm/pal.h> -#include <asm/sal.h> -#include <xen/acpi.h> - -#include <asm/dom_fw.h> - -struct ia64_boot_param *dom_fw_init(struct domain *, char *,int,char *,int); -extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr); -extern struct domain *dom0; -extern unsigned long dom0_start; - -extern unsigned long running_on_sim; - - -unsigned long dom_fw_base_mpa = -1; -unsigned long imva_fw_base = -1; - -// return domain (meta)physical address for a given imva -// this function is a call-back from dom_fw_init -unsigned long dom_pa(unsigned long imva) -{ - if (dom_fw_base_mpa == -1 || imva_fw_base == -1) { - printf("dom_pa: uninitialized! (spinning...)\n"); - while(1); - } - if (imva - imva_fw_base > PAGE_SIZE) { - printf("dom_pa: bad offset! imva=%p, imva_fw_base=%p (spinning...)\n",imva,imva_fw_base); - while(1); - } - return dom_fw_base_mpa + (imva - imva_fw_base); -} - -// builds a hypercall bundle at domain physical address -void dom_efi_hypercall_patch(struct domain *d, unsigned long paddr, unsigned long hypercall) -{ - unsigned long imva; - - if (d == dom0) paddr += dom0_start; - imva = domain_mpa_to_imva(d,paddr); - build_hypercall_bundle(imva,d->arch.breakimm,hypercall,1); -} - - -// builds a hypercall bundle at domain physical address -void dom_fw_hypercall_patch(struct domain *d, unsigned long paddr, unsigned long hypercall,unsigned long ret) -{ - unsigned long imva; - - if (d == dom0) paddr += dom0_start; - imva = domain_mpa_to_imva(d,paddr); - build_hypercall_bundle(imva,d->arch.breakimm,hypercall,ret); -} - - -// FIXME: This is really a hack: Forcing the boot parameter block -// at domain mpaddr 0 page, then grabbing only the low bits of the -// Xen imva, which is the offset into the page -unsigned long dom_fw_setup(struct domain *d, char *args, int arglen) -{ - struct ia64_boot_param *bp; - - dom_fw_base_mpa = 0; - if (d == dom0) dom_fw_base_mpa += dom0_start; - imva_fw_base = domain_mpa_to_imva(d,dom_fw_base_mpa); - bp = dom_fw_init(d,args,arglen,imva_fw_base,PAGE_SIZE); - return dom_pa((unsigned long)bp); -} - - -/* the following heavily leveraged from linux/arch/ia64/hp/sim/fw-emu.c */ - -#define MB (1024*1024UL) - -#define NUM_EFI_SYS_TABLES 6 -#define PASS_THRU_IOPORT_SPACE -#ifdef PASS_THRU_IOPORT_SPACE -# define NUM_MEM_DESCS 4 -#else -# define NUM_MEM_DESCS 3 -#endif - - -#define SECS_PER_HOUR (60 * 60) -#define SECS_PER_DAY (SECS_PER_HOUR * 24) - -/* Compute the `struct tm' representation of *T, - offset OFFSET seconds east of UTC, - and store year, yday, mon, mday, wday, hour, min, sec into *TP. - Return nonzero if successful. */ -int -offtime (unsigned long t, efi_time_t *tp) -{ - const unsigned short int __mon_yday[2][13] = - { - /* Normal years. */ - { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, - /* Leap years. */ - { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } - }; - long int days, rem, y; - const unsigned short int *ip; - - days = t / SECS_PER_DAY; - rem = t % SECS_PER_DAY; - while (rem < 0) { - rem += SECS_PER_DAY; - --days; - } - while (rem >= SECS_PER_DAY) { - rem -= SECS_PER_DAY; - ++days; - } - tp->hour = rem / SECS_PER_HOUR; - rem %= SECS_PER_HOUR; - tp->minute = rem / 60; - tp->second = rem % 60; - /* January 1, 1970 was a Thursday. */ - y = 1970; - -# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) -# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) -# define __isleap(year) \ - ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) - - while (days < 0 || days >= (__isleap (y) ? 366 : 365)) { - /* Guess a corrected year, assuming 365 days per year. */ - long int yg = y + days / 365 - (days % 365 < 0); - - /* Adjust DAYS and Y to match the guessed year. */ - days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1) - - LEAPS_THRU_END_OF (y - 1)); - y = yg; - } - tp->year = y; - ip = __mon_yday[__isleap(y)]; - for (y = 11; days < (long int) ip[y]; --y) - continue; - days -= ip[y]; - tp->month = y + 1; - tp->day = days + 1; - return 1; -} - -extern struct ia64_pal_retval pal_emulator_static (unsigned long); - -/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */ - -#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3) - -#define REG_OFFSET(addr) (0x00000000000000FF & (addr)) -#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr)) -#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr)) - -#ifndef XEN -static efi_status_t -fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc) -{ -#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC) - struct { - int tv_sec; /* must be 32bits to work */ - int tv_usec; - } tv32bits; - - ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD); - - memset(tm, 0, sizeof(*tm)); - offtime(tv32bits.tv_sec, tm); - - if (tc) - memset(tc, 0, sizeof(*tc)); -#else -# error Not implemented yet... -#endif - return EFI_SUCCESS; -} - -static void -efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data) -{ -#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC) - ssc(status, 0, 0, 0, SSC_EXIT); -#else -# error Not implemented yet... -#endif -} - -static efi_status_t -efi_unimplemented (void) -{ - return EFI_UNSUPPORTED; -} -#endif /* !XEN */ - -struct sal_ret_values -sal_emulator (long index, unsigned long in1, unsigned long in2, - unsigned long in3, unsigned long in4, unsigned long in5, - unsigned long in6, unsigned long in7) -{ - long r9 = 0; - long r10 = 0; - long r11 = 0; - long status; - - /* - * Don't do a "switch" here since that gives us code that - * isn't self-relocatable. - */ - status = 0; - if (index == SAL_FREQ_BASE) { - if (!running_on_sim) - status = ia64_sal_freq_base(in1,&r9,&r10); - else switch (in1) { - case SAL_FREQ_BASE_PLATFORM: - r9 = 200000000; - break; - - case SAL_FREQ_BASE_INTERVAL_TIMER: - r9 = 700000000; - break; - - case SAL_FREQ_BASE_REALTIME_CLOCK: - r9 = 1; - break; - - default: - status = -1; - break; - } - } else if (index == SAL_PCI_CONFIG_READ) { - if (current->domain == dom0) { - u64 value; - // note that args 2&3 are swapped!! - status = ia64_sal_pci_config_read(in1,in3,in2,&value); - r9 = value; - } - else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_READ\n"); - } else if (index == SAL_PCI_CONFIG_WRITE) { - if (current->domain == dom0) { - if (((in1 & ~0xffffffffUL) && (in4 == 0)) || - (in4 > 1) || - (in2 > 8) || (in2 & (in2-1))) - printf("*** SAL_PCI_CONF_WRITE?!?(adr=%p,typ=%p,sz=%p,val=%p)\n",in1,in4,in2,in3); - // note that args are in a different order!! - status = ia64_sal_pci_config_write(in1,in4,in2,in3); - } - else printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_WRITE\n"); - } else if (index == SAL_SET_VECTORS) { - printf("*** CALLED SAL_SET_VECTORS. IGNORED...\n"); - } else if (index == SAL_GET_STATE_INFO) { - printf("*** CALLED SAL_GET_STATE_INFO. IGNORED...\n"); - } else if (index == SAL_GET_STATE_INFO_SIZE) { - printf("*** CALLED SAL_GET_STATE_INFO_SIZE. IGNORED...\n"); - } else if (index == SAL_CLEAR_STATE_INFO) { - printf("*** CALLED SAL_CLEAR_STATE_INFO. IGNORED...\n"); - } else if (index == SAL_MC_RENDEZ) { - printf("*** CALLED SAL_MC_RENDEZ. IGNORED...\n"); - } else if (index == SAL_MC_SET_PARAMS) { - printf("*** CALLED SAL_MC_SET_PARAMS. IGNORED...\n"); - } else if (index == SAL_CACHE_FLUSH) { - printf("*** CALLED SAL_CACHE_FLUSH. IGNORED...\n"); - } else if (index == SAL_CACHE_INIT) { - printf("*** CALLED SAL_CACHE_INIT. IGNORED...\n"); - } else if (index == SAL_UPDATE_PAL) { - printf("*** CALLED SAL_UPDATE_PAL. IGNORED...\n"); - } else { - printf("*** CALLED SAL_ WITH UNKNOWN INDEX. IGNORED...\n"); - status = -1; - } - return ((struct sal_ret_values) {status, r9, r10, r11}); -} - -struct ia64_pal_retval -xen_pal_emulator(unsigned long index, unsigned long in1, - unsigned long in2, unsigned long in3) -{ - long r9 = 0; - long r10 = 0; - long r11 = 0; - long status = -1; - -#define USE_PAL_EMULATOR -#ifdef USE_PAL_EMULATOR - return pal_emulator_static(index); -#endif - if (running_on_sim) return pal_emulator_static(index); - if (index >= PAL_COPY_PAL) { - printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n", - index); - } - else switch (index) { - case PAL_MEM_ATTRIB: - status = ia64_pal_mem_attrib(&r9); - break; - case PAL_FREQ_BASE: - status = ia64_pal_freq_base(&r9); - break; - case PAL_PROC_GET_FEATURES: - status = ia64_pal_proc_get_features(&r9,&r10,&r11); - break; - case PAL_BUS_GET_FEATURES: - status = ia64_pal_bus_get_features(&r9,&r10,&r11); - break; - case PAL_FREQ_RATIOS: - status = ia64_pal_freq_ratios(&r9,&r10,&r11); - break; - case PAL_PTCE_INFO: - { - // return hard-coded xen-specific values because ptc.e - // is emulated on xen to always flush everything - // these values result in only one ptc.e instruction - status = 0; r9 = 0; r10 = (1L << 32) | 1L; r11 = 0; - } - break; - case PAL_VERSION: - status = ia64_pal_version(&r9,&r10); - break; - case PAL_VM_PAGE_SIZE: - status = ia64_pal_vm_page_size(&r9,&r10); - break; - case PAL_DEBUG_INFO: - status = ia64_pal_debug_info(&r9,&r10); - break; - case PAL_CACHE_SUMMARY: - status = ia64_pal_cache_summary(&r9,&r10); - break; - case PAL_VM_SUMMARY: - // FIXME: what should xen return for these, figure out later - // For now, linux does the right thing if pal call fails - // In particular, rid_size must be set properly! - //status = ia64_pal_vm_summary(&r9,&r10); - break; - case PAL_RSE_INFO: - status = ia64_pal_rse_info(&r9,&r10); - break; - case PAL_VM_INFO: - status = ia64_pal_vm_info(in1,in2,&r9,&r10); - break; - case PAL_REGISTER_INFO: - status = ia64_pal_register_info(in1,&r9,&r10); - break; - case PAL_CACHE_FLUSH: - /* FIXME */ - printk("PAL_CACHE_FLUSH NOT IMPLEMENTED!\n"); - BUG(); - break; - case PAL_PERF_MON_INFO: - { - unsigned long pm_buffer[16]; - int i; - status = ia64_pal_perf_mon_info(pm_buffer,&r9); - if (status != 0) { - while(1) - printk("PAL_PERF_MON_INFO fails ret=%d\n",status); - break; - } - if (copy_to_user((void __user *)in1,pm_buffer,128)) { - while(1) - printk("xen_pal_emulator: PAL_PERF_MON_INFO " - "can't copy to user!!!!\n"); - status = -1; - break; - } - } - break; - case PAL_CACHE_INFO: - { - pal_cache_config_info_t ci; - status = ia64_pal_cache_config_info(in1,in2,&ci); - if (status != 0) break; - r9 = ci.pcci_info_1.pcci1_data; - r10 = ci.pcci_info_2.pcci2_data; - } - break; - case PAL_VM_TR_READ: /* FIXME: vcpu_get_tr?? */ - printk("PAL_VM_TR_READ NOT IMPLEMENTED, IGNORED!\n"); - break; - case PAL_HALT_INFO: /* inappropriate info for guest? */ - printk("PAL_HALT_INFO NOT IMPLEMENTED, IGNORED!\n"); - break; - default: - printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %d!!!!\n", - index); - break; - } - return ((struct ia64_pal_retval) {status, r9, r10, r11}); -} - -#define NFUNCPTRS 20 - -void print_md(efi_memory_desc_t *md) -{ -#if 1 - printk("domain mem: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n", - md->type, md->attribute, md->phys_addr, - md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), - md->num_pages >> (20 - EFI_PAGE_SHIFT)); -#endif -} - -#define LSAPIC_NUM 16 // TEMP -static u32 lsapic_flag=1; - -/* Provide only one LP to guest */ -static int -acpi_update_lsapic (acpi_table_entry_header *header) -{ - struct acpi_table_lsapic *lsapic; - - lsapic = (struct acpi_table_lsapic *) header; - if (!lsapic) - return -EINVAL; - - if (lsapic->flags.enabled && lsapic_flag) { - printk("enable lsapic entry: 0x%lx\n", (u64)lsapic); - lsapic_flag = 0; /* disable all the following processros */ - } else if (lsapic->flags.enabled) { - printk("DISABLE lsapic entry: 0x%lx\n", (u64)lsapic); - lsapic->flags.enabled = 0; - } else - printk("lsapic entry is already disabled: 0x%lx\n", (u64)lsapic); - - return 0; -} - -static int -acpi_update_madt_checksum (unsigned long phys_addr, unsigned long size) -{ - u8 checksum=0; - u8* ptr; - int len; - struct acpi_table_madt* acpi_madt; - - if (!phys_addr || !size) - return -EINVAL; - - acpi_madt = (struct acpi_table_madt *) __va(phys_addr); - acpi_madt->header.checksum=0; - - /* re-calculate MADT checksum */ - ptr = (u8*)acpi_madt; - len = acpi_madt->header.length; - while (len>0){ - checksum = (u8)( checksum + (*ptr++) ); - len--; - } - acpi_madt->header.checksum = 0x0 - checksum; - - return 0; -} - -/* base is physical address of acpi table */ -void touch_acpi_table(void) -{ - u64 count = 0; - count = acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic, NR_CPUS); - if ( count < 1) - printk("Error parsing MADT - no LAPIC entires\n"); - printk("Total %d lsapic entry\n", count); - acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum); - - return; -} - - -struct ia64_boot_param * -dom_fw_init (struct domain *d, char *args, int arglen, char *fw_mem, int fw_mem_size) -{ - efi_system_table_t *efi_systab; - efi_runtime_services_t *efi_runtime; - efi_config_table_t *efi_tables; - struct ia64_sal_systab *sal_systab; - efi_memory_desc_t *efi_memmap, *md; - unsigned long *pal_desc, *sal_desc; - struct ia64_sal_desc_entry_point *sal_ed; - struct ia64_boot_param *bp; - unsigned long *pfn; - unsigned char checksum = 0; - char *cp, *cmd_line, *fw_vendor; - int i = 0; - unsigned long maxmem = d->max_pages * PAGE_SIZE; - unsigned long start_mpaddr = ((d==dom0)?dom0_start:0); - -# define MAKE_MD(typ, attr, start, end, abs) \ - do { \ - md = efi_memmap + i++; \ - md->type = typ; \ - md->pad = 0; \ - md->phys_addr = abs ? start : start_mpaddr + start; \ - md->virt_addr = 0; \ - md->num_pages = (end - start) >> 12; \ - md->attribute = attr; \ - print_md(md); \ - } while (0) - -/* FIXME: should check size but for now we have a whole MB to play with. - And if stealing code from fw-emu.c, watch out for new fw_vendor on the end! - if (fw_mem_size < sizeof(fw_mem_proto)) { - printf("sys_fw_init: insufficient space for fw_mem\n"); - return 0; - } -*/ - memset(fw_mem, 0, fw_mem_size); - -#ifdef XEN -#else - pal_desc = (unsigned long *) &pal_emulator_static; - sal_desc = (unsigned long *) &sal_emulator; -#endif - - cp = fw_mem; - efi_systab = (void *) cp; cp += sizeof(*efi_systab); - efi_runtime = (void *) cp; cp += sizeof(*efi_runtime); - efi_tables = (void *) cp; cp += NUM_EFI_SYS_TABLES * sizeof(*efi_tables); - sal_systab = (void *) cp; cp += sizeof(*sal_systab); - sal_ed = (void *) cp; cp += sizeof(*sal_ed); - efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap); - bp = (void *) cp; cp += sizeof(*bp); - pfn = (void *) cp; cp += NFUNCPTRS * 2 * sizeof(pfn); - cmd_line = (void *) cp; - - if (args) { - if (arglen >= 1024) - arglen = 1023; - memcpy(cmd_line, args, arglen); - } else { - arglen = 0; - } - cmd_line[arglen] = '\0'; - - memset(efi_systab, 0, sizeof(efi_systab)); - efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE; - efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION; - efi_systab->hdr.headersize = sizeof(efi_systab->hdr); - cp = fw_vendor = &cmd_line[arglen] + (2-(arglen&1)); // round to 16-bit boundary -#define FW_VENDOR "X\0e\0n\0/\0i\0a\0\066\0\064\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" - cp += sizeof(FW_VENDOR) + (8-((unsigned long)cp & 7)); // round to 64-bit boundary - - memcpy(fw_vendor,FW_VENDOR,sizeof(FW_VENDOR)); - efi_systab->fw_vendor = dom_pa(fw_vendor); - - efi_systab->fw_revision = 1; - efi_systab->runtime = (void *) dom_pa(efi_runtime); - efi_systab->nr_tables = NUM_EFI_SYS_TABLES; - efi_systab->tables = dom_pa(efi_tables); - - efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE; - efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION; - efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr); -#define EFI_HYPERCALL_PATCH(tgt,call) do { \ - dom_efi_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call); \ - tgt = dom_pa(pfn); \ - *pfn++ = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \ - *pfn++ = 0; \ - } while (0) - - EFI_HYPERCALL_PATCH(efi_runtime->get_time,EFI_GET_TIME); - EFI_HYPERCALL_PATCH(efi_runtime->set_time,EFI_SET_TIME); - EFI_HYPERCALL_PATCH(efi_runtime->get_wakeup_time,EFI_GET_WAKEUP_TIME); - EFI_HYPERCALL_PATCH(efi_runtime->set_wakeup_time,EFI_SET_WAKEUP_TIME); - EFI_HYPERCALL_PATCH(efi_runtime->set_virtual_address_map,EFI_SET_VIRTUAL_ADDRESS_MAP); - EFI_HYPERCALL_PATCH(efi_runtime->get_variable,EFI_GET_VARIABLE); - EFI_HYPERCALL_PATCH(efi_runtime->get_next_variable,EFI_GET_NEXT_VARIABLE); - EFI_HYPERCALL_PATCH(efi_runtime->set_variable,EFI_SET_VARIABLE); - EFI_HYPERCALL_PATCH(efi_runtime->get_next_high_mono_count,EFI_GET_NEXT_HIGH_MONO_COUNT); - EFI_HYPERCALL_PATCH(efi_runtime->reset_system,EFI_RESET_SYSTEM); - - efi_tables[0].guid = SAL_SYSTEM_TABLE_GUID; - efi_tables[0].table = dom_pa(sal_systab); - for (i = 1; i < NUM_EFI_SYS_TABLES; i++) { - efi_tables[i].guid = NULL_GUID; - efi_tables[i].table = 0; - } - if (d == dom0) { - printf("Domain0 EFI passthrough:"); - i = 1; - if (efi.mps) { - efi_tables[i].guid = MPS_TABLE_GUID; - efi_tables[i].table = __pa(efi.mps); - printf(" MPS=%0xlx",efi_tables[i].table); - i++; - } - - touch_acpi_table(); - - if (efi.acpi20) { - efi_tables[i].guid = ACPI_20_TABLE_GUID; - efi_tables[i].table = __pa(efi.acpi20); - printf(" ACPI 2.0=%0xlx",efi_tables[i].table); - i++; - } - if (efi.acpi) { - efi_tables[i].guid = ACPI_TABLE_GUID; - efi_tables[i].table = __pa(efi.acpi); - printf(" ACPI=%0xlx",efi_tables[i].table); - i++; - } - if (efi.smbios) { - efi_tables[i].guid = SMBIOS_TABLE_GUID; - efi_tables[i].table = __pa(efi.smbios); - printf(" SMBIOS=%0xlx",efi_tables[i].table); - i++; - } - if (efi.hcdp) { - efi_tables[i].guid = HCDP_TABLE_GUID; - efi_tables[i].table = __pa(efi.hcdp); - printf(" HCDP=%0xlx",efi_tables[i].table); - i++; - } - printf("\n"); - } - - /* fill in the SAL system table: */ - memcpy(sal_systab->signature, "SST_", 4); - sal_systab->size = sizeof(*sal_systab); - sal_systab->sal_rev_minor = 1; - sal_systab->sal_rev_major = 0; - sal_systab->entry_count = 1; - - strcpy(sal_systab->oem_id, "Xen/ia64"); - strcpy(sal_systab->product_id, "Xen/ia64"); - - /* fill in an entry point: */ - sal_ed->type = SAL_DESC_ENTRY_POINT; -#define FW_HYPERCALL_PATCH(tgt,call,ret) do { \ - dom_fw_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call,ret); \ - tgt = FW_HYPERCALL_##call##_PADDR + ((d==dom0)?dom0_start:0); \ - } while (0) - FW_HYPERCALL_PATCH(sal_ed->pal_proc,PAL_CALL,0); - FW_HYPERCALL_PATCH(sal_ed->sal_proc,SAL_CALL,1); - sal_ed->gp = 0; // will be ignored - - for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp) - checksum += *cp; - - sal_systab->checksum = -checksum; - - /* simulate 1MB free memory at physical address zero */ - i = 0; - MAKE_MD(EFI_BOOT_SERVICES_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0); - /* hypercall patches live here, masquerade as reserved PAL memory */ - MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB,HYPERCALL_START,HYPERCALL_END, 0); - MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 0); -#ifdef PASS_THRU_IOPORT_SPACE - if (d == dom0 && !running_on_sim) { - /* pass through the I/O port space */ - efi_memory_desc_t *efi_get_io_md(void); - efi_memory_desc_t *ia64_efi_io_md = efi_get_io_md(); - u32 type; - u64 iostart, ioend, ioattr; - - type = ia64_efi_io_md->type; - iostart = ia64_efi_io_md->phys_addr; - ioend = ia64_efi_io_md->phys_addr + - (ia64_efi_io_md->num_pages << 12); - ioattr = ia64_efi_io_md->attribute; - MAKE_MD(type,ioattr,iostart,ioend, 1); - } - else - MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0); -#endif - - bp->efi_systab = dom_pa(fw_mem); - bp->efi_memmap = dom_pa(efi_memmap); - bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t); - bp->efi_memdesc_size = sizeof(efi_memory_desc_t); - bp->efi_memdesc_version = 1; - bp->command_line = dom_pa(cmd_line); - bp->console_info.num_cols = 80; - bp->console_info.num_rows = 25; - bp->console_info.orig_x = 0; - bp->console_info.orig_y = 24; - bp->fpswa = 0; - - return bp; -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/domain.c --- a/xen/arch/ia64/domain.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,1103 +0,0 @@ -/* - * Copyright (C) 1995 Linus Torvalds - * - * Pentium III FXSR, SSE support - * Gareth Hughes <gareth@xxxxxxxxxxx>, May 2000 - * - * Copyright (C) 2005 Intel Co - * Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx> - * - * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx> Add CONFIG_VTI domain support - */ - -#include <xen/config.h> -#include <xen/lib.h> -#include <xen/errno.h> -#include <xen/sched.h> -#include <xen/smp.h> -#include <xen/delay.h> -#include <xen/softirq.h> -#include <xen/mm.h> -#include <asm/ptrace.h> -#include <asm/system.h> -#include <asm/io.h> -#include <asm/processor.h> -#include <asm/desc.h> -//#include <asm/mpspec.h> -#include <xen/irq.h> -#include <xen/event.h> -//#include <xen/shadow.h> -#include <xen/console.h> - -#include <xen/elf.h> -//#include <asm/page.h> -#include <asm/pgalloc.h> -#include <asm/dma.h> /* for MAX_DMA_ADDRESS */ - -#include <asm/asm-offsets.h> /* for IA64_THREAD_INFO_SIZE */ - -#include <asm/vcpu.h> /* for function declarations */ -#include <public/arch-ia64.h> -#include <asm/vmx.h> -#include <asm/vmx_vcpu.h> -#include <asm/vmx_vpd.h> -#include <asm/pal.h> -#include <public/io/ioreq.h> - -#define CONFIG_DOMAIN0_CONTIGUOUS -unsigned long dom0_start = -1L; -unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable -//FIXME: alignment should be 256MB, lest Linux use a 256MB page size -unsigned long dom0_align = 256*1024*1024; -#ifdef DOMU_BUILD_STAGING -unsigned long domU_staging_size = 32*1024*1024; //FIXME: Should be configurable -unsigned long domU_staging_start; -unsigned long domU_staging_align = 64*1024; -unsigned long *domU_staging_area; -#endif - -// initialized by arch/ia64/setup.c:find_initrd() -unsigned long initrd_start = 0, initrd_end = 0; - -#define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend)) - -//extern int loadelfimage(char *); -extern int readelfimage_base_and_size(char *, unsigned long, - unsigned long *, unsigned long *, unsigned long *); - -unsigned long map_domain_page0(struct domain *); -extern unsigned long dom_fw_setup(struct domain *, char *, int); - -/* this belongs in include/asm, but there doesn't seem to be a suitable place */ -void free_perdomain_pt(struct domain *d) -{ - printf("free_perdomain_pt: not implemented\n"); - //free_page((unsigned long)d->mm.perdomain_pt); -} - -int hlt_counter; - -void disable_hlt(void) -{ - hlt_counter++; -} - -void enable_hlt(void) -{ - hlt_counter--; -} - -static void default_idle(void) -{ - if ( hlt_counter == 0 ) - { - local_irq_disable(); - if ( !softirq_pending(smp_processor_id()) ) - safe_halt(); - //else - local_irq_enable(); - } -} - -void continue_cpu_idle_loop(void) -{ - int cpu = smp_processor_id(); - for ( ; ; ) - { -#ifdef IA64 -// __IRQ_STAT(cpu, idle_timestamp) = jiffies -#else - irq_stat[cpu].idle_timestamp = jiffies; -#endif - while ( !softirq_pending(cpu) ) - default_idle(); - raise_softirq(SCHEDULE_SOFTIRQ); - do_softirq(); - } -} - -void startup_cpu_idle_loop(void) -{ - /* Just some sanity to ensure that the scheduler is set up okay. */ - ASSERT(current->domain == IDLE_DOMAIN_ID); - raise_softirq(SCHEDULE_SOFTIRQ); - do_softirq(); - - /* - * Declares CPU setup done to the boot processor. - * Therefore memory barrier to ensure state is visible. - */ - smp_mb(); -#if 0 -//do we have to ensure the idle task has a shared page so that, for example, -//region registers can be loaded from it. Apparently not... - idle0_task.shared_info = (void *)alloc_xenheap_page(); - memset(idle0_task.shared_info, 0, PAGE_SIZE); - /* pin mapping */ - // FIXME: Does this belong here? Or do only at domain switch time? - { - /* WARNING: following must be inlined to avoid nested fault */ - unsigned long psr = ia64_clear_ic(); - ia64_itr(0x2, IA64_TR_SHARED_INFO, SHAREDINFO_ADDR, - pte_val(pfn_pte(ia64_tpa(idle0_task.shared_info) >> PAGE_SHIFT, PAGE_KERNEL)), - PAGE_SHIFT); - ia64_set_psr(psr); - ia64_srlz_i(); - } -#endif - - continue_cpu_idle_loop(); -} - -struct vcpu *arch_alloc_vcpu_struct(void) -{ - /* Per-vp stack is used here. So we need keep vcpu - * same page as per-vp stack */ - return alloc_xenheap_pages(KERNEL_STACK_SIZE_ORDER); -} - -void arch_free_vcpu_struct(struct vcpu *v) -{ - free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER); -} - -static void init_switch_stack(struct vcpu *v) -{ - struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; - struct switch_stack *sw = (struct switch_stack *) regs - 1; - extern void ia64_ret_from_clone; - - memset(sw, 0, sizeof(struct switch_stack) + sizeof(struct pt_regs)); - sw->ar_bspstore = (unsigned long)v + IA64_RBS_OFFSET; - sw->b0 = (unsigned long) &ia64_ret_from_clone; - sw->ar_fpsr = FPSR_DEFAULT; - v->arch._thread.ksp = (unsigned long) sw - 16; - // stay on kernel stack because may get interrupts! - // ia64_ret_from_clone (which b0 gets in new_thread) switches - // to user stack - v->arch._thread.on_ustack = 0; - memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96); -} - -void arch_do_createdomain(struct vcpu *v) -{ - struct domain *d = v->domain; - struct thread_info *ti = alloc_thread_info(v); - - /* Clear thread_info to clear some important fields, like preempt_count */ - memset(ti, 0, sizeof(struct thread_info)); - init_switch_stack(v); - - d->shared_info = (void *)alloc_xenheap_page(); - if (!d->shared_info) { - printk("ERROR/HALTING: CAN'T ALLOC PAGE\n"); - while (1); - } - memset(d->shared_info, 0, PAGE_SIZE); - d->shared_info->vcpu_data[0].arch.privregs = - alloc_xenheap_pages(get_order(sizeof(mapped_regs_t))); - printf("arch_vcpu_info=%p\n", d->shared_info->vcpu_data[0].arch.privregs); - memset(d->shared_info->vcpu_data[0].arch.privregs, 0, PAGE_SIZE); - v->vcpu_info = &(d->shared_info->vcpu_data[0]); - - d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME - -#ifdef CONFIG_VTI - /* Per-domain vTLB and vhpt implementation. Now vmx domain will stick - * to this solution. Maybe it can be deferred until we know created - * one as vmx domain */ - v->arch.vtlb = init_domain_tlb(v); -#endif - - /* We may also need emulation rid for region4, though it's unlikely - * to see guest issue uncacheable access in metaphysical mode. But - * keep such info here may be more sane. - */ - if (((d->arch.metaphysical_rr0 = allocate_metaphysical_rr()) == -1UL) - || ((d->arch.metaphysical_rr4 = allocate_metaphysical_rr()) == -1UL)) - BUG(); - VCPU(v, metaphysical_mode) = 1; - v->arch.metaphysical_rr0 = d->arch.metaphysical_rr0; - v->arch.metaphysical_rr4 = d->arch.metaphysical_rr4; - v->arch.metaphysical_saved_rr0 = d->arch.metaphysical_rr0; - v->arch.metaphysical_saved_rr4 = d->arch.metaphysical_rr4; -#define DOMAIN_RID_BITS_DEFAULT 18 - if (!allocate_rid_range(d,DOMAIN_RID_BITS_DEFAULT)) // FIXME - BUG(); - v->arch.starting_rid = d->arch.starting_rid; - v->arch.ending_rid = d->arch.ending_rid; - // the following will eventually need to be negotiated dynamically - d->xen_vastart = XEN_START_ADDR; - d->xen_vaend = XEN_END_ADDR; - d->shared_info_va = SHAREDINFO_ADDR; - d->arch.breakimm = 0x1000; - v->arch.breakimm = d->arch.breakimm; - - d->arch.mm = xmalloc(struct mm_struct); - if (unlikely(!d->arch.mm)) { - printk("Can't allocate mm_struct for domain %d\n",d->domain_id); - return -ENOMEM; - } - memset(d->arch.mm, 0, sizeof(*d->arch.mm)); - d->arch.mm->pgd = pgd_alloc(d->arch.mm); - if (unlikely(!d->arch.mm->pgd)) { - printk("Can't allocate pgd for domain %d\n",d->domain_id); - return -ENOMEM; - } -} - -void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c) -{ - struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; - - printf("arch_getdomaininfo_ctxt\n"); - c->regs = *regs; - c->vcpu.evtchn_vector = v->vcpu_info->arch.evtchn_vector; -#if 0 - if (c->vcpu.privregs && copy_to_user(c->vcpu.privregs, - v->vcpu_info->arch.privregs, sizeof(mapped_regs_t))) { - printk("Bad ctxt address: 0x%lx\n", c->vcpu.privregs); - return -EFAULT; - } -#endif - - c->shared = v->domain->shared_info->arch; -} - -int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c) -{ - struct pt_regs *regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; - struct domain *d = v->domain; - int i, rc, ret; - unsigned long progress = 0; - - printf("arch_set_info_guest\n"); - if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) - return 0; - - if (c->flags & VGCF_VMX_GUEST) { - if (!vmx_enabled) { - printk("No VMX hardware feature for vmx domain.\n"); - return -EINVAL; - } - - vmx_setup_platform(v, c); - } - - *regs = c->regs; - new_thread(v, regs->cr_iip, 0, 0); - - v->vcpu_info->arch.evtchn_vector = c->vcpu.evtchn_vector; - if ( c->vcpu.privregs && copy_from_user(v->vcpu_info->arch.privregs, - c->vcpu.privregs, sizeof(mapped_regs_t))) { - printk("Bad ctxt address in arch_set_info_guest: 0x%lx\n", c->vcpu.privregs); - return -EFAULT; - } - - v->arch.domain_itm_last = -1L; - d->shared_info->arch = c->shared; - - /* Don't redo final setup */ - set_bit(_VCPUF_initialised, &v->vcpu_flags); - return 0; -} - -void arch_do_boot_vcpu(struct vcpu *v) -{ - struct domain *d = v->domain; - printf("arch_do_boot_vcpu: not implemented\n"); - - d->shared_info->vcpu_data[v->vcpu_id].arch.privregs = - alloc_xenheap_pages(get_order(sizeof(mapped_regs_t))); - printf("arch_vcpu_info=%p\n", d->shared_info->vcpu_data[v->vcpu_id].arch.privregs); - memset(d->shared_info->vcpu_data[v->vcpu_id].arch.privregs, 0, PAGE_SIZE); - return; -} - -void domain_relinquish_resources(struct domain *d) -{ - /* FIXME */ - printf("domain_relinquish_resources: not implemented\n"); -} - -// heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread() -// and linux/arch/ia64/kernel/process.c:kernel_thread() -void new_thread(struct vcpu *v, - unsigned long start_pc, - unsigned long start_stack, - unsigned long start_info) -{ - struct domain *d = v->domain; - struct pt_regs *regs; - struct ia64_boot_param *bp; - extern char saved_command_line[]; - - -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - if (d == dom0) start_pc += dom0_start; -#endif - - regs = (struct pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; - if (VMX_DOMAIN(v)) { - /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */ - regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro */ - } else { - regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) - | IA64_PSR_BITS_TO_SET | IA64_PSR_BN - & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS); - regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2 - } - regs->cr_iip = start_pc; - regs->cr_ifs = 1UL << 63; /* or clear? */ - regs->ar_fpsr = FPSR_DEFAULT; - - if (VMX_DOMAIN(v)) { -#ifdef CONFIG_VTI - vmx_init_all_rr(v); - if (d == dom0) - VMX_VPD(v,vgr[12]) = dom_fw_setup(d,saved_command_line,256L); - /* Virtual processor context setup */ - VMX_VPD(v, vpsr) = IA64_PSR_BN; - VPD_CR(v, dcr) = 0; -#endif - } else { - init_all_rr(v); - if (d == dom0) - regs->r28 = dom_fw_setup(d,saved_command_line,256L); - else { - regs->ar_rsc |= (2 << 2); /* force PL2/3 */ - regs->r28 = dom_fw_setup(d,"nomca nosmp xencons=tty0 console=tty0 root=/dev/hda1",256L); //FIXME - } - VCPU(v, banknum) = 1; - VCPU(v, metaphysical_mode) = 1; - d->shared_info->arch.flags = (d == dom0) ? (SIF_INITDOMAIN|SIF_PRIVILEGED|SIF_BLK_BE_DOMAIN|SIF_NET_BE_DOMAIN|SIF_USB_BE_DOMAIN) : 0; - } -} - -static struct page * map_new_domain0_page(unsigned long mpaddr) -{ - if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { - printk("map_new_domain0_page: bad domain0 mpaddr %p!\n",mpaddr); -printk("map_new_domain0_page: start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size); - while(1); - } - return pfn_to_page((mpaddr >> PAGE_SHIFT)); -} - -/* allocate new page for domain and map it to the specified metaphysical addr */ -struct page * map_new_domain_page(struct domain *d, unsigned long mpaddr) -{ - struct mm_struct *mm = d->arch.mm; - struct page *p = (struct page *)0; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; -extern unsigned long vhpt_paddr, vhpt_pend; - - if (!mm->pgd) { - printk("map_new_domain_page: domain pgd must exist!\n"); - return(p); - } - pgd = pgd_offset(mm,mpaddr); - if (pgd_none(*pgd)) - pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr)); - - pud = pud_offset(pgd, mpaddr); - if (pud_none(*pud)) - pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr)); - - pmd = pmd_offset(pud, mpaddr); - if (pmd_none(*pmd)) - pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr)); -// pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr)); - - pte = pte_offset_map(pmd, mpaddr); - if (pte_none(*pte)) { -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - if (d == dom0) p = map_new_domain0_page(mpaddr); - else -#endif - { - p = alloc_domheap_page(d); - // zero out pages for security reasons - memset(__va(page_to_phys(p)),0,PAGE_SIZE); - } - if (unlikely(!p)) { -printf("map_new_domain_page: Can't alloc!!!! Aaaargh!\n"); - return(p); - } -if (unlikely(page_to_phys(p) > vhpt_paddr && page_to_phys(p) < vhpt_pend)) { - printf("map_new_domain_page: reassigned vhpt page %p!!\n",page_to_phys(p)); -} - set_pte(pte, pfn_pte(page_to_phys(p) >> PAGE_SHIFT, - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX))); - } - else printk("map_new_domain_page: mpaddr %lx already mapped!\n",mpaddr); - return p; -} - -/* map a physical address to the specified metaphysical addr */ -void map_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr) -{ - struct mm_struct *mm = d->arch.mm; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - if (!mm->pgd) { - printk("map_domain_page: domain pgd must exist!\n"); - return; - } - pgd = pgd_offset(mm,mpaddr); - if (pgd_none(*pgd)) - pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr)); - - pud = pud_offset(pgd, mpaddr); - if (pud_none(*pud)) - pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr)); - - pmd = pmd_offset(pud, mpaddr); - if (pmd_none(*pmd)) - pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm,mpaddr)); -// pmd_populate(mm, pmd, pte_alloc_one(mm,mpaddr)); - - pte = pte_offset_map(pmd, mpaddr); - if (pte_none(*pte)) { - set_pte(pte, pfn_pte(physaddr >> PAGE_SHIFT, - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX))); - } - else printk("map_domain_page: mpaddr %lx already mapped!\n",mpaddr); -} - -void mpafoo(unsigned long mpaddr) -{ - extern unsigned long privop_trace; - if (mpaddr == 0x3800) - privop_trace = 1; -} - -unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr) -{ - struct mm_struct *mm = d->arch.mm; - pgd_t *pgd = pgd_offset(mm, mpaddr); - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - if (d == dom0) { - if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { - //printk("lookup_domain_mpa: bad dom0 mpaddr %p!\n",mpaddr); -//printk("lookup_domain_mpa: start=%p,end=%p!\n",dom0_start,dom0_start+dom0_size); - mpafoo(mpaddr); - } - pte_t pteval = pfn_pte(mpaddr >> PAGE_SHIFT, - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)); - pte = &pteval; - return *(unsigned long *)pte; - } -#endif -tryagain: - if (pgd_present(*pgd)) { - pud = pud_offset(pgd,mpaddr); - if (pud_present(*pud)) { - pmd = pmd_offset(pud,mpaddr); - if (pmd_present(*pmd)) { - pte = pte_offset_map(pmd,mpaddr); - if (pte_present(*pte)) { -//printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte)); - return *(unsigned long *)pte; - } - } - } - } - /* if lookup fails and mpaddr is "legal", "create" the page */ - if ((mpaddr >> PAGE_SHIFT) < d->max_pages) { - if (map_new_domain_page(d,mpaddr)) goto tryagain; - } - printk("lookup_domain_mpa: bad mpa %p (> %p\n", - mpaddr,d->max_pages<<PAGE_SHIFT); - mpafoo(mpaddr); - return 0; -} - -// FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE -#ifndef CONFIG_VTI -unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr) -{ - unsigned long pte = lookup_domain_mpa(d,mpaddr); - unsigned long imva; - - pte &= _PAGE_PPN_MASK; - imva = __va(pte); - imva |= mpaddr & ~PAGE_MASK; - return(imva); -} -#else // CONFIG_VTI -unsigned long domain_mpa_to_imva(struct domain *d, unsigned long mpaddr) -{ - unsigned long imva = __gpa_to_mpa(d, mpaddr); - - return __va(imva); -} -#endif // CONFIG_VTI - -// remove following line if not privifying in memory -//#define HAVE_PRIVIFY_MEMORY -#ifndef HAVE_PRIVIFY_MEMORY -#define privify_memory(x,y) do {} while(0) -#endif - -// see arch/x86/xxx/domain_build.c -int elf_sanity_check(Elf_Ehdr *ehdr) -{ - return (IS_ELF(*ehdr)); -} - -static void copy_memory(void *dst, void *src, int size) -{ - int remain; - - if (IS_XEN_ADDRESS(dom0,src)) { - memcpy(dst,src,size); - } - else { - printf("About to call __copy_from_user(%p,%p,%d)\n", - dst,src,size); - while (remain = __copy_from_user(dst,src,size)) { - printf("incomplete user copy, %d remain of %d\n", - remain,size); - dst += size - remain; src += size - remain; - size -= remain; - } - } -} - -void loaddomainelfimage(struct domain *d, unsigned long image_start) -{ - char *elfbase = image_start; - //Elf_Ehdr *ehdr = (Elf_Ehdr *)image_start; - Elf_Ehdr ehdr; - Elf_Phdr phdr; - int h, filesz, memsz, paddr; - unsigned long elfaddr, dom_mpaddr, dom_imva; - struct page *p; - unsigned long pteval; - - copy_memory(&ehdr,image_start,sizeof(Elf_Ehdr)); - for ( h = 0; h < ehdr.e_phnum; h++ ) { - copy_memory(&phdr,elfbase + ehdr.e_phoff + (h*ehdr.e_phentsize), - sizeof(Elf_Phdr)); - //if ( !is_loadable_phdr(phdr) ) - if ((phdr.p_type != PT_LOAD)) { - continue; - } - filesz = phdr.p_filesz; memsz = phdr.p_memsz; - elfaddr = elfbase + phdr.p_offset; - dom_mpaddr = phdr.p_paddr; -//printf("p_offset: %x, size=%x\n",elfaddr,filesz); -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - if (d == dom0) { - if (dom_mpaddr+memsz>dom0_size || dom_mpaddr+filesz>dom0_size) { - printf("Domain0 doesn't fit in allocated space!\n"); - while(1); - } - dom_imva = __va(dom_mpaddr + dom0_start); - copy_memory(dom_imva,elfaddr,filesz); - if (memsz > filesz) memset(dom_imva+filesz,0,memsz-filesz); -//FIXME: This test for code seems to find a lot more than objdump -x does - if (phdr.p_flags & PF_X) privify_memory(dom_imva,filesz); - } - else -#endif - while (memsz > 0) { -#ifdef DOMU_AUTO_RESTART - pteval = lookup_domain_mpa(d,dom_mpaddr); - if (pteval) dom_imva = __va(pteval & _PFN_MASK); - else { printf("loaddomainelfimage: BAD!\n"); while(1); } -#else - p = map_new_domain_page(d,dom_mpaddr); - if (unlikely(!p)) BUG(); - dom_imva = __va(page_to_phys(p)); -#endif - if (filesz > 0) { - if (filesz >= PAGE_SIZE) - copy_memory(dom_imva,elfaddr,PAGE_SIZE); - else { // copy partial page, zero the rest of page - copy_memory(dom_imva,elfaddr,filesz); - memset(dom_imva+filesz,0,PAGE_SIZE-filesz); - } -//FIXME: This test for code seems to find a lot more than objdump -x does - if (phdr.p_flags & PF_X) - privify_memory(dom_imva,PAGE_SIZE); - } - else if (memsz > 0) // always zero out entire page - memset(dom_imva,0,PAGE_SIZE); - memsz -= PAGE_SIZE; filesz -= PAGE_SIZE; - elfaddr += PAGE_SIZE; dom_mpaddr += PAGE_SIZE; - } - } -} - -int -parsedomainelfimage(char *elfbase, unsigned long elfsize, unsigned long *entry) -{ - Elf_Ehdr ehdr; - - copy_memory(&ehdr,elfbase,sizeof(Elf_Ehdr)); - - if ( !elf_sanity_check(&ehdr) ) { - printk("ELF sanity check failed.\n"); - return -EINVAL; - } - - if ( (ehdr.e_phoff + (ehdr.e_phnum * ehdr.e_phentsize)) > elfsize ) - { - printk("ELF program headers extend beyond end of image.\n"); - return -EINVAL; - } - - if ( (ehdr.e_shoff + (ehdr.e_shnum * ehdr.e_shentsize)) > elfsize ) - { - printk("ELF section headers extend beyond end of image.\n"); - return -EINVAL; - } - -#if 0 - /* Find the section-header strings table. */ - if ( ehdr.e_shstrndx == SHN_UNDEF ) - { - printk("ELF image has no section-header strings table (shstrtab).\n"); - return -EINVAL; - } -#endif - - *entry = ehdr.e_entry; -printf("parsedomainelfimage: entry point = %p\n",*entry); - - return 0; -} - - -void alloc_dom0(void) -{ -#ifdef CONFIG_DOMAIN0_CONTIGUOUS - if (platform_is_hp_ski()) { - dom0_size = 128*1024*1024; //FIXME: Should be configurable - } - printf("alloc_dom0: starting (initializing %d MB...)\n",dom0_size/(1024*1024)); - - /* FIXME: The first trunk (say 256M) should always be assigned to - * Dom0, since Dom0's physical == machine address for DMA purpose. - * Some old version linux, like 2.4, assumes physical memory existing - * in 2nd 64M space. - */ - dom0_start = alloc_boot_pages( - dom0_size >> PAGE_SHIFT, dom0_align >> PAGE_SHIFT); - dom0_start <<= PAGE_SHIFT; - if (!dom0_start) { - printf("construct_dom0: can't allocate contiguous memory size=%p\n", - dom0_size); - while(1); - } - printf("alloc_dom0: dom0_start=%p\n",dom0_start); -#else - dom0_start = 0; -#endif - -} - -#ifdef DOMU_BUILD_STAGING -void alloc_domU_staging(void) -{ - domU_staging_size = 32*1024*1024; //FIXME: Should be configurable - printf("alloc_domU_staging: starting (initializing %d MB...)\n",domU_staging_size/(1024*1024)); - domU_staging_start = alloc_boot_pages( - domU_staging_size >> PAGE_SHIFT, domU_staging_align >> PAGE_SHIFT); - domU_staging_start <<= PAGE_SHIFT; - if (!domU_staging_size) { - printf("alloc_domU_staging: can't allocate, spinning...\n"); - while(1); - } - else domU_staging_area = (unsigned long *)__va(domU_staging_start); - printf("alloc_domU_staging: domU_staging_area=%p\n",domU_staging_area); - -} - -unsigned long -domU_staging_read_8(unsigned long at) -{ - // no way to return errors so just do it - return domU_staging_area[at>>3]; - -} - -unsigned long -domU_staging_write_32(unsigned long at, unsigned long a, unsigned long b, - unsigned long c, unsigned long d) -{ - if (at + 32 > domU_staging_size) return -1; - if (at & 0x1f) return -1; - at >>= 3; - domU_staging_area[at++] = a; - domU_staging_area[at++] = b; - domU_staging_area[at++] = c; - domU_staging_area[at] = d; - return 0; - -} -#endif - -/* - * Domain 0 has direct access to all devices absolutely. However - * the major point of this stub here, is to allow alloc_dom_mem - * handled with order > 0 request. Dom0 requires that bit set to - * allocate memory for other domains. - */ -void physdev_init_dom0(struct domain *d) -{ - set_bit(_DOMF_physdev_access, &d->domain_flags); -} - -extern unsigned long running_on_sim; -unsigned int vmx_dom0 = 0; -int construct_dom0(struct domain *d, - unsigned long image_start, unsigned long image_len, - unsigned long initrd_start, unsigned long initrd_len, - char *cmdline) -{ - char *dst; - int i, rc; - unsigned long pfn, mfn; - unsigned long nr_pt_pages; - unsigned long count; - unsigned long alloc_start, alloc_end; - struct pfn_info *page = NULL; - start_info_t *si; - struct vcpu *v = d->vcpu[0]; - - struct domain_setup_info dsi; - unsigned long p_start; - unsigned long pkern_start; - unsigned long pkern_entry; - unsigned long pkern_end; - unsigned long ret, progress = 0; - -//printf("construct_dom0: starting\n"); - /* Sanity! */ -#ifndef CLONE_DOMAIN0 - if ( d != dom0 ) - BUG(); - if ( test_bit(_DOMF_constructed, &d->domain_flags) ) - BUG(); -#endif - - memset(&dsi, 0, sizeof(struct domain_setup_info)); - - printk("*** LOADING DOMAIN 0 ***\n"); - - alloc_start = dom0_start; - alloc_end = dom0_start + dom0_size; - d->tot_pages = d->max_pages = dom0_size/PAGE_SIZE; - image_start = __va(ia64_boot_param->initrd_start); - image_len = ia64_boot_param->initrd_size; -//printk("image_start=%lx, image_len=%lx\n",image_start,image_len); -//printk("First word of image: %lx\n",*(unsigned long *)image_start); - -//printf("construct_dom0: about to call parseelfimage\n"); - dsi.image_addr = (unsigned long)image_start; - dsi.image_len = image_len; - rc = parseelfimage(&dsi); - if ( rc != 0 ) - return rc; - -#ifdef CONFIG_VTI - /* Temp workaround */ - if (running_on_sim) - dsi.xen_section_string = (char *)1; - - /* Check whether dom0 is vti domain */ - if ((!vmx_enabled) && !dsi.xen_section_string) { - printk("Lack of hardware support for unmodified vmx dom0\n"); - panic(""); - } - - if (vmx_enabled && !dsi.xen_section_string) { - printk("Dom0 is vmx domain!\n"); - vmx_dom0 = 1; - } -#endif - - p_start = dsi.v_start; - pkern_start = dsi.v_kernstart; - pkern_end = dsi.v_kernend; - pkern_entry = dsi.v_kernentry; - -//printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n",p_start,pkern_start,pkern_end,pkern_entry); - - if ( (p_start & (PAGE_SIZE-1)) != 0 ) - { - printk("Initial guest OS must load to a page boundary.\n"); - return -EINVAL; - } - - printk("METAPHYSICAL MEMORY ARRANGEMENT:\n" - " Kernel image: %lx->%lx\n" - " Entry address: %lx\n" - " Init. ramdisk: (NOT IMPLEMENTED YET)\n", - pkern_start, pkern_end, pkern_entry); - - if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) ) - { - printk("Initial guest OS requires too much space\n" - "(%luMB is greater than %luMB limit)\n", - (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20); - return -ENOMEM; - } - - // if high 3 bits of pkern start are non-zero, error - - // if pkern end is after end of metaphysical memory, error - // (we should be able to deal with this... later) - - - // - -#if 0 - strcpy(d->name,"Domain0"); -#endif - - /* Mask all upcalls... */ - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; - -#ifdef CONFIG_VTI - /* Construct a frame-allocation list for the initial domain, since these - * pages are allocated by boot allocator and pfns are not set properly - */ - for ( mfn = (alloc_start>>PAGE_SHIFT); - mfn < (alloc_end>>PAGE_SHIFT); - mfn++ ) - { - page = &frame_table[mfn]; - page_set_owner(page, d); - page->u.inuse.type_info = 0; - page->count_info = PGC_allocated | 1; - list_add_tail(&page->list, &d->page_list); - - /* Construct 1:1 mapping */ - machine_to_phys_mapping[mfn] = mfn; - } - - /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt - * for dom0 - */ - d->arch.pmt = NULL; -#endif - - /* Copy the OS image. */ - loaddomainelfimage(d,image_start); - - /* Copy the initial ramdisk. */ - //if ( initrd_len != 0 ) - // memcpy((void *)vinitrd_start, initrd_start, initrd_len); - - /* Sync d/i cache conservatively */ - ret = ia64_pal_cache_flush(4, 0, &progress, NULL); - if (ret != PAL_STATUS_SUCCESS) - panic("PAL CACHE FLUSH failed for dom0.\n"); - printk("Sync i/d cache for dom0 image SUCC\n"); - -#if 0 - /* Set up start info area. */ - //si = (start_info_t *)vstartinfo_start; - memset(si, 0, PAGE_SIZE); - si->nr_pages = d->tot_pages; - si->shared_info = virt_to_phys(d->shared_info); - si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; - //si->pt_base = vpt_start; - //si->nr_pt_frames = nr_pt_pages; - //si->mfn_list = vphysmap_start; - - if ( initrd_len != 0 ) - { - //si->mod_start = vinitrd_start; - si->mod_len = initrd_len; - printk("Initrd len 0x%lx, start at 0x%08lx\n", - si->mod_len, si->mod_start); - } - - dst = si->cmd_line; - if ( cmdline != NULL ) - { - for ( i = 0; i < 255; i++ ) - { - if ( cmdline[i] == '\0' ) - break; - *dst++ = cmdline[i]; - } - } - *dst = '\0'; - - zap_low_mappings(); /* Do the same for the idle page tables. */ -#endif - - /* Give up the VGA console if DOM0 is configured to grab it. */ - if (cmdline != NULL) - console_endboot(strstr(cmdline, "tty0") != NULL); - - /* VMX specific construction for Dom0, if hardware supports VMX - * and Dom0 is unmodified image - */ - printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d); - if (vmx_dom0) - vmx_final_setup_domain(dom0); - - set_bit(_DOMF_constructed, &d->domain_flags); - - new_thread(v, pkern_entry, 0, 0); - physdev_init_dom0(d); - - // FIXME: Hack for keyboard input -#ifdef CLONE_DOMAIN0 -if (d == dom0) -#endif - serial_input_init(); - if (d == dom0) { - VCPU(v, delivery_mask[0]) = -1L; - VCPU(v, delivery_mask[1]) = -1L; - VCPU(v, delivery_mask[2]) = -1L; - VCPU(v, delivery_mask[3]) = -1L; - } - else __set_bit(0x30, VCPU(v, delivery_mask)); - - return 0; -} - -// FIXME: When dom0 can construct domains, this goes away (or is rewritten) -int construct_domU(struct domain *d, - unsigned long image_start, unsigned long image_len, - unsigned long initrd_start, unsigned long initrd_len, - char *cmdline) -{ - int i, rc; - struct vcpu *v = d->vcpu[0]; - unsigned long pkern_entry; - -#ifndef DOMU_AUTO_RESTART - if ( test_bit(_DOMF_constructed, &d->domain_flags) ) BUG(); -#endif - - printk("*** LOADING DOMAIN %d ***\n",d->domain_id); - - d->max_pages = dom0_size/PAGE_SIZE; // FIXME: use dom0 size - // FIXME: use domain0 command line - rc = parsedomainelfimage(image_start, image_len, &pkern_entry); - printk("parsedomainelfimage returns %d\n",rc); - if ( rc != 0 ) return rc; - - /* Mask all upcalls... */ - for ( i = 0; i < MAX_VIRT_CPUS; i++ ) - d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; - - /* Copy the OS image. */ - printk("calling loaddomainelfimage(%p,%p)\n",d,image_start); - loaddomainelfimage(d,image_start); - printk("loaddomainelfimage returns\n"); - - set_bit(_DOMF_constructed, &d->domain_flags); - - printk("calling new_thread, entry=%p\n",pkern_entry); -#ifdef DOMU_AUTO_RESTART - v->domain->arch.image_start = image_start; - v->domain->arch.image_len = image_len; - v->domain->arch.entry = pkern_entry; -#endif - new_thread(v, pkern_entry, 0, 0); - printk("new_thread returns\n"); - __set_bit(0x30, VCPU(v, delivery_mask)); - - return 0; -} - -#ifdef DOMU_AUTO_RESTART -void reconstruct_domU(struct vcpu *v) -{ - /* re-copy the OS image to reset data values to original */ - printk("reconstruct_domU: restarting domain %d...\n", - v->domain->domain_id); - loaddomainelfimage(v->domain,v->domain->arch.image_start); - new_thread(v, v->domain->arch.entry, 0, 0); -} -#endif - -// FIXME: When dom0 can construct domains, this goes away (or is rewritten) -int launch_domainU(unsigned long size) -{ -#ifdef CLONE_DOMAIN0 - static int next = CLONE_DOMAIN0+1; -#else - static int next = 1; -#endif - - struct domain *d = do_createdomain(next,0); - if (!d) { - printf("launch_domainU: couldn't create\n"); - return 1; - } - else next++; - if (construct_domU(d, (unsigned long)domU_staging_area, size,0,0,0)) { - printf("launch_domainU: couldn't construct(id=%d,%lx,%lx)\n", - d->domain_id,domU_staging_area,size); - return 2; - } - domain_unpause_by_systemcontroller(d); -} - -void machine_restart(char * __unused) -{ - if (platform_is_hp_ski()) dummy(); - printf("machine_restart called: spinning....\n"); - while(1); -} - -void machine_halt(void) -{ - if (platform_is_hp_ski()) dummy(); - printf("machine_halt called: spinning....\n"); - while(1); -} - -void dummy_called(char *function) -{ - if (platform_is_hp_ski()) asm("break 0;;"); - printf("dummy called in %s: spinning....\n", function); - while(1); -} - - -#if 0 -void switch_to(struct vcpu *prev, struct vcpu *next) -{ - struct vcpu *last; - - __switch_to(prev,next,last); - //set_current(next); -} -#endif - -void domain_pend_keyboard_interrupt(int irq) -{ - vcpu_pend_interrupt(dom0->vcpu[0],irq); -} - -void vcpu_migrate_cpu(struct vcpu *v, int newcpu) -{ - if ( v->processor == newcpu ) - return; - - set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags); - v->processor = newcpu; -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/grant_table.c --- a/xen/arch/ia64/grant_table.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,1288 +0,0 @@ -#ifndef CONFIG_VTI -// temporarily in arch/ia64 until can merge into common/grant_table.c -/****************************************************************************** - * common/grant_table.c - * - * Mechanism for granting foreign access to page frames, and receiving - * page-ownership transfers. - * - * Copyright (c) 2005 Christopher Clark - * Copyright (c) 2004 K A Fraser - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#define GRANT_DEBUG 0 -#define GRANT_DEBUG_VERBOSE 0 - -#include <xen/config.h> -#include <xen/lib.h> -#include <xen/sched.h> -#include <xen/shadow.h> -#include <xen/mm.h> -#ifdef __ia64__ -#define __addr_ok(a) 1 // FIXME-ia64: a variant of access_ok?? -// FIXME-ia64: need to implement real cmpxchg_user on ia64 -//#define cmpxchg_user(_p,_o,_n) ((*_p == _o) ? ((*_p = _n), 0) : ((_o = *_p), 0)) -// FIXME-ia64: these belong in an asm/grant_table.h... PAGE_SIZE different -#undef ORDER_GRANT_FRAMES -//#undef NUM_GRANT_FRAMES -#define ORDER_GRANT_FRAMES 0 -//#define NUM_GRANT_FRAMES (1U << ORDER_GRANT_FRAMES) -#endif - -#define PIN_FAIL(_lbl, _rc, _f, _a...) \ - do { \ - DPRINTK( _f, ## _a ); \ - rc = (_rc); \ - goto _lbl; \ - } while ( 0 ) - -static inline int -get_maptrack_handle( - grant_table_t *t) -{ - unsigned int h; - if ( unlikely((h = t->maptrack_head) == t->maptrack_limit) ) - return -1; - t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT; - t->map_count++; - return h; -} - -static inline void -put_maptrack_handle( - grant_table_t *t, int handle) -{ - t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT; - t->maptrack_head = handle; - t->map_count--; -} - -static int -__gnttab_activate_grant_ref( - struct domain *mapping_d, /* IN */ - struct vcpu *mapping_ed, - struct domain *granting_d, - grant_ref_t ref, - u16 dev_hst_ro_flags, - unsigned long host_virt_addr, - unsigned long *pframe ) /* OUT */ -{ - domid_t sdom; - u16 sflags; - active_grant_entry_t *act; - grant_entry_t *sha; - s16 rc = 1; - unsigned long frame = 0; - int retries = 0; - - /* - * Objectives of this function: - * . Make the record ( granting_d, ref ) active, if not already. - * . Update shared grant entry of owner, indicating frame is mapped. - * . Increment the owner act->pin reference counts. - * . get_page on shared frame if new mapping. - * . get_page_type if this is first RW mapping of frame. - * . Add PTE to virtual address space of mapping_d, if necessary. - * Returns: - * . -ve: error - * . 1: ok - * . 0: ok and TLB invalidate of host_virt_addr needed. - * - * On success, *pframe contains mfn. - */ - - /* - * We bound the number of times we retry CMPXCHG on memory locations that - * we share with a guest OS. The reason is that the guest can modify that - * location at a higher rate than we can read-modify-CMPXCHG, so the guest - * could cause us to livelock. There are a few cases where it is valid for - * the guest to race our updates (e.g., to change the GTF_readonly flag), - * so we allow a few retries before failing. - */ - - act = &granting_d->grant_table->active[ref]; - sha = &granting_d->grant_table->shared[ref]; - - spin_lock(&granting_d->grant_table->lock); - - if ( act->pin == 0 ) - { - /* CASE 1: Activating a previously inactive entry. */ - - sflags = sha->flags; - sdom = sha->domid; - - for ( ; ; ) - { - u32 scombo, prev_scombo, new_scombo; - - if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) || - unlikely(sdom != mapping_d->domain_id) ) - PIN_FAIL(unlock_out, GNTST_general_error, - "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", - sflags, sdom, mapping_d->domain_id); - - /* Merge two 16-bit values into a 32-bit combined update. */ - /* NB. Endianness! */ - prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; - - new_scombo = scombo | GTF_reading; - if ( !(dev_hst_ro_flags & GNTMAP_readonly) ) - { - new_scombo |= GTF_writing; - if ( unlikely(sflags & GTF_readonly) ) - PIN_FAIL(unlock_out, GNTST_general_error, - "Attempt to write-pin a r/o grant entry.\n"); - } - - /* NB. prev_scombo is updated in place to seen value. */ - if ( unlikely(cmpxchg_user((u32 *)&sha->flags, - prev_scombo, - new_scombo)) ) - PIN_FAIL(unlock_out, GNTST_general_error, - "Fault while modifying shared flags and domid.\n"); - - /* Did the combined update work (did we see what we expected?). */ - if ( likely(prev_scombo == scombo) ) - break; - - if ( retries++ == 4 ) - PIN_FAIL(unlock_out, GNTST_general_error, - "Shared grant entry is unstable.\n"); - - /* Didn't see what we expected. Split out the seen flags & dom. */ - /* NB. Endianness! */ - sflags = (u16)prev_scombo; - sdom = (u16)(prev_scombo >> 16); - } - - /* rmb(); */ /* not on x86 */ - - frame = __gpfn_to_mfn_foreign(granting_d, sha->frame); - -#ifdef __ia64__ -// FIXME-ia64: any error checking need to be done here? -#else - if ( unlikely(!pfn_valid(frame)) || - unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ? - get_page(&frame_table[frame], granting_d) : - get_page_and_type(&frame_table[frame], granting_d, - PGT_writable_page))) ) - { - clear_bit(_GTF_writing, &sha->flags); - clear_bit(_GTF_reading, &sha->flags); - PIN_FAIL(unlock_out, GNTST_general_error, - "Could not pin the granted frame (%lx)!\n", frame); - } -#endif - - if ( dev_hst_ro_flags & GNTMAP_device_map ) - act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? - GNTPIN_devr_inc : GNTPIN_devw_inc; - if ( dev_hst_ro_flags & GNTMAP_host_map ) - act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? - GNTPIN_hstr_inc : GNTPIN_hstw_inc; - act->domid = sdom; - act->frame = frame; - } - else - { - /* CASE 2: Active modications to an already active entry. */ - - /* - * A cheesy check for possible pin-count overflow. - * A more accurate check cannot be done with a single comparison. - */ - if ( (act->pin & 0x80808080U) != 0 ) - PIN_FAIL(unlock_out, ENOSPC, - "Risk of counter overflow %08x\n", act->pin); - - frame = act->frame; - - if ( !(dev_hst_ro_flags & GNTMAP_readonly) && - !((sflags = sha->flags) & GTF_writing) ) - { - for ( ; ; ) - { - u16 prev_sflags; - - if ( unlikely(sflags & GTF_readonly) ) - PIN_FAIL(unlock_out, GNTST_general_error, - "Attempt to write-pin a r/o grant entry.\n"); - - prev_sflags = sflags; - - /* NB. prev_sflags is updated in place to seen value. */ - if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, - prev_sflags | GTF_writing)) ) - PIN_FAIL(unlock_out, GNTST_general_error, - "Fault while modifying shared flags.\n"); - - if ( likely(prev_sflags == sflags) ) - break; - - if ( retries++ == 4 ) - PIN_FAIL(unlock_out, GNTST_general_error, - "Shared grant entry is unstable.\n"); - - sflags = prev_sflags; - } - -#ifdef __ia64__ -// FIXME-ia64: any error checking need to be done here? -#else - if ( unlikely(!get_page_type(&frame_table[frame], - PGT_writable_page)) ) - { - clear_bit(_GTF_writing, &sha->flags); - PIN_FAIL(unlock_out, GNTST_general_error, - "Attempt to write-pin a unwritable page.\n"); - } -#endif - } - - if ( dev_hst_ro_flags & GNTMAP_device_map ) - act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? - GNTPIN_devr_inc : GNTPIN_devw_inc; - - if ( dev_hst_ro_flags & GNTMAP_host_map ) - act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? - GNTPIN_hstr_inc : GNTPIN_hstw_inc; - } - - /* - * At this point: - * act->pin updated to reflect mapping. - * sha->flags updated to indicate to granting domain mapping done. - * frame contains the mfn. - */ - - spin_unlock(&granting_d->grant_table->lock); - -#ifdef __ia64__ -// FIXME-ia64: any error checking need to be done here? -#else - if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) ) - { - /* Write update into the pagetable. */ - l1_pgentry_t pte; - pte = l1e_from_pfn(frame, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY); - if ( !(dev_hst_ro_flags & GNTMAP_readonly) ) - l1e_add_flags(pte,_PAGE_RW); - rc = update_grant_va_mapping( host_virt_addr, pte, - mapping_d, mapping_ed ); - - /* - * IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB. - * This is done in the outer gnttab_map_grant_ref. - */ - - if ( rc < 0 ) - { - /* Failure: undo and abort. */ - - spin_lock(&granting_d->grant_table->lock); - - if ( dev_hst_ro_flags & GNTMAP_readonly ) - { - act->pin -= GNTPIN_hstr_inc; - } - else - { - act->pin -= GNTPIN_hstw_inc; - if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) - { - clear_bit(_GTF_writing, &sha->flags); - put_page_type(&frame_table[frame]); - } - } - - if ( act->pin == 0 ) - { - clear_bit(_GTF_reading, &sha->flags); - put_page(&frame_table[frame]); - } - - spin_unlock(&granting_d->grant_table->lock); - } - - } -#endif - - *pframe = frame; - return rc; - - unlock_out: - spin_unlock(&granting_d->grant_table->lock); - return rc; -} - -/* - * Returns 0 if TLB flush / invalidate required by caller. - * va will indicate the address to be invalidated. - */ -static int -__gnttab_map_grant_ref( - gnttab_map_grant_ref_t *uop, - unsigned long *va) -{ - domid_t dom; - grant_ref_t ref; - struct domain *ld, *rd; - struct vcpu *led; - u16 dev_hst_ro_flags; - int handle; - unsigned long frame = 0, host_virt_addr; - int rc; - - led = current; - ld = led->domain; - - /* Bitwise-OR avoids short-circuiting which screws control flow. */ - if ( unlikely(__get_user(dom, &uop->dom) | - __get_user(ref, &uop->ref) | - __get_user(host_virt_addr, &uop->host_addr) | - __get_user(dev_hst_ro_flags, &uop->flags)) ) - { - DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n"); - return -EFAULT; /* don't set status */ - } - - - if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map)) && - unlikely(!__addr_ok(host_virt_addr))) - { - DPRINTK("Bad virtual address (%lx) or flags (%x).\n", - host_virt_addr, dev_hst_ro_flags); - (void)__put_user(GNTST_bad_virt_addr, &uop->handle); - return GNTST_bad_gntref; - } - - if ( unlikely(ref >= NR_GRANT_ENTRIES) || - unlikely((dev_hst_ro_flags & - (GNTMAP_device_map|GNTMAP_host_map)) == 0) ) - { - DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags); - (void)__put_user(GNTST_bad_gntref, &uop->handle); - return GNTST_bad_gntref; - } - - if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || - unlikely(ld == rd) ) - { - if ( rd != NULL ) - put_domain(rd); - DPRINTK("Could not find domain %d\n", dom); - (void)__put_user(GNTST_bad_domain, &uop->handle); - return GNTST_bad_domain; - } - - /* Get a maptrack handle. */ - if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) ) - { - int i; - grant_mapping_t *new_mt; - grant_table_t *lgt = ld->grant_table; - - /* Grow the maptrack table. */ - new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1); - if ( new_mt == NULL ) - { - put_domain(rd); - DPRINTK("No more map handles available\n"); - (void)__put_user(GNTST_no_device_space, &uop->handle); - return GNTST_no_device_space; - } - - memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order); - for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ ) - new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; - - free_xenheap_pages(lgt->maptrack, lgt->maptrack_order); - lgt->maptrack = new_mt; - lgt->maptrack_order += 1; - lgt->maptrack_limit <<= 1; - - printk("Doubled maptrack size\n"); - handle = get_maptrack_handle(ld->grant_table); - } - -#if GRANT_DEBUG_VERBOSE - DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n", - ref, dom, dev_hst_ro_flags); -#endif - - if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref, - dev_hst_ro_flags, - host_virt_addr, &frame))) - { - /* - * Only make the maptrack live _after_ writing the pte, in case we - * overwrite the same frame number, causing a maptrack walk to find it - */ - ld->grant_table->maptrack[handle].domid = dom; - - ld->grant_table->maptrack[handle].ref_and_flags - = (ref << MAPTRACK_REF_SHIFT) | - (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK); - - (void)__put_user(frame, &uop->dev_bus_addr); - - if ( dev_hst_ro_flags & GNTMAP_host_map ) - *va = host_virt_addr; - - (void)__put_user(handle, &uop->handle); - } - else - { - (void)__put_user(rc, &uop->handle); - put_maptrack_handle(ld->grant_table, handle); - } - - put_domain(rd); - return rc; -} - -static long -gnttab_map_grant_ref( - gnttab_map_grant_ref_t *uop, unsigned int count) -{ - int i, flush = 0; - unsigned long va = 0; - - for ( i = 0; i < count; i++ ) - if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 ) - flush++; - -#ifdef __ia64__ -// FIXME-ia64: probably need to do something here to avoid stale mappings? -#else - if ( flush == 1 ) - flush_tlb_one_mask(current->domain->cpumask, va); - else if ( flush != 0 ) - flush_tlb_mask(current->domain->cpumask); -#endif - - return 0; -} - -static int -__gnttab_unmap_grant_ref( - gnttab_unmap_grant_ref_t *uop, - unsigned long *va) -{ - domid_t dom; - grant_ref_t ref; - u16 handle; - struct domain *ld, *rd; - - active_grant_entry_t *act; - grant_entry_t *sha; - grant_mapping_t *map; - u16 flags; - s16 rc = 1; - unsigned long frame, virt; - - ld = current->domain; - - /* Bitwise-OR avoids short-circuiting which screws control flow. */ - if ( unlikely(__get_user(virt, &uop->host_addr) | - __get_user(frame, &uop->dev_bus_addr) | - __get_user(handle, &uop->handle)) ) - { - DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n"); - return -EFAULT; /* don't set status */ - } - - map = &ld->grant_table->maptrack[handle]; - - if ( unlikely(handle >= ld->grant_table->maptrack_limit) || - unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) ) - { - DPRINTK("Bad handle (%d).\n", handle); - (void)__put_user(GNTST_bad_handle, &uop->status); - return GNTST_bad_handle; - } - - dom = map->domid; - ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; - flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK; - - if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || - unlikely(ld == rd) ) - { - if ( rd != NULL ) - put_domain(rd); - DPRINTK("Could not find domain %d\n", dom); - (void)__put_user(GNTST_bad_domain, &uop->status); - return GNTST_bad_domain; - } - -#if GRANT_DEBUG_VERBOSE - DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n", - ref, dom, handle); -#endif - - act = &rd->grant_table->active[ref]; - sha = &rd->grant_table->shared[ref]; - - spin_lock(&rd->grant_table->lock); - - if ( frame == 0 ) - { - frame = act->frame; - } - else - { - if ( unlikely(frame != act->frame) ) - PIN_FAIL(unmap_out, GNTST_general_error, - "Bad frame number doesn't match gntref.\n"); - if ( flags & GNTMAP_device_map ) - act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc - : GNTPIN_devw_inc; - - map->ref_and_flags &= ~GNTMAP_device_map; - (void)__put_user(0, &uop->dev_bus_addr); - - /* Frame is now unmapped for device access. */ - } - - if ( (virt != 0) && - (flags & GNTMAP_host_map) && - ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0)) - { -#ifdef __ia64__ -// FIXME-ia64: any error checking need to be done here? -#else - l1_pgentry_t *pl1e; - unsigned long _ol1e; - - pl1e = &linear_pg_table[l1_linear_offset(virt)]; - - if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) ) - { - DPRINTK("Could not find PTE entry for address %lx\n", virt); - rc = -EINVAL; - goto unmap_out; - } - - /* - * Check that the virtual address supplied is actually mapped to - * act->frame. - */ - if ( unlikely((_ol1e >> PAGE_SHIFT) != frame )) - { - DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n", - _ol1e, virt, frame); - rc = -EINVAL; - goto unmap_out; - } - - /* Delete pagetable entry. */ - if ( unlikely(__put_user(0, (unsigned long *)pl1e))) - { - DPRINTK("Cannot delete PTE entry at %p for virtual address %lx\n", - pl1e, virt); - rc = -EINVAL; - goto unmap_out; - } -#endif - - map->ref_and_flags &= ~GNTMAP_host_map; - - act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc - : GNTPIN_hstw_inc; - - rc = 0; - *va = virt; - } - - if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) - { - map->ref_and_flags = 0; - put_maptrack_handle(ld->grant_table, handle); - } - -#ifdef __ia64__ -// FIXME-ia64: any error checking need to be done here? I think not and then -// this can probably be macro-ized into nothingness -#else - /* If just unmapped a writable mapping, mark as dirtied */ - if ( unlikely(shadow_mode_log_dirty(rd)) && - !( flags & GNTMAP_readonly ) ) - mark_dirty(rd, frame); -#endif - - /* If the last writable mapping has been removed, put_page_type */ - if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) && - ( !( flags & GNTMAP_readonly ) ) ) - { - clear_bit(_GTF_writing, &sha->flags); - put_page_type(&frame_table[frame]); - } - - if ( act->pin == 0 ) - { - clear_bit(_GTF_reading, &sha->flags); - put_page(&frame_table[frame]); - } - - unmap_out: - (void)__put_user(rc, &uop->status); - spin_unlock(&rd->grant_table->lock); - put_domain(rd); - return rc; -} - -static long -gnttab_unmap_grant_ref( - gnttab_unmap_grant_ref_t *uop, unsigned int count) -{ - int i, flush = 0; - unsigned long va = 0; - - for ( i = 0; i < count; i++ ) - if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 ) - flush++; - -#ifdef __ia64__ -// FIXME-ia64: probably need to do something here to avoid stale mappings? -#else - if ( flush == 1 ) - flush_tlb_one_mask(current->domain->cpumask, va); - else if ( flush != 0 ) - flush_tlb_mask(current->domain->cpumask); -#endif - - return 0; -} - -static long -gnttab_setup_table( - gnttab_setup_table_t *uop, unsigned int count) -{ - gnttab_setup_table_t op; - struct domain *d; - int i; - unsigned long addr; - - if ( count != 1 ) - return -EINVAL; - - if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) - { - DPRINTK("Fault while reading gnttab_setup_table_t.\n"); - return -EFAULT; - } - - if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) ) - { - DPRINTK("Xen only supports up to %d grant-table frames per domain.\n", - NR_GRANT_FRAMES); - (void)put_user(GNTST_general_error, &uop->status); - return 0; - } - - if ( op.dom == DOMID_SELF ) - { - op.dom = current->domain->domain_id; - } - else if ( unlikely(!IS_PRIV(current->domain)) ) - { - (void)put_user(GNTST_permission_denied, &uop->status); - return 0; - } - - if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) ) - { - DPRINTK("Bad domid %d.\n", op.dom); - (void)put_user(GNTST_bad_domain, &uop->status); - return 0; - } - - if ( op.nr_frames <= NR_GRANT_FRAMES ) - { - ASSERT(d->grant_table != NULL); - (void)put_user(GNTST_okay, &uop->status); -#ifdef __ia64__ - if (d == dom0) { - for ( i = 0; i < op.nr_frames; i++ ) - (void)put_user( - (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i, - &uop->frame_list[i]); - } else { - /* IA64 hack - need to map it somewhere */ - addr = (1UL << 40); - map_domain_page(d, addr, virt_to_phys(d->grant_table->shared)); - (void)put_user(addr >> PAGE_SHIFT, &uop->frame_list[0]); - } -#else - for ( i = 0; i < op.nr_frames; i++ ) - (void)put_user( - (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i, - &uop->frame_list[i]); -#endif - } - - put_domain(d); - return 0; -} - -#if GRANT_DEBUG -static int -gnttab_dump_table(gnttab_dump_table_t *uop) -{ - grant_table_t *gt; - gnttab_dump_table_t op; - struct domain *d; - u32 shared_mfn; - active_grant_entry_t *act; - grant_entry_t sha_copy; - grant_mapping_t *maptrack; - int i; - - - if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) - { - DPRINTK("Fault while reading gnttab_dump_table_t.\n"); - return -EFAULT; - } - - if ( op.dom == DOMID_SELF ) - { - op.dom = current->domain->domain_id; - } - - if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) ) - { - DPRINTK("Bad domid %d.\n", op.dom); - (void)put_user(GNTST_bad_domain, &uop->status); - return 0; - } - - ASSERT(d->grant_table != NULL); - gt = d->grant_table; - (void)put_user(GNTST_okay, &uop->status); - - shared_mfn = virt_to_phys(d->grant_table->shared); - - DPRINTK("Grant table for dom (%hu) MFN (%x)\n", - op.dom, shared_mfn); - - ASSERT(d->grant_table->active != NULL); - ASSERT(d->grant_table->shared != NULL); - ASSERT(d->grant_table->maptrack != NULL); - - for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) - { - sha_copy = gt->shared[i]; - - if ( sha_copy.flags ) - { - DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) " - "dom:(%hu) frame:(%lx)\n", - op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame); - } - } - - spin_lock(&gt->lock); - - for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) - { - act = &gt->active[i]; - - if ( act->pin ) - { - DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) " - "dom:(%hu) frame:(%lx)\n", - op.dom, i, act->pin, act->domid, act->frame); - } - } - - for ( i = 0; i < gt->maptrack_limit; i++ ) - { - maptrack = &gt->maptrack[i]; - - if ( maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK ) - { - DPRINTK("Grant: dom (%hu) MAP (%d) ref:(%hu) flags:(%x) " - "dom:(%hu)\n", - op.dom, i, - maptrack->ref_and_flags >> MAPTRACK_REF_SHIFT, - maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK, - maptrack->domid); - } - } - - spin_unlock(&gt->lock); - - put_domain(d); - return 0; -} -#endif - -long -do_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - long rc; - - if ( count > 512 ) - return -EINVAL; - - LOCK_BIGLOCK(current->domain); - - rc = -EFAULT; - switch ( cmd ) - { - case GNTTABOP_map_grant_ref: - if ( unlikely(!array_access_ok( - uop, count, sizeof(gnttab_map_grant_ref_t))) ) - goto out; - rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count); - break; - case GNTTABOP_unmap_grant_ref: - if ( unlikely(!array_access_ok( - uop, count, sizeof(gnttab_unmap_grant_ref_t))) ) - goto out; - rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count); - break; - case GNTTABOP_setup_table: - rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count); - break; -#if GRANT_DEBUG - case GNTTABOP_dump_table: - rc = gnttab_dump_table((gnttab_dump_table_t *)uop); - break; -#endif - default: - rc = -ENOSYS; - break; - } - -out: - UNLOCK_BIGLOCK(current->domain); - - return rc; -} - -int -gnttab_check_unmap( - struct domain *rd, struct domain *ld, unsigned long frame, int readonly) -{ - /* Called when put_page is invoked on a page belonging to a foreign domain. - * Instead of decrementing the frame table ref count, locate the grant - * table entry, if any, and if found, decrement that count. - * Called a _lot_ at domain creation because pages mapped by priv domains - * also traverse this. - */ - - /* Note: If the same frame is mapped multiple times, and then one of - * the ptes is overwritten, which maptrack handle gets invalidated? - * Advice: Don't do it. Explicitly unmap. - */ - - unsigned int handle, ref, refcount; - grant_table_t *lgt, *rgt; - active_grant_entry_t *act; - grant_mapping_t *map; - int found = 0; - - lgt = ld->grant_table; - -#if GRANT_DEBUG_VERBOSE - if ( ld->domain_id != 0 ) - { - DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n", - rd->domain_id, ld->domain_id, frame, readonly); - } -#endif - - /* Fast exit if we're not mapping anything using grant tables */ - if ( lgt->map_count == 0 ) - return 0; - - if ( get_domain(rd) == 0 ) - { - DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n", - rd->domain_id); - return 0; - } - - rgt = rd->grant_table; - - for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) - { - map = &lgt->maptrack[handle]; - - if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) && - ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly)))) - { - ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT); - act = &rgt->active[ref]; - - spin_lock(&rgt->lock); - - if ( act->frame != frame ) - { - spin_unlock(&rgt->lock); - continue; - } - - refcount = act->pin & ( readonly ? GNTPIN_hstr_mask - : GNTPIN_hstw_mask ); - if ( refcount == 0 ) - { - spin_unlock(&rgt->lock); - continue; - } - - /* gotcha */ - DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n", - rd->domain_id, ld->domain_id, frame, readonly); - - if ( readonly ) - act->pin -= GNTPIN_hstr_inc; - else - { - act->pin -= GNTPIN_hstw_inc; - - /* any more granted writable mappings? */ - if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) - { - clear_bit(_GTF_writing, &rgt->shared[ref].flags); - put_page_type(&frame_table[frame]); - } - } - - if ( act->pin == 0 ) - { - clear_bit(_GTF_reading, &rgt->shared[ref].flags); - put_page(&frame_table[frame]); - } - spin_unlock(&rgt->lock); - - clear_bit(GNTMAP_host_map, &map->ref_and_flags); - - if ( !(map->ref_and_flags & GNTMAP_device_map) ) - put_maptrack_handle(lgt, handle); - - found = 1; - break; - } - } - put_domain(rd); - - return found; -} - -int -gnttab_prepare_for_transfer( - struct domain *rd, struct domain *ld, grant_ref_t ref) -{ - grant_table_t *rgt; - grant_entry_t *sha; - domid_t sdom; - u16 sflags; - u32 scombo, prev_scombo; - int retries = 0; - unsigned long target_pfn; - - DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n", - rd->domain_id, ld->domain_id, ref); - - if ( unlikely((rgt = rd->grant_table) == NULL) || - unlikely(ref >= NR_GRANT_ENTRIES) ) - { - DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n", - rd->domain_id, ref); - return 0; - } - - spin_lock(&rgt->lock); - - sha = &rgt->shared[ref]; - - sflags = sha->flags; - sdom = sha->domid; - - for ( ; ; ) - { - target_pfn = sha->frame; - - if ( unlikely(target_pfn >= max_page ) ) - { - DPRINTK("Bad pfn (%lx)\n", target_pfn); - goto fail; - } - - if ( unlikely(sflags != GTF_accept_transfer) || - unlikely(sdom != ld->domain_id) ) - { - DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", - sflags, sdom, ld->domain_id); - goto fail; - } - - /* Merge two 16-bit values into a 32-bit combined update. */ - /* NB. Endianness! */ - prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; - - /* NB. prev_scombo is updated in place to seen value. */ - if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, - prev_scombo | GTF_transfer_committed)) ) - { - DPRINTK("Fault while modifying shared flags and domid.\n"); - goto fail; - } - - /* Did the combined update work (did we see what we expected?). */ - if ( likely(prev_scombo == scombo) ) - break; - - if ( retries++ == 4 ) - { - DPRINTK("Shared grant entry is unstable.\n"); - goto fail; - } - - /* Didn't see what we expected. Split out the seen flags & dom. */ - /* NB. Endianness! */ - sflags = (u16)prev_scombo; - sdom = (u16)(prev_scombo >> 16); - } - - spin_unlock(&rgt->lock); - return 1; - - fail: - spin_unlock(&rgt->lock); - return 0; -} - -void -gnttab_notify_transfer( - struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame) -{ - grant_entry_t *sha; - unsigned long pfn; - - DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n", - rd->domain_id, ld->domain_id, ref); - - sha = &rd->grant_table->shared[ref]; - - spin_lock(&rd->grant_table->lock); - -#ifdef __ia64__ -// FIXME-ia64: any error checking need to be done here? -#else - pfn = sha->frame; - - if ( unlikely(pfn >= max_page ) ) - DPRINTK("Bad pfn (%lx)\n", pfn); - else - { - machine_to_phys_mapping[frame] = pfn; - - if ( unlikely(shadow_mode_log_dirty(ld))) - mark_dirty(ld, frame); - - if (shadow_mode_translate(ld)) - __phys_to_machine_mapping[pfn] = frame; - } -#endif - sha->frame = __mfn_to_gpfn(rd, frame); - sha->domid = rd->domain_id; - wmb(); - sha->flags = ( GTF_accept_transfer | GTF_transfer_completed ); - - spin_unlock(&rd->grant_table->lock); - - return; -} - -int -grant_table_create( - struct domain *d) -{ - grant_table_t *t; - int i; - - if ( (t = xmalloc(grant_table_t)) == NULL ) - goto no_mem; - - /* Simple stuff. */ - memset(t, 0, sizeof(*t)); - spin_lock_init(&t->lock); - - /* Active grant table. */ - if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES)) - == NULL ) - goto no_mem; - memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES); - - /* Tracking of mapped foreign frames table */ - if ( (t->maptrack = alloc_xenheap_page()) == NULL ) - goto no_mem; - t->maptrack_order = 0; - t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t); - memset(t->maptrack, 0, PAGE_SIZE); - for ( i = 0; i < t->maptrack_limit; i++ ) - t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; - - /* Shared grant table. */ - t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES); - if ( t->shared == NULL ) - goto no_mem; - memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE); - -#ifdef __ia64__ -// I don't think there's anything to do here on ia64?... -#else - for ( i = 0; i < NR_GRANT_FRAMES; i++ ) - { - SHARE_PFN_WITH_DOMAIN( - virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d); - machine_to_phys_mapping[(virt_to_phys(t->shared) >> PAGE_SHIFT) + i] = - INVALID_M2P_ENTRY; - } -#endif - - /* Okay, install the structure. */ - wmb(); /* avoid races with lock-free access to d->grant_table */ - d->grant_table = t; - return 0; - - no_mem: - if ( t != NULL ) - { - xfree(t->active); - if ( t->maptrack != NULL ) - free_xenheap_page(t->maptrack); - xfree(t); - } - return -ENOMEM; -} - -void -gnttab_release_dev_mappings(grant_table_t *gt) -{ - grant_mapping_t *map; - domid_t dom; - grant_ref_t ref; - u16 handle; - struct domain *ld, *rd; - unsigned long frame; - active_grant_entry_t *act; - grant_entry_t *sha; - - ld = current->domain; - - for ( handle = 0; handle < gt->maptrack_limit; handle++ ) - { - map = &gt->maptrack[handle]; - - if ( map->ref_and_flags & GNTMAP_device_map ) - { - dom = map->domid; - ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; - - DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n", - handle, ref, - map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom); - - if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || - unlikely(ld == rd) ) - { - if ( rd != NULL ) - put_domain(rd); - - printk(KERN_WARNING "Grant release: No dom%d\n", dom); - continue; - } - - act = &rd->grant_table->active[ref]; - sha = &rd->grant_table->shared[ref]; - - spin_lock(&rd->grant_table->lock); - - if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) ) - { - frame = act->frame; - - if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) && - ( (act->pin & GNTPIN_devw_mask) > 0 ) ) - { - clear_bit(_GTF_writing, &sha->flags); - put_page_type(&frame_table[frame]); - } - - act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask); - - if ( act->pin == 0 ) - { - clear_bit(_GTF_reading, &sha->flags); - map->ref_and_flags = 0; - put_page(&frame_table[frame]); - } - else - map->ref_and_flags &= ~GNTMAP_device_map; - } - - spin_unlock(&rd->grant_table->lock); - - put_domain(rd); - } - } -} - - -void -grant_table_destroy( - struct domain *d) -{ - grant_table_t *t; - - if ( (t = d->grant_table) != NULL ) - { - /* Free memory relating to this grant table. */ - d->grant_table = NULL; - free_xenheap_pages(t->shared, ORDER_GRANT_FRAMES); - free_xenheap_page(t->maptrack); - xfree(t->active); - xfree(t); - } -} - -void -grant_table_init( - void) -{ - /* Nothing. */ -} -#endif - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 4 - * tab-width: 4 - * indent-tabs-mode: nil - * End: - */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/hpsimserial.c --- a/xen/arch/ia64/hpsimserial.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,23 +0,0 @@ -/* - * HP Ski simulator serial I/O - * - * Copyright (C) 2004 Hewlett-Packard Co - * Dan Magenheimer <dan.magenheimer@xxxxxx> - */ - -#include <linux/config.h> -#include <xen/sched.h> -#include <xen/serial.h> -#include "hpsim_ssc.h" - -static void hp_ski_putc(struct serial_port *port, char c) -{ - ia64_ssc(c,0,0,0,SSC_PUTCHAR); -} - -static struct uart_driver hp_ski = { .putc = hp_ski_putc }; - -void hpsim_serial_init(void) -{ - serial_register_uart(0, &hp_ski, 0); -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/hypercall.c --- a/xen/arch/ia64/hypercall.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,182 +0,0 @@ -/* - * Hypercall implementations - * - * Copyright (C) 2005 Hewlett-Packard Co. - * Dan Magenheimer (dan.magenheimer@xxxxxx) - * - */ - -#include <xen/config.h> -#include <xen/sched.h> - -#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */ -#include <asm/sal.h> /* FOR struct ia64_sal_retval */ - -#include <asm/vcpu.h> -#include <asm/dom_fw.h> - -extern unsigned long translate_domain_mpaddr(unsigned long); -extern struct ia64_pal_retval xen_pal_emulator(UINT64,UINT64,UINT64,UINT64); -extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64); - -unsigned long idle_when_pending = 0; -unsigned long pal_halt_light_count = 0; - -int -ia64_hypercall (struct pt_regs *regs) -{ - struct vcpu *v = (struct domain *) current; - struct ia64_sal_retval x; - struct ia64_pal_retval y; - unsigned long *tv, *tc; - int pi; - - switch (regs->r2) { - case FW_HYPERCALL_PAL_CALL: - //printf("*** PAL hypercall: index=%d\n",regs->r28); - //FIXME: This should call a C routine -#if 0 - // This is very conservative, but avoids a possible - // (and deadly) freeze in paravirtualized domains due - // to a yet-to-be-found bug where pending_interruption - // is zero when it shouldn't be. Since PAL is called - // in the idle loop, this should resolve it - VCPU(v,pending_interruption) = 1; -#endif - if (regs->r28 == PAL_HALT_LIGHT) { -#define SPURIOUS_VECTOR 15 - pi = vcpu_check_pending_interrupts(v); - if (pi != SPURIOUS_VECTOR) { - if (!VCPU(v,pending_interruption)) - idle_when_pending++; - vcpu_pend_unspecified_interrupt(v); -//printf("idle w/int#%d pending!\n",pi); -//this shouldn't happen, but it apparently does quite a bit! so don't -//allow it to happen... i.e. if a domain has an interrupt pending and -//it tries to halt itself because it thinks it is idle, just return here -//as deliver_pending_interrupt is called on the way out and will deliver it - } - else { - pal_halt_light_count++; - do_sched_op(SCHEDOP_yield); - } - //break; - } - else if (regs->r28 >= PAL_COPY_PAL) { /* FIXME */ - printf("stacked PAL hypercalls not supported\n"); - regs->r8 = -1; - break; - } - else y = xen_pal_emulator(regs->r28,regs->r29, - regs->r30,regs->r31); - regs->r8 = y.status; regs->r9 = y.v0; - regs->r10 = y.v1; regs->r11 = y.v2; - break; - case FW_HYPERCALL_SAL_CALL: - x = sal_emulator(vcpu_get_gr(v,32),vcpu_get_gr(v,33), - vcpu_get_gr(v,34),vcpu_get_gr(v,35), - vcpu_get_gr(v,36),vcpu_get_gr(v,37), - vcpu_get_gr(v,38),vcpu_get_gr(v,39)); - regs->r8 = x.status; regs->r9 = x.v0; - regs->r10 = x.v1; regs->r11 = x.v2; - break; - case FW_HYPERCALL_EFI_RESET_SYSTEM: - printf("efi.reset_system called "); - if (current->domain == dom0) { - printf("(by dom0)\n "); - (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL); - } -#ifdef DOMU_AUTO_RESTART - else { - reconstruct_domU(current); - return 0; // don't increment ip! - } -#else - printf("(not supported for non-0 domain)\n"); - regs->r8 = EFI_UNSUPPORTED; -#endif - break; - case FW_HYPERCALL_EFI_GET_TIME: - tv = vcpu_get_gr(v,32); - tc = vcpu_get_gr(v,33); - //printf("efi_get_time(%p,%p) called...",tv,tc); - tv = __va(translate_domain_mpaddr(tv)); - if (tc) tc = __va(translate_domain_mpaddr(tc)); - regs->r8 = (*efi.get_time)(tv,tc); - //printf("and returns %lx\n",regs->r8); - break; - case FW_HYPERCALL_EFI_SET_TIME: - case FW_HYPERCALL_EFI_GET_WAKEUP_TIME: - case FW_HYPERCALL_EFI_SET_WAKEUP_TIME: - // FIXME: need fixes in efi.h from 2.6.9 - case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP: - // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED - // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS - // POINTER ARGUMENTS WILL BE VIRTUAL!! - case FW_HYPERCALL_EFI_GET_VARIABLE: - // FIXME: need fixes in efi.h from 2.6.9 - case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE: - case FW_HYPERCALL_EFI_SET_VARIABLE: - case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT: - // FIXME: need fixes in efi.h from 2.6.9 - regs->r8 = EFI_UNSUPPORTED; - break; - case 0xffff: // test dummy hypercall - regs->r8 = dump_privop_counts_to_user( - vcpu_get_gr(v,32), - vcpu_get_gr(v,33)); - break; - case 0xfffe: // test dummy hypercall - regs->r8 = zero_privop_counts_to_user( - vcpu_get_gr(v,32), - vcpu_get_gr(v,33)); - break; - case 0xfffd: // test dummy hypercall - regs->r8 = launch_domainU( - vcpu_get_gr(v,32)); - break; - case 0xfffc: // test dummy hypercall - regs->r8 = domU_staging_write_32( - vcpu_get_gr(v,32), - vcpu_get_gr(v,33), - vcpu_get_gr(v,34), - vcpu_get_gr(v,35), - vcpu_get_gr(v,36)); - break; - case 0xfffb: // test dummy hypercall - regs->r8 = domU_staging_read_8(vcpu_get_gr(v,32)); - break; - - case __HYPERVISOR_dom0_op: - regs->r8 = do_dom0_op(regs->r14); - break; - - case __HYPERVISOR_dom_mem_op: -#ifdef CONFIG_VTI - regs->r8 = do_dom_mem_op(regs->r14, regs->r15, regs->r16, regs->r17, regs->r18); -#else - /* we don't handle reservations; just return success */ - regs->r8 = regs->r16; -#endif - break; - - case __HYPERVISOR_event_channel_op: - regs->r8 = do_event_channel_op(regs->r14); - break; - -#ifndef CONFIG_VTI - case __HYPERVISOR_grant_table_op: - regs->r8 = do_grant_table_op(regs->r14, regs->r15, regs->r16); - break; -#endif - - case __HYPERVISOR_console_io: - regs->r8 = do_console_io(regs->r14, regs->r15, regs->r16); - break; - - default: - printf("unknown hypercall %x\n", regs->r2); - regs->r8 = (unsigned long)-1; - } - return 1; -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/hyperprivop.S --- a/xen/arch/ia64/hyperprivop.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,1592 +0,0 @@ -/* - * arch/ia64/kernel/hyperprivop.S - * - * Copyright (C) 2005 Hewlett-Packard Co - * Dan Magenheimer <dan.magenheimer@xxxxxx> - */ - -#include <linux/config.h> - -#include <asm/asmmacro.h> -#include <asm/kregs.h> -#include <asm/offsets.h> -#include <asm/processor.h> -#include <asm/system.h> -#include <public/arch-ia64.h> - -#if 1 // change to 0 to turn off all fast paths -#define FAST_HYPERPRIVOPS -#define FAST_HYPERPRIVOP_CNT -#define FAST_REFLECT_CNT -//#define FAST_TICK -#define FAST_BREAK -#define FAST_ACCESS_REFLECT -#define FAST_RFI -#define FAST_SSM_I -#define FAST_PTC_GA -#undef RFI_TO_INTERRUPT // not working yet -#endif - -#ifdef CONFIG_SMP -#warning "FIXME: ptc.ga instruction requires spinlock for SMP" -#undef FAST_PTC_GA -#endif - -// FIXME: turn off for now... but NaTs may crash Xen so re-enable soon! -//#define HANDLE_AR_UNAT - -// FIXME: This is defined in include/asm-ia64/hw_irq.h but this -// doesn't appear to be include'able from assembly? -#define IA64_TIMER_VECTOR 0xef - -// Should be included from common header file (also in process.c) -// NO PSR_CLR IS DIFFERENT! (CPL) -#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT) -#define IA64_PSR_CPL0 (__IA64_UL(1) << IA64_PSR_CPL0_BIT) -// note IA64_PSR_PK removed from following, why is this necessary? -#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \ - IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \ - IA64_PSR_IT | IA64_PSR_BN) - -#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \ - IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \ - IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \ - IA64_PSR_MC | IA64_PSR_IS | \ - IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \ - IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA) - -// Note: not hand-scheduled for now -// Registers at entry -// r16 == cr.isr -// r17 == cr.iim -// r18 == XSI_PSR_IC_OFS -// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits) -// r31 == pr -GLOBAL_ENTRY(fast_hyperprivop) -#ifndef FAST_HYPERPRIVOPS // see beginning of file - br.sptk.many dispatch_break_fault ;; -#endif - // HYPERPRIVOP_SSM_I? - // assumes domain interrupts pending, so just do it - cmp.eq p7,p6=XEN_HYPER_SSM_I,r17 -(p7) br.sptk.many hyper_ssm_i;; - - // FIXME. This algorithm gives up (goes to the slow path) if there - // are ANY interrupts pending, even if they are currently - // undeliverable. This should be improved later... - adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;; - ld4 r20=[r20] ;; - cmp.eq p7,p0=r0,r20 -(p7) br.cond.sptk.many 1f - movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r20=[r20];; - adds r21=IA64_VCPU_IRR0_OFFSET,r20; - adds r22=IA64_VCPU_IRR0_OFFSET+8,r20;; - ld8 r23=[r21],16; ld8 r24=[r22],16;; - ld8 r21=[r21]; ld8 r22=[r22];; - or r23=r23,r24; or r21=r21,r22;; - or r20=r23,r21;; -1: // when we get to here r20=~=interrupts pending - - // HYPERPRIVOP_RFI? - cmp.eq p7,p6=XEN_HYPER_RFI,r17 -(p7) br.sptk.many hyper_rfi;; - - // HYPERPRIVOP_GET_IVR? - cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17 -(p7) br.sptk.many hyper_get_ivr;; - - cmp.ne p7,p0=r20,r0 -(p7) br.spnt.many dispatch_break_fault ;; - - // HYPERPRIVOP_COVER? - cmp.eq p7,p6=XEN_HYPER_COVER,r17 -(p7) br.sptk.many hyper_cover;; - - // HYPERPRIVOP_SSM_DT? - cmp.eq p7,p6=XEN_HYPER_SSM_DT,r17 -(p7) br.sptk.many hyper_ssm_dt;; - - // HYPERPRIVOP_RSM_DT? - cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17 -(p7) br.sptk.many hyper_rsm_dt;; - - // HYPERPRIVOP_GET_TPR? - cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17 -(p7) br.sptk.many hyper_get_tpr;; - - // HYPERPRIVOP_SET_TPR? - cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17 -(p7) br.sptk.many hyper_set_tpr;; - - // HYPERPRIVOP_EOI? - cmp.eq p7,p6=XEN_HYPER_EOI,r17 -(p7) br.sptk.many hyper_eoi;; - - // HYPERPRIVOP_SET_ITM? - cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17 -(p7) br.sptk.many hyper_set_itm;; - - // HYPERPRIVOP_SET_RR? - cmp.eq p7,p6=XEN_HYPER_SET_RR,r17 -(p7) br.sptk.many hyper_set_rr;; - - // HYPERPRIVOP_GET_RR? - cmp.eq p7,p6=XEN_HYPER_GET_RR,r17 -(p7) br.sptk.many hyper_get_rr;; - - // HYPERPRIVOP_PTC_GA? - cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17 -(p7) br.sptk.many hyper_ptc_ga;; - - // HYPERPRIVOP_ITC_D? - cmp.eq p7,p6=XEN_HYPER_ITC_D,r17 -(p7) br.sptk.many hyper_itc_d;; - - // HYPERPRIVOP_ITC_I? - cmp.eq p7,p6=XEN_HYPER_ITC_I,r17 -(p7) br.sptk.many hyper_itc_i;; - - // HYPERPRIVOP_THASH? - cmp.eq p7,p6=XEN_HYPER_THASH,r17 -(p7) br.sptk.many hyper_thash;; - - // if not one of the above, give up for now and do it the slow way - br.sptk.many dispatch_break_fault ;; - - -// give up for now if: ipsr.be==1, ipsr.pp==1 -// from reflect_interruption, don't need to: -// - printf first extint (debug only) -// - check for interrupt collection enabled (routine will force on) -// - set ifa (not valid for extint) -// - set iha (not valid for extint) -// - set itir (not valid for extint) -// DO need to -// - increment the HYPER_SSM_I fast_hyperprivop counter -// - set shared_mem iip to instruction after HYPER_SSM_I -// - set cr.iip to guest iva+0x3000 -// - set shared_mem ipsr to [vcpu_get_ipsr_int_state] -// be = pp = bn = 0; dt = it = rt = 1; cpl = 3 or 0; -// i = shared_mem interrupt_delivery_enabled -// ic = shared_mem interrupt_collection_enabled -// ri = instruction after HYPER_SSM_I -// all other bits unchanged from real cr.ipsr -// - set cr.ipsr (DELIVER_PSR_SET/CLEAR, don't forget cpl!) -// - set shared_mem isr: isr.ei to instr following HYPER_SSM_I -// and isr.ri to cr.isr.ri (all other bits zero) -// - cover and set shared_mem precover_ifs to cr.ifs -// ^^^ MISSED THIS FOR fast_break?? -// - set shared_mem ifs and incomplete_regframe to 0 -// - set shared_mem interrupt_delivery_enabled to 0 -// - set shared_mem interrupt_collection_enabled to 0 -// - set r31 to SHAREDINFO_ADDR -// - virtual bank switch 0 -// maybe implement later -// - verify that there really IS a deliverable interrupt pending -// - set shared_mem iva -// needs to be done but not implemented (in reflect_interruption) -// - set shared_mem iipa -// don't know for sure -// - set shared_mem unat -// r16 == cr.isr -// r17 == cr.iim -// r18 == XSI_PSR_IC -// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits) -// r31 == pr -ENTRY(hyper_ssm_i) -#ifndef FAST_SSM_I - br.spnt.few dispatch_break_fault ;; -#endif - // give up for now if: ipsr.be==1, ipsr.pp==1 - mov r30=cr.ipsr;; - mov r29=cr.iip;; - extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; - cmp.ne p7,p0=r21,r0 -(p7) br.sptk.many dispatch_break_fault ;; - extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; - cmp.ne p7,p0=r21,r0 -(p7) br.sptk.many dispatch_break_fault ;; -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_I);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - // set shared_mem iip to instruction after HYPER_SSM_I - extr.u r20=r30,41,2 ;; - cmp.eq p6,p7=2,r20 ;; -(p6) mov r20=0 -(p6) adds r29=16,r29 -(p7) adds r20=1,r20 ;; - dep r30=r20,r30,41,2;; // adjust cr.ipsr.ri but don't save yet - adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r29 ;; - // set shared_mem isr - extr.u r16=r16,38,1;; // grab cr.isr.ir bit - dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero) - dep r16=r20,r16,41,2 ;; // deposit cr.isr.ri - adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r16 ;; - // set cr.ipsr - mov r29=r30 ;; - movl r28=DELIVER_PSR_SET;; - movl r27=~DELIVER_PSR_CLR;; - or r29=r29,r28;; - and r29=r29,r27;; - mov cr.ipsr=r29;; - // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set) - extr.u r29=r30,IA64_PSR_CPL0_BIT,2;; - cmp.eq p6,p7=3,r29;; -(p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2 -(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2 - ;; - // FOR SSM_I ONLY, also turn on psr.i and psr.ic - movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT|IA64_PSR_I|IA64_PSR_IC);; - movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);; - or r30=r30,r28;; - and r30=r30,r27;; - adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r30 ;; - // set shared_mem interrupt_delivery_enabled to 0 - // set shared_mem interrupt_collection_enabled to 0 - st8 [r18]=r0;; - // cover and set shared_mem precover_ifs to cr.ifs - // set shared_mem ifs and incomplete_regframe to 0 - cover ;; - mov r20=cr.ifs;; - adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r21]=r0 ;; - adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r0 ;; - adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r20 ;; - // leave cr.ifs alone for later rfi - // set iip to go to domain IVA break instruction vector - movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r22=[r22];; - adds r22=IA64_VCPU_IVA_OFFSET,r22;; - ld8 r23=[r22];; - movl r24=0x3000;; - add r24=r24,r23;; - mov cr.iip=r24;; - // OK, now all set to go except for switch to virtual bank0 - mov r30=r2; mov r29=r3;; - adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; - adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; - bsw.1;; - // FIXME?: ar.unat is not really handled correctly, - // but may not matter if the OS is NaT-clean - .mem.offset 0,0; st8.spill [r2]=r16,16; - .mem.offset 8,0; st8.spill [r3]=r17,16 ;; - .mem.offset 0,0; st8.spill [r2]=r18,16; - .mem.offset 8,0; st8.spill [r3]=r19,16 ;; - .mem.offset 0,0; st8.spill [r2]=r20,16; - .mem.offset 8,0; st8.spill [r3]=r21,16 ;; - .mem.offset 0,0; st8.spill [r2]=r22,16; - .mem.offset 8,0; st8.spill [r3]=r23,16 ;; - .mem.offset 0,0; st8.spill [r2]=r24,16; - .mem.offset 8,0; st8.spill [r3]=r25,16 ;; - .mem.offset 0,0; st8.spill [r2]=r26,16; - .mem.offset 8,0; st8.spill [r3]=r27,16 ;; - .mem.offset 0,0; st8.spill [r2]=r28,16; - .mem.offset 8,0; st8.spill [r3]=r29,16 ;; - .mem.offset 0,0; st8.spill [r2]=r30,16; - .mem.offset 8,0; st8.spill [r3]=r31,16 ;; - movl r31=XSI_IPSR;; - bsw.0 ;; - mov r2=r30; mov r3=r29;; - adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r20]=r0 ;; - mov pr=r31,-1 ;; - rfi - ;; - -// reflect domain clock interrupt -// r31 == pr -// r30 == cr.ivr -// r29 == rp -GLOBAL_ENTRY(fast_tick_reflect) -#ifndef FAST_TICK // see beginning of file - br.cond.sptk.many rp;; -#endif - mov r28=IA64_TIMER_VECTOR;; - cmp.ne p6,p0=r28,r30 -(p6) br.cond.spnt.few rp;; - movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;; - ld8 r26=[r20];; - mov r27=ar.itc;; - adds r27=200,r27;; // safety margin - cmp.ltu p6,p0=r26,r27 -(p6) br.cond.spnt.few rp;; - mov r17=cr.ipsr;; - // slow path if: ipsr.be==1, ipsr.pp==1 - extr.u r21=r17,IA64_PSR_BE_BIT,1 ;; - cmp.ne p6,p0=r21,r0 -(p6) br.cond.spnt.few rp;; - extr.u r21=r17,IA64_PSR_PP_BIT,1 ;; - cmp.ne p6,p0=r21,r0 -(p6) br.cond.spnt.few rp;; - // definitely have a domain tick - mov cr.eoi=r0;; - mov rp=r29;; - mov cr.itm=r26;; // ensure next tick -#ifdef FAST_REFLECT_CNT - movl r20=fast_reflect_count+((0x3000>>8)*8);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - // vcpu_pend_timer(current) - movl r18=XSI_PSR_IC;; - adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;; - ld8 r20=[r20];; - cmp.eq p6,p0=r20,r0 // if cr.itv==0 done -(p6) br.cond.spnt.few fast_tick_reflect_done;; - tbit.nz p6,p0=r20,16;; // check itv.m (discard) bit -(p6) br.cond.spnt.few fast_tick_reflect_done;; - extr.u r27=r20,0,6 // r27 has low 6 bits of itv.vector - extr.u r26=r20,6,2;; // r26 has irr index of itv.vector - movl r19=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r19=[r19];; - adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r19 - adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r19;; - ld8 r24=[r22];; - ld8 r23=[r23];; - cmp.eq p6,p0=r23,r24 // skip if this tick already delivered -(p6) br.cond.spnt.few fast_tick_reflect_done;; - // set irr bit - adds r21=IA64_VCPU_IRR0_OFFSET,r19; - shl r26=r26,3;; - add r21=r21,r26;; - mov r25=1;; - shl r22=r25,r27;; - ld8 r23=[r21];; - or r22=r22,r23;; - st8 [r21]=r22;; - // set PSCB(pending_interruption)! - adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r20]=r25;; - - // if interrupted at pl0, we're done - extr.u r16=r17,IA64_PSR_CPL0_BIT,2;; - cmp.eq p6,p0=r16,r0;; -(p6) br.cond.spnt.few fast_tick_reflect_done;; - // if guest vpsr.i is off, we're done - adds r21=XSI_PSR_I_OFS-XSI_PSR_IC_OFS,r18 ;; - ld4 r21=[r21];; - cmp.eq p6,p0=r21,r0 -(p6) br.cond.spnt.few fast_tick_reflect_done;; - - // OK, we have a clock tick to deliver to the active domain! - // so deliver to iva+0x3000 - // r17 == cr.ipsr - // r18 == XSI_PSR_IC - // r19 == IA64_KR(CURRENT) - // r31 == pr - mov r16=cr.isr;; - mov r29=cr.iip;; - adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r29 ;; - // set shared_mem isr - extr.u r16=r16,38,1;; // grab cr.isr.ir bit - dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero) - extr.u r20=r17,41,2 ;; // get ipsr.ri - dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei - adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r16 ;; - // set cr.ipsr (make sure cpl==2!) - mov r29=r17 ;; - movl r28=DELIVER_PSR_SET;; - movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);; - or r29=r29,r28;; - and r29=r29,r27;; - mov cr.ipsr=r29;; - // set shared_mem ipsr (from ipsr in r17 with ipsr.ri already set) - extr.u r29=r17,IA64_PSR_CPL0_BIT,2;; - cmp.eq p6,p7=3,r29;; -(p6) dep r17=-1,r17,IA64_PSR_CPL0_BIT,2 -(p7) dep r17=0,r17,IA64_PSR_CPL0_BIT,2 - ;; - movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);; - movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN|IA64_PSR_I|IA64_PSR_IC);; - dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;; - or r17=r17,r28;; - and r17=r17,r27;; - ld4 r16=[r18],4;; - cmp.ne p6,p0=r16,r0;; -(p6) dep r17=-1,r17,IA64_PSR_IC_BIT,1 ;; - ld4 r16=[r18],-4;; - cmp.ne p6,p0=r16,r0;; -(p6) dep r17=-1,r17,IA64_PSR_I_BIT,1 ;; - adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r17 ;; - // set shared_mem interrupt_delivery_enabled to 0 - // set shared_mem interrupt_collection_enabled to 0 - st8 [r18]=r0;; - // cover and set shared_mem precover_ifs to cr.ifs - // set shared_mem ifs and incomplete_regframe to 0 - cover ;; - mov r20=cr.ifs;; - adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r21]=r0 ;; - adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r0 ;; - adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r20 ;; - // leave cr.ifs alone for later rfi - // set iip to go to domain IVA break instruction vector - adds r22=IA64_VCPU_IVA_OFFSET,r19;; - ld8 r23=[r22];; - movl r24=0x3000;; - add r24=r24,r23;; - mov cr.iip=r24;; - // OK, now all set to go except for switch to virtual bank0 - mov r30=r2; mov r29=r3;; -#ifdef HANDLE_AR_UNAT - mov r28=ar.unat; -#endif - adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; - adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; - bsw.1;; - .mem.offset 0,0; st8.spill [r2]=r16,16; - .mem.offset 8,0; st8.spill [r3]=r17,16 ;; - .mem.offset 0,0; st8.spill [r2]=r18,16; - .mem.offset 8,0; st8.spill [r3]=r19,16 ;; - .mem.offset 0,0; st8.spill [r2]=r20,16; - .mem.offset 8,0; st8.spill [r3]=r21,16 ;; - .mem.offset 0,0; st8.spill [r2]=r22,16; - .mem.offset 8,0; st8.spill [r3]=r23,16 ;; - .mem.offset 0,0; st8.spill [r2]=r24,16; - .mem.offset 8,0; st8.spill [r3]=r25,16 ;; - .mem.offset 0,0; st8.spill [r2]=r26,16; - .mem.offset 8,0; st8.spill [r3]=r27,16 ;; - .mem.offset 0,0; st8.spill [r2]=r28,16; - .mem.offset 8,0; st8.spill [r3]=r29,16 ;; - .mem.offset 0,0; st8.spill [r2]=r30,16; - .mem.offset 8,0; st8.spill [r3]=r31,16 ;; -#ifdef HANDLE_AR_UNAT - // bank0 regs have no NaT bit, so ensure they are NaT clean - mov r16=r0; mov r17=r0; mov r18=r0; mov r19=r0; - mov r20=r0; mov r21=r0; mov r22=r0; mov r23=r0; - mov r24=r0; mov r25=r0; mov r26=r0; mov r27=r0; - mov r28=r0; mov r29=r0; mov r30=r0; movl r31=XSI_IPSR;; -#endif - bsw.0 ;; - mov r2=r30; mov r3=r29;; -#ifdef HANDLE_AR_UNAT - mov ar.unat=r28; -#endif - adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r20]=r0 ;; -fast_tick_reflect_done: - mov pr=r31,-1 ;; - rfi -END(fast_tick_reflect) - -// reflect domain breaks directly to domain -// r16 == cr.isr -// r17 == cr.iim -// r18 == XSI_PSR_IC -// r19 == vpsr.ic (low 32 bits) | vpsr.i (high 32 bits) -// r31 == pr -GLOBAL_ENTRY(fast_break_reflect) -#ifndef FAST_BREAK // see beginning of file - br.sptk.many dispatch_break_fault ;; -#endif - mov r30=cr.ipsr;; - mov r29=cr.iip;; - extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; - cmp.ne p7,p0=r21,r0 ;; -(p7) br.spnt.few dispatch_break_fault ;; - extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; - cmp.ne p7,p0=r21,r0 ;; -(p7) br.spnt.few dispatch_break_fault ;; -#if 1 /* special handling in case running on simulator */ - movl r20=first_break;; - ld4 r23=[r20];; - movl r21=0x80001; - movl r22=0x80002;; - cmp.ne p7,p0=r23,r0;; -(p7) br.spnt.few dispatch_break_fault ;; - cmp.eq p7,p0=r21,r17; -(p7) br.spnt.few dispatch_break_fault ;; - cmp.eq p7,p0=r22,r17; -(p7) br.spnt.few dispatch_break_fault ;; -#endif - movl r20=0x2c00; - // save iim in shared_info - adds r21=XSI_IIM_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r17;; - // fall through - - -// reflect to domain ivt+r20 -// sets up isr,iip,ipsr,ifs (FIXME: do iipa too) -// r16 == cr.isr -// r18 == XSI_PSR_IC -// r20 == offset into ivt -// r29 == iip -// r30 == ipsr -// r31 == pr -ENTRY(fast_reflect) -#ifdef FAST_REFLECT_CNT - movl r22=fast_reflect_count; - shr r23=r20,5;; - add r22=r22,r23;; - ld8 r21=[r22];; - adds r21=1,r21;; - st8 [r22]=r21;; -#endif - // save iip in shared_info (DON'T POINT TO NEXT INSTRUCTION!) - adds r21=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r29;; - // set shared_mem isr - adds r21=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r16 ;; - // set cr.ipsr - mov r29=r30 ;; - movl r28=DELIVER_PSR_SET;; - movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);; - or r29=r29,r28;; - and r29=r29,r27;; - mov cr.ipsr=r29;; - // set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set) - extr.u r29=r30,IA64_PSR_CPL0_BIT,2;; - cmp.eq p6,p7=3,r29;; -(p6) dep r30=-1,r30,IA64_PSR_CPL0_BIT,2 -(p7) dep r30=0,r30,IA64_PSR_CPL0_BIT,2 - ;; - movl r28=(IA64_PSR_DT|IA64_PSR_IT|IA64_PSR_RT);; - movl r27=~(IA64_PSR_BE|IA64_PSR_PP|IA64_PSR_BN);; - or r30=r30,r28;; - and r30=r30,r27;; - // also set shared_mem ipsr.i and ipsr.ic appropriately - ld8 r24=[r18];; - extr.u r22=r24,32,32 - cmp4.eq p6,p7=r24,r0;; -(p6) dep r30=0,r30,IA64_PSR_IC_BIT,1 -(p7) dep r30=-1,r30,IA64_PSR_IC_BIT,1 ;; - cmp4.eq p6,p7=r22,r0;; -(p6) dep r30=0,r30,IA64_PSR_I_BIT,1 -(p7) dep r30=-1,r30,IA64_PSR_I_BIT,1 ;; - adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r30 ;; - // set shared_mem interrupt_delivery_enabled to 0 - // set shared_mem interrupt_collection_enabled to 0 - st8 [r18]=r0;; - // cover and set shared_mem precover_ifs to cr.ifs - // set shared_mem ifs and incomplete_regframe to 0 - cover ;; - mov r24=cr.ifs;; - adds r21=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r21]=r0 ;; - adds r21=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r0 ;; - adds r21=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r24 ;; - // vpsr.i = vpsr.ic = 0 on delivery of interruption - st8 [r18]=r0;; - // FIXME: need to save iipa and isr to be arch-compliant - // set iip to go to domain IVA break instruction vector - movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r22=[r22];; - adds r22=IA64_VCPU_IVA_OFFSET,r22;; - ld8 r23=[r22];; - add r20=r20,r23;; - mov cr.iip=r20;; - // OK, now all set to go except for switch to virtual bank0 - mov r30=r2; mov r29=r3;; -#ifdef HANDLE_AR_UNAT - mov r28=ar.unat; -#endif - adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; - adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; - bsw.1;; - .mem.offset 0,0; st8.spill [r2]=r16,16; - .mem.offset 8,0; st8.spill [r3]=r17,16 ;; - .mem.offset 0,0; st8.spill [r2]=r18,16; - .mem.offset 8,0; st8.spill [r3]=r19,16 ;; - .mem.offset 0,0; st8.spill [r2]=r20,16; - .mem.offset 8,0; st8.spill [r3]=r21,16 ;; - .mem.offset 0,0; st8.spill [r2]=r22,16; - .mem.offset 8,0; st8.spill [r3]=r23,16 ;; - .mem.offset 0,0; st8.spill [r2]=r24,16; - .mem.offset 8,0; st8.spill [r3]=r25,16 ;; - .mem.offset 0,0; st8.spill [r2]=r26,16; - .mem.offset 8,0; st8.spill [r3]=r27,16 ;; - .mem.offset 0,0; st8.spill [r2]=r28,16; - .mem.offset 8,0; st8.spill [r3]=r29,16 ;; - .mem.offset 0,0; st8.spill [r2]=r30,16; - .mem.offset 8,0; st8.spill [r3]=r31,16 ;; -#ifdef HANDLE_AR_UNAT - // bank0 regs have no NaT bit, so ensure they are NaT clean - mov r16=r0; mov r17=r0; mov r18=r0; mov r19=r0; - mov r20=r0; mov r21=r0; mov r22=r0; mov r23=r0; - mov r24=r0; mov r25=r0; mov r26=r0; mov r27=r0; - mov r28=r0; mov r29=r0; mov r30=r0; movl r31=XSI_IPSR;; -#endif - movl r31=XSI_IPSR;; - bsw.0 ;; - mov r2=r30; mov r3=r29;; -#ifdef HANDLE_AR_UNAT - mov ar.unat=r28; -#endif - adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r20]=r0 ;; - mov pr=r31,-1 ;; - rfi - ;; - -// reflect access faults (0x2400,0x2800,0x5300) directly to domain -// r16 == isr -// r17 == ifa -// r19 == reflect number (only pass-thru to dispatch_reflection) -// r20 == offset into ivt -// r31 == pr -GLOBAL_ENTRY(fast_access_reflect) -#ifndef FAST_ACCESS_REFLECT // see beginning of file - br.spnt.few dispatch_reflection ;; -#endif - mov r30=cr.ipsr;; - mov r29=cr.iip;; - extr.u r21=r30,IA64_PSR_BE_BIT,1 ;; - cmp.ne p7,p0=r21,r0 -(p7) br.spnt.few dispatch_reflection ;; - extr.u r21=r30,IA64_PSR_PP_BIT,1 ;; - cmp.ne p7,p0=r21,r0 -(p7) br.spnt.few dispatch_reflection ;; - extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;; - cmp.eq p7,p0=r21,r0 -(p7) br.spnt.few dispatch_reflection ;; - movl r18=XSI_PSR_IC;; - ld8 r21=[r18];; - cmp.eq p7,p0=r0,r21 -(p7) br.spnt.few dispatch_reflection ;; - // set shared_mem ifa, FIXME: should we validate it? - mov r17=cr.ifa;; - adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r21]=r17 ;; - // get rr[ifa] and save to itir in shared memory (extra bits ignored) - shr.u r22=r17,61 - adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 - adds r21=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; - shladd r22=r22,3,r21;; - ld8 r22=[r22];; - st8 [r23]=r22;; - br.cond.sptk.many fast_reflect;; - - -// ensure that, if giving up, registers at entry to fast_hyperprivop unchanged -ENTRY(hyper_rfi) -#ifndef FAST_RFI - br.spnt.few dispatch_break_fault ;; -#endif - // if no interrupts pending, proceed - mov r30=r0 - cmp.eq p7,p0=r20,r0 -(p7) br.sptk.many 1f - ;; - adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; - ld8 r21=[r20];; // r21 = vcr.ipsr - extr.u r22=r21,IA64_PSR_I_BIT,1 ;; - mov r30=r22 - // r30 determines whether we might deliver an immediate extint -1: - adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; - ld8 r21=[r20];; // r21 = vcr.ipsr - extr.u r22=r21,IA64_PSR_BE_BIT,1 ;; - // if turning on psr.be, give up for now and do it the slow way - cmp.ne p7,p0=r22,r0 -(p7) br.spnt.few dispatch_break_fault ;; - // if (!(vpsr.dt && vpsr.rt && vpsr.it)), do it the slow way - movl r20=(IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT);; - and r22=r20,r21 - ;; - cmp.ne p7,p0=r22,r20 -(p7) br.spnt.few dispatch_break_fault ;; - // if was in metaphys mode, do it the slow way (FIXME later?) - adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;; - ld4 r20=[r20];; - cmp.ne p7,p0=r20,r0 -(p7) br.spnt.few dispatch_break_fault ;; - // if domain hasn't already done virtual bank switch - // do it the slow way (FIXME later?) -#if 0 - adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; - ld4 r20=[r20];; - cmp.eq p7,p0=r20,r0 -(p7) br.spnt.few dispatch_break_fault ;; -#endif - // validate vcr.iip, if in Xen range, do it the slow way - adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; - ld8 r22=[r20];; - movl r23=XEN_VIRT_SPACE_LOW - movl r24=XEN_VIRT_SPACE_HIGH ;; - cmp.ltu p0,p7=r22,r23 ;; // if !(iip<low) && -(p7) cmp.geu p0,p7=r22,r24 ;; // !(iip>=high) -(p7) br.spnt.few dispatch_break_fault ;; -#ifndef RFI_TO_INTERRUPT // see beginning of file - cmp.ne p6,p0=r30,r0 -(p6) br.cond.spnt.few dispatch_break_fault ;; -#endif - -1: // OK now, let's do an rfi. -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);; - ld8 r23=[r20];; - adds r23=1,r23;; - st8 [r20]=r23;; -#endif -#ifdef RFI_TO_INTERRUPT - // maybe do an immediate interrupt delivery? - cmp.ne p6,p0=r30,r0 -(p6) br.cond.spnt.few rfi_check_extint;; -#endif - -just_do_rfi: - // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip - mov cr.iip=r22;; - adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r20]=r0 ;; - adds r20=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - ld8 r20=[r20];; - dep r20=0,r20,38,25;; // ensure ifs has no reserved bits set - mov cr.ifs=r20 ;; - // ipsr.cpl == (vcr.ipsr.cpl == 0) 2 : 3; - dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;; - // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic - mov r19=r0 ;; - extr.u r23=r21,IA64_PSR_I_BIT,1 ;; - cmp.ne p7,p6=r23,r0 ;; - // not done yet -(p7) dep r19=-1,r19,32,1 - extr.u r23=r21,IA64_PSR_IC_BIT,1 ;; - cmp.ne p7,p6=r23,r0 ;; -(p7) dep r19=-1,r19,0,1 ;; - st8 [r18]=r19 ;; - // force on psr.ic, i, dt, rt, it, bn - movl r20=(IA64_PSR_I|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_IT|IA64_PSR_BN) - ;; - or r21=r21,r20 - ;; - mov cr.ipsr=r21 - adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; - ld4 r21=[r20];; - cmp.ne p7,p0=r21,r0 // domain already did "bank 1 switch?" -(p7) br.cond.spnt.few 1f; - // OK, now all set to go except for switch to virtual bank1 - mov r22=1;; st4 [r20]=r22; - mov r30=r2; mov r29=r3;; - adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; - adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; - bsw.1;; - // FIXME?: ar.unat is not really handled correctly, - // but may not matter if the OS is NaT-clean - .mem.offset 0,0; ld8.fill r16=[r2],16 ; - .mem.offset 8,0; ld8.fill r17=[r3],16 ;; - .mem.offset 0,0; ld8.fill r18=[r2],16 ; - .mem.offset 0,0; ld8.fill r19=[r3],16 ;; - .mem.offset 8,0; ld8.fill r20=[r2],16 ; - .mem.offset 8,0; ld8.fill r21=[r3],16 ;; - .mem.offset 8,0; ld8.fill r22=[r2],16 ; - .mem.offset 8,0; ld8.fill r23=[r3],16 ;; - .mem.offset 8,0; ld8.fill r24=[r2],16 ; - .mem.offset 8,0; ld8.fill r25=[r3],16 ;; - .mem.offset 8,0; ld8.fill r26=[r2],16 ; - .mem.offset 8,0; ld8.fill r27=[r3],16 ;; - .mem.offset 8,0; ld8.fill r28=[r2],16 ; - .mem.offset 8,0; ld8.fill r29=[r3],16 ;; - .mem.offset 8,0; ld8.fill r30=[r2],16 ; - .mem.offset 8,0; ld8.fill r31=[r3],16 ;; - bsw.0 ;; - mov r2=r30; mov r3=r29;; -1: mov pr=r31,-1 - ;; - rfi - ;; - -#ifdef RFI_TO_INTERRUPT -GLOBAL_ENTRY(rfi_check_extint) - //br.sptk.many dispatch_break_fault ;; - - // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip - // make sure none of these get trashed in case going to just_do_rfi - movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r30=[r30];; - adds r24=IA64_VCPU_INSVC3_OFFSET,r30;; - mov r25=192 - adds r16=IA64_VCPU_IRR3_OFFSET,r30;; - ld8 r23=[r16];; - cmp.eq p6,p0=r23,r0;; -(p6) adds r16=-8,r16;; -(p6) adds r24=-8,r24;; -(p6) adds r25=-64,r25;; -(p6) ld8 r23=[r16];; -(p6) cmp.eq p6,p0=r23,r0;; -(p6) adds r16=-8,r16;; -(p6) adds r24=-8,r24;; -(p6) adds r25=-64,r25;; -(p6) ld8 r23=[r16];; -(p6) cmp.eq p6,p0=r23,r0;; -(p6) adds r16=-8,r16;; -(p6) adds r24=-8,r24;; -(p6) adds r25=-64,r25;; -(p6) ld8 r23=[r16];; -(p6) cmp.eq p6,p0=r23,r0;; - cmp.eq p6,p0=r23,r0 -(p6) br.cond.spnt.few just_do_rfi; // this is actually an error - // r16 points to non-zero element of irr, r23 has value - // r24 points to corr element of insvc, r25 has elt*64 - ld8 r26=[r24];; - cmp.geu p6,p0=r26,r23 -(p6) br.cond.spnt.many just_do_rfi; - - // not masked by insvc, get vector number - shr.u r26=r23,1;; - or r26=r23,r26;; - shr.u r27=r26,2;; - or r26=r26,r27;; - shr.u r27=r26,4;; - or r26=r26,r27;; - shr.u r27=r26,8;; - or r26=r26,r27;; - shr.u r27=r26,16;; - or r26=r26,r27;; - shr.u r27=r26,32;; - or r26=r26,r27;; - andcm r26=0xffffffffffffffff,r26;; - popcnt r26=r26;; - sub r26=63,r26;; - // r26 now contains the bit index (mod 64) - mov r27=1;; - shl r27=r27,r26;; - // r27 now contains the (within the proper word) bit mask - add r26=r25,r26 - // r26 now contains the vector [0..255] - adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; - ld8 r20=[r20] ;; - extr.u r28=r20,16,1 - extr.u r29=r20,4,4 ;; - cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, just rfi -(p6) br.cond.spnt.few just_do_rfi;; - shl r29=r29,4;; - adds r29=15,r29;; - cmp.ge p6,p0=r29,r26 // if tpr masks interrupt, just rfi -(p6) br.cond.spnt.few just_do_rfi;; - -// this doesn't work yet (dies early after getting to user mode) -// but happens relatively infrequently, so fix it later. -// NOTE that these will be counted incorrectly for now (for privcnt output) -GLOBAL_ENTRY(rfi_with_interrupt) -#if 1 - br.sptk.many dispatch_break_fault ;; -#endif - - // OK, have an unmasked vector, so deliver extint to vcr.iva+0x3000 - // r18 == XSI_PSR_IC - // r21 == vipsr (ipsr in shared_mem) - // r30 == IA64_KR(CURRENT) - // r31 == pr - mov r17=cr.ipsr;; - mov r16=cr.isr;; - // set shared_mem isr - extr.u r16=r16,38,1;; // grab cr.isr.ir bit - dep r16=r16,r0,38,1 ;; // insert into cr.isr (rest of bits zero) - extr.u r20=r21,41,2 ;; // get v(!)psr.ri - dep r16=r20,r16,41,2 ;; // deposit cr.isr.ei - adds r22=XSI_ISR_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r22]=r16 ;; - // set cr.ipsr (make sure cpl==2!) - mov r29=r17 ;; - movl r28=DELIVER_PSR_SET;; - movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);; - or r29=r29,r28;; - and r29=r29,r27;; - mov cr.ipsr=r29;; - // v.ipsr and v.iip are already set (and v.iip validated) as rfi target - // set shared_mem interrupt_delivery_enabled to 0 - // set shared_mem interrupt_collection_enabled to 0 - st8 [r18]=r0;; - // cover and set shared_mem precover_ifs to cr.ifs - // set shared_mem ifs and incomplete_regframe to 0 -#if 0 - cover ;; - mov r20=cr.ifs;; - adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r22]=r0 ;; - adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r22]=r0 ;; - adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r22]=r20 ;; - // leave cr.ifs alone for later rfi -#else - adds r22=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r22]=r0 ;; - adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - ld8 r20=[r22];; - st8 [r22]=r0 ;; - adds r22=XSI_PRECOVER_IFS_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r22]=r20 ;; -#endif - // set iip to go to domain IVA break instruction vector - adds r22=IA64_VCPU_IVA_OFFSET,r30;; - ld8 r23=[r22];; - movl r24=0x3000;; - add r24=r24,r23;; - mov cr.iip=r24;; -#if 0 - // OK, now all set to go except for switch to virtual bank0 - mov r30=r2; mov r29=r3;; - adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; - adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; - bsw.1;; - // FIXME: need to handle ar.unat! - .mem.offset 0,0; st8.spill [r2]=r16,16; - .mem.offset 8,0; st8.spill [r3]=r17,16 ;; - .mem.offset 0,0; st8.spill [r2]=r18,16; - .mem.offset 8,0; st8.spill [r3]=r19,16 ;; - .mem.offset 0,0; st8.spill [r2]=r20,16; - .mem.offset 8,0; st8.spill [r3]=r21,16 ;; - .mem.offset 0,0; st8.spill [r2]=r22,16; - .mem.offset 8,0; st8.spill [r3]=r23,16 ;; - .mem.offset 0,0; st8.spill [r2]=r24,16; - .mem.offset 8,0; st8.spill [r3]=r25,16 ;; - .mem.offset 0,0; st8.spill [r2]=r26,16; - .mem.offset 8,0; st8.spill [r3]=r27,16 ;; - .mem.offset 0,0; st8.spill [r2]=r28,16; - .mem.offset 8,0; st8.spill [r3]=r29,16 ;; - .mem.offset 0,0; st8.spill [r2]=r30,16; - .mem.offset 8,0; st8.spill [r3]=r31,16 ;; - movl r31=XSI_IPSR;; - bsw.0 ;; - mov r2=r30; mov r3=r29;; -#else - bsw.1;; - movl r31=XSI_IPSR;; - bsw.0 ;; -#endif - adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; - st4 [r20]=r0 ;; - mov pr=r31,-1 ;; - rfi -#endif // RFI_TO_INTERRUPT - -ENTRY(hyper_cover) -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - mov r24=cr.ipsr - mov r25=cr.iip;; - // skip test for vpsr.ic.. it's a prerequisite for hyperprivops - cover ;; - adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; - mov r30=cr.ifs;; - adds r22=XSI_IFS_OFS-XSI_PSR_IC_OFS,r18 - ld4 r21=[r20] ;; - cmp.eq p6,p7=r21,r0 ;; -(p6) st8 [r22]=r30;; -(p7) st4 [r20]=r0;; - mov cr.ifs=r0;; - // adjust return address to skip over break instruction - extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; - -// return from metaphysical mode (meta=1) to virtual mode (meta=0) -ENTRY(hyper_ssm_dt) -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SSM_DT);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - mov r24=cr.ipsr - mov r25=cr.iip;; - adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;; - ld4 r21=[r20];; - cmp.eq p7,p0=r21,r0 // meta==0? -(p7) br.spnt.many 1f ;; // already in virtual mode - movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r22=[r22];; - adds r22=IA64_VCPU_META_SAVED_RR0_OFFSET,r22;; - ld4 r23=[r22];; - mov rr[r0]=r23;; - srlz.i;; - st4 [r20]=r0 ;; - // adjust return address to skip over break instruction -1: extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; - -// go to metaphysical mode (meta=1) from virtual mode (meta=0) -ENTRY(hyper_rsm_dt) -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RSM_DT);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - mov r24=cr.ipsr - mov r25=cr.iip;; - adds r20=XSI_METAPHYS_OFS-XSI_PSR_IC_OFS,r18 ;; - ld4 r21=[r20];; - cmp.ne p7,p0=r21,r0 // meta==0? -(p7) br.spnt.many 1f ;; // already in metaphysical mode - movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r22=[r22];; - adds r22=IA64_VCPU_META_RR0_OFFSET,r22;; - ld4 r23=[r22];; - mov rr[r0]=r23;; - srlz.i;; - adds r21=1,r0 ;; - st4 [r20]=r21 ;; - // adjust return address to skip over break instruction -1: extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; - -ENTRY(hyper_get_tpr) -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - mov r24=cr.ipsr - mov r25=cr.iip;; - adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; - ld8 r8=[r20];; - extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; -END(hyper_get_tpr) - -// if we get to here, there are no interrupts pending so we -// can change virtual tpr to any value without fear of provoking -// (or accidentally missing) delivering an interrupt -ENTRY(hyper_set_tpr) -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - mov r24=cr.ipsr - mov r25=cr.iip;; - movl r27=0xff00;; - adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; - andcm r8=r8,r27;; - st8 [r20]=r8;; - extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; -END(hyper_set_tpr) - -ENTRY(hyper_get_ivr) -#ifdef FAST_HYPERPRIVOP_CNT - movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);; - ld8 r21=[r22];; - adds r21=1,r21;; - st8 [r22]=r21;; -#endif - mov r8=15;; - // when we get to here r20=~=interrupts pending - cmp.eq p7,p0=r20,r0;; -(p7) adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;; -(p7) st4 [r20]=r0;; -(p7) br.spnt.many 1f ;; - movl r30=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r30=[r30];; - adds r24=IA64_VCPU_INSVC3_OFFSET,r30;; - mov r25=192 - adds r22=IA64_VCPU_IRR3_OFFSET,r30;; - ld8 r23=[r22];; - cmp.eq p6,p0=r23,r0;; -(p6) adds r22=-8,r22;; -(p6) adds r24=-8,r24;; -(p6) adds r25=-64,r25;; -(p6) ld8 r23=[r22];; -(p6) cmp.eq p6,p0=r23,r0;; -(p6) adds r22=-8,r22;; -(p6) adds r24=-8,r24;; -(p6) adds r25=-64,r25;; -(p6) ld8 r23=[r22];; -(p6) cmp.eq p6,p0=r23,r0;; -(p6) adds r22=-8,r22;; -(p6) adds r24=-8,r24;; -(p6) adds r25=-64,r25;; -(p6) ld8 r23=[r22];; -(p6) cmp.eq p6,p0=r23,r0;; - cmp.eq p6,p0=r23,r0 -(p6) br.cond.spnt.few 1f; // this is actually an error - // r22 points to non-zero element of irr, r23 has value - // r24 points to corr element of insvc, r25 has elt*64 - ld8 r26=[r24];; - cmp.geu p6,p0=r26,r23 -(p6) br.cond.spnt.many 1f; - // not masked by insvc, get vector number - shr.u r26=r23,1;; - or r26=r23,r26;; - shr.u r27=r26,2;; - or r26=r26,r27;; - shr.u r27=r26,4;; - or r26=r26,r27;; - shr.u r27=r26,8;; - or r26=r26,r27;; - shr.u r27=r26,16;; - or r26=r26,r27;; - shr.u r27=r26,32;; - or r26=r26,r27;; - andcm r26=0xffffffffffffffff,r26;; - popcnt r26=r26;; - sub r26=63,r26;; - // r26 now contains the bit index (mod 64) - mov r27=1;; - shl r27=r27,r26;; - // r27 now contains the (within the proper word) bit mask - add r26=r25,r26 - // r26 now contains the vector [0..255] - adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; - ld8 r20=[r20] ;; - extr.u r28=r20,16,1 - extr.u r29=r20,4,4 ;; - cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, return SPURIOUS -(p6) br.cond.spnt.few 1f; - shl r29=r29,4;; - adds r29=15,r29;; - cmp.ge p6,p0=r29,r26 -(p6) br.cond.spnt.few 1f; - // OK, have an unmasked vector to process/return - ld8 r25=[r24];; - or r25=r25,r27;; - st8 [r24]=r25;; - ld8 r25=[r22];; - andcm r25=r25,r27;; - st8 [r22]=r25;; - mov r8=r26;; - // if its a clock tick, remember itm to avoid delivering it twice - adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;; - ld8 r20=[r20];; - extr.u r20=r20,0,8;; - cmp.eq p6,p0=r20,r8 - adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r30 - adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r30;; - ld8 r23=[r23];; -(p6) st8 [r22]=r23;; - // all done -1: mov r24=cr.ipsr - mov r25=cr.iip;; - extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; -END(hyper_get_ivr) - -ENTRY(hyper_eoi) - // when we get to here r20=~=interrupts pending - cmp.ne p7,p0=r20,r0 -(p7) br.spnt.many dispatch_break_fault ;; -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r22=[r22];; - adds r22=IA64_VCPU_INSVC3_OFFSET,r22;; - ld8 r23=[r22];; - cmp.eq p6,p0=r23,r0;; -(p6) adds r22=-8,r22;; -(p6) ld8 r23=[r22];; -(p6) cmp.eq p6,p0=r23,r0;; -(p6) adds r22=-8,r22;; -(p6) ld8 r23=[r22];; -(p6) cmp.eq p6,p0=r23,r0;; -(p6) adds r22=-8,r22;; -(p6) ld8 r23=[r22];; -(p6) cmp.eq p6,p0=r23,r0;; - cmp.eq p6,p0=r23,r0 -(p6) br.cond.spnt.few 1f; // this is actually an error - // r22 points to non-zero element of insvc, r23 has value - shr.u r24=r23,1;; - or r24=r23,r24;; - shr.u r25=r24,2;; - or r24=r24,r25;; - shr.u r25=r24,4;; - or r24=r24,r25;; - shr.u r25=r24,8;; - or r24=r24,r25;; - shr.u r25=r24,16;; - or r24=r24,r25;; - shr.u r25=r24,32;; - or r24=r24,r25;; - andcm r24=0xffffffffffffffff,r24;; - popcnt r24=r24;; - sub r24=63,r24;; - // r24 now contains the bit index - mov r25=1;; - shl r25=r25,r24;; - andcm r23=r23,r25;; - st8 [r22]=r23;; -1: mov r24=cr.ipsr - mov r25=cr.iip;; - extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; -END(hyper_eoi) - -ENTRY(hyper_set_itm) - // when we get to here r20=~=interrupts pending - cmp.ne p7,p0=r20,r0 -(p7) br.spnt.many dispatch_break_fault ;; -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - movl r20=THIS_CPU(cpu_info)+IA64_CPUINFO_ITM_NEXT_OFFSET;; - ld8 r21=[r20];; - movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r20=[r20];; - adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;; - st8 [r20]=r8;; - cmp.geu p6,p0=r21,r8;; -(p6) mov r21=r8;; - // now "safe set" cr.itm=r21 - mov r23=100;; -2: mov cr.itm=r21;; - srlz.d;; - mov r22=ar.itc ;; - cmp.leu p6,p0=r21,r22;; - add r21=r21,r23;; - shl r23=r23,1;; -(p6) br.cond.spnt.few 2b;; -1: mov r24=cr.ipsr - mov r25=cr.iip;; - extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; -END(hyper_set_itm) - -ENTRY(hyper_get_rr) -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - extr.u r25=r8,61,3;; - adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; - shl r25=r25,3;; - add r20=r20,r25;; - ld8 r8=[r20];; -1: mov r24=cr.ipsr - mov r25=cr.iip;; - extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; -END(hyper_get_rr) - -ENTRY(hyper_set_rr) - extr.u r25=r8,61,3;; - cmp.leu p7,p0=7,r25 // punt on setting rr7 -(p7) br.spnt.many dispatch_break_fault ;; -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - extr.u r26=r9,8,24 // r26 = r9.rid - movl r20=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r20=[r20];; - adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;; - ld4 r22=[r21];; - adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;; - ld4 r23=[r21];; - adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;; - add r22=r26,r22;; - cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid -(p6) br.cond.spnt.few 1f; // this is an error, but just ignore/return - // r21=starting_rid - adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; - shl r25=r25,3;; - add r20=r20,r25;; - st8 [r20]=r9;; // store away exactly what was passed - // but adjust value actually placed in rr[r8] - // r22 contains adjusted rid, "mangle" it (see regionreg.c) - // and set ps to PAGE_SHIFT and ve to 1 - extr.u r27=r22,0,8 - extr.u r28=r22,8,8 - extr.u r29=r22,16,8;; - dep.z r23=PAGE_SHIFT,2,6;; - dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 - dep r23=r27,r23,24,8;; - dep r23=r28,r23,16,8;; - dep r23=r29,r23,8,8 - cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical -(p6) st4 [r24]=r23 - mov rr[r8]=r23;; - // done, mosey on back -1: mov r24=cr.ipsr - mov r25=cr.iip;; - extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; -END(hyper_set_rr) - -// this routine was derived from optimized assembly output from -// vcpu_thash so it is dense and difficult to read but it works -// On entry: -// r18 == XSI_PSR_IC -// r31 == pr -GLOBAL_ENTRY(hyper_thash) -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_THASH);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - shr.u r20 = r8, 61 - addl r25 = 1, r0 - movl r17 = 0xe000000000000000 - ;; - and r21 = r17, r8 // VHPT_Addr1 - ;; - shladd r28 = r20, 3, r18 - adds r19 = XSI_PTA_OFS-XSI_PSR_IC_OFS, r18 - ;; - adds r27 = XSI_RR0_OFS-XSI_PSR_IC_OFS, r28 - addl r28 = 32767, r0 - ld8 r24 = [r19] // pta - ;; - ld8 r23 = [r27] // rrs[vadr>>61] - extr.u r26 = r24, 2, 6 - ;; - extr.u r22 = r23, 2, 6 - shl r30 = r25, r26 - ;; - shr.u r19 = r8, r22 - shr.u r29 = r24, 15 - ;; - adds r17 = -1, r30 - ;; - shladd r27 = r19, 3, r0 - extr.u r26 = r17, 15, 46 - ;; - andcm r24 = r29, r26 - and r19 = r28, r27 - shr.u r25 = r27, 15 - ;; - and r23 = r26, r25 - ;; - or r22 = r24, r23 - ;; - dep.z r20 = r22, 15, 46 - ;; - or r16 = r20, r21 - ;; - or r8 = r19, r16 - // done, update iip/ipsr to next instruction - mov r24=cr.ipsr - mov r25=cr.iip;; - extr.u r26=r24,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r25=16,r25 -(p7) adds r26=1,r26 - ;; - dep r24=r26,r24,41,2 - ;; - mov cr.ipsr=r24 - mov cr.iip=r25 - mov pr=r31,-1 ;; - rfi - ;; -END(hyper_thash) - -ENTRY(hyper_ptc_ga) -#ifndef FAST_PTC_GA - br.spnt.few dispatch_break_fault ;; -#endif - // FIXME: validate not flushing Xen addresses -#ifdef FAST_HYPERPRIVOP_CNT - movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_PTC_GA);; - ld8 r21=[r20];; - adds r21=1,r21;; - st8 [r20]=r21;; -#endif - mov r28=r8 - extr.u r19=r9,2,6 // addr_range=1<<((r9&0xfc)>>2) - mov r20=1 - shr.u r24=r8,61 - addl r27=56,r0 // PAGE_SHIFT<<2 (for ptc.ga) - movl r26=0x8000000000000000 // INVALID_TI_TAG - mov r30=ar.lc - ;; - shl r19=r20,r19 - cmp.eq p7,p0=7,r24 -(p7) br.spnt.many dispatch_break_fault ;; // slow way for rr7 - ;; - cmp.le p7,p0=r19,r0 // skip flush if size<=0 -(p7) br.cond.dpnt 2f ;; - extr.u r24=r19,0,PAGE_SHIFT - shr.u r23=r19,PAGE_SHIFT ;; // repeat loop for n pages - cmp.ne p7,p0=r24,r0 ;; -(p7) adds r23=1,r23 ;; // n_pages<size<n_pages+1? extra iter - mov ar.lc=r23 - movl r29=PAGE_SIZE;; -1: - thash r25=r28 ;; - adds r25=16,r25 ;; - ld8 r24=[r25] ;; - // FIXME: should check if tag matches, not just blow it away - or r24=r26,r24 ;; // vhpt_entry->ti_tag = 1 - st8 [r25]=r24 - ptc.ga r28,r27 ;; - srlz.i ;; - add r28=r29,r28 - br.cloop.sptk.few 1b - ;; -2: - mov ar.lc=r30 ;; - mov r29=cr.ipsr - mov r30=cr.iip;; - movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r27=[r27];; - adds r25=IA64_VCPU_DTLB_OFFSET,r27 - adds r26=IA64_VCPU_ITLB_OFFSET,r27;; - ld8 r24=[r25] - ld8 r27=[r26] ;; - and r24=-2,r24 - and r27=-2,r27 ;; - st8 [r25]=r24 // set 1-entry i/dtlb as not present - st8 [r26]=r27 ;; - // increment to point to next instruction - extr.u r26=r29,41,2 ;; - cmp.eq p6,p7=2,r26 ;; -(p6) mov r26=0 -(p6) adds r30=16,r30 -(p7) adds r26=1,r26 - ;; - dep r29=r26,r29,41,2 - ;; - mov cr.ipsr=r29 - mov cr.iip=r30 - mov pr=r31,-1 ;; - rfi - ;; -END(hyper_ptc_ga) - -ENTRY(hyper_itc_d) - br.spnt.many dispatch_break_fault ;; -END(hyper_itc_d) - -ENTRY(hyper_itc_i) - br.spnt.many dispatch_break_fault ;; -END(hyper_itc_i) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/idle0_task.c --- a/xen/arch/ia64/idle0_task.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,58 +0,0 @@ -#include <xen/config.h> -#include <xen/sched.h> -#include <asm/desc.h> - -#define INIT_MM(name) \ -{ \ - .pgd = swapper_pg_dir, \ - .mm_users = ATOMIC_INIT(2), \ - .mm_count = ATOMIC_INIT(1), \ - .page_table_lock = SPIN_LOCK_UNLOCKED, \ - .mmlist = LIST_HEAD_INIT(name.mmlist), \ -} - -#define IDLE0_EXEC_DOMAIN(_ed,_d) \ -{ \ - processor: 0, \ - mm: 0, \ - thread: INIT_THREAD, \ - domain: (_d) \ -} - -#define IDLE0_DOMAIN(_t) \ -{ \ - domain_id: IDLE_DOMAIN_ID, \ - domain_flags:DOMF_idle_domain, \ - refcnt: ATOMIC_INIT(1) \ -} - -struct mm_struct init_mm = INIT_MM(init_mm); -EXPORT_SYMBOL(init_mm); - -struct domain idle0_domain = IDLE0_DOMAIN(idle0_domain); -#if 0 -struct vcpu idle0_vcpu = IDLE0_EXEC_DOMAIN(idle0_vcpu, - &idle0_domain); -#endif - - -/* - * Initial task structure. - * - * We need to make sure that this is properly aligned due to the way process stacks are - * handled. This is done by having a special ".data.init_task" section... - */ -union { - struct { - struct domain task; - } s; - unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)]; -} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))); -// = {{ - ; -//.task = IDLE0_EXEC_DOMAIN(init_task_mem.s.task,&idle0_domain), -//}; -//}; - -EXPORT_SYMBOL(init_task); - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/irq.c --- a/xen/arch/ia64/irq.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,1503 +0,0 @@ -/* - * linux/arch/ia64/kernel/irq.c - * - * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar - * - * This file contains the code used by various IRQ handling routines: - * asking for different IRQ's should be done through these routines - * instead of just grabbing them. Thus setups with different IRQ numbers - * shouldn't result in any weird surprises, and installing new handlers - * should be easier. - * - * Copyright (C) Ashok Raj<ashok.raj@xxxxxxxxx>, Intel Corporation 2004 - * - * 4/14/2004: Added code to handle cpu migration and do safe irq - * migration without lossing interrupts for iosapic - * architecture. - */ - -/* - * (mostly architecture independent, will move to kernel/irq.c in 2.5.) - * - * IRQs are in fact implemented a bit like signal handlers for the kernel. - * Naturally it's not a 1:1 relation, but there are similarities. - */ - -#include <linux/config.h> -#include <linux/errno.h> -#include <linux/module.h> -#ifndef XEN -#include <linux/signal.h> -#endif -#include <linux/sched.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/timex.h> -#include <linux/slab.h> -#ifndef XEN -#include <linux/random.h> -#include <linux/cpu.h> -#endif -#include <linux/ctype.h> -#ifndef XEN -#include <linux/smp_lock.h> -#endif -#include <linux/init.h> -#ifndef XEN -#include <linux/kernel_stat.h> -#endif -#include <linux/irq.h> -#ifndef XEN -#include <linux/proc_fs.h> -#endif -#include <linux/seq_file.h> -#ifndef XEN -#include <linux/kallsyms.h> -#include <linux/notifier.h> -#endif - -#include <asm/atomic.h> -#ifndef XEN -#include <asm/cpu.h> -#endif -#include <asm/io.h> -#include <asm/smp.h> -#include <asm/system.h> -#include <asm/bitops.h> -#include <asm/uaccess.h> -#include <asm/pgalloc.h> -#ifndef XEN -#include <asm/tlbflush.h> -#endif -#include <asm/delay.h> -#include <asm/irq.h> - -#ifdef XEN -#include <xen/event.h> -#define _irq_desc irq_desc -#define irq_descp(irq) &irq_desc[irq] -#define apicid_to_phys_cpu_present(x) 1 -#endif - - -/* - * Linux has a controller-independent x86 interrupt architecture. - * every controller has a 'controller-template', that is used - * by the main code to do the right thing. Each driver-visible - * interrupt source is transparently wired to the appropriate - * controller. Thus drivers need not be aware of the - * interrupt-controller. - * - * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC, - * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC. - * (IO-APICs assumed to be messaging to Pentium local-APICs) - * - * the code is designed to be easily extended with new/different - * interrupt controllers, without having to do assembly magic. - */ - -/* - * Controller mappings for all interrupt sources: - */ -irq_desc_t _irq_desc[NR_IRQS] __cacheline_aligned = { - [0 ... NR_IRQS-1] = { - .status = IRQ_DISABLED, - .handler = &no_irq_type, - .lock = SPIN_LOCK_UNLOCKED - } -}; - -/* - * This is updated when the user sets irq affinity via /proc - */ -cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; - -#ifdef CONFIG_IA64_GENERIC -irq_desc_t * __ia64_irq_desc (unsigned int irq) -{ - return _irq_desc + irq; -} - -ia64_vector __ia64_irq_to_vector (unsigned int irq) -{ - return (ia64_vector) irq; -} - -unsigned int __ia64_local_vector_to_irq (ia64_vector vec) -{ - return (unsigned int) vec; -} -#endif - -static void register_irq_proc (unsigned int irq); - -/* - * Special irq handlers. - */ - -#ifdef XEN -void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } -#else -irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs) -{ return IRQ_NONE; } -#endif - -/* - * Generic no controller code - */ - -static void enable_none(unsigned int irq) { } -static unsigned int startup_none(unsigned int irq) { return 0; } -static void disable_none(unsigned int irq) { } -static void ack_none(unsigned int irq) -{ -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves, it doesn't deserve - * a generic callback i think. - */ -#ifdef CONFIG_X86 - printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - */ - ack_APIC_irq(); -#endif -#endif -#ifdef CONFIG_IA64 - printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id()); -#endif -} - -/* startup is the same as "enable", shutdown is same as "disable" */ -#define shutdown_none disable_none -#define end_none enable_none - -struct hw_interrupt_type no_irq_type = { - "none", - startup_none, - shutdown_none, - enable_none, - disable_none, - ack_none, - end_none -}; - -atomic_t irq_err_count; -#ifdef CONFIG_X86_IO_APIC -#ifdef APIC_MISMATCH_DEBUG -atomic_t irq_mis_count; -#endif -#endif - -/* - * Generic, controller-independent functions: - */ - -#ifndef XEN -int show_interrupts(struct seq_file *p, void *v) -{ - int j, i = *(loff_t *) v; - struct irqaction * action; - irq_desc_t *idesc; - unsigned long flags; - - if (i == 0) { - seq_puts(p, " "); - for (j=0; j<NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "CPU%d ",j); - seq_putc(p, '\n'); - } - - if (i < NR_IRQS) { - idesc = irq_descp(i); - spin_lock_irqsave(&idesc->lock, flags); - action = idesc->action; - if (!action) - goto skip; - seq_printf(p, "%3d: ",i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); -#endif - seq_printf(p, " %14s", idesc->handler->typename); - seq_printf(p, " %s", action->name); - - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&idesc->lock, flags); - } else if (i == NR_IRQS) { - seq_puts(p, "NMI: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", nmi_count(j)); - seq_putc(p, '\n'); -#ifdef CONFIG_X86_LOCAL_APIC - seq_puts(p, "LOC: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs); - seq_putc(p, '\n'); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#ifdef CONFIG_X86_IO_APIC -#ifdef APIC_MISMATCH_DEBUG - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif -#endif - } - return 0; -} -#endif - -#ifdef CONFIG_SMP -inline void synchronize_irq(unsigned int irq) -{ -#ifndef XEN - struct irq_desc *desc = irq_desc + irq; - - while (desc->status & IRQ_INPROGRESS) - cpu_relax(); -#endif -} -EXPORT_SYMBOL(synchronize_irq); -#endif - -/* - * This should really return information about whether - * we should do bottom half handling etc. Right now we - * end up _always_ checking the bottom half, which is a - * waste of time and is not what some drivers would - * prefer. - */ -int handle_IRQ_event(unsigned int irq, - struct pt_regs *regs, struct irqaction *action) -{ - int status = 1; /* Force the "do bottom halves" bit */ - int retval = 0; - -#ifndef XEN - if (!(action->flags & SA_INTERRUPT)) -#endif - local_irq_enable(); - -#ifdef XEN - action->handler(irq, action->dev_id, regs); -#else - do { - status |= action->flags; - retval |= action->handler(irq, action->dev_id, regs); - action = action->next; - } while (action); - if (status & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); -#endif - local_irq_disable(); - return retval; -} - -#ifndef XEN -static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret) -{ - struct irqaction *action; - - if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) { - printk(KERN_ERR "irq event %d: bogus return value %x\n", - irq, action_ret); - } else { - printk(KERN_ERR "irq %d: nobody cared!\n", irq); - } - dump_stack(); - printk(KERN_ERR "handlers:\n"); - action = desc->action; - do { - printk(KERN_ERR "[<%p>]", action->handler); - print_symbol(" (%s)", - (unsigned long)action->handler); - printk("\n"); - action = action->next; - } while (action); -} - -static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret) -{ - static int count = 100; - - if (count) { - count--; - __report_bad_irq(irq, desc, action_ret); - } -} -#endif - -static int noirqdebug; - -static int __init noirqdebug_setup(char *str) -{ - noirqdebug = 1; - printk("IRQ lockup detection disabled\n"); - return 1; -} - -__setup("noirqdebug", noirqdebug_setup); - -/* - * If 99,900 of the previous 100,000 interrupts have not been handled then - * assume that the IRQ is stuck in some manner. Drop a diagnostic and try to - * turn the IRQ off. - * - * (The other 100-of-100,000 interrupts may have been a correctly-functioning - * device sharing an IRQ with the failing one) - * - * Called under desc->lock - */ -#ifndef XEN -static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret) -{ - if (action_ret != IRQ_HANDLED) { - desc->irqs_unhandled++; - if (action_ret != IRQ_NONE) - report_bad_irq(irq, desc, action_ret); - } - - desc->irq_count++; - if (desc->irq_count < 100000) - return; - - desc->irq_count = 0; - if (desc->irqs_unhandled > 99900) { - /* - * The interrupt is stuck - */ - __report_bad_irq(irq, desc, action_ret); - /* - * Now kill the IRQ - */ - printk(KERN_EMERG "Disabling IRQ #%d\n", irq); - desc->status |= IRQ_DISABLED; - desc->handler->disable(irq); - } - desc->irqs_unhandled = 0; -} -#endif - -/* - * Generic enable/disable code: this just calls - * down into the PIC-specific version for the actual - * hardware disable after having gotten the irq - * controller lock. - */ - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Disables and Enables are - * nested. - * Unlike disable_irq(), this function does not ensure existing - * instances of the IRQ handler have completed before returning. - * - * This function may be called from IRQ context. - */ - -inline void disable_irq_nosync(unsigned int irq) -{ - irq_desc_t *desc = irq_descp(irq); - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->depth++) { - desc->status |= IRQ_DISABLED; - desc->handler->disable(irq); - } - spin_unlock_irqrestore(&desc->lock, flags); -} -EXPORT_SYMBOL(disable_irq_nosync); - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and Disables are - * nested. - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ - -void disable_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_descp(irq); - - disable_irq_nosync(irq); - if (desc->action) - synchronize_irq(irq); -} -EXPORT_SYMBOL(disable_irq); - -/** - * enable_irq - enable handling of an irq - * @irq: Interrupt to enable - * - * Undoes the effect of one call to disable_irq(). If this - * matches the last disable, processing of interrupts on this - * IRQ line is re-enabled. - * - * This function may be called from IRQ context. - */ - -void enable_irq(unsigned int irq) -{ - irq_desc_t *desc = irq_descp(irq); - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - switch (desc->depth) { - case 1: { - unsigned int status = desc->status & ~IRQ_DISABLED; - desc->status = status; -#ifndef XEN - if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { - desc->status = status | IRQ_REPLAY; - hw_resend_irq(desc->handler,irq); - } -#endif - desc->handler->enable(irq); - /* fall-through */ - } - default: - desc->depth--; - break; - case 0: - printk(KERN_ERR "enable_irq(%u) unbalanced from %p\n", - irq, (void *) __builtin_return_address(0)); - } - spin_unlock_irqrestore(&desc->lock, flags); -} -EXPORT_SYMBOL(enable_irq); - -/* - * do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - */ -fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs) -{ - irq_desc_t *desc = irq_desc + irq; - struct irqaction * action; - unsigned int status; - -#ifndef XEN - kstat_this_cpu.irqs[irq]++; -#endif - if (desc->status & IRQ_PER_CPU) { - irqreturn_t action_ret; - - /* - * No locking required for CPU-local interrupts: - */ - desc->handler->ack(irq); - action_ret = handle_IRQ_event(irq, regs, desc->action); -#ifndef XEN - if (!noirqdebug) - note_interrupt(irq, desc, action_ret); -#endif - desc->handler->end(irq); - return 1; - } - - spin_lock(&desc->lock); - desc->handler->ack(irq); - /* - * REPLAY is when Linux resends an IRQ that was dropped earlier - * WAITING is used by probe to mark irqs that are being tested - */ -#ifdef XEN - status = desc->status & ~IRQ_REPLAY; -#else - status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); -#endif - status |= IRQ_PENDING; /* we _want_ to handle it */ - - /* - * If the IRQ is disabled for whatever reason, we cannot - * use the action we have. - */ - action = NULL; - if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { - action = desc->action; - status &= ~IRQ_PENDING; /* we commit to handling */ - status |= IRQ_INPROGRESS; /* we are handling it */ - } - desc->status = status; - - /* - * If there is no IRQ handler or it was disabled, exit early. - * Since we set PENDING, if another processor is handling - * a different instance of this same irq, the other processor - * will take care of it. - */ - if (unlikely(!action)) - goto out; - - /* - * Edge triggered interrupts need to remember - * pending events. - * This applies to any hw interrupts that allow a second - * instance of the same irq to arrive while we are in do_IRQ - * or in the handler. But the code here only handles the _second_ - * instance of the irq, not the third or fourth. So it is mostly - * useful for irq hardware that does not mask cleanly in an - * SMP environment. - */ - for (;;) { - irqreturn_t action_ret; - - spin_unlock(&desc->lock); - - action_ret = handle_IRQ_event(irq, regs, action); - - spin_lock(&desc->lock); -#ifndef XEN - if (!noirqdebug) - note_interrupt(irq, desc, action_ret); -#endif - if (likely(!(desc->status & IRQ_PENDING))) - break; - desc->status &= ~IRQ_PENDING; - } - desc->status &= ~IRQ_INPROGRESS; - -out: - /* - * The ->end() handler has to deal with interrupts which got - * disabled while the handler was running. - */ - desc->handler->end(irq); - spin_unlock(&desc->lock); - - return 1; -} - -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * - * SA_INTERRUPT Disable local interrupts while processing - * - * SA_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ - -int request_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, - const char * devname, - void *dev_id) -{ - int retval; - struct irqaction * action; - -#if 1 - /* - * Sanity-check: shared interrupts should REALLY pass in - * a real dev-ID, otherwise we'll have trouble later trying - * to figure out which interrupt is which (messes up the - * interrupt freeing logic etc). - */ - if (irqflags & SA_SHIRQ) { - if (!dev_id) - printk(KERN_ERR "Bad boy: %s called us without a dev_id!\n", devname); - } -#endif - - if (irq >= NR_IRQS) - return -EINVAL; - if (!handler) - return -EINVAL; - - action = xmalloc(struct irqaction); - if (!action) - return -ENOMEM; - - action->handler = handler; -#ifndef XEN - action->flags = irqflags; - action->mask = 0; -#endif - action->name = devname; -#ifndef XEN - action->next = NULL; -#endif - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - if (retval) - xfree(action); - return retval; -} - -EXPORT_SYMBOL(request_irq); - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. - * - * This function must not be called from interrupt context. - */ - -#ifdef XEN -void free_irq(unsigned int irq) -#else -void free_irq(unsigned int irq, void *dev_id) -#endif -{ - irq_desc_t *desc; - struct irqaction **p; - unsigned long flags; - - if (irq >= NR_IRQS) - return; - - desc = irq_descp(irq); - spin_lock_irqsave(&desc->lock,flags); -#ifdef XEN - if (desc->action) { - struct irqaction * action = desc->action; - desc->action = NULL; -#else - p = &desc->action; - for (;;) { - struct irqaction * action = *p; - if (action) { - struct irqaction **pp = p; - p = &action->next; - if (action->dev_id != dev_id) - continue; - - /* Found it - now remove it from the list of entries */ - *pp = action->next; - if (!desc->action) { -#endif - desc->status |= IRQ_DISABLED; - desc->handler->shutdown(irq); -#ifndef XEN - } -#endif - spin_unlock_irqrestore(&desc->lock,flags); - - /* Wait to make sure it's not being used on another CPU */ - synchronize_irq(irq); - xfree(action); - return; - } - printk(KERN_ERR "Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); -#ifndef XEN - return; - } -#endif -} - -EXPORT_SYMBOL(free_irq); - -/* - * IRQ autodetection code.. - * - * This depends on the fact that any interrupt that - * comes in on to an unassigned handler will get stuck - * with "IRQ_WAITING" cleared and the interrupt - * disabled. - */ - -static DECLARE_MUTEX(probe_sem); - -/** - * probe_irq_on - begin an interrupt autodetect - * - * Commence probing for an interrupt. The interrupts are scanned - * and a mask of potential interrupt lines is returned. - * - */ - -#ifndef XEN -unsigned long probe_irq_on(void) -{ - unsigned int i; - irq_desc_t *desc; - unsigned long val; - unsigned long delay; - - down(&probe_sem); - /* - * something may have generated an irq long ago and we want to - * flush such a longstanding irq before considering it as spurious. - */ - for (i = NR_IRQS-1; i > 0; i--) { - desc = irq_descp(i); - - spin_lock_irq(&desc->lock); - if (!desc->action) - desc->handler->startup(i); - spin_unlock_irq(&desc->lock); - } - - /* Wait for longstanding interrupts to trigger. */ - for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) - /* about 20ms delay */ barrier(); - - /* - * enable any unassigned irqs - * (we must startup again here because if a longstanding irq - * happened in the previous stage, it may have masked itself) - */ - for (i = NR_IRQS-1; i > 0; i--) { - desc = irq_descp(i); - - spin_lock_irq(&desc->lock); - if (!desc->action) { - desc->status |= IRQ_AUTODETECT | IRQ_WAITING; - if (desc->handler->startup(i)) - desc->status |= IRQ_PENDING; - } - spin_unlock_irq(&desc->lock); - } - - /* - * Wait for spurious interrupts to trigger - */ - for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) - /* about 100ms delay */ barrier(); - - /* - * Now filter out any obviously spurious interrupts - */ - val = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_descp(i); - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - /* It triggered already - consider it spurious. */ - if (!(status & IRQ_WAITING)) { - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } else - if (i < 32) - val |= 1 << i; - } - spin_unlock_irq(&desc->lock); - } - - return val; -} - -EXPORT_SYMBOL(probe_irq_on); - -/** - * probe_irq_mask - scan a bitmap of interrupt lines - * @val: mask of interrupts to consider - * - * Scan the ISA bus interrupt lines and return a bitmap of - * active interrupts. The interrupt probe logic state is then - * returned to its previous value. - * - * Note: we need to scan all the irq's even though we will - * only return ISA irq numbers - just so that we reset them - * all to a known state. - */ -unsigned int probe_irq_mask(unsigned long val) -{ - int i; - unsigned int mask; - - mask = 0; - for (i = 0; i < 16; i++) { - irq_desc_t *desc = irq_descp(i); - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (!(status & IRQ_WAITING)) - mask |= 1 << i; - - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - up(&probe_sem); - - return mask & val; -} -EXPORT_SYMBOL(probe_irq_mask); - -/** - * probe_irq_off - end an interrupt autodetect - * @val: mask of potential interrupts (unused) - * - * Scans the unused interrupt lines and returns the line which - * appears to have triggered the interrupt. If no interrupt was - * found then zero is returned. If more than one interrupt is - * found then minus the first candidate is returned to indicate - * their is doubt. - * - * The interrupt probe logic state is returned to its previous - * value. - * - * BUGS: When used in a module (which arguably shouldn't happen) - * nothing prevents two IRQ probe callers from overlapping. The - * results of this are non-optimal. - */ - -int probe_irq_off(unsigned long val) -{ - int i, irq_found, nr_irqs; - - nr_irqs = 0; - irq_found = 0; - for (i = 0; i < NR_IRQS; i++) { - irq_desc_t *desc = irq_descp(i); - unsigned int status; - - spin_lock_irq(&desc->lock); - status = desc->status; - - if (status & IRQ_AUTODETECT) { - if (!(status & IRQ_WAITING)) { - if (!nr_irqs) - irq_found = i; - nr_irqs++; - } - desc->status = status & ~IRQ_AUTODETECT; - desc->handler->shutdown(i); - } - spin_unlock_irq(&desc->lock); - } - up(&probe_sem); - - if (nr_irqs > 1) - irq_found = -irq_found; - return irq_found; -} - -EXPORT_SYMBOL(probe_irq_off); -#endif - -int setup_irq(unsigned int irq, struct irqaction * new) -{ - int shared = 0; - unsigned long flags; - struct irqaction *old, **p; - irq_desc_t *desc = irq_descp(irq); - -#ifndef XEN - if (desc->handler == &no_irq_type) - return -ENOSYS; - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & SA_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } - - if (new->flags & SA_PERCPU_IRQ) { - desc->status |= IRQ_PER_CPU; - desc->handler = &irq_type_ia64_lsapic; - } -#endif - - /* - * The following block of code has to be executed atomically - */ - spin_lock_irqsave(&desc->lock,flags); - p = &desc->action; - if ((old = *p) != NULL) { -#ifdef XEN - if (1) { - /* Can't share interrupts unless both agree to */ -#else - if (!(old->flags & new->flags & SA_SHIRQ)) { -#endif - spin_unlock_irqrestore(&desc->lock,flags); - return -EBUSY; - } - -#ifndef XEN - /* add new interrupt at end of irq queue */ - do { - p = &old->next; - old = *p; - } while (old); - shared = 1; -#endif - } - - *p = new; - -#ifndef XEN - if (!shared) { -#else - { -#endif - desc->depth = 0; -#ifdef XEN - desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS); -#else - desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS); -#endif - desc->handler->startup(irq); - } - spin_unlock_irqrestore(&desc->lock,flags); - -#ifndef XEN - register_irq_proc(irq); -#endif - return 0; -} - -#ifndef XEN - -static struct proc_dir_entry * root_irq_dir; -static struct proc_dir_entry * irq_dir [NR_IRQS]; - -#ifdef CONFIG_SMP - -static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; - -static cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; - -static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; - -void set_irq_affinity_info (unsigned int irq, int hwid, int redir) -{ - cpumask_t mask = CPU_MASK_NONE; - - cpu_set(cpu_logical_id(hwid), mask); - - if (irq < NR_IRQS) { - irq_affinity[irq] = mask; - irq_redir[irq] = (char) (redir & 0xff); - } -} - -static int irq_affinity_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len = sprintf(page, "%s", irq_redir[(long)data] ? "r " : ""); - - len += cpumask_scnprintf(page+len, count, irq_affinity[(long)data]); - if (count - len < 2) - return -EINVAL; - len += sprintf(page + len, "\n"); - return len; -} - -static int irq_affinity_write_proc (struct file *file, const char *buffer, - unsigned long count, void *data) -{ - unsigned int irq = (unsigned long) data; - int full_count = count, err; - cpumask_t new_value, tmp; -# define R_PREFIX_LEN 16 - char rbuf[R_PREFIX_LEN]; - int rlen; - int prelen; - irq_desc_t *desc = irq_descp(irq); - unsigned long flags; - - if (!desc->handler->set_affinity) - return -EIO; - - /* - * If string being written starts with a prefix of 'r' or 'R' - * and some limited number of spaces, set IA64_IRQ_REDIRECTED. - * If more than (R_PREFIX_LEN - 2) spaces are passed, they won't - * all be trimmed as part of prelen, the untrimmed spaces will - * cause the hex parsing to fail, and this write() syscall will - * fail with EINVAL. - */ - - if (!count) - return -EINVAL; - rlen = min(sizeof(rbuf)-1, count); - if (copy_from_user(rbuf, buffer, rlen)) - return -EFAULT; - rbuf[rlen] = 0; - prelen = 0; - if (tolower(*rbuf) == 'r') { - prelen = strspn(rbuf, "Rr "); - irq |= IA64_IRQ_REDIRECTED; - } - - err = cpumask_parse(buffer+prelen, count-prelen, new_value); - if (err) - return err; - - /* - * Do not allow disabling IRQs completely - it's a too easy - * way to make the system unusable accidentally :-) At least - * one online CPU still has to be targeted. - */ - cpus_and(tmp, new_value, cpu_online_map); - if (cpus_empty(tmp)) - return -EINVAL; - - spin_lock_irqsave(&desc->lock, flags); - pending_irq_cpumask[irq] = new_value; - spin_unlock_irqrestore(&desc->lock, flags); - - return full_count; -} - -void move_irq(int irq) -{ - /* note - we hold desc->lock */ - cpumask_t tmp; - irq_desc_t *desc = irq_descp(irq); - - if (!cpus_empty(pending_irq_cpumask[irq])) { - cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); - if (unlikely(!cpus_empty(tmp))) { - desc->handler->set_affinity(irq, pending_irq_cpumask[irq]); - } - cpus_clear(pending_irq_cpumask[irq]); - } -} - - -#endif /* CONFIG_SMP */ -#endif - -#ifdef CONFIG_HOTPLUG_CPU -unsigned int vectors_in_migration[NR_IRQS]; - -/* - * Since cpu_online_map is already updated, we just need to check for - * affinity that has zeros - */ -static void migrate_irqs(void) -{ - cpumask_t mask; - irq_desc_t *desc; - int irq, new_cpu; - - for (irq=0; irq < NR_IRQS; irq++) { - desc = irq_descp(irq); - - /* - * No handling for now. - * TBD: Implement a disable function so we can now - * tell CPU not to respond to these local intr sources. - * such as ITV,CPEI,MCA etc. - */ - if (desc->status == IRQ_PER_CPU) - continue; - - cpus_and(mask, irq_affinity[irq], cpu_online_map); - if (any_online_cpu(mask) == NR_CPUS) { - /* - * Save it for phase 2 processing - */ - vectors_in_migration[irq] = irq; - - new_cpu = any_online_cpu(cpu_online_map); - mask = cpumask_of_cpu(new_cpu); - - /* - * Al three are essential, currently WARN_ON.. maybe panic? - */ - if (desc->handler && desc->handler->disable && - desc->handler->enable && desc->handler->set_affinity) { - desc->handler->disable(irq); - desc->handler->set_affinity(irq, mask); - desc->handler->enable(irq); - } else { - WARN_ON((!(desc->handler) || !(desc->handler->disable) || - !(desc->handler->enable) || - !(desc->handler->set_affinity))); - } - } - } -} - -void fixup_irqs(void) -{ - unsigned int irq; - extern void ia64_process_pending_intr(void); - - ia64_set_itv(1<<16); - /* - * Phase 1: Locate irq's bound to this cpu and - * relocate them for cpu removal. - */ - migrate_irqs(); - - /* - * Phase 2: Perform interrupt processing for all entries reported in - * local APIC. - */ - ia64_process_pending_intr(); - - /* - * Phase 3: Now handle any interrupts not captured in local APIC. - * This is to account for cases that device interrupted during the time the - * rte was being disabled and re-programmed. - */ - for (irq=0; irq < NR_IRQS; irq++) { - if (vectors_in_migration[irq]) { - vectors_in_migration[irq]=0; - do_IRQ(irq, NULL); - } - } - - /* - * Now let processor die. We do irq disable and max_xtp() to - * ensure there is no more interrupts routed to this processor. - * But the local timer interrupt can have 1 pending which we - * take care in timer_interrupt(). - */ - max_xtp(); - local_irq_disable(); -} -#endif - -#ifndef XEN -static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); - if (count - len < 2) - return -EINVAL; - len += sprintf(page + len, "\n"); - return len; -} - -static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, - unsigned long count, void *data) -{ - cpumask_t *mask = (cpumask_t *)data; - unsigned long full_count = count, err; - cpumask_t new_value; - - err = cpumask_parse(buffer, count, new_value); - if (err) - return err; - - *mask = new_value; - return full_count; -} - -#define MAX_NAMELEN 10 - -static void register_irq_proc (unsigned int irq) -{ - char name [MAX_NAMELEN]; - - if (!root_irq_dir || (irq_descp(irq)->handler == &no_irq_type) || irq_dir[irq]) - return; - - memset(name, 0, MAX_NAMELEN); - sprintf(name, "%d", irq); - - /* create /proc/irq/1234 */ - irq_dir[irq] = proc_mkdir(name, root_irq_dir); - -#ifdef CONFIG_SMP - { - struct proc_dir_entry *entry; - - /* create /proc/irq/1234/smp_affinity */ - entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); - - if (entry) { - entry->nlink = 1; - entry->data = (void *)(long)irq; - entry->read_proc = irq_affinity_read_proc; - entry->write_proc = irq_affinity_write_proc; - } - - smp_affinity_entry[irq] = entry; - } -#endif -} - -cpumask_t prof_cpu_mask = CPU_MASK_ALL; - -void init_irq_proc (void) -{ - struct proc_dir_entry *entry; - int i; - - /* create /proc/irq */ - root_irq_dir = proc_mkdir("irq", 0); - - /* create /proc/irq/prof_cpu_mask */ - entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); - - if (!entry) - return; - - entry->nlink = 1; - entry->data = (void *)&prof_cpu_mask; - entry->read_proc = prof_cpu_mask_read_proc; - entry->write_proc = prof_cpu_mask_write_proc; - - /* - * Create entries for all existing IRQs. - */ - for (i = 0; i < NR_IRQS; i++) { - if (irq_descp(i)->handler == &no_irq_type) - continue; - register_irq_proc(i); - } -} -#endif - - -#ifdef XEN -/* - * HANDLING OF GUEST-BOUND PHYSICAL IRQS - */ - -#define IRQ_MAX_GUESTS 7 -typedef struct { - u8 nr_guests; - u8 in_flight; - u8 shareable; - struct domain *guest[IRQ_MAX_GUESTS]; -} irq_guest_action_t; - -static void __do_IRQ_guest(int irq) -{ - irq_desc_t *desc = &irq_desc[irq]; - irq_guest_action_t *action = (irq_guest_action_t *)desc->action; - struct domain *d; - int i; - - for ( i = 0; i < action->nr_guests; i++ ) - { - d = action->guest[i]; - if ( !test_and_set_bit(irq, &d->pirq_mask) ) - action->in_flight++; - send_guest_pirq(d, irq); - } -} - -int pirq_guest_unmask(struct domain *d) -{ - irq_desc_t *desc; - int i, j, pirq; - u32 m; - shared_info_t *s = d->shared_info; - - for ( i = 0; i < ARRAY_SIZE(d->pirq_mask); i++ ) - { - m = d->pirq_mask[i]; - while ( (j = ffs(m)) != 0 ) - { - m &= ~(1 << --j); - pirq = (i << 5) + j; - desc = &irq_desc[pirq]; - spin_lock_irq(&desc->lock); - if ( !test_bit(d->pirq_to_evtchn[pirq], &s->evtchn_mask[0]) && - test_and_clear_bit(pirq, &d->pirq_mask) && - (--((irq_guest_action_t *)desc->action)->in_flight == 0) ) - desc->handler->end(pirq); - spin_unlock_irq(&desc->lock); - } - } - - return 0; -} - -int pirq_guest_bind(struct vcpu *d, int irq, int will_share) -{ - irq_desc_t *desc = &irq_desc[irq]; - irq_guest_action_t *action; - unsigned long flags; - int rc = 0; - - if ( !IS_CAPABLE_PHYSDEV(d->domain) ) - return -EPERM; - - spin_lock_irqsave(&desc->lock, flags); - - action = (irq_guest_action_t *)desc->action; - - if ( !(desc->status & IRQ_GUEST) ) - { - if ( desc->action != NULL ) - { - DPRINTK("Cannot bind IRQ %d to guest. In use by '%s'.\n", - irq, desc->action->name); - rc = -EBUSY; - goto out; - } - - action = xmalloc(irq_guest_action_t); - if ( (desc->action = (struct irqaction *)action) == NULL ) - { - DPRINTK("Cannot bind IRQ %d to guest. Out of memory.\n", irq); - rc = -ENOMEM; - goto out; - } - - action->nr_guests = 0; - action->in_flight = 0; - action->shareable = will_share; - - desc->depth = 0; - desc->status |= IRQ_GUEST; - desc->status &= ~IRQ_DISABLED; - desc->handler->startup(irq); - - /* Attempt to bind the interrupt target to the correct CPU. */ -#if 0 /* FIXME CONFIG_SMP ??? */ - if ( desc->handler->set_affinity != NULL ) - desc->handler->set_affinity( - irq, apicid_to_phys_cpu_present(d->processor)); -#endif - } - else if ( !will_share || !action->shareable ) - { - DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n", - irq); - rc = -EBUSY; - goto out; - } - - if ( action->nr_guests == IRQ_MAX_GUESTS ) - { - DPRINTK("Cannot bind IRQ %d to guest. Already at max share.\n", irq); - rc = -EBUSY; - goto out; - } - - action->guest[action->nr_guests++] = d; - - out: - spin_unlock_irqrestore(&desc->lock, flags); - return rc; -} - -int pirq_guest_unbind(struct domain *d, int irq) -{ - irq_desc_t *desc = &irq_desc[irq]; - irq_guest_action_t *action; - unsigned long flags; - int i; - - spin_lock_irqsave(&desc->lock, flags); - - action = (irq_guest_action_t *)desc->action; - - if ( test_and_clear_bit(irq, &d->pirq_mask) && - (--action->in_flight == 0) ) - desc->handler->end(irq); - - if ( action->nr_guests == 1 ) - { - desc->action = NULL; - xfree(action); - desc->depth = 1; - desc->status |= IRQ_DISABLED; - desc->status &= ~IRQ_GUEST; - desc->handler->shutdown(irq); - } - else - { - i = 0; - while ( action->guest[i] != d ) - i++; - memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1); - action->nr_guests--; - } - - spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} - -#endif - -#ifdef XEN -#ifdef IA64 -// this is a temporary hack until real console input is implemented -irqreturn_t guest_forward_keyboard_input(int irq, void *nada, struct pt_regs *regs) -{ - domain_pend_keyboard_interrupt(irq); -} - -void serial_input_init(void) -{ - int retval; - int irq = 0x30; // FIXME - - retval = request_irq(irq,guest_forward_keyboard_input,SA_INTERRUPT,"siminput",NULL); - if (retval) { - printk("serial_input_init: broken request_irq call\n"); - while(1); - } -} -#endif -#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/ivt.S --- a/xen/arch/ia64/ivt.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,1975 +0,0 @@ - -#ifdef XEN -//#define CONFIG_DISABLE_VHPT // FIXME: change when VHPT is enabled?? -// these are all hacked out for now as the entire IVT -// will eventually be replaced... just want to use it -// for startup code to handle TLB misses -//#define ia64_leave_kernel 0 -//#define ia64_ret_from_syscall 0 -//#define ia64_handle_irq 0 -//#define ia64_fault 0 -#define ia64_illegal_op_fault 0 -#define ia64_prepare_handle_unaligned 0 -#define ia64_bad_break 0 -#define ia64_trace_syscall 0 -#define sys_call_table 0 -#define sys_ni_syscall 0 -#include <asm/vhpt.h> -#endif -/* - * arch/ia64/kernel/ivt.S - * - * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co - * Stephane Eranian <eranian@xxxxxxxxxx> - * David Mosberger <davidm@xxxxxxxxxx> - * Copyright (C) 2000, 2002-2003 Intel Co - * Asit Mallick <asit.k.mallick@xxxxxxxxx> - * Suresh Siddha <suresh.b.siddha@xxxxxxxxx> - * Kenneth Chen <kenneth.w.chen@xxxxxxxxx> - * Fenghua Yu <fenghua.yu@xxxxxxxxx> - * - * 00/08/23 Asit Mallick <asit.k.mallick@xxxxxxxxx> TLB handling for SMP - * 00/12/20 David Mosberger-Tang <davidm@xxxxxxxxxx> DTLB/ITLB handler now uses virtual PT. - */ -/* - * This file defines the interruption vector table used by the CPU. - * It does not include one entry per possible cause of interruption. - * - * The first 20 entries of the table contain 64 bundles each while the - * remaining 48 entries contain only 16 bundles each. - * - * The 64 bundles are used to allow inlining the whole handler for critical - * interruptions like TLB misses. - * - * For each entry, the comment is as follows: - * - * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) - * entry offset ----/ / / / / - * entry number ---------/ / / / - * size of the entry -------------/ / / - * vector name -------------------------------------/ / - * interruptions triggering this vector ----------------------/ - * - * The table is 32KB in size and must be aligned on 32KB boundary. - * (The CPU ignores the 15 lower bits of the address) - * - * Table is based upon EAS2.6 (Oct 1999) - */ - -#include <linux/config.h> - -#include <asm/asmmacro.h> -#include <asm/break.h> -#include <asm/ia32.h> -#include <asm/kregs.h> -#include <asm/offsets.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/ptrace.h> -#include <asm/system.h> -#include <asm/thread_info.h> -#include <asm/unistd.h> -#include <asm/errno.h> - -#if 1 -# define PSR_DEFAULT_BITS psr.ac -#else -# define PSR_DEFAULT_BITS 0 -#endif - -#if 0 - /* - * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't - * needed for something else before enabling this... - */ -# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 -#else -# define DBG_FAULT(i) -#endif - -#define MINSTATE_VIRT /* needed by minstate.h */ -#include "minstate.h" - -#define FAULT(n) \ - mov r31=pr; \ - mov r19=n;; /* prepare to save predicates */ \ - br.sptk.many dispatch_to_fault_handler - -#ifdef XEN -#define REFLECT(n) \ - mov r31=pr; \ - mov r19=n;; /* prepare to save predicates */ \ - br.sptk.many dispatch_reflection -#endif - - .section .text.ivt,"ax" - - .align 32768 // align on 32KB boundary - .global ia64_ivt -ia64_ivt: -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) -ENTRY(vhpt_miss) - DBG_FAULT(0) - /* - * The VHPT vector is invoked when the TLB entry for the virtual page table - * is missing. This happens only as a result of a previous - * (the "original") TLB miss, which may either be caused by an instruction - * fetch or a data access (or non-access). - * - * What we do here is normal TLB miss handing for the _original_ miss, followed - * by inserting the TLB entry for the virtual page table page that the VHPT - * walker was attempting to access. The latter gets inserted as long - * as both L1 and L2 have valid mappings for the faulting address. - * The TLB entry for the original miss gets inserted only if - * the L3 entry indicates that the page is present. - * - * do_page_fault gets invoked in the following cases: - * - the faulting virtual address uses unimplemented address bits - * - the faulting virtual address has no L1, L2, or L3 mapping - */ - mov r16=cr.ifa // get address that caused the TLB miss -#ifdef CONFIG_HUGETLB_PAGE - movl r18=PAGE_SHIFT - mov r25=cr.itir -#endif - ;; - rsm psr.dt // use physical addressing for data - mov r31=pr // save the predicate registers -#ifdef XEN - movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;; -#else - mov r19=IA64_KR(PT_BASE) // get page table base address -#endif - shl r21=r16,3 // shift bit 60 into sign bit - shr.u r17=r16,61 // get the region number into r17 - ;; - shr r22=r21,3 -#ifdef CONFIG_HUGETLB_PAGE - extr.u r26=r25,2,6 - ;; - cmp.ne p8,p0=r18,r26 - sub r27=r26,r18 - ;; -(p8) dep r25=r18,r25,2,6 -(p8) shr r22=r22,r27 -#endif - ;; - cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? - shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address - ;; -(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - - srlz.d - LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir - - .pred.rel "mutex", p6, p7 -(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT -(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 - ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) - cmp.eq p7,p6=0,r21 // unused address bits all zeroes? - shr.u r18=r22,PMD_SHIFT // shift L2 index into position - ;; - ld8 r17=[r17] // fetch the L1 entry (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry - ;; -(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0) - shr.u r19=r22,PAGE_SHIFT // shift L3 index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL? - dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry - ;; -(p7) ld8 r18=[r21] // read the L3 PTE - mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss - ;; -(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? - mov r22=cr.iha // get the VHPT address that caused the TLB miss - ;; // avoid RAW on p7 -(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? - dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address - ;; -(p10) itc.i r18 // insert the instruction TLB entry -(p11) itc.d r18 // insert the data TLB entry -(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) - mov cr.ifa=r22 - -#ifdef CONFIG_HUGETLB_PAGE -(p8) mov cr.itir=r25 // change to default page-size for VHPT -#endif - - /* - * Now compute and insert the TLB entry for the virtual page table. We never - * execute in a page table page so there is no need to set the exception deferral - * bit. - */ - adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 - ;; -(p7) itc.d r24 - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - /* - * Re-check L2 and L3 pagetable. If they changed, we may have received a ptc.g - * between reading the pagetable and the "itc". If so, flush the entry we - * inserted and retry. - */ - ld8 r25=[r21] // read L3 PTE again - ld8 r26=[r17] // read L2 entry again - ;; - cmp.ne p6,p7=r26,r20 // did L2 entry change - mov r27=PAGE_SHIFT<<2 - ;; -(p6) ptc.l r22,r27 // purge PTE page translation -(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change - ;; -(p6) ptc.l r16,r27 // purge translation -#endif - - mov pr=r31,-1 // restore predicate registers - rfi -END(vhpt_miss) - - .org ia64_ivt+0x400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0400 Entry 1 (size 64 bundles) ITLB (21) -ENTRY(itlb_miss) - DBG_FAULT(1) -#ifdef XEN - VHPT_CCHAIN_LOOKUP(itlb_miss,i) -#ifdef VHPT_GLOBAL - br.cond.sptk page_fault - ;; -#endif -#endif - /* - * The ITLB handler accesses the L3 PTE via the virtually mapped linear - * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the L3 PTE read - * and go on normally after that. - */ - mov r16=cr.ifa // get virtual address - mov r29=b0 // save b0 - mov r31=pr // save predicates -.itlb_fault: - mov r17=cr.iha // get virtual address of L3 PTE - movl r30=1f // load nested fault continuation point - ;; -1: ld8 r18=[r17] // read L3 PTE - ;; - mov b0=r29 - tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? -(p6) br.cond.spnt page_fault - ;; - itc.i r18 - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r19=[r17] // read L3 PTE again and see if same - mov r20=PAGE_SHIFT<<2 // setup page size for purge - ;; - cmp.ne p7,p0=r18,r19 - ;; -(p7) ptc.l r16,r20 -#endif - mov pr=r31,-1 - rfi -END(itlb_miss) - - .org ia64_ivt+0x0800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) -ENTRY(dtlb_miss) - DBG_FAULT(2) -#ifdef XEN - VHPT_CCHAIN_LOOKUP(dtlb_miss,d) -#ifdef VHPT_GLOBAL - br.cond.sptk page_fault - ;; -#endif -#endif - /* - * The DTLB handler accesses the L3 PTE via the virtually mapped linear - * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the L3 PTE read - * and go on normally after that. - */ - mov r16=cr.ifa // get virtual address - mov r29=b0 // save b0 - mov r31=pr // save predicates -dtlb_fault: - mov r17=cr.iha // get virtual address of L3 PTE - movl r30=1f // load nested fault continuation point - ;; -1: ld8 r18=[r17] // read L3 PTE - ;; - mov b0=r29 - tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? -(p6) br.cond.spnt page_fault - ;; - itc.d r18 - ;; -#ifdef CONFIG_SMP - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r19=[r17] // read L3 PTE again and see if same - mov r20=PAGE_SHIFT<<2 // setup page size for purge - ;; - cmp.ne p7,p0=r18,r19 - ;; -(p7) ptc.l r16,r20 -#endif - mov pr=r31,-1 - rfi -END(dtlb_miss) - - .org ia64_ivt+0x0c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) -ENTRY(alt_itlb_miss) - DBG_FAULT(3) -#ifdef XEN -//#ifdef VHPT_GLOBAL -// VHPT_CCHAIN_LOOKUP(alt_itlb_miss,i) -// br.cond.sptk page_fault -// ;; -//#endif -#endif -#ifdef XEN - mov r31=pr - mov r16=cr.ifa // get address that caused the TLB miss - ;; -late_alt_itlb_miss: - movl r17=PAGE_KERNEL - mov r21=cr.ipsr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - ;; -#else - mov r16=cr.ifa // get address that caused the TLB miss - movl r17=PAGE_KERNEL - mov r21=cr.ipsr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r31=pr - ;; -#endif -#ifdef CONFIG_DISABLE_VHPT - shr.u r22=r16,61 // get the region number into r21 - ;; - cmp.gt p8,p0=6,r22 // user mode - ;; -(p8) thash r17=r16 - ;; -(p8) mov cr.iha=r17 -(p8) mov r29=b0 // save b0 -(p8) br.cond.dptk .itlb_fault -#endif - extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits -#ifdef XEN - shr.u r18=r16,55 // move address bit 59 to bit 4 - ;; - and r18=0x10,r18 // bit 4=address-bit(59) -#else - shr.u r18=r16,57 // move address bit 61 to bit 4 - ;; - andcm r18=0x10,r18 // bit 4=~address-bit(61) -#endif - cmp.ne p8,p0=r0,r23 // psr.cpl != 0? - or r19=r17,r19 // insert PTE control bits into r19 - ;; - or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 -(p8) br.cond.spnt page_fault - ;; - itc.i r19 // insert the TLB entry - mov pr=r31,-1 - rfi -END(alt_itlb_miss) - - .org ia64_ivt+0x1000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) -ENTRY(alt_dtlb_miss) - DBG_FAULT(4) -#ifdef XEN -//#ifdef VHPT_GLOBAL -// VHPT_CCHAIN_LOOKUP(alt_dtlb_miss,d) -// br.cond.sptk page_fault -// ;; -//#endif -#endif -#ifdef XEN - mov r31=pr - mov r16=cr.ifa // get address that caused the TLB miss - ;; -late_alt_dtlb_miss: - movl r17=PAGE_KERNEL - mov r20=cr.isr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r21=cr.ipsr - ;; -#else -#endif -#ifdef CONFIG_DISABLE_VHPT - shr.u r22=r16,61 // get the region number into r21 - ;; - cmp.gt p8,p0=6,r22 // access to region 0-5 - ;; -(p8) thash r17=r16 - ;; -(p8) mov cr.iha=r17 -(p8) mov r29=b0 // save b0 -(p8) br.cond.dptk dtlb_fault -#endif - extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl - and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field - tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? -#ifdef XEN - shr.u r18=r16,55 // move address bit 59 to bit 4 - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? - ;; - and r18=0x10,r18 // bit 4=address-bit(59) -#else - shr.u r18=r16,57 // move address bit 61 to bit 4 - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? - ;; - andcm r18=0x10,r18 // bit 4=~address-bit(61) -#endif - cmp.ne p8,p0=r0,r23 -(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field -(p8) br.cond.spnt page_fault -#ifdef XEN - ;; - // Test for Xen address, if not handle via page_fault - // note that 0xf000 (cached) and 0xe800 (uncached) addresses - // should be OK. - extr.u r22=r16,59,5;; - cmp.eq p8,p0=0x1e,r22 -(p8) br.cond.spnt 1f;; - cmp.ne p8,p0=0x1d,r22 -(p8) br.cond.sptk page_fault ;; -1: -#endif - - dep r21=-1,r21,IA64_PSR_ED_BIT,1 - or r19=r19,r17 // insert PTE control bits into r19 - ;; - or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 -(p6) mov cr.ipsr=r21 - ;; -(p7) itc.d r19 // insert the TLB entry - mov pr=r31,-1 - rfi -END(alt_dtlb_miss) - - .org ia64_ivt+0x1400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) -ENTRY(nested_dtlb_miss) - /* - * In the absence of kernel bugs, we get here when the virtually mapped linear - * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction - * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page - * table is missing, a nested TLB miss fault is triggered and control is - * transferred to this point. When this happens, we lookup the pte for the - * faulting address by walking the page table in physical mode and return to the - * continuation point passed in register r30 (or call page_fault if the address is - * not mapped). - * - * Input: r16: faulting address - * r29: saved b0 - * r30: continuation address - * r31: saved pr - * - * Output: r17: physical address of L3 PTE of faulting address - * r29: saved b0 - * r30: continuation address - * r31: saved pr - * - * Clobbered: b0, r18, r19, r21, psr.dt (cleared) - */ - rsm psr.dt // switch to using physical data addressing -#ifdef XEN - movl r19=THIS_CPU(cpu_kr)+IA64_KR_PT_BASE_OFFSET;; -#else - mov r19=IA64_KR(PT_BASE) // get the page table base address -#endif - shl r21=r16,3 // shift bit 60 into sign bit - ;; - shr.u r17=r16,61 // get the region number into r17 - ;; - cmp.eq p6,p7=5,r17 // is faulting address in region 5? - shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address - ;; -(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place - - srlz.d - LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir - - .pred.rel "mutex", p6, p7 -(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT -(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 - ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) - cmp.eq p7,p6=0,r21 // unused address bits all zeroes? - shr.u r18=r16,PMD_SHIFT // shift L2 index into position - ;; - ld8 r17=[r17] // fetch the L1 entry (may be 0) - ;; -(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry - ;; -(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) - shr.u r19=r16,PAGE_SHIFT // shift L3 index into position - ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? - dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry -(p6) br.cond.spnt page_fault - mov b0=r30 - br.sptk.many b0 // return to continuation point -END(nested_dtlb_miss) - - .org ia64_ivt+0x1800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) -ENTRY(ikey_miss) -#ifdef XEN - REFLECT(6) -#endif - DBG_FAULT(6) - FAULT(6) -END(ikey_miss) - - //----------------------------------------------------------------------------------- - // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) -ENTRY(page_fault) - ssm psr.dt - ;; - srlz.i - ;; - SAVE_MIN_WITH_COVER -#ifdef XEN - alloc r15=ar.pfs,0,0,4,0 - mov out0=cr.ifa - mov out1=cr.isr - mov out3=cr.itir -#else - alloc r15=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=cr.isr -#endif - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collectin is on - ;; -(p15) ssm psr.i // restore psr.i - movl r14=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 // out2 = pointer to pt_regs - br.call.sptk.many b6=ia64_do_page_fault // ignore return address -END(page_fault) - - .org ia64_ivt+0x1c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) -ENTRY(dkey_miss) -#ifdef XEN - REFLECT(7) -#endif - DBG_FAULT(7) - FAULT(7) -END(dkey_miss) - - .org ia64_ivt+0x2000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) -ENTRY(dirty_bit) -#ifdef XEN - REFLECT(8) -#endif - DBG_FAULT(8) - /* - * What we do here is to simply turn on the dirty bit in the PTE. We need to - * update both the page-table and the TLB entry. To efficiently access the PTE, - * we address it through the virtual page table. Most likely, the TLB entry for - * the relevant virtual page table page is still present in the TLB so we can - * normally do this without additional TLB misses. In case the necessary virtual - * page table TLB entry isn't present, we take a nested TLB miss hit where we look - * up the physical address of the L3 PTE and then continue at label 1 below. - */ - mov r16=cr.ifa // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - ;; - thash r17=r16 // compute virtual address of L3 PTE - mov r29=b0 // save b0 in case of nested fault - mov r31=pr // save pr -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits - ;; - cmpxchg8.acq r26=[r17],r25,ar.ccv - mov r24=PAGE_SHIFT<<2 - ;; - cmp.eq p6,p7=r26,r18 - ;; -(p6) itc.d r25 // install updated PTE - ;; - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov b0=r29 // restore b0 - mov ar.ccv=r28 -#else - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - itc.d r18 // install updated PTE -#endif - mov pr=r31,-1 // restore pr - rfi -END(dirty_bit) - - .org ia64_ivt+0x2400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) -ENTRY(iaccess_bit) -#ifdef XEN - mov r31=pr; - mov r16=cr.isr - mov r17=cr.ifa - mov r19=9 - movl r20=0x2400 - br.sptk.many fast_access_reflect;; -#endif - DBG_FAULT(9) - // Like Entry 8, except for instruction access - mov r16=cr.ifa // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - mov r31=pr // save predicates -#ifdef CONFIG_ITANIUM - /* - * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. - */ - mov r17=cr.ipsr - ;; - mov r18=cr.iip - tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? - ;; -(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa -#endif /* CONFIG_ITANIUM */ - ;; - thash r17=r16 // compute virtual address of L3 PTE - mov r29=b0 // save b0 in case of nested fault) -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_A,r18 // set the accessed bit - ;; - cmpxchg8.acq r26=[r17],r25,ar.ccv - mov r24=PAGE_SHIFT<<2 - ;; - cmp.eq p6,p7=r26,r18 - ;; -(p6) itc.i r25 // install updated PTE - ;; - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov b0=r29 // restore b0 - mov ar.ccv=r28 -#else /* !CONFIG_SMP */ - ;; -1: ld8 r18=[r17] - ;; - or r18=_PAGE_A,r18 // set the accessed bit - mov b0=r29 // restore b0 - ;; - st8 [r17]=r18 // store back updated PTE - itc.i r18 // install updated PTE -#endif /* !CONFIG_SMP */ - mov pr=r31,-1 - rfi -END(iaccess_bit) - - .org ia64_ivt+0x2800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) -ENTRY(daccess_bit) -#ifdef XEN - mov r31=pr; - mov r16=cr.isr - mov r17=cr.ifa - mov r19=10 - movl r20=0x2800 - br.sptk.many fast_access_reflect;; -#endif - DBG_FAULT(10) - // Like Entry 8, except for data access - mov r16=cr.ifa // get the address that caused the fault - movl r30=1f // load continuation point in case of nested fault - ;; - thash r17=r16 // compute virtual address of L3 PTE - mov r31=pr - mov r29=b0 // save b0 in case of nested fault) -#ifdef CONFIG_SMP - mov r28=ar.ccv // save ar.ccv - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - mov ar.ccv=r18 // set compare value for cmpxchg - or r25=_PAGE_A,r18 // set the dirty bit - ;; - cmpxchg8.acq r26=[r17],r25,ar.ccv - mov r24=PAGE_SHIFT<<2 - ;; - cmp.eq p6,p7=r26,r18 - ;; -(p6) itc.d r25 // install updated PTE - /* - * Tell the assemblers dependency-violation checker that the above "itc" instructions - * cannot possibly affect the following loads: - */ - dv_serialize_data - ;; - ld8 r18=[r17] // read PTE again - ;; - cmp.eq p6,p7=r18,r25 // is it same as the newly installed - ;; -(p7) ptc.l r16,r24 - mov ar.ccv=r28 -#else - ;; -1: ld8 r18=[r17] - ;; // avoid RAW on r18 - or r18=_PAGE_A,r18 // set the accessed bit - ;; - st8 [r17]=r18 // store back updated PTE - itc.d r18 // install updated PTE -#endif - mov b0=r29 // restore b0 - mov pr=r31,-1 - rfi -END(daccess_bit) - - .org ia64_ivt+0x2c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) -ENTRY(break_fault) - /* - * The streamlined system call entry/exit paths only save/restore the initial part - * of pt_regs. This implies that the callers of system-calls must adhere to the - * normal procedure calling conventions. - * - * Registers to be saved & restored: - * CR registers: cr.ipsr, cr.iip, cr.ifs - * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr - * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15 - * Registers to be restored only: - * r8-r11: output value from the system call. - * - * During system call exit, scratch registers (including r15) are modified/cleared - * to prevent leaking bits from kernel to user level. - */ - DBG_FAULT(11) -#ifdef XEN - mov r16=cr.isr - mov r17=cr.iim - mov r31=pr - ;; - movl r18=XSI_PSR_IC - ;; - ld8 r19=[r18] - ;; - cmp.eq p7,p0=r0,r17 // is this a psuedo-cover? -(p7) br.spnt.many dispatch_privop_fault - ;; - // if vpsr.ic is off, we have a hyperprivop - // A hyperprivop is hand-coded assembly with psr.ic off - // which means no calls, no use of r1-r15 and no memory accesses - // except to pinned addresses! - cmp4.eq p7,p0=r0,r19 -(p7) br.sptk.many fast_hyperprivop - ;; - movl r22=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r22 = [r22] - ;; - adds r22=IA64_VCPU_BREAKIMM_OFFSET,r22;; - ld4 r23=[r22];; - cmp4.eq p6,p7=r23,r17 // Xen-reserved breakimm? -(p6) br.spnt.many dispatch_break_fault - ;; - br.sptk.many fast_break_reflect - ;; -#endif - movl r16=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; - ld8 r16=[r16] - mov r17=cr.iim - mov r18=__IA64_BREAK_SYSCALL - mov r21=ar.fpsr - mov r29=cr.ipsr - mov r19=b6 - mov r25=ar.unat - mov r27=ar.rsc - mov r26=ar.pfs - mov r28=cr.iip -#ifndef XEN - mov r31=pr // prepare to save predicates -#endif - mov r20=r1 - ;; - adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 - cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so) -(p7) br.cond.spnt non_syscall - ;; - ld1 r17=[r16] // load current->thread.on_ustack flag - st1 [r16]=r0 // clear current->thread.on_ustack flag - add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT - ;; - invala - - /* adjust return address so we skip over the break instruction: */ - - extr.u r8=r29,41,2 // extract ei field from cr.ipsr - ;; - cmp.eq p6,p7=2,r8 // isr.ei==2? - mov r2=r1 // setup r2 for ia64_syscall_setup - ;; -(p6) mov r8=0 // clear ei to 0 -(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped -(p7) adds r8=1,r8 // increment ei to next slot - ;; - cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already? - dep r29=r8,r29,41,2 // insert new ei into cr.ipsr - ;; - - // switch from user to kernel RBS: - MINSTATE_START_SAVE_MIN_VIRT - br.call.sptk.many b7=ia64_syscall_setup - ;; - MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1 - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - mov r3=NR_syscalls - 1 - ;; -(p15) ssm psr.i // restore psr.i - // p10==true means out registers are more than 8 or r15's Nat is true -(p10) br.cond.spnt.many ia64_ret_from_syscall - ;; - movl r16=sys_call_table - - adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 - movl r2=ia64_ret_from_syscall - ;; - shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) - cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ? - mov rp=r2 // set the real return addr - ;; -(p6) ld8 r20=[r20] // load address of syscall entry point -(p7) movl r20=sys_ni_syscall - - add r2=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; - ld4 r2=[r2] // r2 = current_thread_info()->flags - ;; - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit - ;; - cmp.eq p8,p0=r2,r0 - mov b6=r20 - ;; -(p8) br.call.sptk.many b6=b6 // ignore this return addr - br.cond.sptk ia64_trace_syscall - // NOT REACHED -END(break_fault) - - .org ia64_ivt+0x3000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) -ENTRY(interrupt) - DBG_FAULT(12) - mov r31=pr // prepare to save predicates - ;; -#ifdef XEN - mov r30=cr.ivr // pass cr.ivr as first arg - // FIXME: this is a hack... use cpuinfo.ksoftirqd because its - // not used anywhere else and we need a place to stash ivr and - // there's no registers available unused by SAVE_MIN/REST - movl r29=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;; - st8 [r29]=r30;; - movl r28=slow_interrupt;; - mov r29=rp;; - mov rp=r28;; - br.cond.sptk.many fast_tick_reflect - ;; -slow_interrupt: - mov rp=r29;; -#endif - SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 - ssm psr.ic | PSR_DEFAULT_BITS - ;; - adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on - ;; - SAVE_REST - ;; - alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group -#ifdef XEN - movl out0=THIS_CPU(cpu_info)+IA64_CPUINFO_KSOFTIRQD_OFFSET;; - ld8 out0=[out0];; -#else - mov out0=cr.ivr // pass cr.ivr as first arg -#endif - add out1=16,sp // pass pointer to pt_regs as second arg - ;; - srlz.d // make sure we see the effect of cr.ivr - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_handle_irq -END(interrupt) - - .org ia64_ivt+0x3400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3400 Entry 13 (size 64 bundles) Reserved - DBG_FAULT(13) - FAULT(13) - -#ifdef XEN - // There is no particular reason for this code to be here, other than that - // there happens to be space here that would go unused otherwise. If this - // fault ever gets "unreserved", simply moved the following code to a more - // suitable spot... - -GLOBAL_ENTRY(dispatch_break_fault) - SAVE_MIN_WITH_COVER - ;; -dispatch_break_fault_post_save: - alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) - mov out0=cr.ifa - adds out1=16,sp - mov out2=cr.isr // FIXME: pity to make this slow access twice - mov out3=cr.iim // FIXME: pity to make this slow access twice - - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.sptk.many ia64_prepare_handle_break -END(dispatch_break_fault) -#endif - - .org ia64_ivt+0x3800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3800 Entry 14 (size 64 bundles) Reserved - DBG_FAULT(14) - FAULT(14) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - * - * ia64_syscall_setup() is a separate subroutine so that it can - * allocate stacked registers so it can safely demine any - * potential NaT values from the input registers. - * - * On entry: - * - executing on bank 0 or bank 1 register set (doesn't matter) - * - r1: stack pointer - * - r2: current task pointer - * - r3: preserved - * - r11: original contents (saved ar.pfs to be saved) - * - r12: original contents (sp to be saved) - * - r13: original contents (tp to be saved) - * - r15: original contents (syscall # to be saved) - * - r18: saved bsp (after switching to kernel stack) - * - r19: saved b6 - * - r20: saved r1 (gp) - * - r21: saved ar.fpsr - * - r22: kernel's register backing store base (krbs_base) - * - r23: saved ar.bspstore - * - r24: saved ar.rnat - * - r25: saved ar.unat - * - r26: saved ar.pfs - * - r27: saved ar.rsc - * - r28: saved cr.iip - * - r29: saved cr.ipsr - * - r31: saved pr - * - b0: original contents (to be saved) - * On exit: - * - executing on bank 1 registers - * - psr.ic enabled, interrupts restored - * - p10: TRUE if syscall is invoked with more than 8 out - * registers or r15's Nat is true - * - r1: kernel's gp - * - r3: preserved (same as on entry) - * - r8: -EINVAL if p10 is true - * - r12: points to kernel stack - * - r13: points to current task - * - p15: TRUE if interrupts need to be re-enabled - * - ar.fpsr: set to kernel settings - */ -GLOBAL_ENTRY(ia64_syscall_setup) -#ifndef XEN -#if PT(B6) != 0 -# error This code assumes that b6 is the first field in pt_regs. -#endif -#endif - st8 [r1]=r19 // save b6 - add r16=PT(CR_IPSR),r1 // initialize first base pointer - add r17=PT(R11),r1 // initialize second base pointer - ;; - alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable - st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr - tnat.nz p8,p0=in0 - - st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11 - tnat.nz p9,p0=in1 -(pKStk) mov r18=r0 // make sure r18 isn't NaT - ;; - - st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs - st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip - mov r28=b0 // save b0 (2 cyc) - ;; - - st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat - dep r19=0,r19,38,26 // clear all bits but 0..37 [I0] -(p8) mov in0=-1 - ;; - - st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs - extr.u r11=r19,7,7 // I0 // get sol of ar.pfs - and r8=0x7f,r19 // A // get sof of ar.pfs - - st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc - tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0 -(p9) mov in1=-1 - ;; - -(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8 - tnat.nz p10,p0=in2 - add r11=8,r11 - ;; -(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field -(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field - tnat.nz p11,p0=in3 - ;; -(p10) mov in2=-1 - tnat.nz p12,p0=in4 // [I0] -(p11) mov in3=-1 - ;; -(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat -(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore - shl r18=r18,16 // compute ar.rsc to be used for "loadrs" - ;; - st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates - st8 [r17]=r28,PT(R1)-PT(B0) // save b0 - tnat.nz p13,p0=in5 // [I0] - ;; - st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs" - st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1 -(p12) mov in4=-1 - ;; - -.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 -.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 -(p13) mov in5=-1 - ;; - st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr - tnat.nz p14,p0=in6 - cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 - ;; - stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error) -(p9) tnat.nz p10,p0=r15 - adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) - - st8.spill [r17]=r15 // save r15 - tnat.nz p8,p0=in7 - nop.i 0 - - mov r13=r2 // establish `current' - movl r1=__gp // establish kernel global pointer - ;; -(p14) mov in6=-1 -(p8) mov in7=-1 - nop.i 0 - - cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 - movl r17=FPSR_DEFAULT - ;; - mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value -(p10) mov r8=-EINVAL - br.ret.sptk.many b7 -END(ia64_syscall_setup) - - .org ia64_ivt+0x3c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3c00 Entry 15 (size 64 bundles) Reserved - DBG_FAULT(15) - FAULT(15) - - /* - * Squatting in this space ... - * - * This special case dispatcher for illegal operation faults allows preserved - * registers to be modified through a callback function (asm only) that is handed - * back from the fault handler in r8. Up to three arguments can be passed to the - * callback function by returning an aggregate with the callback as its first - * element, followed by the arguments. - */ -ENTRY(dispatch_illegal_op_fault) - SAVE_MIN_WITH_COVER - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in insn group - mov out0=ar.ec - ;; - SAVE_REST - ;; - br.call.sptk.many rp=ia64_illegal_op_fault -.ret0: ;; - alloc r14=ar.pfs,0,0,3,0 // must be first in insn group - mov out0=r9 - mov out1=r10 - mov out2=r11 - movl r15=ia64_leave_kernel - ;; - mov rp=r15 - mov b6=r8 - ;; - cmp.ne p6,p0=0,r8 -(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel - br.sptk.many ia64_leave_kernel -END(dispatch_illegal_op_fault) - - .org ia64_ivt+0x4000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4000 Entry 16 (size 64 bundles) Reserved - DBG_FAULT(16) - FAULT(16) - -#ifdef XEN - // There is no particular reason for this code to be here, other than that - // there happens to be space here that would go unused otherwise. If this - // fault ever gets "unreserved", simply moved the following code to a more - // suitable spot... - -ENTRY(dispatch_privop_fault) - SAVE_MIN_WITH_COVER - ;; - alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) - mov out0=cr.ifa - adds out1=16,sp - mov out2=cr.isr // FIXME: pity to make this slow access twice - mov out3=cr.itir - - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.sptk.many ia64_prepare_handle_privop -END(dispatch_privop_fault) -#endif - - - .org ia64_ivt+0x4400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4400 Entry 17 (size 64 bundles) Reserved - DBG_FAULT(17) - FAULT(17) - -ENTRY(non_syscall) - SAVE_MIN_WITH_COVER - - // There is no particular reason for this code to be here, other than that - // there happens to be space here that would go unused otherwise. If this - // fault ever gets "unreserved", simply moved the following code to a more - // suitable spot... - - alloc r14=ar.pfs,0,0,2,0 - mov out0=cr.iim - add out1=16,sp - adds r3=8,r2 // set up second base pointer for SAVE_REST - - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - movl r15=ia64_leave_kernel - ;; - SAVE_REST - mov rp=r15 - ;; - br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr -END(non_syscall) - - .org ia64_ivt+0x4800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4800 Entry 18 (size 64 bundles) Reserved - DBG_FAULT(18) - FAULT(18) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_unaligned_handler) - SAVE_MIN_WITH_COVER - ;; - alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) - mov out0=cr.ifa - adds out1=16,sp - - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.sptk.many ia64_prepare_handle_unaligned -END(dispatch_unaligned_handler) - - .org ia64_ivt+0x4c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4c00 Entry 19 (size 64 bundles) Reserved - DBG_FAULT(19) - FAULT(19) - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - -ENTRY(dispatch_to_fault_handler) - /* - * Input: - * psr.ic: off - * r19: fault vector number (e.g., 24 for General Exception) - * r31: contains saved predicates (pr) - */ - SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - mov out0=r15 - mov out1=cr.isr - mov out2=cr.ifa - mov out3=cr.iim - mov out4=cr.itir - ;; - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.call.sptk.many b6=ia64_fault -END(dispatch_to_fault_handler) - -// -// --- End of long entries, Beginning of short entries -// - - .org ia64_ivt+0x5000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) -ENTRY(page_not_present) -#ifdef XEN - REFLECT(20) -#endif - DBG_FAULT(20) - mov r16=cr.ifa - rsm psr.dt - /* - * The Linux page fault handler doesn't expect non-present pages to be in - * the TLB. Flush the existing entry now, so we meet that expectation. - */ - mov r17=PAGE_SHIFT<<2 - ;; - ptc.l r16,r17 - ;; - mov r31=pr - srlz.d - br.sptk.many page_fault -END(page_not_present) - - .org ia64_ivt+0x5100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) -ENTRY(key_permission) -#ifdef XEN - REFLECT(21) -#endif - DBG_FAULT(21) - mov r16=cr.ifa - rsm psr.dt - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(key_permission) - - .org ia64_ivt+0x5200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) -ENTRY(iaccess_rights) -#ifdef XEN - REFLECT(22) -#endif - DBG_FAULT(22) - mov r16=cr.ifa - rsm psr.dt - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(iaccess_rights) - - .org ia64_ivt+0x5300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) -ENTRY(daccess_rights) -#ifdef XEN - mov r31=pr; - mov r16=cr.isr - mov r17=cr.ifa - mov r19=23 - movl r20=0x5300 - br.sptk.many fast_access_reflect;; -#endif - DBG_FAULT(23) - mov r16=cr.ifa - rsm psr.dt - mov r31=pr - ;; - srlz.d - br.sptk.many page_fault -END(daccess_rights) - - .org ia64_ivt+0x5400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) -ENTRY(general_exception) - DBG_FAULT(24) - mov r16=cr.isr - mov r31=pr - ;; -#ifdef XEN - cmp4.ge p6,p0=0x20,r16 -(p6) br.sptk.many dispatch_privop_fault -#else - cmp4.eq p6,p0=0,r16 -(p6) br.sptk.many dispatch_illegal_op_fault -#endif - ;; - mov r19=24 // fault number - br.sptk.many dispatch_to_fault_handler -END(general_exception) - - .org ia64_ivt+0x5500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) -ENTRY(disabled_fp_reg) -#ifdef XEN - REFLECT(25) -#endif - DBG_FAULT(25) - rsm psr.dfh // ensure we can access fph - ;; - srlz.d - mov r31=pr - mov r19=25 - br.sptk.many dispatch_to_fault_handler -END(disabled_fp_reg) - - .org ia64_ivt+0x5600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) -ENTRY(nat_consumption) -#ifdef XEN - REFLECT(26) -#endif - DBG_FAULT(26) - FAULT(26) -END(nat_consumption) - - .org ia64_ivt+0x5700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5700 Entry 27 (size 16 bundles) Speculation (40) -ENTRY(speculation_vector) -#ifdef XEN - // this probably need not reflect... - REFLECT(27) -#endif - DBG_FAULT(27) - /* - * A [f]chk.[as] instruction needs to take the branch to the recovery code but - * this part of the architecture is not implemented in hardware on some CPUs, such - * as Itanium. Thus, in general we need to emulate the behavior. IIM contains - * the relative target (not yet sign extended). So after sign extending it we - * simply add it to IIP. We also need to reset the EI field of the IPSR to zero, - * i.e., the slot to restart into. - * - * cr.imm contains zero_ext(imm21) - */ - mov r18=cr.iim - ;; - mov r17=cr.iip - shl r18=r18,43 // put sign bit in position (43=64-21) - ;; - - mov r16=cr.ipsr - shr r18=r18,39 // sign extend (39=43-4) - ;; - - add r17=r17,r18 // now add the offset - ;; - mov cr.iip=r17 - dep r16=0,r16,41,2 // clear EI - ;; - - mov cr.ipsr=r16 - ;; - - rfi // and go back -END(speculation_vector) - - .org ia64_ivt+0x5800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5800 Entry 28 (size 16 bundles) Reserved - DBG_FAULT(28) - FAULT(28) - - .org ia64_ivt+0x5900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) -ENTRY(debug_vector) -#ifdef XEN - REFLECT(29) -#endif - DBG_FAULT(29) - FAULT(29) -END(debug_vector) - - .org ia64_ivt+0x5a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) -ENTRY(unaligned_access) -#ifdef XEN - REFLECT(30) -#endif - DBG_FAULT(30) - mov r16=cr.ipsr - mov r31=pr // prepare to save predicates - ;; - br.sptk.many dispatch_unaligned_handler -END(unaligned_access) - - .org ia64_ivt+0x5b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) -ENTRY(unsupported_data_reference) -#ifdef XEN - REFLECT(31) -#endif - DBG_FAULT(31) - FAULT(31) -END(unsupported_data_reference) - - .org ia64_ivt+0x5c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) -ENTRY(floating_point_fault) -#ifdef XEN - REFLECT(32) -#endif - DBG_FAULT(32) - FAULT(32) -END(floating_point_fault) - - .org ia64_ivt+0x5d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) -ENTRY(floating_point_trap) -#ifdef XEN - REFLECT(33) -#endif - DBG_FAULT(33) - FAULT(33) -END(floating_point_trap) - - .org ia64_ivt+0x5e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) -ENTRY(lower_privilege_trap) -#ifdef XEN - REFLECT(34) -#endif - DBG_FAULT(34) - FAULT(34) -END(lower_privilege_trap) - - .org ia64_ivt+0x5f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) -ENTRY(taken_branch_trap) -#ifdef XEN - REFLECT(35) -#endif - DBG_FAULT(35) - FAULT(35) -END(taken_branch_trap) - - .org ia64_ivt+0x6000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) -ENTRY(single_step_trap) -#ifdef XEN - REFLECT(36) -#endif - DBG_FAULT(36) - FAULT(36) -END(single_step_trap) - - .org ia64_ivt+0x6100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6100 Entry 37 (size 16 bundles) Reserved - DBG_FAULT(37) - FAULT(37) - - .org ia64_ivt+0x6200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6200 Entry 38 (size 16 bundles) Reserved - DBG_FAULT(38) - FAULT(38) - - .org ia64_ivt+0x6300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6300 Entry 39 (size 16 bundles) Reserved - DBG_FAULT(39) - FAULT(39) - - .org ia64_ivt+0x6400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6400 Entry 40 (size 16 bundles) Reserved - DBG_FAULT(40) - FAULT(40) - - .org ia64_ivt+0x6500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6500 Entry 41 (size 16 bundles) Reserved - DBG_FAULT(41) - FAULT(41) - - .org ia64_ivt+0x6600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6600 Entry 42 (size 16 bundles) Reserved - DBG_FAULT(42) - FAULT(42) - - .org ia64_ivt+0x6700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6700 Entry 43 (size 16 bundles) Reserved - DBG_FAULT(43) - FAULT(43) - - .org ia64_ivt+0x6800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6800 Entry 44 (size 16 bundles) Reserved - DBG_FAULT(44) - FAULT(44) - - .org ia64_ivt+0x6900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) -ENTRY(ia32_exception) -#ifdef XEN - REFLECT(45) -#endif - DBG_FAULT(45) - FAULT(45) -END(ia32_exception) - - .org ia64_ivt+0x6a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) -ENTRY(ia32_intercept) -#ifdef XEN - REFLECT(46) -#endif - DBG_FAULT(46) -#ifdef CONFIG_IA32_SUPPORT - mov r31=pr - mov r16=cr.isr - ;; - extr.u r17=r16,16,8 // get ISR.code - mov r18=ar.eflag - mov r19=cr.iim // old eflag value - ;; - cmp.ne p6,p0=2,r17 -(p6) br.cond.spnt 1f // not a system flag fault - xor r16=r18,r19 - ;; - extr.u r17=r16,18,1 // get the eflags.ac bit - ;; - cmp.eq p6,p0=0,r17 -(p6) br.cond.spnt 1f // eflags.ac bit didn't change - ;; - mov pr=r31,-1 // restore predicate registers - rfi - -1: -#endif // CONFIG_IA32_SUPPORT - FAULT(46) -END(ia32_intercept) - - .org ia64_ivt+0x6b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) -ENTRY(ia32_interrupt) -#ifdef XEN - REFLECT(47) -#endif - DBG_FAULT(47) -#ifdef CONFIG_IA32_SUPPORT - mov r31=pr - br.sptk.many dispatch_to_ia32_handler -#else - FAULT(47) -#endif -END(ia32_interrupt) - - .org ia64_ivt+0x6c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6c00 Entry 48 (size 16 bundles) Reserved - DBG_FAULT(48) - FAULT(48) - - .org ia64_ivt+0x6d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6d00 Entry 49 (size 16 bundles) Reserved - DBG_FAULT(49) - FAULT(49) - - .org ia64_ivt+0x6e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6e00 Entry 50 (size 16 bundles) Reserved - DBG_FAULT(50) - FAULT(50) - - .org ia64_ivt+0x6f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6f00 Entry 51 (size 16 bundles) Reserved - DBG_FAULT(51) - FAULT(51) - - .org ia64_ivt+0x7000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7000 Entry 52 (size 16 bundles) Reserved - DBG_FAULT(52) - FAULT(52) - - .org ia64_ivt+0x7100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7100 Entry 53 (size 16 bundles) Reserved - DBG_FAULT(53) - FAULT(53) - - .org ia64_ivt+0x7200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7200 Entry 54 (size 16 bundles) Reserved - DBG_FAULT(54) - FAULT(54) - - .org ia64_ivt+0x7300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7300 Entry 55 (size 16 bundles) Reserved - DBG_FAULT(55) - FAULT(55) - - .org ia64_ivt+0x7400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7400 Entry 56 (size 16 bundles) Reserved - DBG_FAULT(56) - FAULT(56) - - .org ia64_ivt+0x7500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7500 Entry 57 (size 16 bundles) Reserved - DBG_FAULT(57) - FAULT(57) - - .org ia64_ivt+0x7600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7600 Entry 58 (size 16 bundles) Reserved - DBG_FAULT(58) - FAULT(58) - - .org ia64_ivt+0x7700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7700 Entry 59 (size 16 bundles) Reserved - DBG_FAULT(59) - FAULT(59) - - .org ia64_ivt+0x7800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7800 Entry 60 (size 16 bundles) Reserved - DBG_FAULT(60) - FAULT(60) - - .org ia64_ivt+0x7900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7900 Entry 61 (size 16 bundles) Reserved - DBG_FAULT(61) - FAULT(61) - - .org ia64_ivt+0x7a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7a00 Entry 62 (size 16 bundles) Reserved - DBG_FAULT(62) - FAULT(62) - - .org ia64_ivt+0x7b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7b00 Entry 63 (size 16 bundles) Reserved - DBG_FAULT(63) - FAULT(63) - - .org ia64_ivt+0x7c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7c00 Entry 64 (size 16 bundles) Reserved - DBG_FAULT(64) - FAULT(64) - - .org ia64_ivt+0x7d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7d00 Entry 65 (size 16 bundles) Reserved - DBG_FAULT(65) - FAULT(65) - - .org ia64_ivt+0x7e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7e00 Entry 66 (size 16 bundles) Reserved - DBG_FAULT(66) - FAULT(66) - - .org ia64_ivt+0x7f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7f00 Entry 67 (size 16 bundles) Reserved - DBG_FAULT(67) - FAULT(67) - -#ifdef XEN - .org ia64_ivt+0x8000 -GLOBAL_ENTRY(dispatch_reflection) - /* - * Input: - * psr.ic: off - * r19: intr type (offset into ivt, see ia64_int.h) - * r31: contains saved predicates (pr) - */ - SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - mov out4=r15 - mov out0=cr.ifa - adds out1=16,sp - mov out2=cr.isr - mov out3=cr.iim -// mov out3=cr.itir - - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - SAVE_REST - movl r14=ia64_leave_kernel - ;; - mov rp=r14 - br.sptk.many ia64_prepare_handle_reflection -END(dispatch_reflection) - -#define SAVE_MIN_COVER_DONE DO_SAVE_MIN(,mov r30=cr.ifs,) - -// same as dispatch_break_fault except cover has already been done -GLOBAL_ENTRY(dispatch_slow_hyperprivop) - SAVE_MIN_COVER_DONE - ;; - br.sptk.many dispatch_break_fault_post_save -END(dispatch_slow_hyperprivop) -#endif - -#ifdef CONFIG_IA32_SUPPORT - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - - // IA32 interrupt entry point - -ENTRY(dispatch_to_ia32_handler) - SAVE_MIN - ;; - mov r14=cr.isr - ssm psr.ic | PSR_DEFAULT_BITS - ;; - srlz.i // guarantee that interruption collection is on - ;; -(p15) ssm psr.i - adds r3=8,r2 // Base pointer for SAVE_REST - ;; - SAVE_REST - ;; - mov r15=0x80 - shr r14=r14,16 // Get interrupt number - ;; - cmp.ne p6,p0=r14,r15 -(p6) br.call.dpnt.many b6=non_ia32_syscall - - adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions - adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp - ;; - cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 - ld8 r8=[r14] // get r8 - ;; - st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP) - ;; - alloc r15=ar.pfs,0,0,6,0 // must first in an insn group - ;; - ld4 r8=[r14],8 // r8 == eax (syscall number) - mov r15=IA32_NR_syscalls - ;; - cmp.ltu.unc p6,p7=r8,r15 - ld4 out1=[r14],8 // r9 == ecx - ;; - ld4 out2=[r14],8 // r10 == edx - ;; - ld4 out0=[r14] // r11 == ebx - adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp - ;; - ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp - ;; - ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi - adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; - ld4 out4=[r14] // r15 == edi - movl r16=ia32_syscall_table - ;; -(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number - ld4 r2=[r2] // r2 = current_thread_info()->flags - ;; - ld8 r16=[r16] - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit - ;; - mov b6=r16 - movl r15=ia32_ret_from_syscall - cmp.eq p8,p0=r2,r0 - ;; - mov rp=r15 -(p8) br.call.sptk.many b6=b6 - br.cond.sptk ia32_trace_syscall - -non_ia32_syscall: - alloc r15=ar.pfs,0,0,2,0 - mov out0=r14 // interrupt # - add out1=16,sp // pointer to pt_regs - ;; // avoid WAW on CFM - br.call.sptk.many rp=ia32_bad_interrupt -.ret1: movl r15=ia64_leave_kernel - ;; - mov rp=r15 - br.ret.sptk.many rp -END(dispatch_to_ia32_handler) - -#endif /* CONFIG_IA32_SUPPORT */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/bitop.c --- a/xen/arch/ia64/linux/lib/bitop.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,88 +0,0 @@ -#include <linux/compiler.h> -#include <linux/types.h> -#include <asm/intrinsics.h> -#include <linux/module.h> -#include <linux/bitops.h> - -/* - * Find next zero bit in a bitmap reasonably efficiently.. - */ - -int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long offset) -{ - unsigned long *p = ((unsigned long *) addr) + (offset >> 6); - unsigned long result = offset & ~63UL; - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset &= 63UL; - if (offset) { - tmp = *(p++); - tmp |= ~0UL >> (64-offset); - if (size < 64) - goto found_first; - if (~tmp) - goto found_middle; - size -= 64; - result += 64; - } - while (size & ~63UL) { - if (~(tmp = *(p++))) - goto found_middle; - result += 64; - size -= 64; - } - if (!size) - return result; - tmp = *p; -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* any bits zero? */ - return result + size; /* nope */ -found_middle: - return result + ffz(tmp); -} -EXPORT_SYMBOL(__find_next_zero_bit); - -/* - * Find next bit in a bitmap reasonably efficiently.. - */ -int __find_next_bit(const void *addr, unsigned long size, unsigned long offset) -{ - unsigned long *p = ((unsigned long *) addr) + (offset >> 6); - unsigned long result = offset & ~63UL; - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset &= 63UL; - if (offset) { - tmp = *(p++); - tmp &= ~0UL << offset; - if (size < 64) - goto found_first; - if (tmp) - goto found_middle; - size -= 64; - result += 64; - } - while (size & ~63UL) { - if ((tmp = *(p++))) - goto found_middle; - result += 64; - size -= 64; - } - if (!size) - return result; - tmp = *p; - found_first: - tmp &= ~0UL >> (64-size); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ - found_middle: - return result + __ffs(tmp); -} -EXPORT_SYMBOL(__find_next_bit); diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/clear_page.S --- a/xen/arch/ia64/linux/lib/clear_page.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,77 +0,0 @@ -/* - * Copyright (C) 1999-2002 Hewlett-Packard Co - * Stephane Eranian <eranian@xxxxxxxxxx> - * David Mosberger-Tang <davidm@xxxxxxxxxx> - * Copyright (C) 2002 Ken Chen <kenneth.w.chen@xxxxxxxxx> - * - * 1/06/01 davidm Tuned for Itanium. - * 2/12/02 kchen Tuned for both Itanium and McKinley - * 3/08/02 davidm Some more tweaking - */ -#include <linux/config.h> - -#include <asm/asmmacro.h> -#include <asm/page.h> - -#ifdef CONFIG_ITANIUM -# define L3_LINE_SIZE 64 // Itanium L3 line size -# define PREFETCH_LINES 9 // magic number -#else -# define L3_LINE_SIZE 128 // McKinley L3 line size -# define PREFETCH_LINES 12 // magic number -#endif - -#define saved_lc r2 -#define dst_fetch r3 -#define dst1 r8 -#define dst2 r9 -#define dst3 r10 -#define dst4 r11 - -#define dst_last r31 - -GLOBAL_ENTRY(clear_page) - .prologue - .regstk 1,0,0,0 - mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until - .save ar.lc, saved_lc - mov saved_lc = ar.lc - - .body - mov ar.lc = (PREFETCH_LINES - 1) - mov dst_fetch = in0 - adds dst1 = 16, in0 - adds dst2 = 32, in0 - ;; -.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE - adds dst3 = 48, in0 // executing this multiple times is harmless - br.cloop.sptk.few .fetch - ;; - addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch - mov ar.lc = r16 // one L3 line per iteration - adds dst4 = 64, in0 - ;; -#ifdef CONFIG_ITANIUM - // Optimized for Itanium -1: stf.spill.nta [dst1] = f0, 64 - stf.spill.nta [dst2] = f0, 64 - cmp.lt p8,p0=dst_fetch, dst_last - ;; -#else - // Optimized for McKinley -1: stf.spill.nta [dst1] = f0, 64 - stf.spill.nta [dst2] = f0, 64 - stf.spill.nta [dst3] = f0, 64 - stf.spill.nta [dst4] = f0, 128 - cmp.lt p8,p0=dst_fetch, dst_last - ;; - stf.spill.nta [dst1] = f0, 64 - stf.spill.nta [dst2] = f0, 64 -#endif - stf.spill.nta [dst3] = f0, 64 -(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE - br.cloop.sptk.few 1b - ;; - mov ar.lc = saved_lc // restore lc - br.ret.sptk.many rp -END(clear_page) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/copy_page_mck.S --- a/xen/arch/ia64/linux/lib/copy_page_mck.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,185 +0,0 @@ -/* - * McKinley-optimized version of copy_page(). - * - * Copyright (C) 2002 Hewlett-Packard Co - * David Mosberger <davidm@xxxxxxxxxx> - * - * Inputs: - * in0: address of target page - * in1: address of source page - * Output: - * no return value - * - * General idea: - * - use regular loads and stores to prefetch data to avoid consuming M-slot just for - * lfetches => good for in-cache performance - * - avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single - * cycle - * - * Principle of operation: - * First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes. - * To avoid secondary misses in L2, we prefetch both source and destination with a line-size - * of 128 bytes. When both of these lines are in the L2 and the first half of the - * source line is in L1, we start copying the remaining words. The second half of the - * source line is prefetched in an earlier iteration, so that by the time we start - * accessing it, it's also present in the L1. - * - * We use a software-pipelined loop to control the overall operation. The pipeline - * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used for prefetching - * source cache-lines. The second PREFETCH_DIST stages are used for prefetching destination - * cache-lines, the last K stages are used to copy the cache-line words not copied by - * the prefetches. The four relevant points in the pipelined are called A, B, C, D: - * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line - * should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought - * into L1D and p[D] is TRUE if a cacheline needs to be copied. - * - * This all sounds very complicated, but thanks to the modulo-scheduled loop support, - * the resulting code is very regular and quite easy to follow (once you get the idea). - * - * As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented - * as the separate .prefetch_loop. Logically, this loop performs exactly like the - * main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed, - * so that each loop iteration is faster (again, good for cached case). - * - * When reading the code, it helps to keep the following picture in mind: - * - * word 0 word 1 - * +------+------+--- - * | v[x] | t1 | ^ - * | t2 | t3 | | - * | t4 | t5 | | - * | t6 | t7 | | 128 bytes - * | n[y] | t9 | | (L2 cache line) - * | t10 | t11 | | - * | t12 | t13 | | - * | t14 | t15 | v - * +------+------+--- - * - * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C] - * to fetch the second-half of the L2 cache line into L1, and the tX words are copied in - * an order that avoids bank conflicts. - */ -#include <asm/asmmacro.h> -#include <asm/page.h> - -#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st) - -#define src0 r2 -#define src1 r3 -#define dst0 r9 -#define dst1 r10 -#define src_pre_mem r11 -#define dst_pre_mem r14 -#define src_pre_l2 r15 -#define dst_pre_l2 r16 -#define t1 r17 -#define t2 r18 -#define t3 r19 -#define t4 r20 -#define t5 t1 // alias! -#define t6 t2 // alias! -#define t7 t3 // alias! -#define t9 t5 // alias! -#define t10 t4 // alias! -#define t11 t7 // alias! -#define t12 t6 // alias! -#define t14 t10 // alias! -#define t13 r21 -#define t15 r22 - -#define saved_lc r23 -#define saved_pr r24 - -#define A 0 -#define B (PREFETCH_DIST) -#define C (B + PREFETCH_DIST) -#define D (C + 3) -#define N (D + 1) -#define Nrot ((N + 7) & ~7) - -GLOBAL_ENTRY(copy_page) - .prologue - alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot - - .rotr v[2*PREFETCH_DIST], n[D-C+1] - .rotp p[N] - - .save ar.lc, saved_lc - mov saved_lc = ar.lc - .save pr, saved_pr - mov saved_pr = pr - .body - - mov src_pre_mem = in1 - mov pr.rot = 0x10000 - mov ar.ec = 1 // special unrolled loop - - mov dst_pre_mem = in0 - mov ar.lc = 2*PREFETCH_DIST - 1 - - add src_pre_l2 = 8*8, in1 - add dst_pre_l2 = 8*8, in0 - add src0 = 8, in1 // first t1 src - add src1 = 3*8, in1 // first t3 src - add dst0 = 8, in0 // first t1 dst - add dst1 = 3*8, in0 // first t3 dst - mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1 - nop.m 0 - nop.i 0 - ;; - // same as .line_copy loop, but with all predicated-off instructions removed: -.prefetch_loop: -(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 -(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 - br.ctop.sptk .prefetch_loop - ;; - cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop cleared it to zero) - mov ar.lc = t1 // with 64KB pages, t1 is too big to fit in 8 bits! - mov ar.ec = N // # of stages in pipeline - ;; -.line_copy: -(p[D]) ld8 t2 = [src0], 3*8 // M0 -(p[D]) ld8 t4 = [src1], 3*8 // M1 -(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory -(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2 - ;; -(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory -(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2 -(p[D]) st8 [dst0] = t1, 8 // M2 -(p[D]) st8 [dst1] = t3, 8 // M3 - ;; -(p[D]) ld8 t5 = [src0], 8 -(p[D]) ld8 t7 = [src1], 3*8 -(p[D]) st8 [dst0] = t2, 3*8 -(p[D]) st8 [dst1] = t4, 3*8 - ;; -(p[D]) ld8 t6 = [src0], 3*8 -(p[D]) ld8 t10 = [src1], 8 -(p[D]) st8 [dst0] = t5, 8 -(p[D]) st8 [dst1] = t7, 3*8 - ;; -(p[D]) ld8 t9 = [src0], 3*8 -(p[D]) ld8 t11 = [src1], 3*8 -(p[D]) st8 [dst0] = t6, 3*8 -(p[D]) st8 [dst1] = t10, 8 - ;; -(p[D]) ld8 t12 = [src0], 8 -(p[D]) ld8 t14 = [src1], 8 -(p[D]) st8 [dst0] = t9, 3*8 -(p[D]) st8 [dst1] = t11, 3*8 - ;; -(p[D]) ld8 t13 = [src0], 4*8 -(p[D]) ld8 t15 = [src1], 4*8 -(p[D]) st8 [dst0] = t12, 8 -(p[D]) st8 [dst1] = t14, 8 - ;; -(p[D-1])ld8 t1 = [src0], 8 -(p[D-1])ld8 t3 = [src1], 8 -(p[D]) st8 [dst0] = t13, 4*8 -(p[D]) st8 [dst1] = t15, 4*8 - br.ctop.sptk .line_copy - ;; - mov ar.lc = saved_lc - mov pr = saved_pr, -1 - br.ret.sptk.many rp -END(copy_page) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/flush.S --- a/xen/arch/ia64/linux/lib/flush.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,61 +0,0 @@ -/* - * Cache flushing routines. - * - * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co - * David Mosberger-Tang <davidm@xxxxxxxxxx> - * - * 05/28/05 Zoltan Menyhart Dynamic stride size - */ - -#include <asm/asmmacro.h> - - - /* - * flush_icache_range(start,end) - * - * Make i-cache(s) coherent with d-caches. - * - * Must deal with range from start to end-1 but nothing else (need to - * be careful not to touch addresses that may be unmapped). - * - * Note: "in0" and "in1" are preserved for debugging purposes. - */ -GLOBAL_ENTRY(flush_icache_range) - - .prologue - alloc r2=ar.pfs,2,0,0,0 - movl r3=ia64_i_cache_stride_shift - mov r21=1 - ;; - ld8 r20=[r3] // r20: stride shift - sub r22=in1,r0,1 // last byte address - ;; - shr.u r23=in0,r20 // start / (stride size) - shr.u r22=r22,r20 // (last byte address) / (stride size) - shl r21=r21,r20 // r21: stride size of the i-cache(s) - ;; - sub r8=r22,r23 // number of strides - 1 - shl r24=r23,r20 // r24: addresses for "fc.i" = - // "start" rounded down to stride boundary - .save ar.lc,r3 - mov r3=ar.lc // save ar.lc - ;; - - .body - mov ar.lc=r8 - ;; - /* - * 32 byte aligned loop, even number of (actually 2) bundles - */ -.Loop: fc.i r24 // issuable on M0 only - add r24=r21,r24 // we flush "stride size" bytes per iteration - nop.i 0 - br.cloop.sptk.few .Loop - ;; - sync.i - ;; - srlz.i - ;; - mov ar.lc=r3 // restore ar.lc - br.ret.sptk.many rp -END(flush_icache_range) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/idiv32.S --- a/xen/arch/ia64/linux/lib/idiv32.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,83 +0,0 @@ -/* - * Copyright (C) 2000 Hewlett-Packard Co - * Copyright (C) 2000 David Mosberger-Tang <davidm@xxxxxxxxxx> - * - * 32-bit integer division. - * - * This code is based on the application note entitled "Divide, Square Root - * and Remainder Algorithms for the IA-64 Architecture". This document - * is available as Intel document number 248725-002 or via the web at - * http://developer.intel.com/software/opensource/numerics/ - * - * For more details on the theory behind these algorithms, see "IA-64 - * and Elementary Functions" by Peter Markstein; HP Professional Books - * (http://www.hp.com/go/retailbooks/) - */ - -#include <asm/asmmacro.h> - -#ifdef MODULO -# define OP mod -#else -# define OP div -#endif - -#ifdef UNSIGNED -# define SGN u -# define EXTEND zxt4 -# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b -# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b -#else -# define SGN -# define EXTEND sxt4 -# define INT_TO_FP(a,b) fcvt.xf a=b -# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b -#endif - -#define PASTE1(a,b) a##b -#define PASTE(a,b) PASTE1(a,b) -#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3)) - -GLOBAL_ENTRY(NAME) - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias) - EXTEND in0 = in0 // in0 = a - EXTEND in1 = in1 // in1 = b - ;; - setf.sig f8 = in0 - setf.sig f9 = in1 -#ifdef MODULO - sub in1 = r0, in1 // in1 = -b -#endif - ;; - // Convert the inputs to FP, to avoid FP software-assist faults. - INT_TO_FP(f8, f8) - INT_TO_FP(f9, f9) - ;; - setf.exp f7 = r2 // f7 = 2^-34 - frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b) - ;; -(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0 -(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1 - ;; -#ifdef MODULO - setf.sig f9 = in1 // f9 = -b -#endif -(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0 -(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34 - ;; -#ifdef MODULO - setf.sig f7 = in0 -#endif -(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1 - ;; - FP_TO_INT(f6, f6) // q = trunc(q2) - ;; -#ifdef MODULO - xma.l f6 = f6, f9, f7 // r = q*(-b) + a - ;; -#endif - getf.sig r8 = f6 // transfer result to result register - br.ret.sptk.many rp -END(NAME) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/idiv64.S --- a/xen/arch/ia64/linux/lib/idiv64.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,80 +0,0 @@ -/* - * Copyright (C) 1999-2000 Hewlett-Packard Co - * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@xxxxxxxxxx> - * - * 64-bit integer division. - * - * This code is based on the application note entitled "Divide, Square Root - * and Remainder Algorithms for the IA-64 Architecture". This document - * is available as Intel document number 248725-002 or via the web at - * http://developer.intel.com/software/opensource/numerics/ - * - * For more details on the theory behind these algorithms, see "IA-64 - * and Elementary Functions" by Peter Markstein; HP Professional Books - * (http://www.hp.com/go/retailbooks/) - */ - -#include <asm/asmmacro.h> - -#ifdef MODULO -# define OP mod -#else -# define OP div -#endif - -#ifdef UNSIGNED -# define SGN u -# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b -# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b -#else -# define SGN -# define INT_TO_FP(a,b) fcvt.xf a=b -# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b -#endif - -#define PASTE1(a,b) a##b -#define PASTE(a,b) PASTE1(a,b) -#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3)) - -GLOBAL_ENTRY(NAME) - .regstk 2,0,0,0 - // Transfer inputs to FP registers. - setf.sig f8 = in0 - setf.sig f9 = in1 - ;; - // Convert the inputs to FP, to avoid FP software-assist faults. - INT_TO_FP(f8, f8) - INT_TO_FP(f9, f9) - ;; - frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b) - ;; -(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0 -(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1 - ;; -(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0 -(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0 - ;; -#ifdef MODULO - sub in1 = r0, in1 // in1 = -b -#endif -(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1 -(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0 - ;; -(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1 -(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a - ;; -#ifdef MODULO - setf.sig f8 = in0 // f8 = a - setf.sig f9 = in1 // f9 = -b -#endif -(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2 - ;; - FP_TO_INT(f11, f11) // q = trunc(q3) - ;; -#ifdef MODULO - xma.l f11 = f11, f9, f8 // r = q*(-b) + a - ;; -#endif - getf.sig r8 = f11 // transfer result to result register - br.ret.sptk.many rp -END(NAME) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/memcpy_mck.S --- a/xen/arch/ia64/linux/lib/memcpy_mck.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,661 +0,0 @@ -/* - * Itanium 2-optimized version of memcpy and copy_user function - * - * Inputs: - * in0: destination address - * in1: source address - * in2: number of bytes to copy - * Output: - * 0 if success, or number of byte NOT copied if error occurred. - * - * Copyright (C) 2002 Intel Corp. - * Copyright (C) 2002 Ken Chen <kenneth.w.chen@xxxxxxxxx> - */ -#include <linux/config.h> -#include <asm/asmmacro.h> -#include <asm/page.h> - -#define EK(y...) EX(y) - -/* McKinley specific optimization */ - -#define retval r8 -#define saved_pfs r31 -#define saved_lc r10 -#define saved_pr r11 -#define saved_in0 r14 -#define saved_in1 r15 -#define saved_in2 r16 - -#define src0 r2 -#define src1 r3 -#define dst0 r17 -#define dst1 r18 -#define cnt r9 - -/* r19-r30 are temp for each code section */ -#define PREFETCH_DIST 8 -#define src_pre_mem r19 -#define dst_pre_mem r20 -#define src_pre_l2 r21 -#define dst_pre_l2 r22 -#define t1 r23 -#define t2 r24 -#define t3 r25 -#define t4 r26 -#define t5 t1 // alias! -#define t6 t2 // alias! -#define t7 t3 // alias! -#define n8 r27 -#define t9 t5 // alias! -#define t10 t4 // alias! -#define t11 t7 // alias! -#define t12 t6 // alias! -#define t14 t10 // alias! -#define t13 r28 -#define t15 r29 -#define tmp r30 - -/* defines for long_copy block */ -#define A 0 -#define B (PREFETCH_DIST) -#define C (B + PREFETCH_DIST) -#define D (C + 1) -#define N (D + 1) -#define Nrot ((N + 7) & ~7) - -/* alias */ -#define in0 r32 -#define in1 r33 -#define in2 r34 - -GLOBAL_ENTRY(memcpy) - and r28=0x7,in0 - and r29=0x7,in1 - mov f6=f0 - br.cond.sptk .common_code - ;; -END(memcpy) -GLOBAL_ENTRY(__copy_user) - .prologue -// check dest alignment - and r28=0x7,in0 - and r29=0x7,in1 - mov f6=f1 - mov saved_in0=in0 // save dest pointer - mov saved_in1=in1 // save src pointer - mov saved_in2=in2 // save len - ;; -.common_code: - cmp.gt p15,p0=8,in2 // check for small size - cmp.ne p13,p0=0,r28 // check dest alignment - cmp.ne p14,p0=0,r29 // check src alignment - add src0=0,in1 - sub r30=8,r28 // for .align_dest - mov retval=r0 // initialize return value - ;; - add dst0=0,in0 - add dst1=1,in0 // dest odd index - cmp.le p6,p0 = 1,r30 // for .align_dest -(p15) br.cond.dpnt .memcpy_short -(p13) br.cond.dpnt .align_dest -(p14) br.cond.dpnt .unaligned_src - ;; - -// both dest and src are aligned on 8-byte boundary -.aligned_src: - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot - .save pr, saved_pr - mov saved_pr=pr - - shr.u cnt=in2,7 // this much cache line - ;; - cmp.lt p6,p0=2*PREFETCH_DIST,cnt - cmp.lt p7,p8=1,cnt - .save ar.lc, saved_lc - mov saved_lc=ar.lc - .body - add cnt=-1,cnt - add src_pre_mem=0,in1 // prefetch src pointer - add dst_pre_mem=0,in0 // prefetch dest pointer - ;; -(p7) mov ar.lc=cnt // prefetch count -(p8) mov ar.lc=r0 -(p6) br.cond.dpnt .long_copy - ;; - -.prefetch: - lfetch.fault [src_pre_mem], 128 - lfetch.fault.excl [dst_pre_mem], 128 - br.cloop.dptk.few .prefetch - ;; - -.medium_copy: - and tmp=31,in2 // copy length after iteration - shr.u r29=in2,5 // number of 32-byte iteration - add dst1=8,dst0 // 2nd dest pointer - ;; - add cnt=-1,r29 // ctop iteration adjustment - cmp.eq p10,p0=r29,r0 // do we really need to loop? - add src1=8,src0 // 2nd src pointer - cmp.le p6,p0=8,tmp - ;; - cmp.le p7,p0=16,tmp - mov ar.lc=cnt // loop setup - cmp.eq p16,p17 = r0,r0 - mov ar.ec=2 -(p10) br.dpnt.few .aligned_src_tail - ;; - TEXT_ALIGN(32) -1: -EX(.ex_handler, (p16) ld8 r34=[src0],16) -EK(.ex_handler, (p16) ld8 r38=[src1],16) -EX(.ex_handler, (p17) st8 [dst0]=r33,16) -EK(.ex_handler, (p17) st8 [dst1]=r37,16) - ;; -EX(.ex_handler, (p16) ld8 r32=[src0],16) -EK(.ex_handler, (p16) ld8 r36=[src1],16) -EX(.ex_handler, (p16) st8 [dst0]=r34,16) -EK(.ex_handler, (p16) st8 [dst1]=r38,16) - br.ctop.dptk.few 1b - ;; - -.aligned_src_tail: -EX(.ex_handler, (p6) ld8 t1=[src0]) - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs -EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8) - cmp.le p8,p0=24,tmp - and r21=-8,tmp - ;; -EX(.ex_hndlr_s, (p8) ld8 t3=[src1]) -EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1 - and in2=7,tmp // remaining length -EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2 - add src0=src0,r21 // setting up src pointer - add dst0=dst0,r21 // setting up dest pointer - ;; -EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3 - mov pr=saved_pr,-1 - br.dptk.many .memcpy_short - ;; - -/* code taken from copy_page_mck */ -.long_copy: - .rotr v[2*PREFETCH_DIST] - .rotp p[N] - - mov src_pre_mem = src0 - mov pr.rot = 0x10000 - mov ar.ec = 1 // special unrolled loop - - mov dst_pre_mem = dst0 - - add src_pre_l2 = 8*8, src0 - add dst_pre_l2 = 8*8, dst0 - ;; - add src0 = 8, src_pre_mem // first t1 src - mov ar.lc = 2*PREFETCH_DIST - 1 - shr.u cnt=in2,7 // number of lines - add src1 = 3*8, src_pre_mem // first t3 src - add dst0 = 8, dst_pre_mem // first t1 dst - add dst1 = 3*8, dst_pre_mem // first t3 dst - ;; - and tmp=127,in2 // remaining bytes after this block - add cnt = -(2*PREFETCH_DIST) - 1, cnt - // same as .line_copy loop, but with all predicated-off instructions removed: -.prefetch_loop: -EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 -EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 - br.ctop.sptk .prefetch_loop - ;; - cmp.eq p16, p0 = r0, r0 // reset p16 to 1 - mov ar.lc = cnt - mov ar.ec = N // # of stages in pipeline - ;; -.line_copy: -EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0 -EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1 -EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 prefetch dst from memory -EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3 prefetch dst from L2 - ;; -EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 prefetch src from memory -EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1 prefetch src from L2 -EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2 -EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3 - ;; -EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8) -EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8) -EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8) - ;; -EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8) -EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8) -EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8) -EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8) - ;; -EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8) -EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8) -EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8) - ;; -EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8) -EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8) -EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8) - ;; -EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8) -EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8) -EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8) -EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8) - ;; -EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8) -EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8) -EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8) -EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8) - br.ctop.sptk .line_copy - ;; - - add dst0=-8,dst0 - add src0=-8,src0 - mov in2=tmp - .restore sp - br.sptk.many .medium_copy - ;; - -#define BLOCK_SIZE 128*32 -#define blocksize r23 -#define curlen r24 - -// dest is on 8-byte boundary, src is not. We need to do -// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle. -.unaligned_src: - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,3,5,0,8 - .save ar.lc, saved_lc - mov saved_lc=ar.lc - .save pr, saved_pr - mov saved_pr=pr - .body -.4k_block: - mov saved_in0=dst0 // need to save all input arguments - mov saved_in2=in2 - mov blocksize=BLOCK_SIZE - ;; - cmp.lt p6,p7=blocksize,in2 - mov saved_in1=src0 - ;; -(p6) mov in2=blocksize - ;; - shr.u r21=in2,7 // this much cache line - shr.u r22=in2,4 // number of 16-byte iteration - and curlen=15,in2 // copy length after iteration - and r30=7,src0 // source alignment - ;; - cmp.lt p7,p8=1,r21 - add cnt=-1,r21 - ;; - - add src_pre_mem=0,src0 // prefetch src pointer - add dst_pre_mem=0,dst0 // prefetch dest pointer - and src0=-8,src0 // 1st src pointer -(p7) mov ar.lc = cnt -(p8) mov ar.lc = r0 - ;; - TEXT_ALIGN(32) -1: lfetch.fault [src_pre_mem], 128 - lfetch.fault.excl [dst_pre_mem], 128 - br.cloop.dptk.few 1b - ;; - - shladd dst1=r22,3,dst0 // 2nd dest pointer - shladd src1=r22,3,src0 // 2nd src pointer - cmp.eq p8,p9=r22,r0 // do we really need to loop? - cmp.le p6,p7=8,curlen; // have at least 8 byte remaining? - add cnt=-1,r22 // ctop iteration adjustment - ;; -EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer -EK(.ex_handler, (p9) ld8 r37=[src1],8) -(p8) br.dpnt.few .noloop - ;; - -// The jump address is calculated based on src alignment. The COPYU -// macro below need to confine its size to power of two, so an entry -// can be caulated using shl instead of an expensive multiply. The -// size is then hard coded by the following #define to match the -// actual size. This make it somewhat tedious when COPYU macro gets -// changed and this need to be adjusted to match. -#define LOOP_SIZE 6 -1: - mov r29=ip // jmp_table thread - mov ar.lc=cnt - ;; - add r29=.jump_table - 1b - (.jmp1-.jump_table), r29 - shl r28=r30, LOOP_SIZE // jmp_table thread - mov ar.ec=2 // loop setup - ;; - add r29=r29,r28 // jmp_table thread - cmp.eq p16,p17=r0,r0 - ;; - mov b6=r29 // jmp_table thread - ;; - br.cond.sptk.few b6 - -// for 8-15 byte case -// We will skip the loop, but need to replicate the side effect -// that the loop produces. -.noloop: -EX(.ex_handler, (p6) ld8 r37=[src1],8) - add src0=8,src0 -(p6) shl r25=r30,3 - ;; -EX(.ex_handler, (p6) ld8 r27=[src1]) -(p6) shr.u r28=r37,r25 -(p6) sub r26=64,r25 - ;; -(p6) shl r27=r27,r26 - ;; -(p6) or r21=r28,r27 - -.unaligned_src_tail: -/* check if we have more than blocksize to copy, if so go back */ - cmp.gt p8,p0=saved_in2,blocksize - ;; -(p8) add dst0=saved_in0,blocksize -(p8) add src0=saved_in1,blocksize -(p8) sub in2=saved_in2,blocksize -(p8) br.dpnt .4k_block - ;; - -/* we have up to 15 byte to copy in the tail. - * part of work is already done in the jump table code - * we are at the following state. - * src side: - * - * xxxxxx xx <----- r21 has xxxxxxxx already - * -------- -------- -------- - * 0 8 16 - * ^ - * | - * src1 - * - * dst - * -------- -------- -------- - * ^ - * | - * dst1 - */ -EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy -(p6) add curlen=-8,curlen // update length - mov ar.pfs=saved_pfs - ;; - mov ar.lc=saved_lc - mov pr=saved_pr,-1 - mov in2=curlen // remaining length - mov dst0=dst1 // dest pointer - add src0=src1,r30 // forward by src alignment - ;; - -// 7 byte or smaller. -.memcpy_short: - cmp.le p8,p9 = 1,in2 - cmp.le p10,p11 = 2,in2 - cmp.le p12,p13 = 3,in2 - cmp.le p14,p15 = 4,in2 - add src1=1,src0 // second src pointer - add dst1=1,dst0 // second dest pointer - ;; - -EX(.ex_handler_short, (p8) ld1 t1=[src0],2) -EK(.ex_handler_short, (p10) ld1 t2=[src1],2) -(p9) br.ret.dpnt rp // 0 byte copy - ;; - -EX(.ex_handler_short, (p8) st1 [dst0]=t1,2) -EK(.ex_handler_short, (p10) st1 [dst1]=t2,2) -(p11) br.ret.dpnt rp // 1 byte copy - -EX(.ex_handler_short, (p12) ld1 t3=[src0],2) -EK(.ex_handler_short, (p14) ld1 t4=[src1],2) -(p13) br.ret.dpnt rp // 2 byte copy - ;; - - cmp.le p6,p7 = 5,in2 - cmp.le p8,p9 = 6,in2 - cmp.le p10,p11 = 7,in2 - -EX(.ex_handler_short, (p12) st1 [dst0]=t3,2) -EK(.ex_handler_short, (p14) st1 [dst1]=t4,2) -(p15) br.ret.dpnt rp // 3 byte copy - ;; - -EX(.ex_handler_short, (p6) ld1 t5=[src0],2) -EK(.ex_handler_short, (p8) ld1 t6=[src1],2) -(p7) br.ret.dpnt rp // 4 byte copy - ;; - -EX(.ex_handler_short, (p6) st1 [dst0]=t5,2) -EK(.ex_handler_short, (p8) st1 [dst1]=t6,2) -(p9) br.ret.dptk rp // 5 byte copy - -EX(.ex_handler_short, (p10) ld1 t7=[src0],2) -(p11) br.ret.dptk rp // 6 byte copy - ;; - -EX(.ex_handler_short, (p10) st1 [dst0]=t7,2) - br.ret.dptk rp // done all cases - - -/* Align dest to nearest 8-byte boundary. We know we have at - * least 7 bytes to copy, enough to crawl to 8-byte boundary. - * Actual number of byte to crawl depend on the dest alignment. - * 7 byte or less is taken care at .memcpy_short - - * src0 - source even index - * src1 - source odd index - * dst0 - dest even index - * dst1 - dest odd index - * r30 - distance to 8-byte boundary - */ - -.align_dest: - add src1=1,in1 // source odd index - cmp.le p7,p0 = 2,r30 // for .align_dest - cmp.le p8,p0 = 3,r30 // for .align_dest -EX(.ex_handler_short, (p6) ld1 t1=[src0],2) - cmp.le p9,p0 = 4,r30 // for .align_dest - cmp.le p10,p0 = 5,r30 - ;; -EX(.ex_handler_short, (p7) ld1 t2=[src1],2) -EK(.ex_handler_short, (p8) ld1 t3=[src0],2) - cmp.le p11,p0 = 6,r30 -EX(.ex_handler_short, (p6) st1 [dst0] = t1,2) - cmp.le p12,p0 = 7,r30 - ;; -EX(.ex_handler_short, (p9) ld1 t4=[src1],2) -EK(.ex_handler_short, (p10) ld1 t5=[src0],2) -EX(.ex_handler_short, (p7) st1 [dst1] = t2,2) -EK(.ex_handler_short, (p8) st1 [dst0] = t3,2) - ;; -EX(.ex_handler_short, (p11) ld1 t6=[src1],2) -EK(.ex_handler_short, (p12) ld1 t7=[src0],2) - cmp.eq p6,p7=r28,r29 -EX(.ex_handler_short, (p9) st1 [dst1] = t4,2) -EK(.ex_handler_short, (p10) st1 [dst0] = t5,2) - sub in2=in2,r30 - ;; -EX(.ex_handler_short, (p11) st1 [dst1] = t6,2) -EK(.ex_handler_short, (p12) st1 [dst0] = t7) - add dst0=in0,r30 // setup arguments - add src0=in1,r30 -(p6) br.cond.dptk .aligned_src -(p7) br.cond.dpnt .unaligned_src - ;; - -/* main loop body in jump table format */ -#define COPYU(shift) \ -1: \ -EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */ \ -EK(.ex_handler, (p16) ld8 r36=[src1],8); \ - (p17) shrp r35=r33,r34,shift;; /* 1 */ \ -EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail section */ \ - nop.m 0; \ - (p16) shrp r38=r36,r37,shift; \ -EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */ \ -EK(.ex_handler, (p17) st8 [dst1]=r39,8); \ - br.ctop.dptk.few 1b;; \ - (p7) add src1=-8,src1; /* back out for <8 byte case */ \ - shrp r21=r22,r38,shift; /* speculative work */ \ - br.sptk.few .unaligned_src_tail /* branch out of jump table */ \ - ;; - TEXT_ALIGN(32) -.jump_table: - COPYU(8) // unaligned cases -.jmp1: - COPYU(16) - COPYU(24) - COPYU(32) - COPYU(40) - COPYU(48) - COPYU(56) - -#undef A -#undef B -#undef C -#undef D - -/* - * Due to lack of local tag support in gcc 2.x assembler, it is not clear which - * instruction failed in the bundle. The exception algorithm is that we - * first figure out the faulting address, then detect if there is any - * progress made on the copy, if so, redo the copy from last known copied - * location up to the faulting address (exclusive). In the copy_from_user - * case, remaining byte in kernel buffer will be zeroed. - * - * Take copy_from_user as an example, in the code there are multiple loads - * in a bundle and those multiple loads could span over two pages, the - * faulting address is calculated as page_round_down(max(src0, src1)). - * This is based on knowledge that if we can access one byte in a page, we - * can access any byte in that page. - * - * predicate used in the exception handler: - * p6-p7: direction - * p10-p11: src faulting addr calculation - * p12-p13: dst faulting addr calculation - */ - -#define A r19 -#define B r20 -#define C r21 -#define D r22 -#define F r28 - -#define memset_arg0 r32 -#define memset_arg2 r33 - -#define saved_retval loc0 -#define saved_rtlink loc1 -#define saved_pfs_stack loc2 - -.ex_hndlr_s: - add src0=8,src0 - br.sptk .ex_handler - ;; -.ex_hndlr_d: - add dst0=8,dst0 - br.sptk .ex_handler - ;; -.ex_hndlr_lcpy_1: - mov src1=src_pre_mem - mov dst1=dst_pre_mem - cmp.gtu p10,p11=src_pre_mem,saved_in1 - cmp.gtu p12,p13=dst_pre_mem,saved_in0 - ;; -(p10) add src0=8,saved_in1 -(p11) mov src0=saved_in1 -(p12) add dst0=8,saved_in0 -(p13) mov dst0=saved_in0 - br.sptk .ex_handler -.ex_handler_lcpy: - // in line_copy block, the preload addresses should always ahead - // of the other two src/dst pointers. Furthermore, src1/dst1 should - // always ahead of src0/dst0. - mov src1=src_pre_mem - mov dst1=dst_pre_mem -.ex_handler: - mov pr=saved_pr,-1 // first restore pr, lc, and pfs - mov ar.lc=saved_lc - mov ar.pfs=saved_pfs - ;; -.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs - cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction - cmp.ltu p10,p11=src0,src1 - cmp.ltu p12,p13=dst0,dst1 - fcmp.eq p8,p0=f6,f0 // is it memcpy? - mov tmp = dst0 - ;; -(p11) mov src1 = src0 // pick the larger of the two -(p13) mov dst0 = dst1 // make dst0 the smaller one -(p13) mov dst1 = tmp // and dst1 the larger one - ;; -(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary -(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary - ;; -(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store -(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load - mov retval=saved_in2 -(p8) ld1 tmp=[src1] // force an oops for memcpy call -(p8) st1 [dst1]=r0 // force an oops for memcpy call -(p14) br.ret.sptk.many rp - -/* - * The remaining byte to copy is calculated as: - * - * A = (faulting_addr - orig_src) -> len to faulting ld address - * or - * (faulting_addr - orig_dst) -> len to faulting st address - * B = (cur_dst - orig_dst) -> len copied so far - * C = A - B -> len need to be copied - * D = orig_len - A -> len need to be zeroed - */ -(p6) sub A = F, saved_in0 -(p7) sub A = F, saved_in1 - clrrrb - ;; - alloc saved_pfs_stack=ar.pfs,3,3,3,0 - sub B = dst0, saved_in0 // how many byte copied so far - ;; - sub C = A, B - sub D = saved_in2, A - ;; - cmp.gt p8,p0=C,r0 // more than 1 byte? - add memset_arg0=saved_in0, A -(p6) mov memset_arg2=0 // copy_to_user should not call memset -(p7) mov memset_arg2=D // copy_from_user need to have kbuf zeroed - mov r8=0 - mov saved_retval = D - mov saved_rtlink = b0 - - add out0=saved_in0, B - add out1=saved_in1, B - mov out2=C -(p8) br.call.sptk.few b0=__copy_user // recursive call - ;; - - add saved_retval=saved_retval,r8 // above might return non-zero value - cmp.gt p8,p0=memset_arg2,r0 // more than 1 byte? - mov out0=memset_arg0 // *s - mov out1=r0 // c - mov out2=memset_arg2 // n -(p8) br.call.sptk.few b0=memset - ;; - - mov retval=saved_retval - mov ar.pfs=saved_pfs_stack - mov b0=saved_rtlink - br.ret.sptk.many rp - -/* end of McKinley specific optimization */ -END(__copy_user) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/memset.S --- a/xen/arch/ia64/linux/lib/memset.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,362 +0,0 @@ -/* Optimized version of the standard memset() function. - - Copyright (c) 2002 Hewlett-Packard Co/CERN - Sverre Jarp <Sverre.Jarp@xxxxxxx> - - Return: dest - - Inputs: - in0: dest - in1: value - in2: count - - The algorithm is fairly straightforward: set byte by byte until we - we get to a 16B-aligned address, then loop on 128 B chunks using an - early store as prefetching, then loop on 32B chucks, then clear remaining - words, finally clear remaining bytes. - Since a stf.spill f0 can store 16B in one go, we use this instruction - to get peak speed when value = 0. */ - -#include <asm/asmmacro.h> -#undef ret - -#define dest in0 -#define value in1 -#define cnt in2 - -#define tmp r31 -#define save_lc r30 -#define ptr0 r29 -#define ptr1 r28 -#define ptr2 r27 -#define ptr3 r26 -#define ptr9 r24 -#define loopcnt r23 -#define linecnt r22 -#define bytecnt r21 - -#define fvalue f6 - -// This routine uses only scratch predicate registers (p6 - p15) -#define p_scr p6 // default register for same-cycle branches -#define p_nz p7 -#define p_zr p8 -#define p_unalgn p9 -#define p_y p11 -#define p_n p12 -#define p_yy p13 -#define p_nn p14 - -#define MIN1 15 -#define MIN1P1HALF 8 -#define LINE_SIZE 128 -#define LSIZE_SH 7 // shift amount -#define PREF_AHEAD 8 - -GLOBAL_ENTRY(memset) -{ .mmi - .prologue - alloc tmp = ar.pfs, 3, 0, 0, 0 - lfetch.nt1 [dest] // - .save ar.lc, save_lc - mov.i save_lc = ar.lc - .body -} { .mmi - mov ret0 = dest // return value - cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero - cmp.eq p_scr, p0 = cnt, r0 -;; } -{ .mmi - and ptr2 = -(MIN1+1), dest // aligned address - and tmp = MIN1, dest // prepare to check for correct alignment - tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) -} { .mib - mov ptr1 = dest - mux1 value = value, @brcst // create 8 identical bytes in word -(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 -;; } -{ .mib - cmp.ne p_unalgn, p0 = tmp, r0 // -} { .mib - sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1 higher than loopcnt - cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? -(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) -;; } -{ .mmi -(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment -(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? -;; } -{ .mib -(p_y) add cnt = -8, cnt // -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? -} { .mib -(p_y) st8 [ptr2] = value,-4 // -(p_n) add ptr2 = 4, ptr2 // -;; } -{ .mib -(p_yy) add cnt = -4, cnt // -(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? -} { .mib -(p_yy) st4 [ptr2] = value,-2 // -(p_nn) add ptr2 = 2, ptr2 // -;; } -{ .mmi - mov tmp = LINE_SIZE+1 // for compare -(p_y) add cnt = -2, cnt // -(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? -} { .mmi - setf.sig fvalue=value // transfer value to FLP side -(p_y) st2 [ptr2] = value,-1 // -(p_n) add ptr2 = 1, ptr2 // -;; } - -{ .mmi -(p_yy) st1 [ptr2] = value // - cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? -} { .mbb -(p_yy) add cnt = -1, cnt // -(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few -;; } - -{ .mib - nop.m 0 - shr.u linecnt = cnt, LSIZE_SH -(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill -;; } - - TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache lines; fill later -{ .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder -} { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value -;; } -{ .mmi -(p_scr) add loopcnt = -1, linecnt // - add ptr2 = 8, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range -;; } -{ .mmi - add tmp = -1, linecnt // next loop count - mov.i ar.lc = loopcnt // -;; } -.pref_l1a: -{ .mib - stf8 [ptr9] = fvalue, 128 // Do stores one cache line apart - nop.i 0 - br.cloop.dptk.few .pref_l1a -;; } -{ .mmi - add ptr0 = 16, ptr2 // Two stores in parallel - mov.i ar.lc = tmp // -;; } -.l1ax: - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 8 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 24 - stf8 [ptr0] = fvalue, 24 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 8 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 24 - stf8 [ptr0] = fvalue, 24 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 8 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 24 - stf8 [ptr0] = fvalue, 24 - ;; } - { .mmi - stf8 [ptr2] = fvalue, 8 - stf8 [ptr0] = fvalue, 32 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? - ;; } -{ .mmb - stf8 [ptr2] = fvalue, 24 -(p_scr) stf8 [ptr9] = fvalue, 128 - br.cloop.dptk.few .l1ax -;; } -{ .mbb - cmp.le p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2 - br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3 -;; } - - TEXT_ALIGN(32) -.l1b: // ------------------------------------ // L1B: store ahead into cache lines; fill later -{ .mmi - and tmp = -(LINE_SIZE), cnt // compute end of range - mov ptr9 = ptr1 // used for prefetching - and cnt = (LINE_SIZE-1), cnt // remainder -} { .mmi - mov loopcnt = PREF_AHEAD-1 // default prefetch loop - cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value -;; } -{ .mmi -(p_scr) add loopcnt = -1, linecnt - add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) - add ptr1 = tmp, ptr1 // first address beyond total range -;; } -{ .mmi - add tmp = -1, linecnt // next loop count - mov.i ar.lc = loopcnt -;; } -.pref_l1b: -{ .mib - stf.spill [ptr9] = f0, 128 // Do stores one cache line apart - nop.i 0 - br.cloop.dptk.few .pref_l1b -;; } -{ .mmi - add ptr0 = 16, ptr2 // Two stores in parallel - mov.i ar.lc = tmp -;; } -.l1bx: - { .mmi - stf.spill [ptr2] = f0, 32 - stf.spill [ptr0] = f0, 32 - ;; } - { .mmi - stf.spill [ptr2] = f0, 32 - stf.spill [ptr0] = f0, 32 - ;; } - { .mmi - stf.spill [ptr2] = f0, 32 - stf.spill [ptr0] = f0, 64 - cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? - ;; } -{ .mmb - stf.spill [ptr2] = f0, 32 -(p_scr) stf.spill [ptr9] = f0, 128 - br.cloop.dptk.few .l1bx -;; } -{ .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // -;; } - -.fraction_of_line: -{ .mib - add ptr2 = 16, ptr1 - shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 -;; } -{ .mib - cmp.eq p_scr, p0 = loopcnt, r0 - add loopcnt = -1, loopcnt -(p_scr) br.cond.dpnt.many .store_words -;; } -{ .mib - and cnt = 0x1f, cnt // compute the remaining cnt - mov.i ar.lc = loopcnt -;; } - TEXT_ALIGN(32) -.l2: // ------------------------------------ // L2A: store 32B in 2 cycles -{ .mmb - stf8 [ptr1] = fvalue, 8 - stf8 [ptr2] = fvalue, 8 -;; } { .mmb - stf8 [ptr1] = fvalue, 24 - stf8 [ptr2] = fvalue, 24 - br.cloop.dptk.many .l2 -;; } -.store_words: -{ .mib - cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? -(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch -;; } - -{ .mmi - stf8 [ptr1] = fvalue, 8 // store - cmp.le p_y, p_n = 16, cnt - add cnt = -8, cnt // subtract -;; } -{ .mmi -(p_y) stf8 [ptr1] = fvalue, 8 // store -(p_y) cmp.le.unc p_yy, p_nn = 16, cnt -(p_y) add cnt = -8, cnt // subtract -;; } -{ .mmi // store -(p_yy) stf8 [ptr1] = fvalue, 8 -(p_yy) add cnt = -8, cnt // subtract -;; } - -.move_bytes_from_alignment: -{ .mib - cmp.eq p_scr, p0 = cnt, r0 - tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? -(p_scr) br.cond.dpnt.few .restore_and_exit -;; } -{ .mib -(p_y) st4 [ptr1] = value,4 - tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? -;; } -{ .mib -(p_yy) st2 [ptr1] = value,2 - tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? -;; } - -{ .mib -(p_y) st1 [ptr1] = value -;; } -.restore_and_exit: -{ .mib - nop.m 0 - mov.i ar.lc = save_lc - br.ret.sptk.many rp -;; } - -.move_bytes_unaligned: -{ .mmi - .pred.rel "mutex",p_y, p_n - .pred.rel "mutex",p_yy, p_nn -(p_n) cmp.le p_yy, p_nn = 4, cnt -(p_y) cmp.le p_yy, p_nn = 5, cnt -(p_n) add ptr2 = 2, ptr1 -} { .mmi -(p_y) add ptr2 = 3, ptr1 -(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte [15, 14 (or less) left] -(p_y) add cnt = -1, cnt -;; } -{ .mmi -(p_yy) cmp.le.unc p_y, p0 = 8, cnt - add ptr3 = ptr1, cnt // prepare last store - mov.i ar.lc = save_lc -} { .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11, 10 (o less) left] -(p_yy) add cnt = -4, cnt -;; } -{ .mmi -(p_y) cmp.le.unc p_yy, p0 = 8, cnt - add ptr3 = -1, ptr3 // last store - tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? -} { .mmi -(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6 (or less) left] -(p_y) add cnt = -4, cnt -;; } -{ .mmi -(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes -(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2 (or less) left] - tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? -} { .mmi -(p_yy) add cnt = -4, cnt -;; } -{ .mmb -(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes -(p_y) st1 [ptr3] = value // fill last byte (using ptr3) - br.ret.sptk.many rp -} -END(memset) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/linux/lib/strlen.S --- a/xen/arch/ia64/linux/lib/strlen.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,192 +0,0 @@ -/* - * - * Optimized version of the standard strlen() function - * - * - * Inputs: - * in0 address of string - * - * Outputs: - * ret0 the number of characters in the string (0 if empty string) - * does not count the \0 - * - * Copyright (C) 1999, 2001 Hewlett-Packard Co - * Stephane Eranian <eranian@xxxxxxxxxx> - * - * 09/24/99 S.Eranian add speculation recovery code - */ - -#include <asm/asmmacro.h> - -// -// -// This is an enhanced version of the basic strlen. it includes a combination -// of compute zero index (czx), parallel comparisons, speculative loads and -// loop unroll using rotating registers. -// -// General Ideas about the algorithm: -// The goal is to look at the string in chunks of 8 bytes. -// so we need to do a few extra checks at the beginning because the -// string may not be 8-byte aligned. In this case we load the 8byte -// quantity which includes the start of the string and mask the unused -// bytes with 0xff to avoid confusing czx. -// We use speculative loads and software pipelining to hide memory -// latency and do read ahead safely. This way we defer any exception. -// -// Because we don't want the kernel to be relying on particular -// settings of the DCR register, we provide recovery code in case -// speculation fails. The recovery code is going to "redo" the work using -// only normal loads. If we still get a fault then we generate a -// kernel panic. Otherwise we return the strlen as usual. -// -// The fact that speculation may fail can be caused, for instance, by -// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., -// a NaT bit will be set if the translation is not present. The normal -// load, on the other hand, will cause the translation to be inserted -// if the mapping exists. -// -// It should be noted that we execute recovery code only when we need -// to use the data that has been speculatively loaded: we don't execute -// recovery code on pure read ahead data. -// -// Remarks: -// - the cmp r0,r0 is used as a fast way to initialize a predicate -// register to 1. This is required to make sure that we get the parallel -// compare correct. -// -// - we don't use the epilogue counter to exit the loop but we need to set -// it to zero beforehand. -// -// - after the loop we must test for Nat values because neither the -// czx nor cmp instruction raise a NaT consumption fault. We must be -// careful not to look too far for a Nat for which we don't care. -// For instance we don't need to look at a NaT in val2 if the zero byte -// was in val1. -// -// - Clearly performance tuning is required. -// -// -// -#define saved_pfs r11 -#define tmp r10 -#define base r16 -#define orig r17 -#define saved_pr r18 -#define src r19 -#define mask r20 -#define val r21 -#define val1 r22 -#define val2 r23 - -GLOBAL_ENTRY(strlen) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8 - - .rotr v[2], w[2] // declares our 4 aliases - - extr.u tmp=in0,0,3 // tmp=least significant 3 bits - mov orig=in0 // keep trackof initial byte address - dep src=0,in0,0,3 // src=8byte-aligned in0 address - .save pr, saved_pr - mov saved_pr=pr // preserve predicates (rotation) - ;; - - .body - - ld8 v[1]=[src],8 // must not speculate: can fail here - shl tmp=tmp,3 // multiply by 8bits/byte - mov mask=-1 // our mask - ;; - ld8.s w[1]=[src],8 // speculatively load next - cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and - sub tmp=64,tmp // how many bits to shift our mask on the right - ;; - shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part - mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) - ;; - add base=-16,src // keep track of aligned base - or v[1]=v[1],mask // now we have a safe initial byte pattern - ;; -1: - ld8.s v[0]=[src],8 // speculatively load next - czx1.r val1=v[1] // search 0 byte from right - czx1.r val2=w[1] // search 0 byte from right following 8bytes - ;; - ld8.s w[0]=[src],8 // speculatively load next to next - cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 - cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 -(p6) br.wtop.dptk 1b // loop until p6 == 0 - ;; - // - // We must return try the recovery code iff - // val1_is_nat || (val1==8 && val2_is_nat) - // - // XXX Fixme - // - there must be a better way of doing the test - // - cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) - tnat.nz p6,p7=val1 // test NaT on val1 -(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT - ;; - // - // if we come here p7 is true, i.e., initialized for // cmp - // - cmp.eq.and p7,p0=8,val1// val1==8? - tnat.nz.and p7,p0=val2 // test NaT if val2 -(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT - ;; -(p8) mov val1=val2 // the other test got us out of the loop -(p8) adds src=-16,src // correct position when 3 ahead -(p9) adds src=-24,src // correct position when 4 ahead - ;; - sub ret0=src,orig // distance from base - sub tmp=8,val1 // which byte in word - mov pr=saved_pr,0xffffffffffff0000 - ;; - sub ret0=ret0,tmp // adjust - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp // end of normal execution - - // - // Outlined recovery code when speculation failed - // - // This time we don't use speculation and rely on the normal exception - // mechanism. that's why the loop is not as good as the previous one - // because read ahead is not possible - // - // IMPORTANT: - // Please note that in the case of strlen() as opposed to strlen_user() - // we don't use the exception mechanism, as this function is not - // supposed to fail. If that happens it means we have a bug and the - // code will cause of kernel fault. - // - // XXX Fixme - // - today we restart from the beginning of the string instead - // of trying to continue where we left off. - // -.recover: - ld8 val=[base],8 // will fail if unrecoverable fault - ;; - or val=val,mask // remask first bytes - cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop - ;; - // - // ar.ec is still zero here - // -2: -(p6) ld8 val=[base],8 // will fail if unrecoverable fault - ;; - czx1.r val1=val // search 0 byte from right - ;; - cmp.eq p6,p0=8,val1 // val1==8 ? -(p6) br.wtop.dptk 2b // loop until p6 == 0 - ;; // (avoid WAW on p63) - sub ret0=base,orig // distance from base - sub tmp=8,val1 - mov pr=saved_pr,0xffffffffffff0000 - ;; - sub ret0=ret0,tmp // length=now - back -1 - mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what - br.ret.sptk.many rp // end of successful recovery code -END(strlen) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/mm.c --- a/xen/arch/ia64/mm.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,152 +0,0 @@ -/****************************************************************************** - * arch/ia64/mm.c - * - * Copyright (c) 2002-2005 K A Fraser - * Copyright (c) 2004 Christian Limpach - * Copyright (c) 2005, Intel Corporation. - * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * A description of the x86 page table API: - * - * Domains trap to do_mmu_update with a list of update requests. - * This is a list of (ptr, val) pairs, where the requested operation - * is *ptr = val. - * - * Reference counting of pages: - * ---------------------------- - * Each page has two refcounts: tot_count and type_count. - * - * TOT_COUNT is the obvious reference count. It counts all uses of a - * physical page frame by a domain, including uses as a page directory, - * a page table, or simple mappings via a PTE. This count prevents a - * domain from releasing a frame back to the free pool when it still holds - * a reference to it. - * - * TYPE_COUNT is more subtle. A frame can be put to one of three - * mutually-exclusive uses: it might be used as a page directory, or a - * page table, or it may be mapped writable by the domain [of course, a - * frame may not be used in any of these three ways!]. - * So, type_count is a count of the number of times a frame is being - * referred to in its current incarnation. Therefore, a page can only - * change its type when its type count is zero. - * - * Pinning the page type: - * ---------------------- - * The type of a page can be pinned/unpinned with the commands - * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is, - * pinning is not reference counted, so it can't be nested). - * This is useful to prevent a page's type count falling to zero, at which - * point safety checks would need to be carried out next time the count - * is increased again. - * - * A further note on writable page mappings: - * ----------------------------------------- - * For simplicity, the count of writable mappings for a page may not - * correspond to reality. The 'writable count' is incremented for every - * PTE which maps the page with the _PAGE_RW flag set. However, for - * write access to be possible the page directory entry must also have - * its _PAGE_RW bit set. We do not check this as it complicates the - * reference counting considerably [consider the case of multiple - * directory entries referencing a single page table, some with the RW - * bit set, others not -- it starts getting a bit messy]. - * In normal use, this simplification shouldn't be a problem. - * However, the logic can be added if required. - * - * One more note on read-only page mappings: - * ----------------------------------------- - * We want domains to be able to map pages for read-only access. The - * main reason is that page tables and directories should be readable - * by a domain, but it would not be safe for them to be writable. - * However, domains have free access to rings 1 & 2 of the Intel - * privilege model. In terms of page protection, these are considered - * to be part of 'supervisor mode'. The WP bit in CR0 controls whether - * read-only restrictions are respected in supervisor mode -- if the - * bit is clear then any mapped page is writable. - * - * We get round this by always setting the WP bit and disallowing - * updates to it. This is very unlikely to cause a problem for guest - * OS's, which will generally use the WP bit to simplify copy-on-write - * implementation (in that case, OS wants a fault when it writes to - * an application-supplied buffer). - */ - -#include <xen/config.h> -#include <public/xen.h> -#include <xen/init.h> -#include <xen/lib.h> -#include <xen/mm.h> -#include <xen/errno.h> -#include <asm/vmx_vcpu.h> -#include <asm/vmmu.h> -#include <asm/regionreg.h> -#include <asm/vmx_mm_def.h> -/* - uregs->ptr is virtual address - uregs->val is pte value - */ -#ifdef CONFIG_VTI -int do_mmu_update(mmu_update_t *ureqs,u64 count,u64 *pdone,u64 foreigndom) -{ - int i,cmd; - u64 mfn, gpfn; - VCPU *vcpu; - mmu_update_t req; - ia64_rr rr; - thash_cb_t *hcb; - thash_data_t entry={0},*ovl; - vcpu = current; - search_section_t sections; - hcb = vmx_vcpu_get_vtlb(vcpu); - for ( i = 0; i < count; i++ ) - { - copy_from_user(&req, ureqs, sizeof(req)); - cmd = req.ptr&3; - req.ptr &= ~3; - if(cmd ==MMU_NORMAL_PT_UPDATE){ - entry.page_flags = req.val; - entry.locked = 1; - entry.tc = 1; - entry.cl = DSIDE_TLB; - rr = vmx_vcpu_rr(vcpu, req.ptr); - entry.ps = rr.ps; - entry.key = redistribute_rid(rr.rid); - entry.rid = rr.rid; - entry.vadr = PAGEALIGN(req.ptr,entry.ps); - sections.tr = 1; - sections.tc = 0; - ovl = thash_find_overlap(hcb, &entry, sections); - if (ovl) { - // generate MCA. - panic("Tlb conflict!!"); - return; - } - thash_purge_and_insert(hcb, &entry); - }else if(cmd == MMU_MACHPHYS_UPDATE){ - mfn = req.ptr >>PAGE_SHIFT; - gpfn = req.val; - set_machinetophys(mfn,gpfn); - }else{ - printf("Unkown command of mmu_update:ptr: %lx,val: %lx \n",req.ptr,req.val); - while(1); - } - ureqs ++; - } - return 0; -} -#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/mm_init.c --- a/xen/arch/ia64/mm_init.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,547 +0,0 @@ -/* - * Initialize MMU support. - * - * Copyright (C) 1998-2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@xxxxxxxxxx> - */ -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/init.h> - -#ifdef XEN -#include <xen/sched.h> -#endif -#include <linux/bootmem.h> -#include <linux/efi.h> -#include <linux/elf.h> -#include <linux/mm.h> -#include <linux/mmzone.h> -#include <linux/module.h> -#ifndef XEN -#include <linux/personality.h> -#endif -#include <linux/reboot.h> -#include <linux/slab.h> -#include <linux/swap.h> -#ifndef XEN -#include <linux/proc_fs.h> -#endif - -#ifndef XEN -#include <asm/a.out.h> -#endif -#include <asm/bitops.h> -#include <asm/dma.h> -#ifndef XEN -#include <asm/ia32.h> -#endif -#include <asm/io.h> -#include <asm/machvec.h> -#include <asm/numa.h> -#include <asm/patch.h> -#include <asm/pgalloc.h> -#include <asm/sal.h> -#include <asm/sections.h> -#include <asm/system.h> -#include <asm/tlb.h> -#include <asm/uaccess.h> -#include <asm/unistd.h> -#include <asm/mca.h> - -#ifndef XEN -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -#endif - -extern void ia64_tlb_init (void); - -unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; - -#ifdef CONFIG_VIRTUAL_MEM_MAP -unsigned long vmalloc_end = VMALLOC_END_INIT; -EXPORT_SYMBOL(vmalloc_end); -struct page *vmem_map; -EXPORT_SYMBOL(vmem_map); -#endif - -static int pgt_cache_water[2] = { 25, 50 }; - -struct page *zero_page_memmap_ptr; /* map entry for zero page */ -EXPORT_SYMBOL(zero_page_memmap_ptr); - -#ifdef XEN -void *high_memory; -EXPORT_SYMBOL(high_memory); - -///////////////////////////////////////////// -// following from linux-2.6.7/mm/mmap.c -/* description of effects of mapping type and prot in current implementation. - * this is due to the limited x86 page protection hardware. The expected - * behavior is in parens: - * - * map_type prot - * PROT_NONE PROT_READ PROT_WRITE PROT_EXEC - * MAP_SHARED r: (no) no r: (yes) yes r: (no) yes r: (no) yes - * w: (no) no w: (no) no w: (yes) yes w: (no) no - * x: (no) no x: (no) yes x: (no) yes x: (yes) yes - * - * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes - * w: (no) no w: (no) no w: (copy) copy w: (no) no - * x: (no) no x: (no) yes x: (no) yes x: (yes) yes - * - */ -pgprot_t protection_map[16] = { - __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, - __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 -}; - -void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) -{ - printf("insert_vm_struct: called, not implemented yet\n"); -} - -///////////////////////////////////////////// -//following from linux/mm/memory.c - -#ifndef __ARCH_HAS_4LEVEL_HACK -/* - * Allocate page upper directory. - * - * We've already handled the fast-path in-line, and we own the - * page table lock. - * - * On a two-level or three-level page table, this ends up actually being - * entirely optimized away. - */ -pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) -{ - pud_t *new; - - spin_unlock(&mm->page_table_lock); - new = pud_alloc_one(mm, address); - spin_lock(&mm->page_table_lock); - if (!new) - return NULL; - - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (pgd_present(*pgd)) { - pud_free(new); - goto out; - } - pgd_populate(mm, pgd, new); - out: - return pud_offset(pgd, address); -} - -/* - * Allocate page middle directory. - * - * We've already handled the fast-path in-line, and we own the - * page table lock. - * - * On a two-level page table, this ends up actually being entirely - * optimized away. - */ -pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) -{ - pmd_t *new; - - spin_unlock(&mm->page_table_lock); - new = pmd_alloc_one(mm, address); - spin_lock(&mm->page_table_lock); - if (!new) - return NULL; - - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (pud_present(*pud)) { - pmd_free(new); - goto out; - } - pud_populate(mm, pud, new); - out: - return pmd_offset(pud, address); -} -#endif - -pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address) -{ - if (!pmd_present(*pmd)) { - struct page *new; - - spin_unlock(&mm->page_table_lock); - new = pte_alloc_one(mm, address); - spin_lock(&mm->page_table_lock); - if (!new) - return NULL; - - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (pmd_present(*pmd)) { - pte_free(new); - goto out; - } - inc_page_state(nr_page_table_pages); - pmd_populate(mm, pmd, new); - } -out: - return pte_offset_map(pmd, address); -} -///////////////////////////////////////////// -#endif /* XEN */ - -void -update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte) -{ - unsigned long addr; - struct page *page; - - if (!pte_exec(pte)) - return; /* not an executable page... */ - - page = pte_page(pte); - /* don't use VADDR: it may not be mapped on this CPU (or may have just been flushed): */ - addr = (unsigned long) page_address(page); - - if (test_bit(PG_arch_1, &page->flags)) - return; /* i-cache is already coherent with d-cache */ - - flush_icache_range(addr, addr + PAGE_SIZE); - set_bit(PG_arch_1, &page->flags); /* mark page as clean */ -} - -inline void -ia64_set_rbs_bot (void) -{ -#ifdef XEN - unsigned stack_size = MAX_USER_STACK_SIZE; -#else - unsigned long stack_size = current->rlim[RLIMIT_STACK].rlim_max & -16; -#endif - - if (stack_size > MAX_USER_STACK_SIZE) - stack_size = MAX_USER_STACK_SIZE; - current->arch._thread.rbs_bot = STACK_TOP - stack_size; -} - -/* - * This performs some platform-dependent address space initialization. - * On IA-64, we want to setup the VM area for the register backing - * store (which grows upwards) and install the gateway page which is - * used for signal trampolines, etc. - */ -void -ia64_init_addr_space (void) -{ -#ifdef XEN -printf("ia64_init_addr_space: called, not implemented\n"); -#else - struct vm_area_struct *vma; - - ia64_set_rbs_bot(); - - /* - * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore - * the problem. When the process attempts to write to the register backing store - * for the first time, it will get a SEGFAULT in this case. - */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (vma) { - memset(vma, 0, sizeof(*vma)); - vma->vm_mm = current->mm; - vma->vm_start = current->arch._thread.rbs_bot & PAGE_MASK; - vma->vm_end = vma->vm_start + PAGE_SIZE; - vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7]; - vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP; - insert_vm_struct(current->mm, vma); - } - - /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ - if (!(current->personality & MMAP_PAGE_ZERO)) { - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); - if (vma) { - memset(vma, 0, sizeof(*vma)); - vma->vm_mm = current->mm; - vma->vm_end = PAGE_SIZE; - vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); - vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; - insert_vm_struct(current->mm, vma); - } - } -#endif -} - -setup_gate (void) -{ - printk("setup_gate not-implemented.\n"); -} - -void __devinit -ia64_mmu_init (void *my_cpu_data) -{ - unsigned long psr, pta, impl_va_bits; - extern void __devinit tlb_init (void); - int cpu; - -#ifdef CONFIG_DISABLE_VHPT -# define VHPT_ENABLE_BIT 0 -#else -# define VHPT_ENABLE_BIT 1 -#endif - - /* Pin mapping for percpu area into TLB */ - psr = ia64_clear_ic(); - ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR, - pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), - PERCPU_PAGE_SHIFT); - - ia64_set_psr(psr); - ia64_srlz_i(); - - /* - * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped - * address space. The IA-64 architecture guarantees that at least 50 bits of - * virtual address space are implemented but if we pick a large enough page size - * (e.g., 64KB), the mapped address space is big enough that it will overlap with - * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages, - * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a - * problem in practice. Alternatively, we could truncate the top of the mapped - * address space to not permit mappings that would overlap with the VMLPT. - * --davidm 00/12/06 - */ -# define pte_bits 3 -# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT) - /* - * The virtual page table has to cover the entire implemented address space within - * a region even though not all of this space may be mappable. The reason for - * this is that the Access bit and Dirty bit fault handlers perform - * non-speculative accesses to the virtual page table, so the address range of the - * virtual page table itself needs to be covered by virtual page table. - */ -# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) -# define POW2(n) (1ULL << (n)) - - impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61))); - - if (impl_va_bits < 51 || impl_va_bits > 61) - panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1); - -#ifdef XEN - vhpt_init(); -#endif -#if 0 - /* place the VMLPT at the end of each page-table mapped region: */ - pta = POW2(61) - POW2(vmlpt_bits); - - if (POW2(mapped_space_bits) >= pta) - panic("mm/init: overlap between virtually mapped linear page table and " - "mapped kernel space!"); - /* - * Set the (virtually mapped linear) page table address. Bit - * 8 selects between the short and long format, bits 2-7 the - * size of the table, and bit 0 whether the VHPT walker is - * enabled. - */ - ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT); -#endif - ia64_tlb_init(); - -#ifdef CONFIG_HUGETLB_PAGE - ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2); - ia64_srlz_d(); -#endif - - cpu = smp_processor_id(); - -#ifndef XEN - /* mca handler uses cr.lid as key to pick the right entry */ - ia64_mca_tlb_list[cpu].cr_lid = ia64_getreg(_IA64_REG_CR_LID); - - /* insert this percpu data information into our list for MCA recovery purposes */ - ia64_mca_tlb_list[cpu].percpu_paddr = pte_val(mk_pte_phys(__pa(my_cpu_data), PAGE_KERNEL)); - /* Also save per-cpu tlb flush recipe for use in physical mode mca handler */ - ia64_mca_tlb_list[cpu].ptce_base = local_cpu_data->ptce_base; - ia64_mca_tlb_list[cpu].ptce_count[0] = local_cpu_data->ptce_count[0]; - ia64_mca_tlb_list[cpu].ptce_count[1] = local_cpu_data->ptce_count[1]; - ia64_mca_tlb_list[cpu].ptce_stride[0] = local_cpu_data->ptce_stride[0]; - ia64_mca_tlb_list[cpu].ptce_stride[1] = local_cpu_data->ptce_stride[1]; -#endif -} - -#ifdef CONFIG_VIRTUAL_MEM_MAP - -int -create_mem_map_page_table (u64 start, u64 end, void *arg) -{ - unsigned long address, start_page, end_page; - struct page *map_start, *map_end; - int node; - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - - map_start = vmem_map + (__pa(start) >> PAGE_SHIFT); - map_end = vmem_map + (__pa(end) >> PAGE_SHIFT); - - start_page = (unsigned long) map_start & PAGE_MASK; - end_page = PAGE_ALIGN((unsigned long) map_end); - node = paddr_to_nid(__pa(start)); - - for (address = start_page; address < end_page; address += PAGE_SIZE) { - pgd = pgd_offset_k(address); - if (pgd_none(*pgd)) - pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); - pmd = pmd_offset(pgd, address); - - if (pmd_none(*pmd)) - pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)); - pte = pte_offset_kernel(pmd, address); - - if (pte_none(*pte)) - set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT, - PAGE_KERNEL)); - } - return 0; -} - -struct memmap_init_callback_data { - struct page *start; - struct page *end; - int nid; - unsigned long zone; -}; - -static int -virtual_memmap_init (u64 start, u64 end, void *arg) -{ - struct memmap_init_callback_data *args; - struct page *map_start, *map_end; - - args = (struct memmap_init_callback_data *) arg; - - map_start = vmem_map + (__pa(start) >> PAGE_SHIFT); - map_end = vmem_map + (__pa(end) >> PAGE_SHIFT); - - if (map_start < args->start) - map_start = args->start; - if (map_end > args->end) - map_end = args->end; - - /* - * We have to initialize "out of bounds" struct page elements that fit completely - * on the same pages that were allocated for the "in bounds" elements because they - * may be referenced later (and found to be "reserved"). - */ - map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page); - map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) map_end) - / sizeof(struct page)); - - if (map_start < map_end) - memmap_init_zone(map_start, (unsigned long) (map_end - map_start), - args->nid, args->zone, page_to_pfn(map_start)); - return 0; -} - -void -memmap_init (struct page *start, unsigned long size, int nid, - unsigned long zone, unsigned long start_pfn) -{ - if (!vmem_map) - memmap_init_zone(start, size, nid, zone, start_pfn); - else { - struct memmap_init_callback_data args; - - args.start = start; - args.end = start + size; - args.nid = nid; - args.zone = zone; - - efi_memmap_walk(virtual_memmap_init, &args); - } -} - -int -ia64_pfn_valid (unsigned long pfn) -{ - char byte; - struct page *pg = pfn_to_page(pfn); - - return (__get_user(byte, (char *) pg) == 0) - && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK)) - || (__get_user(byte, (char *) (pg + 1) - 1) == 0)); -} -EXPORT_SYMBOL(ia64_pfn_valid); - -int -find_largest_hole (u64 start, u64 end, void *arg) -{ - u64 *max_gap = arg; - - static u64 last_end = PAGE_OFFSET; - - /* NOTE: this algorithm assumes efi memmap table is ordered */ - -#ifdef XEN -//printf("find_largest_hole: start=%lx,end=%lx,max_gap=%lx\n",start,end,*(unsigned long *)arg); -#endif - if (*max_gap < (start - last_end)) - *max_gap = start - last_end; - last_end = end; -#ifdef XEN -//printf("find_largest_hole2: max_gap=%lx,last_end=%lx\n",*max_gap,last_end); -#endif - return 0; -} -#endif /* CONFIG_VIRTUAL_MEM_MAP */ - -static int -count_reserved_pages (u64 start, u64 end, void *arg) -{ - unsigned long num_reserved = 0; - unsigned long *count = arg; - - for (; start < end; start += PAGE_SIZE) - if (PageReserved(virt_to_page(start))) - ++num_reserved; - *count += num_reserved; - return 0; -} - -/* - * Boot command-line option "nolwsys" can be used to disable the use of any light-weight - * system call handler. When this option is in effect, all fsyscalls will end up bubbling - * down into the kernel and calling the normal (heavy-weight) syscall handler. This is - * useful for performance testing, but conceivably could also come in handy for debugging - * purposes. - */ - -static int nolwsys; - -static int __init -nolwsys_setup (char *s) -{ - nolwsys = 1; - return 1; -} - -__setup("nolwsys", nolwsys_setup); - -void -mem_init (void) -{ -#ifdef CONFIG_PCI - /* - * This needs to be called _after_ the command line has been parsed but _before_ - * any drivers that may need the PCI DMA interface are initialized or bootmem has - * been freed. - */ - platform_dma_init(); -#endif - -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/mmio.c --- a/xen/arch/ia64/mmio.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,515 +0,0 @@ - -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * mmio.c: MMIO emulation components. - * Copyright (c) 2004, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) - * Kun Tian (Kevin Tian) (Kevin.tian@xxxxxxxxx) - */ - -#include <linux/sched.h> -#include <asm/tlb.h> -#include <asm/vmx_mm_def.h> -#include <asm/gcc_intrin.h> -#include <linux/interrupt.h> -#include <asm/vmx_vcpu.h> -#include <asm/privop.h> -#include <asm/types.h> -#include <public/io/ioreq.h> -#include <asm/mm.h> -#include <asm/vmx.h> - -/* -struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base) -{ - int i; - for (i=0; mio_base[i].iot != NOT_IO; i++ ) { - if ( gpa >= mio_base[i].start && gpa <= mio_base[i].end ) - return &mio_base[i]; - } - return NULL; -} -*/ - -#define PIB_LOW_HALF(ofst) !(ofst&(1<<20)) -#define PIB_OFST_INTA 0x1E0000 -#define PIB_OFST_XTP 0x1E0008 - -static void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int ma) -{ - switch (pib_off) { - case PIB_OFST_INTA: - panic("Undefined write on PIB INTA\n"); - break; - case PIB_OFST_XTP: - if ( s == 1 && ma == 4 /* UC */) { - vmx_vcpu_get_plat(vcpu)->xtp = *(uint8_t *)src; - } - else { - panic("Undefined write on PIB XTP\n"); - } - break; - default: - if ( PIB_LOW_HALF(pib_off) ) { // lower half - if ( s != 8 || ma != 0x4 /* UC */ ) { - panic("Undefined IPI-LHF write with s %d, ma %d!\n", s, ma); - } - else { - write_ipi(vcpu, pib_off, *(uint64_t *)src); - // TODO for SM-VP - } - } - else { // upper half - printf("IPI-UHF write %lx\n",pib_off); - panic("Not support yet for SM-VP\n"); - } - break; - } -} - -static void pib_read(VCPU *vcpu, uint64_t pib_off, void *dest, size_t s, int ma) -{ - switch (pib_off) { - case PIB_OFST_INTA: - // todo --- emit on processor system bus. - if ( s == 1 && ma == 4) { // 1 byte load - // TODO: INTA read from IOSAPIC - } - else { - panic("Undefined read on PIB INTA\n"); - } - break; - case PIB_OFST_XTP: - if ( s == 1 && ma == 4) { - *((uint8_t*)dest) = vmx_vcpu_get_plat(vcpu)->xtp; - } - else { - panic("Undefined read on PIB XTP\n"); - } - break; - default: - if ( PIB_LOW_HALF(pib_off) ) { // lower half - if ( s != 8 || ma != 4 ) { - panic("Undefined IPI-LHF read!\n"); - } - else { -#ifdef IPI_DEBUG - printf("IPI-LHF read %lx\n",pib_off); -#endif - *(uint64_t *)dest = 0; // TODO for SM-VP - } - } - else { // upper half - if ( s != 1 || ma != 4 ) { - panic("Undefined PIB-UHF read!\n"); - } - else { -#ifdef IPI_DEBUG - printf("IPI-UHF read %lx\n",pib_off); -#endif - *(uint8_t *)dest = 0; // TODO for SM-VP - } - } - break; - } -} - -static void low_mmio_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir) -{ - struct vcpu *v = current; - vcpu_iodata_t *vio; - ioreq_t *p; - unsigned long addr; - - vio = get_vio(v->domain, v->vcpu_id); - if (vio == 0) { - panic("bad shared page: %lx", (unsigned long)vio); - } - p = &vio->vp_ioreq; - p->addr = pa; - p->size = s; - p->count = 1; - p->dir = dir; - if(dir==IOREQ_WRITE) //write; - p->u.data = *val; - p->pdata_valid = 0; - p->port_mm = 1; - p->df = 0; - - set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); - p->state = STATE_IOREQ_READY; - evtchn_send(iopacket_port(v->domain)); - vmx_wait_io(); - if(dir==IOREQ_READ){ //read - *val=p->u.data; - } - return; -} -#define TO_LEGACY_IO(pa) (((pa)>>12<<2)|((pa)&0x3)) - -static void legacy_io_access(VCPU *vcpu, u64 pa, u64 *val, size_t s, int dir) -{ - struct vcpu *v = current; - vcpu_iodata_t *vio; - ioreq_t *p; - unsigned long addr; - - vio = get_vio(v->domain, v->vcpu_id); - if (vio == 0) { - panic("bad shared page: %lx"); - } - p = &vio->vp_ioreq; - p->addr = TO_LEGACY_IO(pa&0x3ffffffUL); - p->size = s; - p->count = 1; - p->dir = dir; - if(dir==IOREQ_WRITE) //write; - p->u.data = *val; - p->pdata_valid = 0; - p->port_mm = 0; - p->df = 0; - - set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); - p->state = STATE_IOREQ_READY; - evtchn_send(iopacket_port(v->domain)); - - vmx_wait_io(); - if(dir==IOREQ_READ){ //read - *val=p->u.data; - } -#ifdef DEBUG_PCI - if(dir==IOREQ_WRITE) - if(p->addr == 0xcf8UL) - printk("Write 0xcf8, with val [0x%lx]\n", p->u.data); - else - if(p->addr == 0xcfcUL) - printk("Read 0xcfc, with val [0x%lx]\n", p->u.data); -#endif //DEBUG_PCI - return; -} - -static void mmio_access(VCPU *vcpu, u64 src_pa, u64 *dest, size_t s, int ma, int dir) -{ - struct virutal_platform_def *v_plat; - //mmio_type_t iot; - unsigned long iot; - iot=__gpfn_is_io(vcpu->domain, src_pa>>PAGE_SHIFT); - v_plat = vmx_vcpu_get_plat(vcpu); - - switch (iot) { - case GPFN_PIB: - if(!dir) - pib_write(vcpu, dest, src_pa - v_plat->pib_base, s, ma); - else - pib_read(vcpu, src_pa - v_plat->pib_base, dest, s, ma); - break; - case GPFN_GFW: - break; - case GPFN_IOSAPIC: - case GPFN_FRAME_BUFFER: - case GPFN_LOW_MMIO: - low_mmio_access(vcpu, src_pa, dest, s, dir); - break; - case GPFN_LEGACY_IO: - legacy_io_access(vcpu, src_pa, dest, s, dir); - break; - default: - panic("Bad I/O access\n"); - break; - } - return; -} - -/* - * Read or write data in guest virtual address mode. - */ -/* -void -memwrite_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s) -{ - uint64_t pa; - - if (!vtlb->nomap) - panic("Normal memory write shouldn't go to this point!"); - pa = PPN_2_PA(vtlb->ppn); - pa += POFFSET((u64)dest, vtlb->ps); - mmio_write (vcpu, src, pa, s, vtlb->ma); -} - - -void -memwrite_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s) -{ - uint64_t pa = (uint64_t)dest; - int ma; - - if ( pa & (1UL <<63) ) { - // UC - ma = 4; - pa <<=1; - pa >>=1; - } - else { - // WBL - ma = 0; // using WB for WBL - } - mmio_write (vcpu, src, pa, s, ma); -} - -void -memread_v(VCPU *vcpu, thash_data_t *vtlb, u64 *src, u64 *dest, size_t s) -{ - uint64_t pa; - - if (!vtlb->nomap) - panic("Normal memory write shouldn't go to this point!"); - pa = PPN_2_PA(vtlb->ppn); - pa += POFFSET((u64)src, vtlb->ps); - - mmio_read(vcpu, pa, dest, s, vtlb->ma); -} - -void -memread_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s) -{ - uint64_t pa = (uint64_t)src; - int ma; - - if ( pa & (1UL <<63) ) { - // UC - ma = 4; - pa <<=1; - pa >>=1; - } - else { - // WBL - ma = 0; // using WB for WBL - } - mmio_read(vcpu, pa, dest, s, ma); -} -*/ - - -/* - * Deliver IPI message. (Only U-VP is supported now) - * offset: address offset to IPI space. - * value: deliver value. - */ -static void deliver_ipi (VCPU *vcpu, uint64_t dm, uint64_t vector) -{ -#ifdef IPI_DEBUG - printf ("deliver_ipi %lx %lx\n",dm,vector); -#endif - switch ( dm ) { - case 0: // INT - vmx_vcpu_pend_interrupt (vcpu, vector); - break; - case 2: // PMI - // TODO -- inject guest PMI - panic ("Inject guest PMI!\n"); - break; - case 4: // NMI - vmx_vcpu_pend_interrupt (vcpu, 2); - break; - case 5: // INIT - // TODO -- inject guest INIT - panic ("Inject guest INIT!\n"); - break; - case 7: // ExtINT - vmx_vcpu_pend_interrupt (vcpu, 0); - break; - case 1: - case 3: - case 6: - default: - panic ("Deliver reserved IPI!\n"); - break; - } -} - -/* - * TODO: Use hash table for the lookup. - */ -static inline VCPU *lid_2_vcpu (struct domain *d, u64 id, u64 eid) -{ - int i; - VCPU *vcpu; - LID lid; - for (i=0; i<MAX_VIRT_CPUS; i++) { - vcpu = d->vcpu[i]; - if (!vcpu) - continue; - lid.val = VPD_CR(vcpu, lid); - if ( lid.id == id && lid.eid == eid ) { - return vcpu; - } - } - return NULL; -} - -/* - * execute write IPI op. - */ -static int write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value) -{ - VCPU *target_cpu; - - target_cpu = lid_2_vcpu(vcpu->domain, - ((ipi_a_t)addr).id, ((ipi_a_t)addr).eid); - if ( target_cpu == NULL ) panic("Unknown IPI cpu\n"); - if ( target_cpu == vcpu ) { - // IPI to self - deliver_ipi (vcpu, ((ipi_d_t)value).dm, - ((ipi_d_t)value).vector); - return 1; - } - else { - // TODO: send Host IPI to inject guest SMP IPI interruption - panic ("No SM-VP supported!\n"); - return 0; - } -} - - -/* - dir 1: read 0:write - inst_type 0:integer 1:floating point - */ -extern IA64_BUNDLE __vmx_get_domain_bundle(u64 iip); -#define SL_INTEGER 0 // store/load interger -#define SL_FLOATING 1 // store/load floating - -void emulate_io_inst(VCPU *vcpu, u64 padr, u64 ma) -{ - REGS *regs; - IA64_BUNDLE bundle; - int slot, dir, inst_type; - size_t size; - u64 data, value,post_update, slot1a, slot1b, temp; - INST64 inst; - regs=vcpu_regs(vcpu); - bundle = __vmx_get_domain_bundle(regs->cr_iip); - slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; - if (!slot) inst.inst = bundle.slot0; - else if (slot == 1){ - slot1a=bundle.slot1a; - slot1b=bundle.slot1b; - inst.inst =slot1a + (slot1b<<18); - } - else if (slot == 2) inst.inst = bundle.slot2; - - - // Integer Load/Store - if(inst.M1.major==4&&inst.M1.m==0&&inst.M1.x==0){ - inst_type = SL_INTEGER; // - size=(inst.M1.x6&0x3); - if((inst.M1.x6>>2)>0xb){ // write - dir=IOREQ_WRITE; //write - vmx_vcpu_get_gr(vcpu,inst.M4.r2,&data); - }else if((inst.M1.x6>>2)<0xb){ // read - dir=IOREQ_READ; - vmx_vcpu_get_gr(vcpu,inst.M1.r1,&value); - } - } - // Integer Load + Reg update - else if(inst.M2.major==4&&inst.M2.m==1&&inst.M2.x==0){ - inst_type = SL_INTEGER; - dir = IOREQ_READ; //write - size = (inst.M2.x6&0x3); - vmx_vcpu_get_gr(vcpu,inst.M2.r1,&value); - vmx_vcpu_get_gr(vcpu,inst.M2.r3,&temp); - vmx_vcpu_get_gr(vcpu,inst.M2.r2,&post_update); - temp += post_update; - vmx_vcpu_set_gr(vcpu,inst.M2.r3,temp,0); - } - // Integer Load/Store + Imm update - else if(inst.M3.major==5){ - inst_type = SL_INTEGER; // - size=(inst.M3.x6&0x3); - if((inst.M5.x6>>2)>0xb){ // write - dir=IOREQ_WRITE; //write - vmx_vcpu_get_gr(vcpu,inst.M5.r2,&data); - vmx_vcpu_get_gr(vcpu,inst.M5.r3,&temp); - post_update = (inst.M5.i<<7)+inst.M5.imm7; - if(inst.M5.s) - temp -= post_update; - else - temp += post_update; - vmx_vcpu_set_gr(vcpu,inst.M5.r3,temp,0); - - }else if((inst.M3.x6>>2)<0xb){ // read - dir=IOREQ_READ; - vmx_vcpu_get_gr(vcpu,inst.M3.r1,&value); - vmx_vcpu_get_gr(vcpu,inst.M3.r3,&temp); - post_update = (inst.M3.i<<7)+inst.M3.imm7; - if(inst.M3.s) - temp -= post_update; - else - temp += post_update; - vmx_vcpu_set_gr(vcpu,inst.M3.r3,temp,0); - - } - } - // Floating-point Load/Store -// else if(inst.M6.major==6&&inst.M6.m==0&&inst.M6.x==0&&inst.M6.x6==3){ -// inst_type=SL_FLOATING; //fp -// dir=IOREQ_READ; -// size=3; //ldfd -// } - else{ - printf("This memory access instruction can't be emulated two: %lx\n ",inst.inst); - while(1); - } - - size = 1 << size; - if(dir==IOREQ_WRITE){ - mmio_access(vcpu, padr, &data, size, ma, dir); - }else{ - mmio_access(vcpu, padr, &data, size, ma, dir); - if(size==0) - data = (value & 0xffffffffffffff00U) | (data & 0xffU); - else if(size==1) - data = (value & 0xffffffffffff0000U) | (data & 0xffffU); - else if(size==2) - data = (value & 0xffffffff00000000U) | (data & 0xffffffffU); - - if(inst_type==SL_INTEGER){ //gp - vmx_vcpu_set_gr(vcpu,inst.M1.r1,data,0); - }else{ - panic("Don't support ldfd now !"); -/* switch(inst.M6.f1){ - - case 6: - regs->f6=(struct ia64_fpreg)data; - case 7: - regs->f7=(struct ia64_fpreg)data; - case 8: - regs->f8=(struct ia64_fpreg)data; - case 9: - regs->f9=(struct ia64_fpreg)data; - case 10: - regs->f10=(struct ia64_fpreg)data; - case 11: - regs->f11=(struct ia64_fpreg)data; - default : - ia64_ldfs(inst.M6.f1,&data); - } -*/ - } - } - vmx_vcpu_increment_iip(vcpu); -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/pal_emul.c --- a/xen/arch/ia64/pal_emul.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,280 +0,0 @@ -/* - * PAL/SAL call delegation - * - * Copyright (c) 2004 Li Susie <susie.li@xxxxxxxxx> - * Copyright (c) 2005 Yu Ke <ke.yu@xxxxxxxxx> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - */ - -#include <asm/vmx_vcpu.h> - -static void -get_pal_parameters (VCPU *vcpu, UINT64 *gr29, - UINT64 *gr30, UINT64 *gr31) { - - vmx_vcpu_get_gr(vcpu,29,gr29); - vmx_vcpu_get_gr(vcpu,30,gr30); - vmx_vcpu_get_gr(vcpu,31,gr31); -} - -static void -set_pal_result (VCPU *vcpu,struct ia64_pal_retval result) { - - vmx_vcpu_set_gr(vcpu,8, result.status,0); - vmx_vcpu_set_gr(vcpu,9, result.v0,0); - vmx_vcpu_set_gr(vcpu,10, result.v1,0); - vmx_vcpu_set_gr(vcpu,11, result.v2,0); -} - - -static struct ia64_pal_retval -pal_cache_flush (VCPU *vcpu) { - UINT64 gr28,gr29, gr30, gr31; - struct ia64_pal_retval result; - - get_pal_parameters (vcpu, &gr29, &gr30, &gr31); - vmx_vcpu_get_gr(vcpu,28,&gr28); - - /* Always call Host Pal in int=1 */ - gr30 = gr30 &(~(0x2UL)); - - /* call Host PAL cache flush */ - result=ia64_pal_call_static(gr28 ,gr29, gr30,gr31,1); // Clear psr.ic when call PAL_CACHE_FLUSH - - /* If host PAL call is interrupted, then loop to complete it */ -// while (result.status == 1) { -// ia64_pal_call_static(gr28 ,gr29, gr30, -// result.v1,1LL); -// } - while (result.status != 0) { - panic("PAL_CACHE_FLUSH ERROR, status %d", result.status); - } - - return result; -} - -static struct ia64_pal_retval -pal_vm_tr_read (VCPU *vcpu ) { -#warning pal_vm_tr_read: to be implemented - struct ia64_pal_retval result; - - result.status= -1; //unimplemented - - return result; -} - - -static struct ia64_pal_retval -pal_prefetch_visibility (VCPU *vcpu) { - /* Due to current MM virtualization algorithm, - * We do not allow guest to change mapping attribute. - * Thus we will not support PAL_PREFETCH_VISIBILITY - */ - struct ia64_pal_retval result; - - result.status= -1; //unimplemented - - return result; -} - -static struct ia64_pal_retval -pal_platform_addr(VCPU *vcpu) { - struct ia64_pal_retval result; - - result.status= 0; //success - - return result; -} - -static struct ia64_pal_retval -pal_halt (VCPU *vcpu) { -#warning pal_halt: to be implemented - //bugbug: to be implement. - struct ia64_pal_retval result; - - result.status= -1; //unimplemented - - return result; -} - - -static struct ia64_pal_retval -pal_halt_light (VCPU *vcpu) { - struct ia64_pal_retval result; - - result.status= -1; //unimplemented - - return result; -} - -static struct ia64_pal_retval -pal_cache_read (VCPU *vcpu) { - struct ia64_pal_retval result; - - result.status= -1; //unimplemented - - return result; -} - -static struct ia64_pal_retval -pal_cache_write (VCPU *vcpu) { - struct ia64_pal_retval result; - - result.status= -1; //unimplemented - - return result; -} - -static struct ia64_pal_retval -pal_bus_get_features(VCPU *vcpu){ - -} - -static struct ia64_pal_retval -pal_cache_summary(VCPU *vcpu){ - -} - -static struct ia64_pal_retval -pal_cache_init(VCPU *vcpu){ - struct ia64_pal_retval result; - result.status=0; - return result; -} - -static struct ia64_pal_retval -pal_cache_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_cache_prot_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_cache_shared_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_mem_attrib(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_debug_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_fixed_addr(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_freq_base(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_freq_ratios(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_halt_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_logical_to_physica(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_perf_mon_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_proc_get_features(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_ptce_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_register_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_rse_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_test_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_vm_summary(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_vm_info(VCPU *vcpu){ -} - -static struct ia64_pal_retval -pal_vm_page_size(VCPU *vcpu){ -} - -void -pal_emul( VCPU *vcpu) { - UINT64 gr28; - struct ia64_pal_retval result; - - - vmx_vcpu_get_gr(vcpu,28,&gr28); //bank1 - - switch (gr28) { - case PAL_CACHE_FLUSH: - result = pal_cache_flush (vcpu); - break; - - case PAL_PREFETCH_VISIBILITY: - result = pal_prefetch_visibility (vcpu); - break; - - case PAL_VM_TR_READ: - result = pal_vm_tr_read (vcpu); - break; - - case PAL_HALT: - result = pal_halt (vcpu); - break; - - case PAL_HALT_LIGHT: - result = pal_halt_light (vcpu); - break; - - case PAL_CACHE_READ: - result = pal_cache_read (vcpu); - break; - - case PAL_CACHE_WRITE: - result = pal_cache_write (vcpu); - break; - - case PAL_PLATFORM_ADDR: - result = pal_platform_addr (vcpu); - break; - - default: - panic("pal_emul(): guest call unsupported pal" ); - } - set_pal_result (vcpu, result); -} - - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/pcdp.c --- a/xen/arch/ia64/pcdp.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,120 +0,0 @@ -/* - * Parse the EFI PCDP table to locate the console device. - * - * (c) Copyright 2002, 2003, 2004 Hewlett-Packard Development Company, L.P. - * Khalid Aziz <khalid.aziz@xxxxxx> - * Alex Williamson <alex.williamson@xxxxxx> - * Bjorn Helgaas <bjorn.helgaas@xxxxxx> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/acpi.h> -#include <linux/console.h> -#include <linux/efi.h> -#include <linux/serial.h> -#ifdef XEN -#include <linux/errno.h> -#endif -#include "pcdp.h" - -static int __init -setup_serial_console(struct pcdp_uart *uart) -{ -#ifdef XEN - extern struct ns16550_defaults ns16550_com1; - ns16550_com1.baud = uart->baud; - ns16550_com1.io_base = uart->addr.address; - if (uart->bits) - ns16550_com1.data_bits = uart->bits; - return 0; -#else -#ifdef CONFIG_SERIAL_8250_CONSOLE - int mmio; - static char options[64]; - - mmio = (uart->addr.address_space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY); - snprintf(options, sizeof(options), "console=uart,%s,0x%lx,%lun%d", - mmio ? "mmio" : "io", uart->addr.address, uart->baud, - uart->bits ? uart->bits : 8); - - return early_serial_console_init(options); -#else - return -ENODEV; -#endif -#endif -} - -#ifndef XEN -static int __init -setup_vga_console(struct pcdp_vga *vga) -{ -#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) - if (efi_mem_type(0xA0000) == EFI_CONVENTIONAL_MEMORY) { - printk(KERN_ERR "PCDP: VGA selected, but frame buffer is not MMIO!\n"); - return -ENODEV; - } - - conswitchp = &vga_con; - printk(KERN_INFO "PCDP: VGA console\n"); - return 0; -#else - return -ENODEV; -#endif -} -#endif - -int __init -efi_setup_pcdp_console(char *cmdline) -{ - struct pcdp *pcdp; - struct pcdp_uart *uart; - struct pcdp_device *dev, *end; - int i, serial = 0; - - pcdp = efi.hcdp; - if (!pcdp) - return -ENODEV; - -#ifndef XEN - printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, __pa(pcdp)); -#endif - - if (strstr(cmdline, "console=hcdp")) { - if (pcdp->rev < 3) - serial = 1; - } else if (strstr(cmdline, "console=")) { -#ifndef XEN - printk(KERN_INFO "Explicit \"console=\"; ignoring PCDP\n"); -#endif - return -ENODEV; - } - - if (pcdp->rev < 3 && efi_uart_console_only()) - serial = 1; - - for (i = 0, uart = pcdp->uart; i < pcdp->num_uarts; i++, uart++) { - if (uart->flags & PCDP_UART_PRIMARY_CONSOLE || serial) { - if (uart->type == PCDP_CONSOLE_UART) { - return setup_serial_console(uart); - } - } - } - -#ifndef XEN - end = (struct pcdp_device *) ((u8 *) pcdp + pcdp->length); - for (dev = (struct pcdp_device *) (pcdp->uart + pcdp->num_uarts); - dev < end; - dev = (struct pcdp_device *) ((u8 *) dev + dev->length)) { - if (dev->flags & PCDP_PRIMARY_CONSOLE) { - if (dev->type == PCDP_CONSOLE_VGA) { - return setup_vga_console((struct pcdp_vga *) dev); - } - } - } -#endif - - return -ENODEV; -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/privop.c --- a/xen/arch/ia64/privop.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,1130 +0,0 @@ -/* - * Privileged operation "API" handling functions. - * - * Copyright (C) 2004 Hewlett-Packard Co. - * Dan Magenheimer (dan.magenheimer@xxxxxx) - * - */ - -#include <asm/privop.h> -#include <asm/vcpu.h> -#include <asm/processor.h> -#include <asm/delay.h> // Debug only -//#include <debug.h> - -long priv_verbose=0; - -/************************************************************************** -Hypercall bundle creation -**************************************************************************/ - - -void build_hypercall_bundle(UINT64 *imva, UINT64 brkimm, UINT64 hypnum, UINT64 ret) -{ - INST64_A5 slot0; - INST64_I19 slot1; - INST64_B4 slot2; - IA64_BUNDLE bundle; - - // slot1: mov r2 = hypnum (low 20 bits) - slot0.inst = 0; - slot0.qp = 0; slot0.r1 = 2; slot0.r3 = 0; slot0.major = 0x9; - slot0.imm7b = hypnum; slot0.imm9d = hypnum >> 7; - slot0.imm5c = hypnum >> 16; slot0.s = 0; - // slot1: break brkimm - slot1.inst = 0; - slot1.qp = 0; slot1.x6 = 0; slot1.x3 = 0; slot1.major = 0x0; - slot1.imm20 = brkimm; slot1.i = brkimm >> 20; - // if ret slot2: br.ret.sptk.many rp - // else slot2: br.cond.sptk.many rp - slot2.inst = 0; slot2.qp = 0; slot2.p = 1; slot2.b2 = 0; - slot2.wh = 0; slot2.d = 0; slot2.major = 0x0; - if (ret) { - slot2.btype = 4; slot2.x6 = 0x21; - } - else { - slot2.btype = 0; slot2.x6 = 0x20; - } - - bundle.i64[0] = 0; bundle.i64[1] = 0; - bundle.template = 0x11; - bundle.slot0 = slot0.inst; bundle.slot2 = slot2.inst; - bundle.slot1a = slot1.inst; bundle.slot1b = slot1.inst >> 18; - - *imva++ = bundle.i64[0]; *imva = bundle.i64[1]; -} - -/************************************************************************** -Privileged operation emulation routines -**************************************************************************/ - -IA64FAULT priv_rfi(VCPU *vcpu, INST64 inst) -{ - return vcpu_rfi(vcpu); -} - -IA64FAULT priv_bsw0(VCPU *vcpu, INST64 inst) -{ - return vcpu_bsw0(vcpu); -} - -IA64FAULT priv_bsw1(VCPU *vcpu, INST64 inst) -{ - return vcpu_bsw1(vcpu); -} - -IA64FAULT priv_cover(VCPU *vcpu, INST64 inst) -{ - return vcpu_cover(vcpu); -} - -IA64FAULT priv_ptc_l(VCPU *vcpu, INST64 inst) -{ - UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3); - UINT64 addr_range; - - addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2); - return vcpu_ptc_l(vcpu,vadr,addr_range); -} - -IA64FAULT priv_ptc_e(VCPU *vcpu, INST64 inst) -{ - UINT src = inst.M28.r3; - - // NOTE: ptc_e with source gr > 63 is emulated as a fc r(y-64) - if (src > 63) return(vcpu_fc(vcpu,vcpu_get_gr(vcpu,src - 64))); - return vcpu_ptc_e(vcpu,vcpu_get_gr(vcpu,src)); -} - -IA64FAULT priv_ptc_g(VCPU *vcpu, INST64 inst) -{ - UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3); - UINT64 addr_range; - - addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2); - return vcpu_ptc_g(vcpu,vadr,addr_range); -} - -IA64FAULT priv_ptc_ga(VCPU *vcpu, INST64 inst) -{ - UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3); - UINT64 addr_range; - - addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2); - return vcpu_ptc_ga(vcpu,vadr,addr_range); -} - -IA64FAULT priv_ptr_d(VCPU *vcpu, INST64 inst) -{ - UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3); - UINT64 addr_range; - - addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2); - return vcpu_ptr_d(vcpu,vadr,addr_range); -} - -IA64FAULT priv_ptr_i(VCPU *vcpu, INST64 inst) -{ - UINT64 vadr = vcpu_get_gr(vcpu,inst.M45.r3); - UINT64 addr_range; - - addr_range = 1 << ((vcpu_get_gr(vcpu,inst.M45.r2) & 0xfc) >> 2); - return vcpu_ptr_i(vcpu,vadr,addr_range); -} - -IA64FAULT priv_tpa(VCPU *vcpu, INST64 inst) -{ - UINT64 padr; - UINT fault; - UINT src = inst.M46.r3; - - // NOTE: tpa with source gr > 63 is emulated as a ttag rx=r(y-64) - if (src > 63) - fault = vcpu_ttag(vcpu,vcpu_get_gr(vcpu,src-64),&padr); - else fault = vcpu_tpa(vcpu,vcpu_get_gr(vcpu,src),&padr); - if (fault == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, inst.M46.r1, padr); - else return fault; -} - -IA64FAULT priv_tak(VCPU *vcpu, INST64 inst) -{ - UINT64 key; - UINT fault; - UINT src = inst.M46.r3; - - // NOTE: tak with source gr > 63 is emulated as a thash rx=r(y-64) - if (src > 63) - fault = vcpu_thash(vcpu,vcpu_get_gr(vcpu,src-64),&key); - else fault = vcpu_tak(vcpu,vcpu_get_gr(vcpu,src),&key); - if (fault == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, inst.M46.r1, key); - else return fault; -} - -/************************************ - * Insert translation register/cache -************************************/ - -IA64FAULT priv_itr_d(VCPU *vcpu, INST64 inst) -{ - UINT64 fault, itir, ifa, pte, slot; - - //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT); - if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT) - return(IA64_ILLOP_FAULT); - if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) - return(IA64_ILLOP_FAULT); - pte = vcpu_get_gr(vcpu,inst.M42.r2); - slot = vcpu_get_gr(vcpu,inst.M42.r3); - - return (vcpu_itr_d(vcpu,slot,pte,itir,ifa)); -} - -IA64FAULT priv_itr_i(VCPU *vcpu, INST64 inst) -{ - UINT64 fault, itir, ifa, pte, slot; - - //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT); - if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT) - return(IA64_ILLOP_FAULT); - if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) - return(IA64_ILLOP_FAULT); - pte = vcpu_get_gr(vcpu,inst.M42.r2); - slot = vcpu_get_gr(vcpu,inst.M42.r3); - - return (vcpu_itr_i(vcpu,slot,pte,itir,ifa)); -} - -IA64FAULT priv_itc_d(VCPU *vcpu, INST64 inst) -{ - UINT64 fault, itir, ifa, pte; - - //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT); - if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT) - return(IA64_ILLOP_FAULT); - if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) - return(IA64_ILLOP_FAULT); - pte = vcpu_get_gr(vcpu,inst.M41.r2); - - return (vcpu_itc_d(vcpu,pte,itir,ifa)); -} - -IA64FAULT priv_itc_i(VCPU *vcpu, INST64 inst) -{ - UINT64 fault, itir, ifa, pte; - - //if (!vcpu_get_psr_ic(vcpu)) return(IA64_ILLOP_FAULT); - if ((fault = vcpu_get_itir(vcpu,&itir)) != IA64_NO_FAULT) - return(IA64_ILLOP_FAULT); - if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) - return(IA64_ILLOP_FAULT); - pte = vcpu_get_gr(vcpu,inst.M41.r2); - - return (vcpu_itc_i(vcpu,pte,itir,ifa)); -} - -/************************************* - * Moves to semi-privileged registers -*************************************/ - -IA64FAULT priv_mov_to_ar_imm(VCPU *vcpu, INST64 inst) -{ - // I27 and M30 are identical for these fields - UINT64 ar3 = inst.M30.ar3; - UINT64 imm = vcpu_get_gr(vcpu,inst.M30.imm); - return (vcpu_set_ar(vcpu,ar3,imm)); -} - -IA64FAULT priv_mov_to_ar_reg(VCPU *vcpu, INST64 inst) -{ - // I26 and M29 are identical for these fields - UINT64 ar3 = inst.M29.ar3; - - if (inst.M29.r2 > 63 && inst.M29.ar3 < 8) { // privified mov from kr - UINT64 val; - if (vcpu_get_ar(vcpu,ar3,&val) != IA64_ILLOP_FAULT) - return vcpu_set_gr(vcpu, inst.M29.r2-64, val); - else return IA64_ILLOP_FAULT; - } - else { - UINT64 r2 = vcpu_get_gr(vcpu,inst.M29.r2); - return (vcpu_set_ar(vcpu,ar3,r2)); - } -} - -/******************************** - * Moves to privileged registers -********************************/ - -IA64FAULT priv_mov_to_pkr(VCPU *vcpu, INST64 inst) -{ - UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); - UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); - return (vcpu_set_pkr(vcpu,r3,r2)); -} - -IA64FAULT priv_mov_to_rr(VCPU *vcpu, INST64 inst) -{ - UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); - UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); - return (vcpu_set_rr(vcpu,r3,r2)); -} - -IA64FAULT priv_mov_to_dbr(VCPU *vcpu, INST64 inst) -{ - UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); - UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); - return (vcpu_set_dbr(vcpu,r3,r2)); -} - -IA64FAULT priv_mov_to_ibr(VCPU *vcpu, INST64 inst) -{ - UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); - UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); - return (vcpu_set_ibr(vcpu,r3,r2)); -} - -IA64FAULT priv_mov_to_pmc(VCPU *vcpu, INST64 inst) -{ - UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); - UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); - return (vcpu_set_pmc(vcpu,r3,r2)); -} - -IA64FAULT priv_mov_to_pmd(VCPU *vcpu, INST64 inst) -{ - UINT64 r3 = vcpu_get_gr(vcpu,inst.M42.r3); - UINT64 r2 = vcpu_get_gr(vcpu,inst.M42.r2); - return (vcpu_set_pmd(vcpu,r3,r2)); -} - -unsigned long to_cr_cnt[128] = { 0 }; - -IA64FAULT priv_mov_to_cr(VCPU *vcpu, INST64 inst) -{ - UINT64 val = vcpu_get_gr(vcpu, inst.M32.r2); - to_cr_cnt[inst.M32.cr3]++; - switch (inst.M32.cr3) { - case 0: return vcpu_set_dcr(vcpu,val); - case 1: return vcpu_set_itm(vcpu,val); - case 2: return vcpu_set_iva(vcpu,val); - case 8: return vcpu_set_pta(vcpu,val); - case 16:return vcpu_set_ipsr(vcpu,val); - case 17:return vcpu_set_isr(vcpu,val); - case 19:return vcpu_set_iip(vcpu,val); - case 20:return vcpu_set_ifa(vcpu,val); - case 21:return vcpu_set_itir(vcpu,val); - case 22:return vcpu_set_iipa(vcpu,val); - case 23:return vcpu_set_ifs(vcpu,val); - case 24:return vcpu_set_iim(vcpu,val); - case 25:return vcpu_set_iha(vcpu,val); - case 64:return vcpu_set_lid(vcpu,val); - case 65:return IA64_ILLOP_FAULT; - case 66:return vcpu_set_tpr(vcpu,val); - case 67:return vcpu_set_eoi(vcpu,val); - case 68:return IA64_ILLOP_FAULT; - case 69:return IA64_ILLOP_FAULT; - case 70:return IA64_ILLOP_FAULT; - case 71:return IA64_ILLOP_FAULT; - case 72:return vcpu_set_itv(vcpu,val); - case 73:return vcpu_set_pmv(vcpu,val); - case 74:return vcpu_set_cmcv(vcpu,val); - case 80:return vcpu_set_lrr0(vcpu,val); - case 81:return vcpu_set_lrr1(vcpu,val); - default: return IA64_ILLOP_FAULT; - } -} - -IA64FAULT priv_rsm(VCPU *vcpu, INST64 inst) -{ - UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm; - return vcpu_reset_psr_sm(vcpu,imm24); -} - -IA64FAULT priv_ssm(VCPU *vcpu, INST64 inst) -{ - UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm; - return vcpu_set_psr_sm(vcpu,imm24); -} - -/** - * @todo Check for reserved bits and return IA64_RSVDREG_FAULT. - */ -IA64FAULT priv_mov_to_psr(VCPU *vcpu, INST64 inst) -{ - UINT64 val = vcpu_get_gr(vcpu, inst.M35.r2); - return vcpu_set_psr_l(vcpu,val); -} - -/********************************** - * Moves from privileged registers - **********************************/ - -IA64FAULT priv_mov_from_rr(VCPU *vcpu, INST64 inst) -{ - UINT64 val; - IA64FAULT fault; - - if (inst.M43.r1 > 63) { // privified mov from cpuid - fault = vcpu_get_cpuid(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); - if (fault == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, inst.M43.r1-64, val); - } - else { - fault = vcpu_get_rr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); - if (fault == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, inst.M43.r1, val); - } - return fault; -} - -IA64FAULT priv_mov_from_pkr(VCPU *vcpu, INST64 inst) -{ - UINT64 val; - IA64FAULT fault; - - fault = vcpu_get_pkr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); - if (fault == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, inst.M43.r1, val); - else return fault; -} - -IA64FAULT priv_mov_from_dbr(VCPU *vcpu, INST64 inst) -{ - UINT64 val; - IA64FAULT fault; - - fault = vcpu_get_dbr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); - if (fault == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, inst.M43.r1, val); - else return fault; -} - -IA64FAULT priv_mov_from_ibr(VCPU *vcpu, INST64 inst) -{ - UINT64 val; - IA64FAULT fault; - - fault = vcpu_get_ibr(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); - if (fault == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, inst.M43.r1, val); - else return fault; -} - -IA64FAULT priv_mov_from_pmc(VCPU *vcpu, INST64 inst) -{ - UINT64 val; - IA64FAULT fault; - - if (inst.M43.r1 > 63) { // privified mov from pmd - fault = vcpu_get_pmd(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); - if (fault == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, inst.M43.r1-64, val); - } - else { - fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); - if (fault == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, inst.M43.r1, val); - } - return fault; -} - -unsigned long from_cr_cnt[128] = { 0 }; - -#define cr_get(cr) \ - ((fault = vcpu_get_##cr(vcpu,&val)) == IA64_NO_FAULT) ? \ - vcpu_set_gr(vcpu, tgt, val) : fault; - -IA64FAULT priv_mov_from_cr(VCPU *vcpu, INST64 inst) -{ - UINT64 tgt = inst.M33.r1; - UINT64 val; - IA64FAULT fault; - - from_cr_cnt[inst.M33.cr3]++; - switch (inst.M33.cr3) { - case 0: return cr_get(dcr); - case 1: return cr_get(itm); - case 2: return cr_get(iva); - case 8: return cr_get(pta); - case 16:return cr_get(ipsr); - case 17:return cr_get(isr); - case 19:return cr_get(iip); - case 20:return cr_get(ifa); - case 21:return cr_get(itir); - case 22:return cr_get(iipa); - case 23:return cr_get(ifs); - case 24:return cr_get(iim); - case 25:return cr_get(iha); - case 64:return cr_get(lid); - case 65:return cr_get(ivr); - case 66:return cr_get(tpr); - case 67:return vcpu_set_gr(vcpu,tgt,0L); - case 68:return cr_get(irr0); - case 69:return cr_get(irr1); - case 70:return cr_get(irr2); - case 71:return cr_get(irr3); - case 72:return cr_get(itv); - case 73:return cr_get(pmv); - case 74:return cr_get(cmcv); - case 80:return cr_get(lrr0); - case 81:return cr_get(lrr1); - default: return IA64_ILLOP_FAULT; - } - return IA64_ILLOP_FAULT; -} - -IA64FAULT priv_mov_from_psr(VCPU *vcpu, INST64 inst) -{ - UINT64 tgt = inst.M33.r1; - UINT64 val; - IA64FAULT fault; - - if ((fault = vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, tgt, val); - else return fault; -} - -/************************************************************************** -Privileged operation decode and dispatch routines -**************************************************************************/ - -IA64_SLOT_TYPE slot_types[0x20][3] = { - {M, I, I}, {M, I, I}, {M, I, I}, {M, I, I}, - {M, I, ILLEGAL}, {M, I, ILLEGAL}, - {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL}, - {M, M, I}, {M, M, I}, {M, M, I}, {M, M, I}, - {M, F, I}, {M, F, I}, - {M, M, F}, {M, M, F}, - {M, I, B}, {M, I, B}, - {M, B, B}, {M, B, B}, - {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL}, - {B, B, B}, {B, B, B}, - {M, M, B}, {M, M, B}, - {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL}, - {M, F, B}, {M, F, B}, - {ILLEGAL, ILLEGAL, ILLEGAL}, {ILLEGAL, ILLEGAL, ILLEGAL} -}; - -// pointer to privileged emulation function -typedef IA64FAULT (*PPEFCN)(VCPU *vcpu, INST64 inst); - -PPEFCN Mpriv_funcs[64] = { - priv_mov_to_rr, priv_mov_to_dbr, priv_mov_to_ibr, priv_mov_to_pkr, - priv_mov_to_pmc, priv_mov_to_pmd, 0, 0, - 0, priv_ptc_l, priv_ptc_g, priv_ptc_ga, - priv_ptr_d, priv_ptr_i, priv_itr_d, priv_itr_i, - priv_mov_from_rr, priv_mov_from_dbr, priv_mov_from_ibr, priv_mov_from_pkr, - priv_mov_from_pmc, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, priv_tpa, priv_tak, - 0, 0, 0, 0, - priv_mov_from_cr, priv_mov_from_psr, 0, 0, - 0, 0, 0, 0, - priv_mov_to_cr, priv_mov_to_psr, priv_itc_d, priv_itc_i, - 0, 0, 0, 0, - priv_ptc_e, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 -}; - -struct { - unsigned long mov_to_ar_imm; - unsigned long mov_to_ar_reg; - unsigned long mov_from_ar; - unsigned long ssm; - unsigned long rsm; - unsigned long rfi; - unsigned long bsw0; - unsigned long bsw1; - unsigned long cover; - unsigned long fc; - unsigned long cpuid; - unsigned long Mpriv_cnt[64]; -} privcnt = { 0 }; - -unsigned long privop_trace = 0; - -IA64FAULT -priv_handle_op(VCPU *vcpu, REGS *regs, int privlvl) -{ - IA64_BUNDLE bundle; - IA64_BUNDLE __get_domain_bundle(UINT64); - int slot; - IA64_SLOT_TYPE slot_type; - INST64 inst; - PPEFCN pfunc; - unsigned long ipsr = regs->cr_ipsr; - UINT64 iip = regs->cr_iip; - int x6; - - // make a local copy of the bundle containing the privop -#if 1 - bundle = __get_domain_bundle(iip); - if (!bundle.i64[0] && !bundle.i64[1]) -#else - if (__copy_from_user(&bundle,iip,sizeof(bundle))) -#endif - { -//printf("*** priv_handle_op: privop bundle @%p not mapped, retrying\n",iip); - return vcpu_force_data_miss(vcpu,regs->cr_iip); - } -#if 0 - if (iip==0xa000000100001820) { - static int firstpagefault = 1; - if (firstpagefault) { - printf("*** First time to domain page fault!\n"); firstpagefault=0; - } - } -#endif - if (privop_trace) { - static long i = 400; - //if (i > 0) printf("privop @%p\n",iip); - if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n", - iip,ia64_get_itc(),ia64_get_itm()); - i--; - } - slot = ((struct ia64_psr *)&ipsr)->ri; - if (!slot) inst.inst = (bundle.i64[0]>>5) & MASK_41; - else if (slot == 1) - inst.inst = ((bundle.i64[0]>>46) | bundle.i64[1]<<18) & MASK_41; - else if (slot == 2) inst.inst = (bundle.i64[1]>>23) & MASK_41; - else printf("priv_handle_op: illegal slot: %d\n", slot); - - slot_type = slot_types[bundle.template][slot]; - if (priv_verbose) { - printf("priv_handle_op: checking bundle at 0x%lx (op=0x%016lx) slot %d (type=%d)\n", - iip, (UINT64)inst.inst, slot, slot_type); - } - if (slot_type == B && inst.generic.major == 0 && inst.B8.x6 == 0x0) { - // break instr for privified cover - } - else if (privlvl != 2) return (IA64_ILLOP_FAULT); - switch (slot_type) { - case M: - if (inst.generic.major == 0) { -#if 0 - if (inst.M29.x6 == 0 && inst.M29.x3 == 0) { - privcnt.cover++; - return priv_cover(vcpu,inst); - } -#endif - if (inst.M29.x3 != 0) break; - if (inst.M30.x4 == 8 && inst.M30.x2 == 2) { - privcnt.mov_to_ar_imm++; - return priv_mov_to_ar_imm(vcpu,inst); - } - if (inst.M44.x4 == 6) { - privcnt.ssm++; - return priv_ssm(vcpu,inst); - } - if (inst.M44.x4 == 7) { - privcnt.rsm++; - return priv_rsm(vcpu,inst); - } - break; - } - else if (inst.generic.major != 1) break; - x6 = inst.M29.x6; - if (x6 == 0x2a) { - if (inst.M29.r2 > 63 && inst.M29.ar3 < 8) - privcnt.mov_from_ar++; // privified mov from kr - else privcnt.mov_to_ar_reg++; - return priv_mov_to_ar_reg(vcpu,inst); - } - if (inst.M29.x3 != 0) break; - if (!(pfunc = Mpriv_funcs[x6])) break; - if (x6 == 0x1e || x6 == 0x1f) { // tpa or tak are "special" - if (inst.M46.r3 > 63) { - if (x6 == 0x1e) x6 = 0x1b; - else x6 = 0x1a; - } - } - if (x6 == 52 && inst.M28.r3 > 63) - privcnt.fc++; - else if (x6 == 16 && inst.M43.r3 > 63) - privcnt.cpuid++; - else privcnt.Mpriv_cnt[x6]++; - return (*pfunc)(vcpu,inst); - break; - case B: - if (inst.generic.major != 0) break; - if (inst.B8.x6 == 0x08) { - IA64FAULT fault; - privcnt.rfi++; - fault = priv_rfi(vcpu,inst); - if (fault == IA64_NO_FAULT) fault = IA64_RFI_IN_PROGRESS; - return fault; - } - if (inst.B8.x6 == 0x0c) { - privcnt.bsw0++; - return priv_bsw0(vcpu,inst); - } - if (inst.B8.x6 == 0x0d) { - privcnt.bsw1++; - return priv_bsw1(vcpu,inst); - } - if (inst.B8.x6 == 0x0) { // break instr for privified cover - privcnt.cover++; - return priv_cover(vcpu,inst); - } - break; - case I: - if (inst.generic.major != 0) break; -#if 0 - if (inst.I26.x6 == 0 && inst.I26.x3 == 0) { - privcnt.cover++; - return priv_cover(vcpu,inst); - } -#endif - if (inst.I26.x3 != 0) break; // I26.x3 == I27.x3 - if (inst.I26.x6 == 0x2a) { - if (inst.I26.r2 > 63 && inst.I26.ar3 < 8) - privcnt.mov_from_ar++; // privified mov from kr - else privcnt.mov_to_ar_reg++; - return priv_mov_to_ar_reg(vcpu,inst); - } - if (inst.I27.x6 == 0x0a) { - privcnt.mov_to_ar_imm++; - return priv_mov_to_ar_imm(vcpu,inst); - } - break; - default: - break; - } - //printf("We who are about do die salute you\n"); - printf("handle_op: can't handle privop at 0x%lx (op=0x%016lx) slot %d (type=%d), ipsr=%p\n", - iip, (UINT64)inst.inst, slot, slot_type, ipsr); - //printf("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip)); - //thread_mozambique("privop fault\n"); - return (IA64_ILLOP_FAULT); -} - -/** Emulate a privileged operation. - * - * This should probably return 0 on success and the "trap number" - * (e.g. illegal operation for bad register, priv op for an - * instruction that isn't allowed, etc.) on "failure" - * - * @param vcpu virtual cpu - * @param isrcode interrupt service routine code - * @return fault - */ -IA64FAULT -priv_emulate(VCPU *vcpu, REGS *regs, UINT64 isr) -{ - IA64FAULT fault; - UINT64 ipsr = regs->cr_ipsr; - UINT64 isrcode = (isr >> 4) & 0xf; - int privlvl; - - // handle privops masked as illops? and breaks (6) - if (isrcode != 1 && isrcode != 2 && isrcode != 0 && isrcode != 6) { - printf("priv_emulate: isrcode != 0 or 1 or 2\n"); - printf("priv_emulate: returning ILLOP, not implemented!\n"); - while (1); - return IA64_ILLOP_FAULT; - } - //if (isrcode != 1 && isrcode != 2) return 0; - vcpu_set_regs(vcpu,regs); - privlvl = (ipsr & IA64_PSR_CPL) >> IA64_PSR_CPL0_BIT; - // its OK for a privified-cover to be executed in user-land - fault = priv_handle_op(vcpu,regs,privlvl); - if ((fault == IA64_NO_FAULT) || (fault == IA64_EXTINT_VECTOR)) { // success!! - // update iip/ipsr to point to the next instruction - (void)vcpu_increment_iip(vcpu); - } - if (fault == IA64_ILLOP_FAULT) - printf("priv_emulate: priv_handle_op fails, isr=%p\n",isr); - return fault; -} - - -// FIXME: Move these to include/public/arch-ia64? -#define HYPERPRIVOP_RFI 0x1 -#define HYPERPRIVOP_RSM_DT 0x2 -#define HYPERPRIVOP_SSM_DT 0x3 -#define HYPERPRIVOP_COVER 0x4 -#define HYPERPRIVOP_ITC_D 0x5 -#define HYPERPRIVOP_ITC_I 0x6 -#define HYPERPRIVOP_SSM_I 0x7 -#define HYPERPRIVOP_GET_IVR 0x8 -#define HYPERPRIVOP_GET_TPR 0x9 -#define HYPERPRIVOP_SET_TPR 0xa -#define HYPERPRIVOP_EOI 0xb -#define HYPERPRIVOP_SET_ITM 0xc -#define HYPERPRIVOP_THASH 0xd -#define HYPERPRIVOP_PTC_GA 0xe -#define HYPERPRIVOP_ITR_D 0xf -#define HYPERPRIVOP_GET_RR 0x10 -#define HYPERPRIVOP_SET_RR 0x11 -#define HYPERPRIVOP_MAX 0x11 - -char *hyperpriv_str[HYPERPRIVOP_MAX+1] = { - 0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", "ssm.i", - "=ivr", "=tpr", "tpr=", "eoi", "itm=", "thash", "ptc.ga", "itr.d", - "=rr", "rr=", - 0 -}; - -unsigned long slow_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 }; -unsigned long fast_hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 }; - -/* hyperprivops are generally executed in assembly (with physical psr.ic off) - * so this code is primarily used for debugging them */ -int -ia64_hyperprivop(unsigned long iim, REGS *regs) -{ - struct vcpu *v = (struct domain *) current; - INST64 inst; - UINT64 val; - UINT64 itir, ifa; - -// FIXME: Handle faults appropriately for these - if (!iim || iim > HYPERPRIVOP_MAX) { - printf("bad hyperprivop; ignored\n"); - printf("iim=%d, iip=%p\n",iim,regs->cr_iip); - return 1; - } - slow_hyperpriv_cnt[iim]++; - switch(iim) { - case HYPERPRIVOP_RFI: - (void)vcpu_rfi(v); - return 0; // don't update iip - case HYPERPRIVOP_RSM_DT: - (void)vcpu_reset_psr_dt(v); - return 1; - case HYPERPRIVOP_SSM_DT: - (void)vcpu_set_psr_dt(v); - return 1; - case HYPERPRIVOP_COVER: - (void)vcpu_cover(v); - return 1; - case HYPERPRIVOP_ITC_D: - (void)vcpu_get_itir(v,&itir); - (void)vcpu_get_ifa(v,&ifa); - (void)vcpu_itc_d(v,regs->r8,itir,ifa); - return 1; - case HYPERPRIVOP_ITC_I: - (void)vcpu_get_itir(v,&itir); - (void)vcpu_get_ifa(v,&ifa); - (void)vcpu_itc_i(v,regs->r8,itir,ifa); - return 1; - case HYPERPRIVOP_SSM_I: - (void)vcpu_set_psr_i(v); - return 1; - case HYPERPRIVOP_GET_IVR: - (void)vcpu_get_ivr(v,&val); - regs->r8 = val; - return 1; - case HYPERPRIVOP_GET_TPR: - (void)vcpu_get_tpr(v,&val); - regs->r8 = val; - return 1; - case HYPERPRIVOP_SET_TPR: - (void)vcpu_set_tpr(v,regs->r8); - return 1; - case HYPERPRIVOP_EOI: - (void)vcpu_set_eoi(v,0L); - return 1; - case HYPERPRIVOP_SET_ITM: - (void)vcpu_set_itm(v,regs->r8); - return 1; - case HYPERPRIVOP_THASH: - (void)vcpu_thash(v,regs->r8,&val); - regs->r8 = val; - return 1; - case HYPERPRIVOP_PTC_GA: - (void)vcpu_ptc_ga(v,regs->r8,(1L << ((regs->r9 & 0xfc) >> 2))); - return 1; - case HYPERPRIVOP_ITR_D: - (void)vcpu_get_itir(v,&itir); - (void)vcpu_get_ifa(v,&ifa); - (void)vcpu_itr_d(v,regs->r8,regs->r9,itir,ifa); - return 1; - case HYPERPRIVOP_GET_RR: - (void)vcpu_get_rr(v,regs->r8,&val); - regs->r8 = val; - return 1; - case HYPERPRIVOP_SET_RR: - (void)vcpu_set_rr(v,regs->r8,regs->r9); - return 1; - } - return 0; -} - - -/************************************************************************** -Privileged operation instrumentation routines -**************************************************************************/ - -char *Mpriv_str[64] = { - "mov_to_rr", "mov_to_dbr", "mov_to_ibr", "mov_to_pkr", - "mov_to_pmc", "mov_to_pmd", "<0x06>", "<0x07>", - "<0x08>", "ptc_l", "ptc_g", "ptc_ga", - "ptr_d", "ptr_i", "itr_d", "itr_i", - "mov_from_rr", "mov_from_dbr", "mov_from_ibr", "mov_from_pkr", - "mov_from_pmc", "<0x15>", "<0x16>", "<0x17>", - "<0x18>", "<0x19>", "privified-thash", "privified-ttag", - "<0x1c>", "<0x1d>", "tpa", "tak", - "<0x20>", "<0x21>", "<0x22>", "<0x23>", - "mov_from_cr", "mov_from_psr", "<0x26>", "<0x27>", - "<0x28>", "<0x29>", "<0x2a>", "<0x2b>", - "mov_to_cr", "mov_to_psr", "itc_d", "itc_i", - "<0x30>", "<0x31>", "<0x32>", "<0x33>", - "ptc_e", "<0x35>", "<0x36>", "<0x37>", - "<0x38>", "<0x39>", "<0x3a>", "<0x3b>", - "<0x3c>", "<0x3d>", "<0x3e>", "<0x3f>" -}; - -#define RS "Rsvd" -char *cr_str[128] = { - "dcr","itm","iva",RS,RS,RS,RS,RS, - "pta",RS,RS,RS,RS,RS,RS,RS, - "ipsr","isr",RS,"iip","ifa","itir","iipa","ifs", - "iim","iha",RS,RS,RS,RS,RS,RS, - RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS, - RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS, - "lid","ivr","tpr","eoi","irr0","irr1","irr2","irr3", - "itv","pmv","cmcv",RS,RS,RS,RS,RS, - "lrr0","lrr1",RS,RS,RS,RS,RS,RS, - RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS, - RS,RS,RS,RS,RS,RS,RS,RS, RS,RS,RS,RS,RS,RS,RS,RS, - RS,RS,RS,RS,RS,RS,RS,RS -}; - -// FIXME: should use snprintf to ensure no buffer overflow -int dump_privop_counts(char *buf) -{ - int i, j; - UINT64 sum = 0; - char *s = buf; - - // this is ugly and should probably produce sorted output - // but it will have to do for now - sum += privcnt.mov_to_ar_imm; sum += privcnt.mov_to_ar_reg; - sum += privcnt.ssm; sum += privcnt.rsm; - sum += privcnt.rfi; sum += privcnt.bsw0; - sum += privcnt.bsw1; sum += privcnt.cover; - for (i=0; i < 64; i++) sum += privcnt.Mpriv_cnt[i]; - s += sprintf(s,"Privop statistics: (Total privops: %ld)\n",sum); - if (privcnt.mov_to_ar_imm) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_imm, - "mov_to_ar_imm", (privcnt.mov_to_ar_imm*100L)/sum); - if (privcnt.mov_to_ar_reg) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_reg, - "mov_to_ar_reg", (privcnt.mov_to_ar_reg*100L)/sum); - if (privcnt.mov_from_ar) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_from_ar, - "privified-mov_from_ar", (privcnt.mov_from_ar*100L)/sum); - if (privcnt.ssm) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.ssm, - "ssm", (privcnt.ssm*100L)/sum); - if (privcnt.rsm) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rsm, - "rsm", (privcnt.rsm*100L)/sum); - if (privcnt.rfi) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rfi, - "rfi", (privcnt.rfi*100L)/sum); - if (privcnt.bsw0) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw0, - "bsw0", (privcnt.bsw0*100L)/sum); - if (privcnt.bsw1) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw1, - "bsw1", (privcnt.bsw1*100L)/sum); - if (privcnt.cover) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cover, - "cover", (privcnt.cover*100L)/sum); - if (privcnt.fc) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.fc, - "privified-fc", (privcnt.fc*100L)/sum); - if (privcnt.cpuid) - s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cpuid, - "privified-getcpuid", (privcnt.cpuid*100L)/sum); - for (i=0; i < 64; i++) if (privcnt.Mpriv_cnt[i]) { - if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\n"); - else s += sprintf(s,"%10d %s [%d%%]\n", privcnt.Mpriv_cnt[i], - Mpriv_str[i], (privcnt.Mpriv_cnt[i]*100L)/sum); - if (i == 0x24) { // mov from CR - s += sprintf(s," ["); - for (j=0; j < 128; j++) if (from_cr_cnt[j]) { - if (!cr_str[j]) - s += sprintf(s,"PRIVSTRING NULL!!\n"); - s += sprintf(s,"%s(%d),",cr_str[j],from_cr_cnt[j]); - } - s += sprintf(s,"]\n"); - } - else if (i == 0x2c) { // mov to CR - s += sprintf(s," ["); - for (j=0; j < 128; j++) if (to_cr_cnt[j]) { - if (!cr_str[j]) - s += sprintf(s,"PRIVSTRING NULL!!\n"); - s += sprintf(s,"%s(%d),",cr_str[j],to_cr_cnt[j]); - } - s += sprintf(s,"]\n"); - } - } - return s - buf; -} - -int zero_privop_counts(char *buf) -{ - int i, j; - char *s = buf; - - // this is ugly and should probably produce sorted output - // but it will have to do for now - privcnt.mov_to_ar_imm = 0; privcnt.mov_to_ar_reg = 0; - privcnt.mov_from_ar = 0; - privcnt.ssm = 0; privcnt.rsm = 0; - privcnt.rfi = 0; privcnt.bsw0 = 0; - privcnt.bsw1 = 0; privcnt.cover = 0; - privcnt.fc = 0; privcnt.cpuid = 0; - for (i=0; i < 64; i++) privcnt.Mpriv_cnt[i] = 0; - for (j=0; j < 128; j++) from_cr_cnt[j] = 0; - for (j=0; j < 128; j++) to_cr_cnt[j] = 0; - s += sprintf(s,"All privop statistics zeroed\n"); - return s - buf; -} - -#ifdef PRIVOP_ADDR_COUNT - -extern struct privop_addr_count privop_addr_counter[]; - -void privop_count_addr(unsigned long iip, int inst) -{ - struct privop_addr_count *v = &privop_addr_counter[inst]; - int i; - - for (i = 0; i < PRIVOP_COUNT_NADDRS; i++) { - if (!v->addr[i]) { v->addr[i] = iip; v->count[i]++; return; } - else if (v->addr[i] == iip) { v->count[i]++; return; } - } - v->overflow++;; -} - -int dump_privop_addrs(char *buf) -{ - int i,j; - char *s = buf; - s += sprintf(s,"Privop addresses:\n"); - for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) { - struct privop_addr_count *v = &privop_addr_counter[i]; - s += sprintf(s,"%s:\n",v->instname); - for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) { - if (!v->addr[j]) break; - s += sprintf(s," @%p #%ld\n",v->addr[j],v->count[j]); - } - if (v->overflow) - s += sprintf(s," other #%ld\n",v->overflow); - } - return s - buf; -} - -void zero_privop_addrs(void) -{ - int i,j; - for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) { - struct privop_addr_count *v = &privop_addr_counter[i]; - for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) - v->addr[j] = v->count[j] = 0; - v->overflow = 0; - } -} -#endif - -extern unsigned long dtlb_translate_count; -extern unsigned long tr_translate_count; -extern unsigned long phys_translate_count; -extern unsigned long vhpt_translate_count; -extern unsigned long lazy_cover_count; -extern unsigned long idle_when_pending; -extern unsigned long pal_halt_light_count; -extern unsigned long context_switch_count; - -int dump_misc_stats(char *buf) -{ - char *s = buf; - s += sprintf(s,"Virtual TR translations: %d\n",tr_translate_count); - s += sprintf(s,"Virtual VHPT translations: %d\n",vhpt_translate_count); - s += sprintf(s,"Virtual DTLB translations: %d\n",dtlb_translate_count); - s += sprintf(s,"Physical translations: %d\n",phys_translate_count); - s += sprintf(s,"Idle when pending: %d\n",idle_when_pending); - s += sprintf(s,"PAL_HALT_LIGHT (no pending): %d\n",pal_halt_light_count); - s += sprintf(s,"context switches: %d\n",context_switch_count); - s += sprintf(s,"Lazy covers: %d\n",lazy_cover_count); - return s - buf; -} - -void zero_misc_stats(void) -{ - dtlb_translate_count = 0; - tr_translate_count = 0; - phys_translate_count = 0; - vhpt_translate_count = 0; - lazy_cover_count = 0; - pal_halt_light_count = 0; - idle_when_pending = 0; - context_switch_count = 0; -} - -int dump_hyperprivop_counts(char *buf) -{ - int i; - char *s = buf; - unsigned long total = 0; - for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += slow_hyperpriv_cnt[i]; - s += sprintf(s,"Slow hyperprivops (total %d):\n",total); - for (i = 1; i <= HYPERPRIVOP_MAX; i++) - if (slow_hyperpriv_cnt[i]) - s += sprintf(s,"%10d %s\n", - slow_hyperpriv_cnt[i], hyperpriv_str[i]); - total = 0; - for (i = 1; i <= HYPERPRIVOP_MAX; i++) total += fast_hyperpriv_cnt[i]; - s += sprintf(s,"Fast hyperprivops (total %d):\n",total); - for (i = 1; i <= HYPERPRIVOP_MAX; i++) - if (fast_hyperpriv_cnt[i]) - s += sprintf(s,"%10d %s\n", - fast_hyperpriv_cnt[i], hyperpriv_str[i]); - return s - buf; -} - -void zero_hyperprivop_counts(void) -{ - int i; - for (i = 0; i <= HYPERPRIVOP_MAX; i++) slow_hyperpriv_cnt[i] = 0; - for (i = 0; i <= HYPERPRIVOP_MAX; i++) fast_hyperpriv_cnt[i] = 0; -} - -#define TMPBUFLEN 8*1024 -int dump_privop_counts_to_user(char __user *ubuf, int len) -{ - char buf[TMPBUFLEN]; - int n = dump_privop_counts(buf); - - n += dump_hyperprivop_counts(buf + n); - n += dump_reflect_counts(buf + n); -#ifdef PRIVOP_ADDR_COUNT - n += dump_privop_addrs(buf + n); -#endif - n += dump_misc_stats(buf + n); - if (len < TMPBUFLEN) return -1; - if (__copy_to_user(ubuf,buf,n)) return -1; - return n; -} - -int zero_privop_counts_to_user(char __user *ubuf, int len) -{ - char buf[TMPBUFLEN]; - int n = zero_privop_counts(buf); - - zero_hyperprivop_counts(); -#ifdef PRIVOP_ADDR_COUNT - zero_privop_addrs(); -#endif - zero_misc_stats(); - zero_reflect_counts(); - if (len < TMPBUFLEN) return -1; - if (__copy_to_user(ubuf,buf,n)) return -1; - return n; -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/process.c --- a/xen/arch/ia64/process.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,749 +0,0 @@ -/* - * Miscellaneous process/domain related routines - * - * Copyright (C) 2004 Hewlett-Packard Co. - * Dan Magenheimer (dan.magenheimer@xxxxxx) - * - */ - -#include <xen/config.h> -#include <xen/lib.h> -#include <xen/errno.h> -#include <xen/sched.h> -#include <xen/smp.h> -#include <asm/ptrace.h> -#include <xen/delay.h> - -#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */ -#include <asm/sal.h> /* FOR struct ia64_sal_retval */ - -#include <asm/system.h> -#include <asm/io.h> -#include <asm/processor.h> -#include <asm/desc.h> -//#include <asm/ldt.h> -#include <xen/irq.h> -#include <xen/event.h> -#include <asm/regionreg.h> -#include <asm/privop.h> -#include <asm/vcpu.h> -#include <asm/ia64_int.h> -#include <asm/dom_fw.h> -#include "hpsim_ssc.h" - -extern unsigned long vcpu_get_itir_on_fault(struct vcpu *, UINT64); -extern struct ia64_sal_retval pal_emulator_static(UINT64); -extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64); - -extern unsigned long dom0_start, dom0_size; - -#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT) -// note IA64_PSR_PK removed from following, why is this necessary? -#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \ - IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \ - IA64_PSR_IT | IA64_PSR_BN) - -#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \ - IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \ - IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \ - IA64_PSR_CPL | IA64_PSR_MC | IA64_PSR_IS | \ - IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \ - IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA) - -#define PSCB(x,y) VCPU(x,y) -#define PSCBX(x,y) x->arch.y - -extern unsigned long vcpu_verbose; - -long do_iopl(domid_t domain, unsigned int new_io_pl) -{ - dummy(); - return 0; -} - -void schedule_tail(struct vcpu *next) -{ - unsigned long rr7; - //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info); - //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info); -#ifdef CONFIG_VTI - /* rr7 will be postponed to last point when resuming back to guest */ - vmx_load_all_rr(current); -#else // CONFIG_VTI - if (rr7 = load_region_regs(current)) { - printk("schedule_tail: change to rr7 not yet implemented\n"); - } -#endif // CONFIG_VTI -} - -void tdpfoo(void) { } - -// given a domain virtual address, pte and pagesize, extract the metaphysical -// address, convert the pte for a physical address for (possibly different) -// Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use -// PAGE_SIZE!) -unsigned long translate_domain_pte(unsigned long pteval, - unsigned long address, unsigned long itir) -{ - struct domain *d = current->domain; - unsigned long mask, pteval2, mpaddr; - unsigned long lookup_domain_mpa(struct domain *,unsigned long); - extern struct domain *dom0; - extern unsigned long dom0_start, dom0_size; - - // FIXME address had better be pre-validated on insert - mask = (1L << ((itir >> 2) & 0x3f)) - 1; - mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask); - if (d == dom0) { - if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { - //printk("translate_domain_pte: out-of-bounds dom0 mpaddr %p! itc=%lx...\n",mpaddr,ia64_get_itc()); - tdpfoo(); - } - } - else if ((mpaddr >> PAGE_SHIFT) > d->max_pages) { - printf("translate_domain_pte: bad mpa=%p (> %p),vadr=%p,pteval=%p,itir=%p\n", - mpaddr,d->max_pages<<PAGE_SHIFT,address,pteval,itir); - tdpfoo(); - } - pteval2 = lookup_domain_mpa(d,mpaddr); - pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits - pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected) - pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2; - return pteval2; -} - -// given a current domain metaphysical address, return the physical address -unsigned long translate_domain_mpaddr(unsigned long mpaddr) -{ - extern unsigned long lookup_domain_mpa(struct domain *,unsigned long); - unsigned long pteval; - - if (current->domain == dom0) { - if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { - printk("translate_domain_mpaddr: out-of-bounds dom0 mpaddr %p! continuing...\n",mpaddr); - tdpfoo(); - } - } - pteval = lookup_domain_mpa(current->domain,mpaddr); - return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK)); -} - -unsigned long slow_reflect_count[0x80] = { 0 }; -unsigned long fast_reflect_count[0x80] = { 0 }; - -#define inc_slow_reflect_count(vec) slow_reflect_count[vec>>8]++; - -void zero_reflect_counts(void) -{ - int i; - for (i=0; i<0x80; i++) slow_reflect_count[i] = 0; - for (i=0; i<0x80; i++) fast_reflect_count[i] = 0; -} - -int dump_reflect_counts(char *buf) -{ - int i,j,cnt; - char *s = buf; - - s += sprintf(s,"Slow reflections by vector:\n"); - for (i = 0, j = 0; i < 0x80; i++) { - if (cnt = slow_reflect_count[i]) { - s += sprintf(s,"0x%02x00:%10d, ",i,cnt); - if ((j++ & 3) == 3) s += sprintf(s,"\n"); - } - } - if (j & 3) s += sprintf(s,"\n"); - s += sprintf(s,"Fast reflections by vector:\n"); - for (i = 0, j = 0; i < 0x80; i++) { - if (cnt = fast_reflect_count[i]) { - s += sprintf(s,"0x%02x00:%10d, ",i,cnt); - if ((j++ & 3) == 3) s += sprintf(s,"\n"); - } - } - if (j & 3) s += sprintf(s,"\n"); - return s - buf; -} - -void reflect_interruption(unsigned long ifa, unsigned long isr, unsigned long itiriim, struct pt_regs *regs, unsigned long vector) -{ - unsigned long vcpu_get_ipsr_int_state(struct vcpu *,unsigned long); - unsigned long vcpu_get_rr_ve(struct vcpu *,unsigned long); - struct domain *d = current->domain; - struct vcpu *v = current; - - if (vector == IA64_EXTINT_VECTOR) { - - extern unsigned long vcpu_verbose, privop_trace; - static first_extint = 1; - if (first_extint) { - printf("Delivering first extint to domain: ifa=%p, isr=%p, itir=%p, iip=%p\n",ifa,isr,itiriim,regs->cr_iip); - //privop_trace = 1; vcpu_verbose = 1; - first_extint = 0; - } - } - if (!PSCB(v,interrupt_collection_enabled)) { - if (!(PSCB(v,ipsr) & IA64_PSR_DT)) { - panic_domain(regs,"psr.dt off, trying to deliver nested dtlb!\n"); - } - vector &= ~0xf; - if (vector != IA64_DATA_TLB_VECTOR && - vector != IA64_ALT_DATA_TLB_VECTOR && - vector != IA64_VHPT_TRANS_VECTOR) { -panic_domain(regs,"psr.ic off, delivering fault=%lx,ipsr=%p,iip=%p,ifa=%p,isr=%p,PSCB.iip=%p\n", - vector,regs->cr_ipsr,regs->cr_iip,ifa,isr,PSCB(v,iip)); - - } -//printf("Delivering NESTED DATA TLB fault\n"); - vector = IA64_DATA_NESTED_TLB_VECTOR; - regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL; - regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; -// NOTE: nested trap must NOT pass PSCB address - //regs->r31 = (unsigned long) &PSCB(v); - inc_slow_reflect_count(vector); - return; - - } - if ((vector & 0xf) == IA64_FORCED_IFA) - ifa = PSCB(v,tmp[0]); - vector &= ~0xf; - PSCB(v,ifa) = ifa; - if (vector < IA64_DATA_NESTED_TLB_VECTOR) /* VHPT miss, TLB miss, Alt TLB miss */ - vcpu_thash(v,ifa,&PSCB(current,iha)); - PSCB(v,unat) = regs->ar_unat; // not sure if this is really needed? - PSCB(v,precover_ifs) = regs->cr_ifs; - vcpu_bsw0(v); - PSCB(v,ipsr) = vcpu_get_ipsr_int_state(v,regs->cr_ipsr); - if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) - PSCB(v,iim) = itiriim; - else PSCB(v,itir) = vcpu_get_itir_on_fault(v,ifa); - PSCB(v,isr) = isr; // this is unnecessary except for interrupts! - PSCB(v,iip) = regs->cr_iip; - PSCB(v,ifs) = 0; - PSCB(v,incomplete_regframe) = 0; - - regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL; - regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; -#ifdef CONFIG_SMP -#warning "SMP FIXME: sharedinfo doesn't handle smp yet, need page per vcpu" -#endif - regs->r31 = &(((mapped_regs_t *)SHARED_ARCHINFO_ADDR)->ipsr); - - PSCB(v,interrupt_delivery_enabled) = 0; - PSCB(v,interrupt_collection_enabled) = 0; - - inc_slow_reflect_count(vector); -} - -void foodpi(void) {} - -unsigned long pending_false_positive = 0; - -// ONLY gets called from ia64_leave_kernel -// ONLY call with interrupts disabled?? (else might miss one?) -// NEVER successful if already reflecting a trap/fault because psr.i==0 -void deliver_pending_interrupt(struct pt_regs *regs) -{ - struct domain *d = current->domain; - struct vcpu *v = current; - // FIXME: Will this work properly if doing an RFI??? - if (!is_idle_task(d) && user_mode(regs)) { - //vcpu_poke_timer(v); - if (vcpu_deliverable_interrupts(v)) { - unsigned long isr = regs->cr_ipsr & IA64_PSR_RI; - if (vcpu_timer_pending_early(v)) -printf("*#*#*#* about to deliver early timer to domain %d!!!\n",v->domain->domain_id); - reflect_interruption(0,isr,0,regs,IA64_EXTINT_VECTOR); - } - else if (PSCB(v,pending_interruption)) - ++pending_false_positive; - } -} -unsigned long lazy_cover_count = 0; - -int handle_lazy_cover(struct vcpu *v, unsigned long isr, struct pt_regs *regs) -{ - if (!PSCB(v,interrupt_collection_enabled)) { - PSCB(v,ifs) = regs->cr_ifs; - PSCB(v,incomplete_regframe) = 1; - regs->cr_ifs = 0; - lazy_cover_count++; - return(1); // retry same instruction with cr.ifs off - } - return(0); -} - -void ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs, unsigned long itir) -{ - unsigned long iip = regs->cr_iip; - // FIXME should validate address here - unsigned long pteval; - unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL); - IA64FAULT fault; - - if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, isr, regs)) return; - if ((isr & IA64_ISR_SP) - || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) - { - /* - * This fault was due to a speculative load or lfetch.fault, set the "ed" - * bit in the psr to ensure forward progress. (Target register will get a - * NaT for ld.s, lfetch will be canceled.) - */ - ia64_psr(regs)->ed = 1; - return; - } - - fault = vcpu_translate(current,address,is_data,&pteval,&itir); - if (fault == IA64_NO_FAULT) - { - pteval = translate_domain_pte(pteval,address,itir); - vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,(itir>>2)&0x3f); - return; - } - else if (IS_VMM_ADDRESS(iip)) - { - if (!ia64_done_with_exception(regs)) { - // should never happen. If it does, region 0 addr may - // indicate a bad xen pointer - printk("*** xen_handle_domain_access: exception table" - " lookup failed, iip=%p, addr=%p, spinning...\n", - iip,address); - panic_domain(regs,"*** xen_handle_domain_access: exception table" - " lookup failed, iip=%p, addr=%p, spinning...\n", - iip,address); - } - return; - } - - reflect_interruption(address, isr, 0, regs, fault); -} - -void -ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, - unsigned long iim, unsigned long itir, unsigned long arg5, - unsigned long arg6, unsigned long arg7, unsigned long stack) -{ - struct pt_regs *regs = (struct pt_regs *) &stack; - unsigned long code, error = isr; - char buf[128]; - int result, sig; - static const char *reason[] = { - "IA-64 Illegal Operation fault", - "IA-64 Privileged Operation fault", - "IA-64 Privileged Register fault", - "IA-64 Reserved Register/Field fault", - "Disabled Instruction Set Transition fault", - "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", - "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", - "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" - }; -#if 0 -printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n", - vector, ifa, regs->cr_iip, regs->cr_ipsr, isr); -#endif - - if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { - /* - * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel - * the lfetch. - */ - ia64_psr(regs)->ed = 1; - printf("ia64_fault: handled lfetch.fault\n"); - return; - } - - switch (vector) { - case 24: /* General Exception */ - code = (isr >> 4) & 0xf; - sprintf(buf, "General Exception: %s%s", reason[code], - (code == 3) ? ((isr & (1UL << 37)) - ? " (RSE access)" : " (data access)") : ""); - if (code == 8) { -# ifdef CONFIG_IA64_PRINT_HAZARDS - printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", - current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, - regs->pr); -# endif - printf("ia64_fault: returning on hazard\n"); - return; - } - break; - - case 25: /* Disabled FP-Register */ - if (isr & 2) { - //disabled_fph_fault(regs); - //return; - } - sprintf(buf, "Disabled FPL fault---not supposed to happen!"); - break; - - case 26: /* NaT Consumption */ - if (user_mode(regs)) { - void *addr; - - if (((isr >> 4) & 0xf) == 2) { - /* NaT page consumption */ - //sig = SIGSEGV; - //code = SEGV_ACCERR; - addr = (void *) ifa; - } else { - /* register NaT consumption */ - //sig = SIGILL; - //code = ILL_ILLOPN; - addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); - } - //siginfo.si_signo = sig; - //siginfo.si_code = code; - //siginfo.si_errno = 0; - //siginfo.si_addr = addr; - //siginfo.si_imm = vector; - //siginfo.si_flags = __ISR_VALID; - //siginfo.si_isr = isr; - //force_sig_info(sig, &siginfo, current); - //return; - } //else if (ia64_done_with_exception(regs)) - //return; - sprintf(buf, "NaT consumption"); - break; - - case 31: /* Unsupported Data Reference */ - if (user_mode(regs)) { - //siginfo.si_signo = SIGILL; - //siginfo.si_code = ILL_ILLOPN; - //siginfo.si_errno = 0; - //siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); - //siginfo.si_imm = vector; - //siginfo.si_flags = __ISR_VALID; - //siginfo.si_isr = isr; - //force_sig_info(SIGILL, &siginfo, current); - //return; - } - sprintf(buf, "Unsupported data reference"); - break; - - case 29: /* Debug */ - case 35: /* Taken Branch Trap */ - case 36: /* Single Step Trap */ - //if (fsys_mode(current, regs)) {} - switch (vector) { - case 29: - //siginfo.si_code = TRAP_HWBKPT; -#ifdef CONFIG_ITANIUM - /* - * Erratum 10 (IFA may contain incorrect address) now has - * "NoFix" status. There are no plans for fixing this. - */ - if (ia64_psr(regs)->is == 0) - ifa = regs->cr_iip; -#endif - break; - case 35: ifa = 0; break; - case 36: ifa = 0; break; - //case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; - //case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; - } - //siginfo.si_signo = SIGTRAP; - //siginfo.si_errno = 0; - //siginfo.si_addr = (void *) ifa; - //siginfo.si_imm = 0; - //siginfo.si_flags = __ISR_VALID; - //siginfo.si_isr = isr; - //force_sig_info(SIGTRAP, &siginfo, current); - //return; - - case 32: /* fp fault */ - case 33: /* fp trap */ - //result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr); - //if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { - //siginfo.si_signo = SIGFPE; - //siginfo.si_errno = 0; - //siginfo.si_code = FPE_FLTINV; - //siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); - //siginfo.si_flags = __ISR_VALID; - //siginfo.si_isr = isr; - //siginfo.si_imm = 0; - //force_sig_info(SIGFPE, &siginfo, current); - //} - //return; - sprintf(buf, "FP fault/trap"); - break; - - case 34: - if (isr & 0x2) { - /* Lower-Privilege Transfer Trap */ - /* - * Just clear PSR.lp and then return immediately: all the - * interesting work (e.g., signal delivery is done in the kernel - * exit path). - */ - //ia64_psr(regs)->lp = 0; - //return; - sprintf(buf, "Lower-Privilege Transfer trap"); - } else { - /* Unimplemented Instr. Address Trap */ - if (user_mode(regs)) { - //siginfo.si_signo = SIGILL; - //siginfo.si_code = ILL_BADIADDR; - //siginfo.si_errno = 0; - //siginfo.si_flags = 0; - //siginfo.si_isr = 0; - //siginfo.si_imm = 0; - //siginfo.si_addr = (void *) (regs->cr_iip + ia64_psr(regs)->ri); - //force_sig_info(SIGILL, &siginfo, current); - //return; - } - sprintf(buf, "Unimplemented Instruction Address fault"); - } - break; - - case 45: - printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); - printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", - regs->cr_iip, ifa, isr); - //force_sig(SIGSEGV, current); - break; - - case 46: - printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); - printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", - regs->cr_iip, ifa, isr, iim); - //force_sig(SIGSEGV, current); - return; - - case 47: - sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16); - break; - - default: - sprintf(buf, "Fault %lu", vector); - break; - } - //die_if_kernel(buf, regs, error); -printk("ia64_fault: %s: reflecting\n",buf); -reflect_interruption(ifa,isr,iim,regs,IA64_GENEX_VECTOR); -//while(1); - //force_sig(SIGILL, current); -} - -unsigned long running_on_sim = 0; - -void -do_ssc(unsigned long ssc, struct pt_regs *regs) -{ - extern unsigned long lookup_domain_mpa(struct domain *,unsigned long); - unsigned long arg0, arg1, arg2, arg3, retval; - char buf[2]; -/**/ static int last_fd, last_count; // FIXME FIXME FIXME -/**/ // BROKEN FOR MULTIPLE DOMAINS & SMP -/**/ struct ssc_disk_stat { int fd; unsigned count;} *stat, last_stat; - extern unsigned long vcpu_verbose, privop_trace; - - arg0 = vcpu_get_gr(current,32); - switch(ssc) { - case SSC_PUTCHAR: - buf[0] = arg0; - buf[1] = '\0'; - printf(buf); - break; - case SSC_GETCHAR: - retval = ia64_ssc(0,0,0,0,ssc); - vcpu_set_gr(current,8,retval); - break; - case SSC_WAIT_COMPLETION: - if (arg0) { // metaphysical address - - arg0 = translate_domain_mpaddr(arg0); -/**/ stat = (struct ssc_disk_stat *)__va(arg0); -///**/ if (stat->fd == last_fd) stat->count = last_count; -/**/ stat->count = last_count; -//if (last_count >= PAGE_SIZE) printf("ssc_wait: stat->fd=%d,last_fd=%d,last_count=%d\n",stat->fd,last_fd,last_count); -///**/ retval = ia64_ssc(arg0,0,0,0,ssc); -/**/ retval = 0; - } - else retval = -1L; - vcpu_set_gr(current,8,retval); - break; - case SSC_OPEN: - arg1 = vcpu_get_gr(current,33); // access rights -if (!running_on_sim) { printf("SSC_OPEN, not implemented on hardware. (ignoring...)\n"); arg0 = 0; } - if (arg0) { // metaphysical address - arg0 = translate_domain_mpaddr(arg0); - retval = ia64_ssc(arg0,arg1,0,0,ssc); - } - else retval = -1L; - vcpu_set_gr(current,8,retval); - break; - case SSC_WRITE: - case SSC_READ: -//if (ssc == SSC_WRITE) printf("DOING AN SSC_WRITE\n"); - arg1 = vcpu_get_gr(current,33); - arg2 = vcpu_get_gr(current,34); - arg3 = vcpu_get_gr(current,35); - if (arg2) { // metaphysical address of descriptor - struct ssc_disk_req *req; - unsigned long mpaddr, paddr; - long len; - - arg2 = translate_domain_mpaddr(arg2); - req = (struct disk_req *)__va(arg2); - req->len &= 0xffffffffL; // avoid strange bug - len = req->len; -/**/ last_fd = arg1; -/**/ last_count = len; - mpaddr = req->addr; -//if (last_count >= PAGE_SIZE) printf("do_ssc: read fd=%d, addr=%p, len=%lx ",last_fd,mpaddr,len); - retval = 0; - if ((mpaddr & PAGE_MASK) != ((mpaddr+len-1) & PAGE_MASK)) { - // do partial page first - req->addr = translate_domain_mpaddr(mpaddr); - req->len = PAGE_SIZE - (req->addr & ~PAGE_MASK); - len -= req->len; mpaddr += req->len; - retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc); - arg3 += req->len; // file offset -/**/ last_stat.fd = last_fd; -/**/ (void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION); -//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)[part]=%x ",req->addr,req->len,retval); - } - if (retval >= 0) while (len > 0) { - req->addr = translate_domain_mpaddr(mpaddr); - req->len = (len > PAGE_SIZE) ? PAGE_SIZE : len; - len -= PAGE_SIZE; mpaddr += PAGE_SIZE; - retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc); - arg3 += req->len; // file offset -// TEMP REMOVED AGAIN arg3 += req->len; // file offset -/**/ last_stat.fd = last_fd; -/**/ (void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION); -//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)=%x ",req->addr,req->len,retval); - } - // set it back to the original value - req->len = last_count; - } - else retval = -1L; - vcpu_set_gr(current,8,retval); -//if (last_count >= PAGE_SIZE) printf("retval=%x\n",retval); - break; - case SSC_CONNECT_INTERRUPT: - arg1 = vcpu_get_gr(current,33); - arg2 = vcpu_get_gr(current,34); - arg3 = vcpu_get_gr(current,35); - if (!running_on_sim) { printf("SSC_CONNECT_INTERRUPT, not implemented on hardware. (ignoring...)\n"); break; } - (void)ia64_ssc(arg0,arg1,arg2,arg3,ssc); - break; - case SSC_NETDEV_PROBE: - vcpu_set_gr(current,8,-1L); - break; - default: - printf("ia64_handle_break: bad ssc code %lx, iip=%p, b0=%p... spinning\n",ssc,regs->cr_iip,regs->b0); - while(1); - break; - } - vcpu_increment_iip(current); -} - -int first_break = 1; - -void -ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim) -{ - struct domain *d = (struct domain *) current->domain; - struct vcpu *v = (struct domain *) current; - extern unsigned long running_on_sim; - - if (first_break) { - if (platform_is_hp_ski()) running_on_sim = 1; - else running_on_sim = 0; - first_break = 0; - } - if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant - if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs); - else do_ssc(vcpu_get_gr(current,36), regs); - } - else if (iim == d->arch.breakimm) { - if (ia64_hypercall(regs)) - vcpu_increment_iip(current); - } - else if (!PSCB(v,interrupt_collection_enabled)) { - if (ia64_hyperprivop(iim,regs)) - vcpu_increment_iip(current); - } - else reflect_interruption(ifa,isr,iim,regs,IA64_BREAK_VECTOR); -} - -void -ia64_handle_privop (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long itir) -{ - IA64FAULT vector; - struct domain *d = current->domain; - struct vcpu *v = current; - // FIXME: no need to pass itir in to this routine as we need to - // compute the virtual itir anyway (based on domain's RR.ps) - // AND ACTUALLY reflect_interruption doesn't use it anyway! - itir = vcpu_get_itir_on_fault(v,ifa); - vector = priv_emulate(current,regs,isr); - if (vector != IA64_NO_FAULT && vector != IA64_RFI_IN_PROGRESS) { - reflect_interruption(ifa,isr,itir,regs,vector); - } -} - -#define INTR_TYPE_MAX 10 -UINT64 int_counts[INTR_TYPE_MAX]; - -void -ia64_handle_reflection (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim, unsigned long vector) -{ - struct domain *d = (struct domain *) current->domain; - struct vcpu *v = (struct domain *) current; - unsigned long check_lazy_cover = 0; - unsigned long psr = regs->cr_ipsr; - unsigned long itir = vcpu_get_itir_on_fault(v,ifa); - - if (!(psr & IA64_PSR_CPL)) { - printk("ia64_handle_reflection: reflecting with priv=0!!\n"); - } - // FIXME: no need to pass itir in to this routine as we need to - // compute the virtual itir anyway (based on domain's RR.ps) - // AND ACTUALLY reflect_interruption doesn't use it anyway! - itir = vcpu_get_itir_on_fault(v,ifa); - switch(vector) { - case 8: - vector = IA64_DIRTY_BIT_VECTOR; break; - case 9: - vector = IA64_INST_ACCESS_BIT_VECTOR; break; - case 10: - check_lazy_cover = 1; - vector = IA64_DATA_ACCESS_BIT_VECTOR; break; - case 20: - check_lazy_cover = 1; - vector = IA64_PAGE_NOT_PRESENT_VECTOR; break; - case 22: - vector = IA64_INST_ACCESS_RIGHTS_VECTOR; break; - case 23: - check_lazy_cover = 1; - vector = IA64_DATA_ACCESS_RIGHTS_VECTOR; break; - case 25: - vector = IA64_DISABLED_FPREG_VECTOR; - break; - case 26: -printf("*** NaT fault... attempting to handle as privop\n"); -printf("isr=%p, ifa=%p,iip=%p,ipsr=%p\n",isr,ifa,regs->cr_iip,psr); - vector = priv_emulate(v,regs,isr); - if (vector == IA64_NO_FAULT) { -printf("*** Handled privop masquerading as NaT fault\n"); - return; - } - vector = IA64_NAT_CONSUMPTION_VECTOR; break; - case 27: -//printf("*** Handled speculation vector, itc=%lx!\n",ia64_get_itc()); - itir = iim; - vector = IA64_SPECULATION_VECTOR; break; - case 30: - // FIXME: Should we handle unaligned refs in Xen?? - vector = IA64_UNALIGNED_REF_VECTOR; break; - default: - printf("ia64_handle_reflection: unhandled vector=0x%lx\n",vector); - while(vector); - return; - } - if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, isr, regs)) return; - reflect_interruption(ifa,isr,itir,regs,vector); -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/regionreg.c --- a/xen/arch/ia64/regionreg.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,376 +0,0 @@ -/* - * Region register and region id management - * - * Copyright (C) 2001-2004 Hewlett-Packard Co. - * Dan Magenheimer (dan.magenheimer@xxxxxx - * Bret Mckee (bret.mckee@xxxxxx) - * - */ - - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/sched.h> -#include <asm/page.h> -#include <asm/regionreg.h> -#include <asm/vhpt.h> -#include <asm/vcpu.h> -extern void ia64_new_rr7(unsigned long rid,void *shared_info, void *shared_arch_info); - - -#define IA64_MIN_IMPL_RID_BITS (IA64_MIN_IMPL_RID_MSB+1) -#define IA64_MAX_IMPL_RID_BITS 24 - -#define MIN_RIDS (1 << IA64_MIN_IMPL_RID_BITS) -#define MIN_RID_MAX (MIN_RIDS - 1) -#define MIN_RID_MASK (MIN_RIDS - 1) -#define MAX_RIDS (1 << (IA64_MAX_IMPL_RID_BITS)) -#define MAX_RID (MAX_RIDS - 1) -#define MAX_RID_BLOCKS (1 << (IA64_MAX_IMPL_RID_BITS-IA64_MIN_IMPL_RID_BITS)) -#define RIDS_PER_RIDBLOCK MIN_RIDS - -#if 0 -// following already defined in include/asm-ia64/gcc_intrin.h -// it should probably be ifdef'd out from there to ensure all region -// register usage is encapsulated in this file -static inline unsigned long -ia64_get_rr (unsigned long rr) -{ - unsigned long r; - __asm__ __volatile__ (";;mov %0=rr[%1];;":"=r"(r):"r"(rr):"memory"); - return r; -} - -static inline void -ia64_set_rr (unsigned long rr, unsigned long rrv) -{ - __asm__ __volatile__ (";;mov rr[%0]=%1;;"::"r"(rr),"r"(rrv):"memory"); -} -#endif - -// use this to allocate a rid out of the "Xen reserved rid block" -unsigned long allocate_reserved_rid(void) -{ - static unsigned long currentrid = XEN_DEFAULT_RID; - unsigned long t = currentrid; - - unsigned long max = RIDS_PER_RIDBLOCK; - - if (++currentrid >= max) return(-1UL); - return t; -} - - -// returns -1 if none available -unsigned long allocate_metaphysical_rr(void) -{ - ia64_rr rrv; - - rrv.rid = allocate_reserved_rid(); - rrv.ps = PAGE_SHIFT; - rrv.ve = 0; - return rrv.rrval; -} - -int deallocate_metaphysical_rid(unsigned long rid) -{ - // fix this when the increment allocation mechanism is fixed. - return 1; -} - -/************************************* - Region Block setup/management -*************************************/ - -static int implemented_rid_bits = 0; -static struct domain *ridblock_owner[MAX_RID_BLOCKS] = { 0 }; - -void get_impl_rid_bits(void) -{ - // FIXME (call PAL) -//#ifdef CONFIG_MCKINLEY - implemented_rid_bits = IA64_MAX_IMPL_RID_BITS; -//#else -//#error "rid ranges won't work on Merced" -//#endif - if (implemented_rid_bits <= IA64_MIN_IMPL_RID_BITS || - implemented_rid_bits > IA64_MAX_IMPL_RID_BITS) - BUG(); -} - - -/* - * Allocate a power-of-two-sized chunk of region id space -- one or more - * "rid blocks" - */ -int allocate_rid_range(struct domain *d, unsigned long ridbits) -{ - int i, j, n_rid_blocks; - - if (implemented_rid_bits == 0) get_impl_rid_bits(); - - if (ridbits >= IA64_MAX_IMPL_RID_BITS) - ridbits = IA64_MAX_IMPL_RID_BITS - 1; - - if (ridbits < IA64_MIN_IMPL_RID_BITS) - ridbits = IA64_MIN_IMPL_RID_BITS; - - // convert to rid_blocks and find one - n_rid_blocks = ridbits - IA64_MIN_IMPL_RID_BITS + 1; - - // skip over block 0, reserved for "meta-physical mappings (and Xen)" - for (i = n_rid_blocks; i < MAX_RID_BLOCKS; i += n_rid_blocks) { - if (ridblock_owner[i] == NULL) { - for (j = i; j < i + n_rid_blocks; ++j) { - if (ridblock_owner[j]) break; - } - if (ridblock_owner[j] == NULL) break; - } - } - - if (i >= MAX_RID_BLOCKS) return 0; - - // found an unused block: - // (i << min_rid_bits) <= rid < ((i + n) << min_rid_bits) - // mark this block as owned - for (j = i; j < i + n_rid_blocks; ++j) ridblock_owner[j] = d; - - // setup domain struct - d->arch.rid_bits = ridbits; - d->arch.starting_rid = i << IA64_MIN_IMPL_RID_BITS; d->arch.ending_rid = (i+n_rid_blocks) << IA64_MIN_IMPL_RID_BITS; -printf("###allocating rid_range, domain %p: starting_rid=%lx, ending_rid=%lx\n", -d,d->arch.starting_rid, d->arch.ending_rid); - - return 1; -} - - -int deallocate_rid_range(struct domain *d) -{ - int i; - int rid_block_end = d->arch.ending_rid >> IA64_MIN_IMPL_RID_BITS; - int rid_block_start = d->arch.starting_rid >> IA64_MIN_IMPL_RID_BITS; - - return 1; // KLUDGE ALERT - // - // not all domains will have allocated RIDs (physical mode loaders for instance) - // - if (d->arch.rid_bits == 0) return 1; - -#ifdef DEBUG - for (i = rid_block_start; i < rid_block_end; ++i) { - ASSERT(ridblock_owner[i] == d); - } -#endif - - for (i = rid_block_start; i < rid_block_end; ++i) - ridblock_owner[i] = NULL; - - d->arch.rid_bits = 0; - d->arch.starting_rid = 0; - d->arch.ending_rid = 0; - return 1; -} - - -static inline void -set_rr_no_srlz(unsigned long rr, unsigned long rrval) -{ - ia64_set_rr(rr, vmMangleRID(rrval)); -} - -void -set_rr(unsigned long rr, unsigned long rrval) -{ - ia64_set_rr(rr, vmMangleRID(rrval)); - ia64_srlz_d(); -} - -unsigned long -get_rr(unsigned long rr) -{ - return vmUnmangleRID(ia64_get_rr(rr)); -} - -static inline int validate_page_size(unsigned long ps) -{ - switch(ps) { - case 12: case 13: case 14: case 16: case 18: - case 20: case 22: case 24: case 26: case 28: - return 1; - default: - return 0; - } -} - -// validates and changes a single region register -// in the currently executing domain -// Passing a value of -1 is a (successful) no-op -// NOTE: DOES NOT SET VCPU's rrs[x] value!! -int set_one_rr(unsigned long rr, unsigned long val) -{ - struct vcpu *v = current; - unsigned long rreg = REGION_NUMBER(rr); - ia64_rr rrv, newrrv, memrrv; - unsigned long newrid; - - if (val == -1) return 1; - - rrv.rrval = val; - newrrv.rrval = 0; - newrid = v->arch.starting_rid + rrv.rid; - - if (newrid > v->arch.ending_rid) { - printk("can't set rr%d to %lx, starting_rid=%lx," - "ending_rid=%lx, val=%lx\n", rreg, newrid, - v->arch.starting_rid,v->arch.ending_rid,val); - return 0; - } - -#ifdef CONFIG_VTI - memrrv.rrval = rrv.rrval; - if (rreg == 7) { - newrrv.rid = newrid; - newrrv.ve = VHPT_ENABLED_REGION_7; - newrrv.ps = IA64_GRANULE_SHIFT; - ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info, - v->vcpu_info->arch.privregs); - } - else { - newrrv.rid = newrid; - // FIXME? region 6 needs to be uncached for EFI to work - if (rreg == 6) newrrv.ve = VHPT_ENABLED_REGION_7; - else newrrv.ve = VHPT_ENABLED_REGION_0_TO_6; - newrrv.ps = PAGE_SHIFT; - if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval; - set_rr(rr,newrrv.rrval); - } -#else - memrrv.rrval = rrv.rrval; - newrrv.rid = newrid; - newrrv.ve = 1; // VHPT now enabled for region 7!! - newrrv.ps = PAGE_SHIFT; - if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval; - if (rreg == 7) ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info, - v->vcpu_info->arch.privregs); - else set_rr(rr,newrrv.rrval); -#endif - return 1; -} - -// set rr0 to the passed rid (for metaphysical mode so don't use domain offset -int set_metaphysical_rr0(void) -{ - struct vcpu *v = current; - ia64_rr rrv; - -// rrv.ve = 1; FIXME: TURN ME BACK ON WHEN VHPT IS WORKING - set_rr(0,v->arch.metaphysical_rr0); -} - -// validates/changes region registers 0-6 in the currently executing domain -// Note that this is the one and only SP API (other than executing a privop) -// for a domain to use to change region registers -int set_all_rr( u64 rr0, u64 rr1, u64 rr2, u64 rr3, - u64 rr4, u64 rr5, u64 rr6, u64 rr7) -{ - if (!set_one_rr(0x0000000000000000L, rr0)) return 0; - if (!set_one_rr(0x2000000000000000L, rr1)) return 0; - if (!set_one_rr(0x4000000000000000L, rr2)) return 0; - if (!set_one_rr(0x6000000000000000L, rr3)) return 0; - if (!set_one_rr(0x8000000000000000L, rr4)) return 0; - if (!set_one_rr(0xa000000000000000L, rr5)) return 0; - if (!set_one_rr(0xc000000000000000L, rr6)) return 0; - if (!set_one_rr(0xe000000000000000L, rr7)) return 0; - return 1; -} - -void init_all_rr(struct vcpu *v) -{ - ia64_rr rrv; - - rrv.rrval = 0; - rrv.rrval = v->domain->arch.metaphysical_rr0; - rrv.ps = PAGE_SHIFT; - rrv.ve = 1; -if (!v->vcpu_info) { printf("Stopping in init_all_rr\n"); dummy(); } - VCPU(v,rrs[0]) = -1; - VCPU(v,rrs[1]) = rrv.rrval; - VCPU(v,rrs[2]) = rrv.rrval; - VCPU(v,rrs[3]) = rrv.rrval; - VCPU(v,rrs[4]) = rrv.rrval; - VCPU(v,rrs[5]) = rrv.rrval; - rrv.ve = 0; - VCPU(v,rrs[6]) = rrv.rrval; -// v->shared_info->arch.rrs[7] = rrv.rrval; -} - - -/* XEN/ia64 INTERNAL ROUTINES */ - -unsigned long physicalize_rid(struct vcpu *v, unsigned long rrval) -{ - ia64_rr rrv; - - rrv.rrval = rrval; - rrv.rid += v->arch.starting_rid; - return rrv.rrval; -} - -unsigned long -virtualize_rid(struct vcpu *v, unsigned long rrval) -{ - ia64_rr rrv; - - rrv.rrval = rrval; - rrv.rid -= v->arch.starting_rid; - return rrv.rrval; -} - -// loads a thread's region register (0-6) state into -// the real physical region registers. Returns the -// (possibly mangled) bits to store into rr7 -// iff it is different than what is currently in physical -// rr7 (because we have to to assembly and physical mode -// to change rr7). If no change to rr7 is required, returns 0. -// -unsigned long load_region_regs(struct vcpu *v) -{ - unsigned long rr0, rr1,rr2, rr3, rr4, rr5, rr6, rr7; - // TODO: These probably should be validated - unsigned long bad = 0; - - if (VCPU(v,metaphysical_mode)) { - ia64_rr rrv; - - rrv.rrval = 0; - rrv.rid = v->domain->arch.metaphysical_rr0; - rrv.ps = PAGE_SHIFT; - rrv.ve = 1; - rr0 = rrv.rrval; - set_rr_no_srlz(0x0000000000000000L, rr0); - ia64_srlz_d(); - } - else { - rr0 = VCPU(v,rrs[0]); - if (!set_one_rr(0x0000000000000000L, rr0)) bad |= 1; - } - rr1 = VCPU(v,rrs[1]); - rr2 = VCPU(v,rrs[2]); - rr3 = VCPU(v,rrs[3]); - rr4 = VCPU(v,rrs[4]); - rr5 = VCPU(v,rrs[5]); - rr6 = VCPU(v,rrs[6]); - rr7 = VCPU(v,rrs[7]); - if (!set_one_rr(0x2000000000000000L, rr1)) bad |= 2; - if (!set_one_rr(0x4000000000000000L, rr2)) bad |= 4; - if (!set_one_rr(0x6000000000000000L, rr3)) bad |= 8; - if (!set_one_rr(0x8000000000000000L, rr4)) bad |= 0x10; - if (!set_one_rr(0xa000000000000000L, rr5)) bad |= 0x20; - if (!set_one_rr(0xc000000000000000L, rr6)) bad |= 0x40; - if (!set_one_rr(0xe000000000000000L, rr7)) bad |= 0x80; - if (bad) { - panic_domain(0,"load_region_regs: can't set! bad=%lx\n",bad); - } - return 0; -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/sn_console.c --- a/xen/arch/ia64/sn_console.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,84 +0,0 @@ -/* - * C-Brick Serial Port (and console) driver for SGI Altix machines. - * - * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. - */ - -#include <asm/acpi.h> -#include <asm/sn/sn_sal.h> -#include <xen/serial.h> - -void sn_putc(struct serial_port *, char); - -static struct uart_driver sn_sal_console = { - .putc = sn_putc, -}; - -/** - * early_sn_setup - early setup routine for SN platforms - * - * pulled from arch/ia64/sn/kernel/setup.c - */ -static void __init early_sn_setup(void) -{ - efi_system_table_t *efi_systab; - efi_config_table_t *config_tables; - struct ia64_sal_systab *sal_systab; - struct ia64_sal_desc_entry_point *ep; - char *p; - int i, j; - - /* - * Parse enough of the SAL tables to locate the SAL entry point. Since, console - * IO on SN2 is done via SAL calls, early_printk won't work without this. - * - * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c. - * Any changes to those file may have to be made hereas well. - */ - efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab); - config_tables = __va(efi_systab->tables); - for (i = 0; i < efi_systab->nr_tables; i++) { - if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == - 0) { - sal_systab = __va(config_tables[i].table); - p = (char *)(sal_systab + 1); - for (j = 0; j < sal_systab->entry_count; j++) { - if (*p == SAL_DESC_ENTRY_POINT) { - ep = (struct ia64_sal_desc_entry_point - *)p; - ia64_sal_handler_init(__va - (ep->sal_proc), - __va(ep->gp)); - return; - } - p += SAL_DESC_SIZE(*p); - } - } - } - /* Uh-oh, SAL not available?? */ - printk(KERN_ERR "failed to find SAL entry point\n"); -} - -/** - * sn_serial_console_early_setup - Sets up early console output support - * - * pulled from drivers/serial/sn_console.c - */ -int __init sn_serial_console_early_setup(void) -{ - if (strcmp("sn2",acpi_get_sysname())) - return -1; - - early_sn_setup(); /* Find SAL entry points */ - serial_register_uart(0, &sn_sal_console, NULL); - - return 0; -} - -/* - * sn_putc - Send a character to the console, polled or interrupt mode - */ -void sn_putc(struct serial_port *port, char c) -{ - return ia64_sn_console_putc(c); -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vcpu.c --- a/xen/arch/ia64/vcpu.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,1843 +0,0 @@ -/* - * Virtualized CPU functions - * - * Copyright (C) 2004 Hewlett-Packard Co. - * Dan Magenheimer (dan.magenheimer@xxxxxx) - * - */ - -#include <linux/sched.h> -#include <public/arch-ia64.h> -#include <asm/ia64_int.h> -#include <asm/vcpu.h> -#include <asm/regionreg.h> -#include <asm/tlb.h> -#include <asm/processor.h> -#include <asm/delay.h> -#include <asm/vmx_vcpu.h> - -typedef union { - struct ia64_psr ia64_psr; - unsigned long i64; -} PSR; - -//typedef struct pt_regs REGS; -//typedef struct domain VCPU; - -// this def for vcpu_regs won't work if kernel stack is present -#define vcpu_regs(vcpu) ((struct pt_regs *) vcpu->arch.regs) -#define PSCB(x,y) VCPU(x,y) -#define PSCBX(x,y) x->arch.y - -#define TRUE 1 -#define FALSE 0 -#define IA64_PTA_SZ_BIT 2 -#define IA64_PTA_VF_BIT 8 -#define IA64_PTA_BASE_BIT 15 -#define IA64_PTA_LFMT (1UL << IA64_PTA_VF_BIT) -#define IA64_PTA_SZ(x) (x##UL << IA64_PTA_SZ_BIT) - -#define STATIC - -#ifdef PRIVOP_ADDR_COUNT -struct privop_addr_count privop_addr_counter[PRIVOP_COUNT_NINSTS] = { - { "=ifa", { 0 }, { 0 }, 0 }, - { "thash", { 0 }, { 0 }, 0 }, - 0 -}; -extern void privop_count_addr(unsigned long addr, int inst); -#define PRIVOP_COUNT_ADDR(regs,inst) privop_count_addr(regs->cr_iip,inst) -#else -#define PRIVOP_COUNT_ADDR(x,y) do {} while (0) -#endif - -unsigned long dtlb_translate_count = 0; -unsigned long tr_translate_count = 0; -unsigned long phys_translate_count = 0; - -unsigned long vcpu_verbose = 0; -#define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0) - -extern TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa); -extern TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa); - -/************************************************************************** - VCPU general register access routines -**************************************************************************/ - -UINT64 -vcpu_get_gr(VCPU *vcpu, unsigned reg) -{ - REGS *regs = vcpu_regs(vcpu); - UINT64 val; - - if (!reg) return 0; - getreg(reg,&val,0,regs); // FIXME: handle NATs later - return val; -} - -// returns: -// IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault -// IA64_NO_FAULT otherwise -IA64FAULT -vcpu_set_gr(VCPU *vcpu, unsigned reg, UINT64 value) -{ - REGS *regs = vcpu_regs(vcpu); - long sof = (regs->cr_ifs) & 0x7f; - - if (!reg) return IA64_ILLOP_FAULT; - if (reg >= sof + 32) return IA64_ILLOP_FAULT; - setreg(reg,value,0,regs); // FIXME: handle NATs later - return IA64_NO_FAULT; -} - -/************************************************************************** - VCPU privileged application register access routines -**************************************************************************/ - -IA64FAULT vcpu_set_ar(VCPU *vcpu, UINT64 reg, UINT64 val) -{ - if (reg == 44) return (vcpu_set_itc(vcpu,val)); - else if (reg == 27) return (IA64_ILLOP_FAULT); - else if (reg == 24) - printf("warning: setting ar.eflg is a no-op; no IA-32 support\n"); - else if (reg > 7) return (IA64_ILLOP_FAULT); - else PSCB(vcpu,krs[reg]) = val; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_get_ar(VCPU *vcpu, UINT64 reg, UINT64 *val) -{ - if (reg == 24) - printf("warning: getting ar.eflg is a no-op; no IA-32 support\n"); - else if (reg > 7) return (IA64_ILLOP_FAULT); - else *val = PSCB(vcpu,krs[reg]); - return IA64_NO_FAULT; -} - -/************************************************************************** - VCPU processor status register access routines -**************************************************************************/ - -void vcpu_set_metaphysical_mode(VCPU *vcpu, BOOLEAN newmode) -{ - /* only do something if mode changes */ - if (!!newmode ^ !!PSCB(vcpu,metaphysical_mode)) { - if (newmode) set_metaphysical_rr0(); - else if (PSCB(vcpu,rrs[0]) != -1) - set_one_rr(0, PSCB(vcpu,rrs[0])); - PSCB(vcpu,metaphysical_mode) = newmode; - } -} - -IA64FAULT vcpu_reset_psr_dt(VCPU *vcpu) -{ - vcpu_set_metaphysical_mode(vcpu,TRUE); - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24) -{ - struct ia64_psr psr, imm, *ipsr; - REGS *regs = vcpu_regs(vcpu); - - //PRIVOP_COUNT_ADDR(regs,_RSM); - // TODO: All of these bits need to be virtualized - // TODO: Only allowed for current vcpu - __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory"); - ipsr = (struct ia64_psr *)&regs->cr_ipsr; - imm = *(struct ia64_psr *)&imm24; - // interrupt flag - if (imm.i) PSCB(vcpu,interrupt_delivery_enabled) = 0; - if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 0; - // interrupt collection flag - //if (imm.ic) PSCB(vcpu,interrupt_delivery_enabled) = 0; - // just handle psr.up and psr.pp for now - if (imm24 & ~(IA64_PSR_BE | IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP - | IA64_PSR_I | IA64_PSR_IC | IA64_PSR_DT - | IA64_PSR_DFL | IA64_PSR_DFH)) - return (IA64_ILLOP_FAULT); - if (imm.dfh) ipsr->dfh = 0; - if (imm.dfl) ipsr->dfl = 0; - if (imm.pp) { ipsr->pp = 0; psr.pp = 0; } - if (imm.up) { ipsr->up = 0; psr.up = 0; } - if (imm.sp) { ipsr->sp = 0; psr.sp = 0; } - if (imm.be) ipsr->be = 0; - if (imm.dt) vcpu_set_metaphysical_mode(vcpu,TRUE); - __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory"); - return IA64_NO_FAULT; -} - -extern UINT64 vcpu_check_pending_interrupts(VCPU *vcpu); -#define SPURIOUS_VECTOR 0xf - -IA64FAULT vcpu_set_psr_dt(VCPU *vcpu) -{ - vcpu_set_metaphysical_mode(vcpu,FALSE); - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_psr_i(VCPU *vcpu) -{ - PSCB(vcpu,interrupt_delivery_enabled) = 1; - PSCB(vcpu,interrupt_collection_enabled) = 1; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24) -{ - struct ia64_psr psr, imm, *ipsr; - REGS *regs = vcpu_regs(vcpu); - UINT64 mask, enabling_interrupts = 0; - - //PRIVOP_COUNT_ADDR(regs,_SSM); - // TODO: All of these bits need to be virtualized - __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory"); - imm = *(struct ia64_psr *)&imm24; - ipsr = (struct ia64_psr *)&regs->cr_ipsr; - // just handle psr.sp,pp and psr.i,ic (and user mask) for now - mask = IA64_PSR_PP|IA64_PSR_SP|IA64_PSR_I|IA64_PSR_IC|IA64_PSR_UM | - IA64_PSR_DT|IA64_PSR_DFL|IA64_PSR_DFH; - if (imm24 & ~mask) return (IA64_ILLOP_FAULT); - if (imm.dfh) ipsr->dfh = 1; - if (imm.dfl) ipsr->dfl = 1; - if (imm.pp) { ipsr->pp = 1; psr.pp = 1; } - if (imm.sp) { ipsr->sp = 1; psr.sp = 1; } - if (imm.i) { - if (!PSCB(vcpu,interrupt_delivery_enabled)) { -//printf("vcpu_set_psr_sm: psr.ic 0->1 "); - enabling_interrupts = 1; - } - PSCB(vcpu,interrupt_delivery_enabled) = 1; - } - if (imm.ic) PSCB(vcpu,interrupt_collection_enabled) = 1; - // TODO: do this faster - if (imm.mfl) { ipsr->mfl = 1; psr.mfl = 1; } - if (imm.mfh) { ipsr->mfh = 1; psr.mfh = 1; } - if (imm.ac) { ipsr->ac = 1; psr.ac = 1; } - if (imm.up) { ipsr->up = 1; psr.up = 1; } - if (imm.be) { - printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n"); - return (IA64_ILLOP_FAULT); - } - if (imm.dt) vcpu_set_metaphysical_mode(vcpu,FALSE); - __asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory"); -#if 0 // now done with deliver_pending_interrupts - if (enabling_interrupts) { - if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) { -//printf("with interrupts pending\n"); - return IA64_EXTINT_VECTOR; - } -//else printf("but nothing pending\n"); - } -#endif - if (enabling_interrupts && - vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) - PSCB(vcpu,pending_interruption) = 1; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_psr_l(VCPU *vcpu, UINT64 val) -{ - struct ia64_psr psr, newpsr, *ipsr; - REGS *regs = vcpu_regs(vcpu); - UINT64 enabling_interrupts = 0; - - // TODO: All of these bits need to be virtualized - __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory"); - newpsr = *(struct ia64_psr *)&val; - ipsr = (struct ia64_psr *)&regs->cr_ipsr; - // just handle psr.up and psr.pp for now - //if (val & ~(IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP)) return (IA64_ILLOP_FAULT); - // however trying to set other bits can't be an error as it is in ssm - if (newpsr.dfh) ipsr->dfh = 1; - if (newpsr.dfl) ipsr->dfl = 1; - if (newpsr.pp) { ipsr->pp = 1; psr.pp = 1; } - if (newpsr.up) { ipsr->up = 1; psr.up = 1; } - if (newpsr.sp) { ipsr->sp = 1; psr.sp = 1; } - if (newpsr.i) { - if (!PSCB(vcpu,interrupt_delivery_enabled)) - enabling_interrupts = 1; - PSCB(vcpu,interrupt_delivery_enabled) = 1; - } - if (newpsr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1; - if (newpsr.mfl) { ipsr->mfl = 1; psr.mfl = 1; } - if (newpsr.mfh) { ipsr->mfh = 1; psr.mfh = 1; } - if (newpsr.ac) { ipsr->ac = 1; psr.ac = 1; } - if (newpsr.up) { ipsr->up = 1; psr.up = 1; } - if (newpsr.dt && newpsr.rt) vcpu_set_metaphysical_mode(vcpu,FALSE); - else vcpu_set_metaphysical_mode(vcpu,TRUE); - if (newpsr.be) { - printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n"); - return (IA64_ILLOP_FAULT); - } - //__asm__ __volatile (";; mov psr.l=%0;; srlz.d"::"r"(psr):"memory"); -#if 0 // now done with deliver_pending_interrupts - if (enabling_interrupts) { - if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) - return IA64_EXTINT_VECTOR; - } -#endif - if (enabling_interrupts && - vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) - PSCB(vcpu,pending_interruption) = 1; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_get_psr(VCPU *vcpu, UINT64 *pval) -{ - UINT64 psr; - struct ia64_psr newpsr; - - // TODO: This needs to return a "filtered" view of - // the psr, not the actual psr. Probably the psr needs - // to be a field in regs (in addition to ipsr). - __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory"); - newpsr = *(struct ia64_psr *)&psr; - if (newpsr.cpl == 2) newpsr.cpl = 0; - if (PSCB(vcpu,interrupt_delivery_enabled)) newpsr.i = 1; - else newpsr.i = 0; - if (PSCB(vcpu,interrupt_collection_enabled)) newpsr.ic = 1; - else newpsr.ic = 0; - *pval = *(unsigned long *)&newpsr; - return IA64_NO_FAULT; -} - -BOOLEAN vcpu_get_psr_ic(VCPU *vcpu) -{ - return !!PSCB(vcpu,interrupt_collection_enabled); -} - -BOOLEAN vcpu_get_psr_i(VCPU *vcpu) -{ - return !!PSCB(vcpu,interrupt_delivery_enabled); -} - -UINT64 vcpu_get_ipsr_int_state(VCPU *vcpu,UINT64 prevpsr) -{ - UINT64 dcr = PSCBX(vcpu,dcr); - PSR psr = {0}; - - //printf("*** vcpu_get_ipsr_int_state (0x%016lx)...",prevpsr); - psr.i64 = prevpsr; - psr.ia64_psr.be = 0; if (dcr & IA64_DCR_BE) psr.ia64_psr.be = 1; - psr.ia64_psr.pp = 0; if (dcr & IA64_DCR_PP) psr.ia64_psr.pp = 1; - psr.ia64_psr.ic = PSCB(vcpu,interrupt_collection_enabled); - psr.ia64_psr.i = PSCB(vcpu,interrupt_delivery_enabled); - psr.ia64_psr.bn = PSCB(vcpu,banknum); - psr.ia64_psr.dt = 1; psr.ia64_psr.it = 1; psr.ia64_psr.rt = 1; - if (psr.ia64_psr.cpl == 2) psr.ia64_psr.cpl = 0; // !!!! fool domain - // psr.pk = 1; - //printf("returns 0x%016lx...",psr.i64); - return psr.i64; -} - -/************************************************************************** - VCPU control register access routines -**************************************************************************/ - -IA64FAULT vcpu_get_dcr(VCPU *vcpu, UINT64 *pval) -{ -extern unsigned long privop_trace; -//privop_trace=0; -//verbose("vcpu_get_dcr: called @%p\n",PSCB(vcpu,iip)); - // Reads of cr.dcr on Xen always have the sign bit set, so - // a domain can differentiate whether it is running on SP or not - *pval = PSCBX(vcpu,dcr) | 0x8000000000000000L; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_iva(VCPU *vcpu, UINT64 *pval) -{ - *pval = PSCBX(vcpu,iva) & ~0x7fffL; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_pta(VCPU *vcpu, UINT64 *pval) -{ - *pval = PSCB(vcpu,pta); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_ipsr(VCPU *vcpu, UINT64 *pval) -{ - //REGS *regs = vcpu_regs(vcpu); - //*pval = regs->cr_ipsr; - *pval = PSCB(vcpu,ipsr); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_isr(VCPU *vcpu, UINT64 *pval) -{ - *pval = PSCB(vcpu,isr); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_iip(VCPU *vcpu, UINT64 *pval) -{ - //REGS *regs = vcpu_regs(vcpu); - //*pval = regs->cr_iip; - *pval = PSCB(vcpu,iip); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_ifa(VCPU *vcpu, UINT64 *pval) -{ - UINT64 val = PSCB(vcpu,ifa); - REGS *regs = vcpu_regs(vcpu); - PRIVOP_COUNT_ADDR(regs,_GET_IFA); - *pval = val; - return (IA64_NO_FAULT); -} - -unsigned long vcpu_get_rr_ps(VCPU *vcpu,UINT64 vadr) -{ - ia64_rr rr; - - rr.rrval = PSCB(vcpu,rrs)[vadr>>61]; - return(rr.ps); -} - -unsigned long vcpu_get_rr_rid(VCPU *vcpu,UINT64 vadr) -{ - ia64_rr rr; - - rr.rrval = PSCB(vcpu,rrs)[vadr>>61]; - return(rr.rid); -} - -unsigned long vcpu_get_itir_on_fault(VCPU *vcpu, UINT64 ifa) -{ - ia64_rr rr; - - rr.rrval = 0; - rr.ps = vcpu_get_rr_ps(vcpu,ifa); - rr.rid = vcpu_get_rr_rid(vcpu,ifa); - return (rr.rrval); -} - - -IA64FAULT vcpu_get_itir(VCPU *vcpu, UINT64 *pval) -{ - UINT64 val = PSCB(vcpu,itir); - *pval = val; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_iipa(VCPU *vcpu, UINT64 *pval) -{ - UINT64 val = PSCB(vcpu,iipa); - // SP entry code does not save iipa yet nor does it get - // properly delivered in the pscb - printf("*** vcpu_get_iipa: cr.iipa not fully implemented yet!!\n"); - *pval = val; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_ifs(VCPU *vcpu, UINT64 *pval) -{ - //PSCB(vcpu,ifs) = PSCB(vcpu)->regs.cr_ifs; - //*pval = PSCB(vcpu,regs).cr_ifs; - *pval = PSCB(vcpu,ifs); - PSCB(vcpu,incomplete_regframe) = 0; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_iim(VCPU *vcpu, UINT64 *pval) -{ - UINT64 val = PSCB(vcpu,iim); - *pval = val; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_iha(VCPU *vcpu, UINT64 *pval) -{ - //return vcpu_thash(vcpu,PSCB(vcpu,ifa),pval); - UINT64 val = PSCB(vcpu,iha); - REGS *regs = vcpu_regs(vcpu); - PRIVOP_COUNT_ADDR(regs,_THASH); - *pval = val; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_dcr(VCPU *vcpu, UINT64 val) -{ -extern unsigned long privop_trace; -//privop_trace=1; - // Reads of cr.dcr on SP always have the sign bit set, so - // a domain can differentiate whether it is running on SP or not - // Thus, writes of DCR should ignore the sign bit -//verbose("vcpu_set_dcr: called\n"); - PSCBX(vcpu,dcr) = val & ~0x8000000000000000L; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_iva(VCPU *vcpu, UINT64 val) -{ - PSCBX(vcpu,iva) = val & ~0x7fffL; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_pta(VCPU *vcpu, UINT64 val) -{ - if (val & IA64_PTA_LFMT) { - printf("*** No support for VHPT long format yet!!\n"); - return (IA64_ILLOP_FAULT); - } - if (val & (0x3f<<9)) /* reserved fields */ return IA64_RSVDREG_FAULT; - if (val & 2) /* reserved fields */ return IA64_RSVDREG_FAULT; - PSCB(vcpu,pta) = val; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_ipsr(VCPU *vcpu, UINT64 val) -{ - PSCB(vcpu,ipsr) = val; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_isr(VCPU *vcpu, UINT64 val) -{ - PSCB(vcpu,isr) = val; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_iip(VCPU *vcpu, UINT64 val) -{ - PSCB(vcpu,iip) = val; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_increment_iip(VCPU *vcpu) -{ - REGS *regs = vcpu_regs(vcpu); - struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr; - if (ipsr->ri == 2) { ipsr->ri=0; regs->cr_iip += 16; } - else ipsr->ri++; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_ifa(VCPU *vcpu, UINT64 val) -{ - PSCB(vcpu,ifa) = val; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_itir(VCPU *vcpu, UINT64 val) -{ - PSCB(vcpu,itir) = val; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_iipa(VCPU *vcpu, UINT64 val) -{ - // SP entry code does not save iipa yet nor does it get - // properly delivered in the pscb - printf("*** vcpu_set_iipa: cr.iipa not fully implemented yet!!\n"); - PSCB(vcpu,iipa) = val; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_ifs(VCPU *vcpu, UINT64 val) -{ - //REGS *regs = vcpu_regs(vcpu); - PSCB(vcpu,ifs) = val; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_iim(VCPU *vcpu, UINT64 val) -{ - PSCB(vcpu,iim) = val; - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_set_iha(VCPU *vcpu, UINT64 val) -{ - PSCB(vcpu,iha) = val; - return IA64_NO_FAULT; -} - -/************************************************************************** - VCPU interrupt control register access routines -**************************************************************************/ - -void vcpu_pend_unspecified_interrupt(VCPU *vcpu) -{ - PSCB(vcpu,pending_interruption) = 1; -} - -void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector) -{ - if (vector & ~0xff) { - printf("vcpu_pend_interrupt: bad vector\n"); - return; - } -#ifdef CONFIG_VTI - if ( VMX_DOMAIN(vcpu) ) { - set_bit(vector,VPD_CR(vcpu,irr)); - } else -#endif // CONFIG_VTI - { - /* if (!test_bit(vector,PSCB(vcpu,delivery_mask))) return; */ - if (test_bit(vector,PSCBX(vcpu,irr))) { -//printf("vcpu_pend_interrupt: overrun\n"); - } - set_bit(vector,PSCBX(vcpu,irr)); - PSCB(vcpu,pending_interruption) = 1; - } - -#if 0 - /* Keir: I think you should unblock when an interrupt is pending. */ - { - int running = test_bit(_VCPUF_running, &vcpu->vcpu_flags); - vcpu_unblock(vcpu); - if ( running ) - smp_send_event_check_cpu(vcpu->processor); - } -#endif -} - -void early_tick(VCPU *vcpu) -{ - UINT64 *p = &PSCBX(vcpu,irr[3]); - printf("vcpu_check_pending: about to deliver early tick\n"); - printf("&irr[0]=%p, irr[0]=0x%lx\n",p,*p); -} - -#define IA64_TPR_MMI 0x10000 -#define IA64_TPR_MIC 0x000f0 - -/* checks to see if a VCPU has any unmasked pending interrupts - * if so, returns the highest, else returns SPURIOUS_VECTOR */ -/* NOTE: Since this gets called from vcpu_get_ivr() and the - * semantics of "mov rx=cr.ivr" ignore the setting of the psr.i bit, - * this routine also ignores pscb.interrupt_delivery_enabled - * and this must be checked independently; see vcpu_deliverable interrupts() */ -UINT64 vcpu_check_pending_interrupts(VCPU *vcpu) -{ - UINT64 *p, *q, *r, bits, bitnum, mask, i, vector; - - p = &PSCBX(vcpu,irr[3]); - /* q = &PSCB(vcpu,delivery_mask[3]); */ - r = &PSCBX(vcpu,insvc[3]); - for (i = 3; ; p--, q--, r--, i--) { - bits = *p /* & *q */; - if (bits) break; // got a potential interrupt - if (*r) { - // nothing in this word which is pending+inservice - // but there is one inservice which masks lower - return SPURIOUS_VECTOR; - } - if (i == 0) { - // checked all bits... nothing pending+inservice - return SPURIOUS_VECTOR; - } - } - // have a pending,deliverable interrupt... see if it is masked - bitnum = ia64_fls(bits); -//printf("XXXXXXX vcpu_check_pending_interrupts: got bitnum=%p...",bitnum); - vector = bitnum+(i*64); - mask = 1L << bitnum; -//printf("XXXXXXX vcpu_check_pending_interrupts: got vector=%p...",vector); - if (*r >= mask) { - // masked by equal inservice -//printf("but masked by equal inservice\n"); - return SPURIOUS_VECTOR; - } - if (PSCB(vcpu,tpr) & IA64_TPR_MMI) { - // tpr.mmi is set -//printf("but masked by tpr.mmi\n"); - return SPURIOUS_VECTOR; - } - if (((PSCB(vcpu,tpr) & IA64_TPR_MIC) + 15) >= vector) { - //tpr.mic masks class -//printf("but masked by tpr.mic\n"); - return SPURIOUS_VECTOR; - } - -//printf("returned to caller\n"); -#if 0 -if (vector == (PSCB(vcpu,itv) & 0xff)) { - UINT64 now = ia64_get_itc(); - UINT64 itm = PSCBX(vcpu,domain_itm); - if (now < itm) early_tick(vcpu); - -} -#endif - return vector; -} - -UINT64 vcpu_deliverable_interrupts(VCPU *vcpu) -{ - return (vcpu_get_psr_i(vcpu) && - vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR); -} - -UINT64 vcpu_deliverable_timer(VCPU *vcpu) -{ - return (vcpu_get_psr_i(vcpu) && - vcpu_check_pending_interrupts(vcpu) == PSCB(vcpu,itv)); -} - -IA64FAULT vcpu_get_lid(VCPU *vcpu, UINT64 *pval) -{ -extern unsigned long privop_trace; -//privop_trace=1; - //TODO: Implement this - printf("vcpu_get_lid: WARNING: Getting cr.lid always returns zero\n"); - //*pval = 0; - *pval = ia64_getreg(_IA64_REG_CR_LID); - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT64 *pval) -{ - int i; - UINT64 vector, mask; - -#define HEARTBEAT_FREQ 16 // period in seconds -#ifdef HEARTBEAT_FREQ -#define N_DOMS 16 // period in seconds - static long count[N_DOMS] = { 0 }; - static long nonclockcount[N_DOMS] = { 0 }; - REGS *regs = vcpu_regs(vcpu); - unsigned domid = vcpu->domain->domain_id; -#endif -#ifdef IRQ_DEBUG - static char firstivr = 1; - static char firsttime[256]; - if (firstivr) { - int i; - for (i=0;i<256;i++) firsttime[i]=1; - firstivr=0; - } -#endif - - vector = vcpu_check_pending_interrupts(vcpu); - if (vector == SPURIOUS_VECTOR) { - PSCB(vcpu,pending_interruption) = 0; - *pval = vector; - return IA64_NO_FAULT; - } -#ifdef HEARTBEAT_FREQ - if (domid >= N_DOMS) domid = N_DOMS-1; - if (vector == (PSCB(vcpu,itv) & 0xff)) { - if (!(++count[domid] & ((HEARTBEAT_FREQ*1024)-1))) { - printf("Dom%d heartbeat... ticks=%lx,nonticks=%lx\n", - domid, count[domid], nonclockcount[domid]); - //count[domid] = 0; - //dump_runq(); - } - } - else nonclockcount[domid]++; -#endif - // now have an unmasked, pending, deliverable vector! - // getting ivr has "side effects" -#ifdef IRQ_DEBUG - if (firsttime[vector]) { - printf("*** First get_ivr on vector=%d,itc=%lx\n", - vector,ia64_get_itc()); - firsttime[vector]=0; - } -#endif - i = vector >> 6; - mask = 1L << (vector & 0x3f); -//printf("ZZZZZZ vcpu_get_ivr: setting insvc mask for vector %ld\n",vector); - PSCBX(vcpu,insvc[i]) |= mask; - PSCBX(vcpu,irr[i]) &= ~mask; - //PSCB(vcpu,pending_interruption)--; - *pval = vector; - // if delivering a timer interrupt, remember domain_itm - if (vector == (PSCB(vcpu,itv) & 0xff)) { - PSCBX(vcpu,domain_itm_last) = PSCBX(vcpu,domain_itm); - } - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_get_tpr(VCPU *vcpu, UINT64 *pval) -{ - *pval = PSCB(vcpu,tpr); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_eoi(VCPU *vcpu, UINT64 *pval) -{ - *pval = 0L; // reads of eoi always return 0 - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_irr0(VCPU *vcpu, UINT64 *pval) -{ -#ifndef IRR_USE_FIXED - printk("vcpu_get_irr: called, not implemented yet\n"); - return IA64_ILLOP_FAULT; -#else - *pval = vcpu->irr[0]; - return (IA64_NO_FAULT); -#endif -} - -IA64FAULT vcpu_get_irr1(VCPU *vcpu, UINT64 *pval) -{ -#ifndef IRR_USE_FIXED - printk("vcpu_get_irr: called, not implemented yet\n"); - return IA64_ILLOP_FAULT; -#else - *pval = vcpu->irr[1]; - return (IA64_NO_FAULT); -#endif -} - -IA64FAULT vcpu_get_irr2(VCPU *vcpu, UINT64 *pval) -{ -#ifndef IRR_USE_FIXED - printk("vcpu_get_irr: called, not implemented yet\n"); - return IA64_ILLOP_FAULT; -#else - *pval = vcpu->irr[2]; - return (IA64_NO_FAULT); -#endif -} - -IA64FAULT vcpu_get_irr3(VCPU *vcpu, UINT64 *pval) -{ -#ifndef IRR_USE_FIXED - printk("vcpu_get_irr: called, not implemented yet\n"); - return IA64_ILLOP_FAULT; -#else - *pval = vcpu->irr[3]; - return (IA64_NO_FAULT); -#endif -} - -IA64FAULT vcpu_get_itv(VCPU *vcpu, UINT64 *pval) -{ - *pval = PSCB(vcpu,itv); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_pmv(VCPU *vcpu, UINT64 *pval) -{ - *pval = PSCB(vcpu,pmv); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_cmcv(VCPU *vcpu, UINT64 *pval) -{ - *pval = PSCB(vcpu,cmcv); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_lrr0(VCPU *vcpu, UINT64 *pval) -{ - // fix this when setting values other than m-bit is supported - printf("vcpu_get_lrr0: Unmasked interrupts unsupported\n"); - *pval = (1L << 16); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_lrr1(VCPU *vcpu, UINT64 *pval) -{ - // fix this when setting values other than m-bit is supported - printf("vcpu_get_lrr1: Unmasked interrupts unsupported\n"); - *pval = (1L << 16); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_lid(VCPU *vcpu, UINT64 val) -{ - printf("vcpu_set_lid: Setting cr.lid is unsupported\n"); - return (IA64_ILLOP_FAULT); -} - -IA64FAULT vcpu_set_tpr(VCPU *vcpu, UINT64 val) -{ - if (val & 0xff00) return IA64_RSVDREG_FAULT; - PSCB(vcpu,tpr) = val; - if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) - PSCB(vcpu,pending_interruption) = 1; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT64 val) -{ - UINT64 *p, bits, vec, bitnum; - int i; - - p = &PSCBX(vcpu,insvc[3]); - for (i = 3; (i >= 0) && !(bits = *p); i--, p--); - if (i < 0) { - printf("Trying to EOI interrupt when none are in-service.\r\n"); - return; - } - bitnum = ia64_fls(bits); - vec = bitnum + (i*64); - /* clear the correct bit */ - bits &= ~(1L << bitnum); - *p = bits; - /* clearing an eoi bit may unmask another pending interrupt... */ - if (PSCB(vcpu,interrupt_delivery_enabled)) { // but only if enabled... - // worry about this later... Linux only calls eoi - // with interrupts disabled - printf("Trying to EOI interrupt with interrupts enabled\r\n"); - } - if (vcpu_check_pending_interrupts(vcpu) != SPURIOUS_VECTOR) - PSCB(vcpu,pending_interruption) = 1; -//printf("YYYYY vcpu_set_eoi: Successful\n"); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_lrr0(VCPU *vcpu, UINT64 val) -{ - if (!(val & (1L << 16))) { - printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n"); - return (IA64_ILLOP_FAULT); - } - // no place to save this state but nothing to do anyway - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_lrr1(VCPU *vcpu, UINT64 val) -{ - if (!(val & (1L << 16))) { - printf("vcpu_set_lrr0: Unmasked interrupts unsupported\n"); - return (IA64_ILLOP_FAULT); - } - // no place to save this state but nothing to do anyway - return (IA64_NO_FAULT); -} - -// parameter is a time interval specified in cycles -void vcpu_enable_timer(VCPU *vcpu,UINT64 cycles) -{ - PSCBX(vcpu,xen_timer_interval) = cycles; - vcpu_set_next_timer(vcpu); - printf("vcpu_enable_timer(%d): interval set to %d cycles\n", - PSCBX(vcpu,xen_timer_interval)); - __set_bit(PSCB(vcpu,itv), PSCB(vcpu,delivery_mask)); -} - -IA64FAULT vcpu_set_itv(VCPU *vcpu, UINT64 val) -{ -extern unsigned long privop_trace; -//privop_trace=1; - if (val & 0xef00) return (IA64_ILLOP_FAULT); - PSCB(vcpu,itv) = val; - if (val & 0x10000) { -printf("**** vcpu_set_itv(%d): vitm=%lx, setting to 0\n",val,PSCBX(vcpu,domain_itm)); - PSCBX(vcpu,domain_itm) = 0; - } - else vcpu_enable_timer(vcpu,1000000L); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_pmv(VCPU *vcpu, UINT64 val) -{ - if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT; - PSCB(vcpu,pmv) = val; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_cmcv(VCPU *vcpu, UINT64 val) -{ - if (val & 0xef00) /* reserved fields */ return IA64_RSVDREG_FAULT; - PSCB(vcpu,cmcv) = val; - return (IA64_NO_FAULT); -} - -/************************************************************************** - VCPU temporary register access routines -**************************************************************************/ -UINT64 vcpu_get_tmp(VCPU *vcpu, UINT64 index) -{ - if (index > 7) return 0; - return PSCB(vcpu,tmp[index]); -} - -void vcpu_set_tmp(VCPU *vcpu, UINT64 index, UINT64 val) -{ - if (index <= 7) PSCB(vcpu,tmp[index]) = val; -} - -/************************************************************************** -Interval timer routines -**************************************************************************/ - -BOOLEAN vcpu_timer_disabled(VCPU *vcpu) -{ - UINT64 itv = PSCB(vcpu,itv); - return(!itv || !!(itv & 0x10000)); -} - -BOOLEAN vcpu_timer_inservice(VCPU *vcpu) -{ - UINT64 itv = PSCB(vcpu,itv); - return (test_bit(itv, PSCBX(vcpu,insvc))); -} - -BOOLEAN vcpu_timer_expired(VCPU *vcpu) -{ - unsigned long domain_itm = PSCBX(vcpu,domain_itm); - unsigned long now = ia64_get_itc(); - - if (!domain_itm) return FALSE; - if (now < domain_itm) return FALSE; - if (vcpu_timer_disabled(vcpu)) return FALSE; - return TRUE; -} - -void vcpu_safe_set_itm(unsigned long val) -{ - unsigned long epsilon = 100; - UINT64 now = ia64_get_itc(); - - local_irq_disable(); - while (1) { -//printf("*** vcpu_safe_set_itm: Setting itm to %lx, itc=%lx\n",val,now); - ia64_set_itm(val); - if (val > (now = ia64_get_itc())) break; - val = now + epsilon; - epsilon <<= 1; - } - local_irq_enable(); -} - -void vcpu_set_next_timer(VCPU *vcpu) -{ - UINT64 d = PSCBX(vcpu,domain_itm); - //UINT64 s = PSCBX(vcpu,xen_itm); - UINT64 s = local_cpu_data->itm_next; - UINT64 now = ia64_get_itc(); - //UINT64 interval = PSCBX(vcpu,xen_timer_interval); - - /* gloss over the wraparound problem for now... we know it exists - * but it doesn't matter right now */ - -#if 0 - /* ensure at least next SP tick is in the future */ - if (!interval) PSCBX(vcpu,xen_itm) = now + -#if 0 - (running_on_sim() ? SIM_DEFAULT_CLOCK_RATE : - DEFAULT_CLOCK_RATE); -#else - 3000000; -//printf("vcpu_set_next_timer: HACK!\n"); -#endif -#if 0 - if (PSCBX(vcpu,xen_itm) < now) - while (PSCBX(vcpu,xen_itm) < now + (interval>>1)) - PSCBX(vcpu,xen_itm) += interval; -#endif -#endif - - if (is_idle_task(vcpu->domain)) { - printf("****** vcpu_set_next_timer called during idle!!\n"); - } - //s = PSCBX(vcpu,xen_itm); - if (d && (d > now) && (d < s)) { - vcpu_safe_set_itm(d); - //using_domain_as_itm++; - } - else { - vcpu_safe_set_itm(s); - //using_xen_as_itm++; - } -} - -IA64FAULT vcpu_set_itm(VCPU *vcpu, UINT64 val) -{ - UINT now = ia64_get_itc(); - - //if (val < now) val = now + 1000; -//printf("*** vcpu_set_itm: called with %lx\n",val); - PSCBX(vcpu,domain_itm) = val; - vcpu_set_next_timer(vcpu); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_itc(VCPU *vcpu, UINT64 val) -{ - - UINT64 oldnow = ia64_get_itc(); - UINT64 olditm = PSCBX(vcpu,domain_itm); - unsigned long d = olditm - oldnow; - unsigned long x = local_cpu_data->itm_next - oldnow; - - UINT64 newnow = val, min_delta; - -#define DISALLOW_SETTING_ITC_FOR_NOW -#ifdef DISALLOW_SETTING_ITC_FOR_NOW -printf("vcpu_set_itc: Setting ar.itc is currently disabled\n"); -#else - local_irq_disable(); - if (olditm) { -printf("**** vcpu_set_itc(%lx): vitm changed to %lx\n",val,newnow+d); - PSCBX(vcpu,domain_itm) = newnow + d; - } - local_cpu_data->itm_next = newnow + x; - d = PSCBX(vcpu,domain_itm); - x = local_cpu_data->itm_next; - - ia64_set_itc(newnow); - if (d && (d > newnow) && (d < x)) { - vcpu_safe_set_itm(d); - //using_domain_as_itm++; - } - else { - vcpu_safe_set_itm(x); - //using_xen_as_itm++; - } - local_irq_enable(); -#endif - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_itm(VCPU *vcpu, UINT64 *pval) -{ - //FIXME: Implement this - printf("vcpu_get_itm: Getting cr.itm is unsupported... continuing\n"); - return (IA64_NO_FAULT); - //return (IA64_ILLOP_FAULT); -} - -IA64FAULT vcpu_get_itc(VCPU *vcpu, UINT64 *pval) -{ - //TODO: Implement this - printf("vcpu_get_itc: Getting ar.itc is unsupported\n"); - return (IA64_ILLOP_FAULT); -} - -void vcpu_pend_timer(VCPU *vcpu) -{ - UINT64 itv = PSCB(vcpu,itv) & 0xff; - - if (vcpu_timer_disabled(vcpu)) return; - //if (vcpu_timer_inservice(vcpu)) return; - if (PSCBX(vcpu,domain_itm_last) == PSCBX(vcpu,domain_itm)) { - // already delivered an interrupt for this so - // don't deliver another - return; - } -#if 0 - // attempt to flag "timer tick before its due" source - { - UINT64 itm = PSCBX(vcpu,domain_itm); - UINT64 now = ia64_get_itc(); - if (now < itm) printf("******* vcpu_pend_timer: pending before due!\n"); - } -#endif - vcpu_pend_interrupt(vcpu, itv); -} - -// returns true if ready to deliver a timer interrupt too early -UINT64 vcpu_timer_pending_early(VCPU *vcpu) -{ - UINT64 now = ia64_get_itc(); - UINT64 itm = PSCBX(vcpu,domain_itm); - - if (vcpu_timer_disabled(vcpu)) return 0; - if (!itm) return 0; - return (vcpu_deliverable_timer(vcpu) && (now < itm)); -} - -//FIXME: This is a hack because everything dies if a timer tick is lost -void vcpu_poke_timer(VCPU *vcpu) -{ - UINT64 itv = PSCB(vcpu,itv) & 0xff; - UINT64 now = ia64_get_itc(); - UINT64 itm = PSCBX(vcpu,domain_itm); - UINT64 irr; - - if (vcpu_timer_disabled(vcpu)) return; - if (!itm) return; - if (itv != 0xefL) { - printf("vcpu_poke_timer: unimplemented itv=%lx!\n",itv); - while(1); - } - // using 0xef instead of itv so can get real irr - if (now > itm && !test_bit(0xefL, PSCBX(vcpu,insvc))) { - if (!test_bit(0xefL,PSCBX(vcpu,irr))) { - irr = ia64_getreg(_IA64_REG_CR_IRR3); - if (irr & (1L<<(0xef-0xc0))) return; -if (now-itm>0x800000) -printf("*** poking timer: now=%lx,vitm=%lx,xitm=%lx,itm=%lx\n",now,itm,local_cpu_data->itm_next,ia64_get_itm()); - vcpu_pend_timer(vcpu); - } - } -} - - -/************************************************************************** -Privileged operation emulation routines -**************************************************************************/ - -IA64FAULT vcpu_force_data_miss(VCPU *vcpu, UINT64 ifa) -{ - PSCB(vcpu,tmp[0]) = ifa; // save ifa in vcpu structure, then specify IA64_FORCED_IFA - return (vcpu_get_rr_ve(vcpu,ifa) ? IA64_DATA_TLB_VECTOR : IA64_ALT_DATA_TLB_VECTOR) | IA64_FORCED_IFA; -} - - -IA64FAULT vcpu_rfi(VCPU *vcpu) -{ - // TODO: Only allowed for current vcpu - PSR psr; - UINT64 int_enable, regspsr = 0; - UINT64 ifs; - REGS *regs = vcpu_regs(vcpu); - extern void dorfirfi(void); - - psr.i64 = PSCB(vcpu,ipsr); - if (psr.ia64_psr.cpl < 3) psr.ia64_psr.cpl = 2; - if (psr.ia64_psr.i) PSCB(vcpu,interrupt_delivery_enabled) = 1; - int_enable = psr.ia64_psr.i; - if (psr.ia64_psr.ic) PSCB(vcpu,interrupt_collection_enabled) = 1; - if (psr.ia64_psr.dt && psr.ia64_psr.rt && psr.ia64_psr.it) vcpu_set_metaphysical_mode(vcpu,FALSE); - else vcpu_set_metaphysical_mode(vcpu,TRUE); - psr.ia64_psr.ic = 1; psr.ia64_psr.i = 1; - psr.ia64_psr.dt = 1; psr.ia64_psr.rt = 1; psr.ia64_psr.it = 1; - psr.ia64_psr.bn = 1; - //psr.pk = 1; // checking pkeys shouldn't be a problem but seems broken - if (psr.ia64_psr.be) { - printf("*** DOMAIN TRYING TO TURN ON BIG-ENDIAN!!!\n"); - return (IA64_ILLOP_FAULT); - } - PSCB(vcpu,incomplete_regframe) = 0; // is this necessary? - ifs = PSCB(vcpu,ifs); - //if ((ifs & regs->cr_ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) { - //if ((ifs & 0x8000000000000000L) && ifs != regs->cr_ifs) { - if (ifs & regs->cr_ifs & 0x8000000000000000L) { - // TODO: validate PSCB(vcpu,iip) - // TODO: PSCB(vcpu,ipsr) = psr; - PSCB(vcpu,ipsr) = psr.i64; - // now set up the trampoline - regs->cr_iip = *(unsigned long *)dorfirfi; // function pointer!! - __asm__ __volatile ("mov %0=psr;;":"=r"(regspsr)::"memory"); - regs->cr_ipsr = regspsr & ~(IA64_PSR_I | IA64_PSR_IC | IA64_PSR_BN); - } - else { - regs->cr_ipsr = psr.i64; - regs->cr_iip = PSCB(vcpu,iip); - } - PSCB(vcpu,interrupt_collection_enabled) = 1; - vcpu_bsw1(vcpu); - PSCB(vcpu,interrupt_delivery_enabled) = int_enable; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_cover(VCPU *vcpu) -{ - // TODO: Only allowed for current vcpu - REGS *regs = vcpu_regs(vcpu); - - if (!PSCB(vcpu,interrupt_collection_enabled)) { - if (!PSCB(vcpu,incomplete_regframe)) - PSCB(vcpu,ifs) = regs->cr_ifs; - else PSCB(vcpu,incomplete_regframe) = 0; - } - regs->cr_ifs = 0; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval) -{ - UINT64 pta = PSCB(vcpu,pta); - UINT64 pta_sz = (pta & IA64_PTA_SZ(0x3f)) >> IA64_PTA_SZ_BIT; - UINT64 pta_base = pta & ~((1UL << IA64_PTA_BASE_BIT)-1); - UINT64 Mask = (1L << pta_sz) - 1; - UINT64 Mask_60_15 = (Mask >> 15) & 0x3fffffffffff; - UINT64 compMask_60_15 = ~Mask_60_15; - //UINT64 rr_ps = RR_TO_PS(get_rr(vadr)); - UINT64 rr_ps = vcpu_get_rr_ps(vcpu,vadr); - UINT64 VHPT_offset = (vadr >> rr_ps) << 3; - UINT64 VHPT_addr1 = vadr & 0xe000000000000000L; - UINT64 VHPT_addr2a = - ((pta_base >> 15) & 0x3fffffffffff) & compMask_60_15; - UINT64 VHPT_addr2b = - ((VHPT_offset >> 15) & 0x3fffffffffff) & Mask_60_15;; - UINT64 VHPT_addr3 = VHPT_offset & 0x7fff; - UINT64 VHPT_addr = VHPT_addr1 | ((VHPT_addr2a | VHPT_addr2b) << 15) | - VHPT_addr3; - -#if 0 - if (VHPT_addr1 == 0xe000000000000000L) { - printf("vcpu_thash: thash unsupported with rr7 @%lx\n", - PSCB(vcpu,iip)); - return (IA64_ILLOP_FAULT); - } -#endif -//verbose("vcpu_thash: vadr=%p, VHPT_addr=%p\n",vadr,VHPT_addr); - *pval = VHPT_addr; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *padr) -{ - printf("vcpu_ttag: ttag instruction unsupported\n"); - return (IA64_ILLOP_FAULT); -} - -#define itir_ps(itir) ((itir >> 2) & 0x3f) -#define itir_mask(itir) (~((1UL << itir_ps(itir)) - 1)) - -unsigned long vhpt_translate_count = 0; - -IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 *pteval, UINT64 *itir) -{ - unsigned long pta, pta_mask, iha, pte, ps; - TR_ENTRY *trp; - ia64_rr rr; - - if (!(address >> 61)) { - if (!PSCB(vcpu,metaphysical_mode)) { - REGS *regs = vcpu_regs(vcpu); - unsigned long viip = PSCB(vcpu,iip); - unsigned long vipsr = PSCB(vcpu,ipsr); - unsigned long iip = regs->cr_iip; - unsigned long ipsr = regs->cr_ipsr; - printk("vcpu_translate: bad address %p, viip=%p, vipsr=%p, iip=%p, ipsr=%p continuing\n", address, viip, vipsr, iip, ipsr); - } - - *pteval = (address & _PAGE_PPN_MASK) | __DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX; - *itir = PAGE_SHIFT << 2; - phys_translate_count++; - return IA64_NO_FAULT; - } - - /* check translation registers */ - if ((trp = match_tr(vcpu,address))) { - tr_translate_count++; - *pteval = trp->page_flags; - *itir = trp->itir; - return IA64_NO_FAULT; - } - - /* check 1-entry TLB */ - if ((trp = match_dtlb(vcpu,address))) { - dtlb_translate_count++; - *pteval = trp->page_flags; - *itir = trp->itir; - return IA64_NO_FAULT; - } - - /* check guest VHPT */ - pta = PSCB(vcpu,pta); - rr.rrval = PSCB(vcpu,rrs)[address>>61]; - if (rr.ve && (pta & IA64_PTA_VE)) - { - if (pta & IA64_PTA_VF) - { - /* long format VHPT - not implemented */ - return (is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR); - } - else - { - /* short format VHPT */ - - /* avoid recursively walking VHPT */ - pta_mask = (itir_mask(pta) << 3) >> 3; - if (((address ^ pta) & pta_mask) == 0) - return (is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR); - - vcpu_thash(vcpu, address, &iha); - if (__copy_from_user(&pte, (void *)iha, sizeof(pte)) != 0) - return IA64_VHPT_TRANS_VECTOR; - - /* - * Optimisation: this VHPT walker aborts on not-present pages - * instead of inserting a not-present translation, this allows - * vectoring directly to the miss handler. - \ */ - if (pte & _PAGE_P) - { - *pteval = pte; - *itir = vcpu_get_itir_on_fault(vcpu,address); - vhpt_translate_count++; - return IA64_NO_FAULT; - } - return (is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR); - } - } - return (is_data ? IA64_ALT_DATA_TLB_VECTOR : IA64_ALT_INST_TLB_VECTOR); -} - -IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr) -{ - UINT64 pteval, itir, mask; - IA64FAULT fault; - - fault = vcpu_translate(vcpu, vadr, 1, &pteval, &itir); - if (fault == IA64_NO_FAULT) - { - mask = itir_mask(itir); - *padr = (pteval & _PAGE_PPN_MASK & mask) | (vadr & ~mask); - return (IA64_NO_FAULT); - } - else - { - PSCB(vcpu,tmp[0]) = vadr; // save ifa in vcpu structure, then specify IA64_FORCED_IFA - return (fault | IA64_FORCED_IFA); - } -} - -IA64FAULT vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key) -{ - printf("vcpu_tak: tak instruction unsupported\n"); - return (IA64_ILLOP_FAULT); - // HACK ALERT: tak does a thash for now - //return vcpu_thash(vcpu,vadr,key); -} - -/************************************************************************** - VCPU debug breakpoint register access routines -**************************************************************************/ - -IA64FAULT vcpu_set_dbr(VCPU *vcpu, UINT64 reg, UINT64 val) -{ - // TODO: unimplemented DBRs return a reserved register fault - // TODO: Should set Logical CPU state, not just physical - ia64_set_dbr(reg,val); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_ibr(VCPU *vcpu, UINT64 reg, UINT64 val) -{ - // TODO: unimplemented IBRs return a reserved register fault - // TODO: Should set Logical CPU state, not just physical - ia64_set_ibr(reg,val); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_dbr(VCPU *vcpu, UINT64 reg, UINT64 *pval) -{ - // TODO: unimplemented DBRs return a reserved register fault - UINT64 val = ia64_get_dbr(reg); - *pval = val; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_ibr(VCPU *vcpu, UINT64 reg, UINT64 *pval) -{ - // TODO: unimplemented IBRs return a reserved register fault - UINT64 val = ia64_get_ibr(reg); - *pval = val; - return (IA64_NO_FAULT); -} - -/************************************************************************** - VCPU performance monitor register access routines -**************************************************************************/ - -IA64FAULT vcpu_set_pmc(VCPU *vcpu, UINT64 reg, UINT64 val) -{ - // TODO: Should set Logical CPU state, not just physical - // NOTE: Writes to unimplemented PMC registers are discarded - ia64_set_pmc(reg,val); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_set_pmd(VCPU *vcpu, UINT64 reg, UINT64 val) -{ - // TODO: Should set Logical CPU state, not just physical - // NOTE: Writes to unimplemented PMD registers are discarded - ia64_set_pmd(reg,val); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_pmc(VCPU *vcpu, UINT64 reg, UINT64 *pval) -{ - // NOTE: Reads from unimplemented PMC registers return zero - UINT64 val = (UINT64)ia64_get_pmc(reg); - *pval = val; - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_pmd(VCPU *vcpu, UINT64 reg, UINT64 *pval) -{ - // NOTE: Reads from unimplemented PMD registers return zero - UINT64 val = (UINT64)ia64_get_pmd(reg); - *pval = val; - return (IA64_NO_FAULT); -} - -/************************************************************************** - VCPU banked general register access routines -**************************************************************************/ - -IA64FAULT vcpu_bsw0(VCPU *vcpu) -{ - // TODO: Only allowed for current vcpu - REGS *regs = vcpu_regs(vcpu); - unsigned long *r = &regs->r16; - unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]); - unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]); - int i; - - if (PSCB(vcpu,banknum)) { - for (i = 0; i < 16; i++) { *b1++ = *r; *r++ = *b0++; } - PSCB(vcpu,banknum) = 0; - } - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_bsw1(VCPU *vcpu) -{ - // TODO: Only allowed for current vcpu - REGS *regs = vcpu_regs(vcpu); - unsigned long *r = &regs->r16; - unsigned long *b0 = &PSCB(vcpu,bank0_regs[0]); - unsigned long *b1 = &PSCB(vcpu,bank1_regs[0]); - int i; - - if (!PSCB(vcpu,banknum)) { - for (i = 0; i < 16; i++) { *b0++ = *r; *r++ = *b1++; } - PSCB(vcpu,banknum) = 1; - } - return (IA64_NO_FAULT); -} - -/************************************************************************** - VCPU cpuid access routines -**************************************************************************/ - - -IA64FAULT vcpu_get_cpuid(VCPU *vcpu, UINT64 reg, UINT64 *pval) -{ - // FIXME: This could get called as a result of a rsvd-reg fault - // if reg > 3 - switch(reg) { - case 0: - memcpy(pval,"Xen/ia64",8); - break; - case 1: - *pval = 0; - break; - case 2: - *pval = 0; - break; - case 3: - *pval = ia64_get_cpuid(3); - break; - case 4: - *pval = ia64_get_cpuid(4); - break; - default: - if (reg > (ia64_get_cpuid(3) & 0xff)) - return IA64_RSVDREG_FAULT; - *pval = ia64_get_cpuid(reg); - break; - } - return (IA64_NO_FAULT); -} - -/************************************************************************** - VCPU region register access routines -**************************************************************************/ - -unsigned long vcpu_get_rr_ve(VCPU *vcpu,UINT64 vadr) -{ - ia64_rr rr; - - rr.rrval = PSCB(vcpu,rrs)[vadr>>61]; - return(rr.ve); -} - -IA64FAULT vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val) -{ - PSCB(vcpu,rrs)[reg>>61] = val; - // warning: set_one_rr() does it "live" - set_one_rr(reg,val); - return (IA64_NO_FAULT); -} - -IA64FAULT vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval) -{ - UINT val = PSCB(vcpu,rrs)[reg>>61]; - *pval = val; - return (IA64_NO_FAULT); -} - -/************************************************************************** - VCPU protection key register access routines -**************************************************************************/ - -IA64FAULT vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval) -{ -#ifndef PKR_USE_FIXED - printk("vcpu_get_pkr: called, not implemented yet\n"); - return IA64_ILLOP_FAULT; -#else - UINT64 val = (UINT64)ia64_get_pkr(reg); - *pval = val; - return (IA64_NO_FAULT); -#endif -} - -IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val) -{ -#ifndef PKR_USE_FIXED - printk("vcpu_set_pkr: called, not implemented yet\n"); - return IA64_ILLOP_FAULT; -#else -// if (reg >= NPKRS) return (IA64_ILLOP_FAULT); - vcpu->pkrs[reg] = val; - ia64_set_pkr(reg,val); - return (IA64_NO_FAULT); -#endif -} - -/************************************************************************** - VCPU translation register access routines -**************************************************************************/ - -static void vcpu_purge_tr_entry(TR_ENTRY *trp) -{ - trp->p = 0; -} - -static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64 ifa) -{ - UINT64 ps; - - trp->itir = itir; - trp->rid = virtualize_rid(current, get_rr(ifa) & RR_RID_MASK); - trp->p = 1; - ps = trp->ps; - trp->page_flags = pte; - if (trp->pl < 2) trp->pl = 2; - trp->vadr = ifa & ~0xfff; - if (ps > 12) { // "ignore" relevant low-order bits - trp->ppn &= ~((1UL<<(ps-12))-1); - trp->vadr &= ~((1UL<<ps)-1); - } -} - -TR_ENTRY *vcpu_match_tr_entry(VCPU *vcpu, TR_ENTRY *trp, UINT64 ifa, int count) -{ - unsigned long rid = (get_rr(ifa) & RR_RID_MASK); - int i; - - for (i = 0; i < count; i++, trp++) { - if (!trp->p) continue; - if (physicalize_rid(vcpu,trp->rid) != rid) continue; - if (ifa < trp->vadr) continue; - if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue; - //if (trp->key && !match_pkr(vcpu,trp->key)) continue; - return trp; - } - return 0; -} - -TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa) -{ - TR_ENTRY *trp; - - trp = vcpu_match_tr_entry(vcpu,vcpu->arch.dtrs,ifa,NDTRS); - if (trp) return trp; - trp = vcpu_match_tr_entry(vcpu,vcpu->arch.itrs,ifa,NITRS); - if (trp) return trp; - return 0; -} - -IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte, - UINT64 itir, UINT64 ifa) -{ - TR_ENTRY *trp; - - if (slot >= NDTRS) return IA64_RSVDREG_FAULT; - trp = &PSCBX(vcpu,dtrs[slot]); -//printf("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa); - vcpu_set_tr_entry(trp,pte,itir,ifa); - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 pte, - UINT64 itir, UINT64 ifa) -{ - TR_ENTRY *trp; - - if (slot >= NITRS) return IA64_RSVDREG_FAULT; - trp = &PSCBX(vcpu,itrs[slot]); -//printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa); - vcpu_set_tr_entry(trp,pte,itir,ifa); - return IA64_NO_FAULT; -} - -/************************************************************************** - VCPU translation cache access routines -**************************************************************************/ - -void foobar(void) { /*vcpu_verbose = 1;*/ } - -extern struct domain *dom0; - -void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps) -{ - unsigned long psr; - unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT; - - // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK! - // FIXME, must be inlined or potential for nested fault here! - if ((vcpu->domain==dom0) && (logps < PAGE_SHIFT)) { - printf("vcpu_itc_no_srlz: domain0 use of smaller page size!\n"); - //FIXME: kill domain here - while(1); - } - psr = ia64_clear_ic(); - ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings - ia64_set_psr(psr); - // ia64_srlz_i(); // no srls req'd, will rfi later -#ifdef VHPT_GLOBAL - if (vcpu->domain==dom0 && ((vaddr >> 61) == 7)) { - // FIXME: this is dangerous... vhpt_flush_address ensures these - // addresses never get flushed. More work needed if this - // ever happens. -//printf("vhpt_insert(%p,%p,%p)\n",vaddr,pte,1L<<logps); - if (logps > PAGE_SHIFT) vhpt_multiple_insert(vaddr,pte,logps); - else vhpt_insert(vaddr,pte,logps<<2); - } - // even if domain pagesize is larger than PAGE_SIZE, just put - // PAGE_SIZE mapping in the vhpt for now, else purging is complicated - else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2); -#endif - if (IorD & 0x4) return; // don't place in 1-entry TLB - if (IorD & 0x1) { - vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr); - PSCBX(vcpu,itlb_pte) = mp_pte; - } - if (IorD & 0x2) { - vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr); - PSCBX(vcpu,dtlb_pte) = mp_pte; - } -} - -// NOTE: returns a physical pte, NOT a "metaphysical" pte, so do not check -// the physical address contained for correctness -TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa) -{ - TR_ENTRY *trp; - - if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1)) - return (&vcpu->arch.dtlb); - return 0UL; -} - -IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) -{ - unsigned long pteval, logps = (itir >> 2) & 0x3f; - unsigned long translate_domain_pte(UINT64,UINT64,UINT64); - - if (logps < PAGE_SHIFT) { - printf("vcpu_itc_d: domain trying to use smaller page size!\n"); - //FIXME: kill domain here - while(1); - } - //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize - pteval = translate_domain_pte(pte,ifa,itir); - if (!pteval) return IA64_ILLOP_FAULT; - vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps); - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) -{ - unsigned long pteval, logps = (itir >> 2) & 0x3f; - unsigned long translate_domain_pte(UINT64,UINT64,UINT64); - - // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK! - if (logps < PAGE_SHIFT) { - printf("vcpu_itc_i: domain trying to use smaller page size!\n"); - //FIXME: kill domain here - while(1); - } - //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize - pteval = translate_domain_pte(pte,ifa,itir); - // FIXME: what to do if bad physical address? (machine check?) - if (!pteval) return IA64_ILLOP_FAULT; - vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps); - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 addr_range) -{ - printk("vcpu_ptc_l: called, not implemented yet\n"); - return IA64_ILLOP_FAULT; -} - -// At privlvl=0, fc performs no access rights or protection key checks, while -// at privlvl!=0, fc performs access rights checks as if it were a 1-byte -// read but no protection key check. Thus in order to avoid an unexpected -// access rights fault, we have to translate the virtual address to a -// physical address (possibly via a metaphysical address) and do the fc -// on the physical address, which is guaranteed to flush the same cache line -IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vadr) -{ - // TODO: Only allowed for current vcpu - UINT64 mpaddr, paddr; - IA64FAULT fault; - unsigned long translate_domain_mpaddr(unsigned long); - IA64FAULT vcpu_tpa(VCPU *, UINT64, UINT64 *); - - fault = vcpu_tpa(vcpu, vadr, &mpaddr); - if (fault == IA64_NO_FAULT) { - paddr = translate_domain_mpaddr(mpaddr); - ia64_fc(__va(paddr)); - } - return fault; -} - -int ptce_count = 0; -IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr) -{ - // Note that this only needs to be called once, i.e. the - // architected loop to purge the entire TLB, should use - // base = stride1 = stride2 = 0, count0 = count 1 = 1 - -#ifdef VHPT_GLOBAL - vhpt_flush(); // FIXME: This is overdoing it -#endif - local_flush_tlb_all(); - // just invalidate the "whole" tlb - vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb)); - vcpu_purge_tr_entry(&PSCBX(vcpu,itlb)); - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 addr_range) -{ - printk("vcpu_ptc_g: called, not implemented yet\n"); - return IA64_ILLOP_FAULT; -} - -IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 addr_range) -{ - extern ia64_global_tlb_purge(UINT64 start, UINT64 end, UINT64 nbits); - // FIXME: validate not flushing Xen addresses - // if (Xen address) return(IA64_ILLOP_FAULT); - // FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE -//printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range); -#ifdef VHPT_GLOBAL - vhpt_flush_address(vadr,addr_range); -#endif - ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT); - vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb)); - vcpu_purge_tr_entry(&PSCBX(vcpu,itlb)); - return IA64_NO_FAULT; -} - -IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 addr_range) -{ - printf("vcpu_ptr_d: Purging TLB is unsupported\n"); - return (IA64_ILLOP_FAULT); -} - -IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 addr_range) -{ - printf("vcpu_ptr_i: Purging TLB is unsupported\n"); - return (IA64_ILLOP_FAULT); -} - -void vcpu_set_regs(VCPU *vcpu, REGS *regs) -{ - vcpu->arch.regs = regs; -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vhpt.c --- a/xen/arch/ia64/vhpt.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,151 +0,0 @@ -/* - * Initialize VHPT support. - * - * Copyright (C) 2004 Hewlett-Packard Co - * Dan Magenheimer <dan.magenheimer@xxxxxx> - */ -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/init.h> - -#include <asm/processor.h> -#include <asm/system.h> -#include <asm/pgalloc.h> -#include <asm/page.h> -#include <asm/dma.h> -#include <asm/vhpt.h> - -unsigned long vhpt_paddr, vhpt_pend, vhpt_pte; - -void vhpt_flush(void) -{ - struct vhpt_lf_entry *v = (void *)VHPT_ADDR; - int i, cnt = 0; -#if 0 -static int firsttime = 2; - -if (firsttime) firsttime--; -else { -printf("vhpt_flush: *********************************************\n"); -printf("vhpt_flush: *********************************************\n"); -printf("vhpt_flush: *********************************************\n"); -printf("vhpt_flush: flushing vhpt (seems to crash at rid wrap?)...\n"); -printf("vhpt_flush: *********************************************\n"); -printf("vhpt_flush: *********************************************\n"); -printf("vhpt_flush: *********************************************\n"); -} -#endif - for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) { - v->itir = 0; - v->CChain = 0; - v->page_flags = 0; - v->ti_tag = INVALID_TI_TAG; - } - // initialize cache too??? -} - -#ifdef VHPT_GLOBAL -void vhpt_flush_address(unsigned long vadr, unsigned long addr_range) -{ - unsigned long ps; - struct vhpt_lf_entry *vlfe; - - if ((vadr >> 61) == 7) { - // no vhpt for region 7 yet, see vcpu_itc_no_srlz - printf("vhpt_flush_address: region 7, spinning...\n"); - while(1); - } -#if 0 - // this only seems to occur at shutdown, but it does occur - if ((!addr_range) || addr_range & (addr_range - 1)) { - printf("vhpt_flush_address: weird range, spinning...\n"); - while(1); - } -//printf("************** vhpt_flush_address(%p,%p)\n",vadr,addr_range); -#endif - while ((long)addr_range > 0) { - vlfe = (struct vhpt_lf_entry *)ia64_thash(vadr); - // FIXME: for now, just blow it away even if it belongs to - // another domain. Later, use ttag to check for match -//if (!(vlfe->ti_tag & INVALID_TI_TAG)) { -//printf("vhpt_flush_address: blowing away valid tag for vadr=%p\n",vadr); -//} - vlfe->ti_tag |= INVALID_TI_TAG; - addr_range -= PAGE_SIZE; - vadr += PAGE_SIZE; - } -} -#endif - -void vhpt_map(void) -{ - unsigned long psr; - - psr = ia64_clear_ic(); - ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, vhpt_pte, VHPT_SIZE_LOG2); - ia64_set_psr(psr); - ia64_srlz_i(); -} - -void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, unsigned long logps) -{ - unsigned long mask = (1L << logps) - 1; - extern long running_on_sim; - int i; - - if (logps-PAGE_SHIFT > 10 && !running_on_sim) { - // if this happens, we may want to revisit this algorithm - printf("vhpt_multiple_insert:logps-PAGE_SHIFT>10,spinning..\n"); - while(1); - } - if (logps-PAGE_SHIFT > 2) { - // FIXME: Should add counter here to see how often this - // happens (e.g. for 16MB pages!) and determine if it - // is a performance problem. On a quick look, it takes - // about 39000 instrs for a 16MB page and it seems to occur - // only a few times/second, so OK for now. - // An alternate solution would be to just insert the one - // 16KB in the vhpt (but with the full mapping)? - //printf("vhpt_multiple_insert: logps-PAGE_SHIFT==%d," - //"va=%p, pa=%p, pa-masked=%p\n", - //logps-PAGE_SHIFT,vaddr,pte&_PFN_MASK, - //(pte&_PFN_MASK)&~mask); - } - vaddr &= ~mask; - pte = ((pte & _PFN_MASK) & ~mask) | (pte & ~_PFN_MASK); - for (i = 1L << (logps-PAGE_SHIFT); i > 0; i--) { - vhpt_insert(vaddr,pte,logps<<2); - vaddr += PAGE_SIZE; - } -} - -void vhpt_init(void) -{ - unsigned long vhpt_total_size, vhpt_alignment, vhpt_imva; -#if !VHPT_ENABLED - return; -#endif - // allocate a huge chunk of physical memory.... how??? - vhpt_total_size = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB - vhpt_alignment = 1 << VHPT_SIZE_LOG2; // 4MB, 16MB, 64MB, or 256MB - printf("vhpt_init: vhpt size=%p, align=%p\n",vhpt_total_size,vhpt_alignment); - /* This allocation only holds true if vhpt table is unique for - * all domains. Or else later new vhpt table should be allocated - * from domain heap when each domain is created. Assume xen buddy - * allocator can provide natural aligned page by order? - */ - vhpt_imva = alloc_xenheap_pages(VHPT_SIZE_LOG2 - PAGE_SHIFT); - if (!vhpt_imva) { - printf("vhpt_init: can't allocate VHPT!\n"); - while(1); - } - vhpt_paddr = __pa(vhpt_imva); - vhpt_pend = vhpt_paddr + vhpt_total_size - 1; - printf("vhpt_init: vhpt paddr=%p, end=%p\n",vhpt_paddr,vhpt_pend); - vhpt_pte = pte_val(pfn_pte(vhpt_paddr >> PAGE_SHIFT, PAGE_KERNEL)); - vhpt_map(); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); - vhpt_flush(); -} - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vlsapic.c --- a/xen/arch/ia64/vlsapic.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,620 +0,0 @@ - -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vlsapic.c: virtual lsapic model including ITC timer. - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) - */ - -#include <linux/sched.h> -#include <public/arch-ia64.h> -#include <asm/ia64_int.h> -#include <asm/vcpu.h> -#include <asm/regionreg.h> -#include <asm/tlb.h> -#include <asm/processor.h> -#include <asm/delay.h> -#include <asm/vmx_vcpu.h> -#include <asm/vmx_vcpu.h> -#include <asm/regs.h> -#include <asm/gcc_intrin.h> -#include <asm/vmx_mm_def.h> -#include <asm/vmx.h> -#include <asm/hw_irq.h> -#include <asm/vmx_pal_vsa.h> -#include <asm/kregs.h> - -#define SHARED_VLAPIC_INF -#ifdef V_IOSAPIC_READY -static inline vl_apic_info* get_psapic(VCPU *vcpu) -{ - shared_iopage_t *sp = get_sp(vcpu->domain); - return &(sp->vcpu_iodata[vcpu->vcpu_id].apic_intr); -} -#endif -//u64 fire_itc; -//u64 fire_itc2; -//u64 fire_itm; -//u64 fire_itm2; -/* - * Update the checked last_itc. - */ -static void update_last_itc(vtime_t *vtm, uint64_t cur_itc) -{ - vtm->last_itc = cur_itc; -} - -/* - * ITC value saw in guest (host+offset+drift). - */ -static uint64_t now_itc(vtime_t *vtm) -{ - uint64_t guest_itc=vtm->vtm_offset+ia64_get_itc(); - - if ( vtm->vtm_local_drift ) { -// guest_itc -= vtm->vtm_local_drift; - } - if ( (long)(guest_itc - vtm->last_itc) > 0 ) { - return guest_itc; - - } - else { - /* guest ITC backwarded due after LP switch */ - return vtm->last_itc; - } -} - -/* - * Interval time components reset. - */ -static void vtm_reset(VCPU *vcpu) -{ - uint64_t cur_itc; - vtime_t *vtm; - - vtm=&(vcpu->arch.arch_vmx.vtm); - vtm->vtm_offset = 0; - vtm->vtm_local_drift = 0; - VPD_CR(vcpu, itm) = 0; - VPD_CR(vcpu, itv) = 0x10000; - cur_itc = ia64_get_itc(); - vtm->last_itc = vtm->vtm_offset + cur_itc; -} - -/* callback function when vtm_timer expires */ -static void vtm_timer_fn(void *data) -{ - vtime_t *vtm; - VCPU *vcpu = data; - u64 cur_itc,vitm; - - UINT64 vec; - - vec = VPD_CR(vcpu, itv) & 0xff; - vmx_vcpu_pend_interrupt(vcpu, vec); - - vtm=&(vcpu->arch.arch_vmx.vtm); - cur_itc = now_itc(vtm); - vitm =VPD_CR(vcpu, itm); - //fire_itc2 = cur_itc; - //fire_itm2 = vitm; - update_last_itc(vtm,cur_itc); // pseudo read to update vITC -} - -void vtm_init(VCPU *vcpu) -{ - vtime_t *vtm; - uint64_t itc_freq; - - vtm=&(vcpu->arch.arch_vmx.vtm); - - itc_freq = local_cpu_data->itc_freq; - vtm->cfg_max_jump=itc_freq*MAX_JUMP_STEP/1000; - vtm->cfg_min_grun=itc_freq*MIN_GUEST_RUNNING_TIME/1000; - init_ac_timer(&vtm->vtm_timer, vtm_timer_fn, vcpu, 0); - vtm_reset(vcpu); -} - -/* - * Action when guest read ITC. - */ -uint64_t vtm_get_itc(VCPU *vcpu) -{ - uint64_t guest_itc, spsr; - vtime_t *vtm; - - vtm=&(vcpu->arch.arch_vmx.vtm); - // FIXME: should use local_irq_disable & local_irq_enable ?? - local_irq_save(spsr); - guest_itc = now_itc(vtm); -// update_last_itc(vtm, guest_itc); - - local_irq_restore(spsr); - return guest_itc; -} - -void vtm_set_itc(VCPU *vcpu, uint64_t new_itc) -{ - uint64_t spsr; - vtime_t *vtm; - - vtm=&(vcpu->arch.arch_vmx.vtm); - local_irq_save(spsr); - vtm->vtm_offset = new_itc - ia64_get_itc(); - vtm->last_itc = new_itc; - vtm_interruption_update(vcpu, vtm); - local_irq_restore(spsr); -} - -void vtm_set_itv(VCPU *vcpu) -{ - uint64_t spsr,itv; - vtime_t *vtm; - - vtm=&(vcpu->arch.arch_vmx.vtm); - local_irq_save(spsr); - itv = VPD_CR(vcpu, itv); - if ( ITV_IRQ_MASK(itv) ) - rem_ac_timer(&vtm->vtm_timer); - vtm_interruption_update(vcpu, vtm); - local_irq_restore(spsr); -} - - -/* - * Update interrupt or hook the vtm ac_timer for fire - * At this point vtm_timer should be removed if itv is masked. - */ -/* Interrupt must be disabled at this point */ - -extern u64 tick_to_ns(u64 tick); -#define TIMER_SLOP (50*1000) /* ns */ /* copy from ac_timer.c */ -void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm) -{ - uint64_t cur_itc,vitm,vitv; - uint64_t expires; - long diff_now, diff_last; - uint64_t spsr; - - vitv = VPD_CR(vcpu, itv); - if ( ITV_IRQ_MASK(vitv) ) { - return; - } - - vitm =VPD_CR(vcpu, itm); - local_irq_save(spsr); - cur_itc =now_itc(vtm); - diff_last = vtm->last_itc - vitm; - diff_now = cur_itc - vitm; - update_last_itc (vtm,cur_itc); - - if ( diff_last >= 0 ) { - // interrupt already fired. - rem_ac_timer(&vtm->vtm_timer); - } - else if ( diff_now >= 0 ) { - // ITV is fired. - vmx_vcpu_pend_interrupt(vcpu, vitv&0xff); - } - /* Both last_itc & cur_itc < itm, wait for fire condition */ - else { - expires = NOW() + tick_to_ns(0-diff_now) + TIMER_SLOP; - set_ac_timer(&vtm->vtm_timer, expires); - } - local_irq_restore(spsr); -} - -/* - * Action for vtm when the domain is scheduled out. - * Remove the ac_timer for vtm. - */ -void vtm_domain_out(VCPU *vcpu) -{ - if(!is_idle_task(vcpu->domain)) - rem_ac_timer(&vcpu->arch.arch_vmx.vtm.vtm_timer); -} - -/* - * Action for vtm when the domain is scheduled in. - * Fire vtm IRQ or add the ac_timer for vtm. - */ -void vtm_domain_in(VCPU *vcpu) -{ - vtime_t *vtm; - - if(!is_idle_task(vcpu->domain)) { - vtm=&(vcpu->arch.arch_vmx.vtm); - vtm_interruption_update(vcpu, vtm); - } -} - -/* - * Next for vLSapic - */ - -#define NMI_VECTOR 2 -#define ExtINT_VECTOR 0 -#define NULL_VECTOR -1 -#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.arch_vmx.in_service[i]) -static void update_vhpi(VCPU *vcpu, int vec) -{ - u64 vhpi; - if ( vec == NULL_VECTOR ) { - vhpi = 0; - } - else if ( vec == NMI_VECTOR ) { // NMI - vhpi = 32; - } else if (vec == ExtINT_VECTOR) { //ExtINT - vhpi = 16; - } - else { - vhpi = vec / 16; - } - - VMX_VPD(vcpu,vhpi) = vhpi; - // TODO: Add support for XENO - if ( VMX_VPD(vcpu,vac).a_int ) { - ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT, - (uint64_t) &(vcpu->arch.arch_vmx.vpd), 0, 0,0,0,0,0); - } -} - -#ifdef V_IOSAPIC_READY -void vlapic_update_shared_info(VCPU *vcpu) -{ - //int i; - - vl_apic_info *ps; - - if (vcpu->domain == dom0) - return; - - ps = get_psapic(vcpu); - ps->vl_lapic_id = ((VPD_CR(vcpu, lid) >> 16) & 0xffff) << 16; - printf("vl_lapic_id = %x\n", ps->vl_lapic_id); - ps->vl_apr = 0; - // skip ps->vl_logical_dest && ps->vl_dest_format - // IPF support physical destination mode only - ps->vl_arb_id = 0; - /* - for ( i=0; i<4; i++ ) { - ps->tmr[i] = 0; // edge trigger - } - */ -} - -void vlapic_update_ext_irq(VCPU *vcpu) -{ - int vec; - - vl_apic_info *ps = get_psapic(vcpu); - while ( (vec = highest_bits(ps->irr)) != NULL_VECTOR ) { - clear_bit (vec, ps->irr); - vmx_vcpu_pend_interrupt(vcpu, vec); - } -} -#endif - -void vlsapic_reset(VCPU *vcpu) -{ - int i; -#ifdef V_IOSAPIC_READY - vl_apic_info *psapic; // shared lapic inf. -#endif - - VPD_CR(vcpu, lid) = ia64_getreg(_IA64_REG_CR_LID); - VPD_CR(vcpu, ivr) = 0; - VPD_CR(vcpu,tpr) = 0x10000; - VPD_CR(vcpu, eoi) = 0; - VPD_CR(vcpu, irr[0]) = 0; - VPD_CR(vcpu, irr[1]) = 0; - VPD_CR(vcpu, irr[2]) = 0; - VPD_CR(vcpu, irr[3]) = 0; - VPD_CR(vcpu, pmv) = 0x10000; - VPD_CR(vcpu, cmcv) = 0x10000; - VPD_CR(vcpu, lrr0) = 0x10000; // default reset value? - VPD_CR(vcpu, lrr1) = 0x10000; // default reset value? - update_vhpi(vcpu, NULL_VECTOR); - for ( i=0; i<4; i++) { - VLSAPIC_INSVC(vcpu,i) = 0; - } -#ifdef V_IOSAPIC_READY - vlapic_update_shared_info(vcpu); - //vlapic_update_shared_irr(vcpu); -#endif - DPRINTK("VLSAPIC inservice base=%lp\n", &VLSAPIC_INSVC(vcpu,0) ); -} - -/* - * Find highest signaled bits in 4 words (long). - * - * return 0-255: highest bits. - * -1 : Not found. - */ -static __inline__ int highest_bits(uint64_t *dat) -{ - uint64_t bits, bitnum; - int i; - - /* loop for all 256 bits */ - for ( i=3; i >= 0 ; i -- ) { - bits = dat[i]; - if ( bits ) { - bitnum = ia64_fls(bits); - return i*64+bitnum; - } - } - return NULL_VECTOR; -} - -/* - * Return 0-255 for pending irq. - * NULL_VECTOR: when no pending. - */ -static int highest_pending_irq(VCPU *vcpu) -{ - if ( VPD_CR(vcpu, irr[0]) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR; - if ( VPD_CR(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR; - return highest_bits(&VPD_CR(vcpu, irr[0])); -} - -static int highest_inservice_irq(VCPU *vcpu) -{ - if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR; - if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR; - return highest_bits(&(VLSAPIC_INSVC(vcpu, 0))); -} - -/* - * The pending irq is higher than the inservice one. - * - */ -static int is_higher_irq(int pending, int inservice) -{ - return ( (pending >> 4) > (inservice>>4) || - ((pending != NULL_VECTOR) && (inservice == NULL_VECTOR)) ); -} - -static int is_higher_class(int pending, int mic) -{ - return ( (pending >> 4) > mic ); -} - -static int is_invalid_irq(int vec) -{ - return (vec == 1 || ((vec <= 14 && vec >= 3))); -} - -#define IRQ_NO_MASKED 0 -#define IRQ_MASKED_BY_VTPR 1 -#define IRQ_MASKED_BY_INSVC 2 // masked by inservice IRQ - -/* See Table 5-8 in SDM vol2 for the definition */ -static int -_xirq_masked(VCPU *vcpu, int h_pending, int h_inservice) -{ - tpr_t vtpr; - uint64_t mmi; - - vtpr.val = VPD_CR(vcpu, tpr); - - if ( h_inservice == NMI_VECTOR ) { - return IRQ_MASKED_BY_INSVC; - } - if ( h_pending == NMI_VECTOR ) { - // Non Maskable Interrupt - return IRQ_NO_MASKED; - } - if ( h_inservice == ExtINT_VECTOR ) { - return IRQ_MASKED_BY_INSVC; - } - mmi = vtpr.mmi; - if ( h_pending == ExtINT_VECTOR ) { - if ( mmi ) { - // mask all external IRQ - return IRQ_MASKED_BY_VTPR; - } - else { - return IRQ_NO_MASKED; - } - } - - if ( is_higher_irq(h_pending, h_inservice) ) { - if ( !mmi && is_higher_class(h_pending, vtpr.mic) ) { - return IRQ_NO_MASKED; - } - else { - return IRQ_MASKED_BY_VTPR; - } - } - else { - return IRQ_MASKED_BY_INSVC; - } -} - -static int irq_masked(VCPU *vcpu, int h_pending, int h_inservice) -{ - int mask; - - mask = _xirq_masked(vcpu, h_pending, h_inservice); - return mask; -} - - -/* - * May come from virtualization fault or - * nested host interrupt. - */ -void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector) -{ - uint64_t spsr; - - if (vector & ~0xff) { - DPRINTK("vmx_vcpu_pend_interrupt: bad vector\n"); - return; - } - local_irq_save(spsr); - VPD_CR(vcpu,irr[vector>>6]) |= 1UL<<(vector&63); - //vlapic_update_shared_irr(vcpu); - local_irq_restore(spsr); - vcpu->arch.irq_new_pending = 1; -} - -/* - * Add batch of pending interrupt. - * The interrupt source is contained in pend_irr[0-3] with - * each bits stand for one interrupt. - */ -void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu, UINT64 *pend_irr) -{ - uint64_t spsr; - int i; - - local_irq_save(spsr); - for (i=0 ; i<4; i++ ) { - VPD_CR(vcpu,irr[i]) |= pend_irr[i]; - } - //vlapic_update_shared_irr(vcpu); - local_irq_restore(spsr); - vcpu->arch.irq_new_pending = 1; -} - -/* - * If the new pending interrupt is enabled and not masked, we directly inject - * it into the guest. Otherwise, we set the VHPI if vac.a_int=1 so that when - * the interrupt becomes unmasked, it gets injected. - * RETURN: - * TRUE: Interrupt is injected. - * FALSE: Not injected but may be in VHPI when vac.a_int=1 - * - * Optimization: We defer setting the VHPI until the EOI time, if a higher - * priority interrupt is in-service. The idea is to reduce the - * number of unnecessary calls to inject_vhpi. - */ -int vmx_check_pending_irq(VCPU *vcpu) -{ - uint64_t spsr, mask; - int h_pending, h_inservice; - int injected=0; - uint64_t isr; - IA64_PSR vpsr; - - local_irq_save(spsr); - h_pending = highest_pending_irq(vcpu); - if ( h_pending == NULL_VECTOR ) goto chk_irq_exit; - h_inservice = highest_inservice_irq(vcpu); - - vpsr.val = vmx_vcpu_get_psr(vcpu); - mask = irq_masked(vcpu, h_pending, h_inservice); - if ( vpsr.i && IRQ_NO_MASKED == mask ) { - isr = vpsr.val & IA64_PSR_RI; - if ( !vpsr.ic ) - panic("Interrupt when IC=0\n"); - vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ - injected = 1; - } - else if ( mask == IRQ_MASKED_BY_INSVC ) { - // cann't inject VHPI -// DPRINTK("IRQ masked by higher inservice\n"); - } - else { - // masked by vpsr.i or vtpr. - update_vhpi(vcpu,h_pending); - } - -chk_irq_exit: - local_irq_restore(spsr); - return injected; -} - -/* - * Only coming from virtualization fault. - */ -void guest_write_eoi(VCPU *vcpu) -{ - int vec; - uint64_t spsr; - - vec = highest_inservice_irq(vcpu); - if ( vec == NULL_VECTOR ) panic("Wrong vector to EOI\n"); - local_irq_save(spsr); - VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63)); - local_irq_restore(spsr); - VPD_CR(vcpu, eoi)=0; // overwrite the data - vmx_check_pending_irq(vcpu); -} - -uint64_t guest_read_vivr(VCPU *vcpu) -{ - int vec, next, h_inservice; - uint64_t spsr; - - local_irq_save(spsr); - vec = highest_pending_irq(vcpu); - h_inservice = highest_inservice_irq(vcpu); - if ( vec == NULL_VECTOR || - irq_masked(vcpu, vec, h_inservice) != IRQ_NO_MASKED ) { - local_irq_restore(spsr); - return IA64_SPURIOUS_INT_VECTOR; - } - - VLSAPIC_INSVC(vcpu,vec>>6) |= (1UL <<(vec&63)); - VPD_CR(vcpu, irr[vec>>6]) &= ~(1UL <<(vec&63)); - update_vhpi(vcpu, NULL_VECTOR); // clear VHPI till EOI or IRR write - //vlapic_update_shared_irr(vcpu); - local_irq_restore(spsr); - return (uint64_t)vec; -} - -static void generate_exirq(VCPU *vcpu) -{ - IA64_PSR vpsr; - uint64_t isr; - - vpsr.val = vmx_vcpu_get_psr(vcpu); - update_vhpi(vcpu, NULL_VECTOR); - isr = vpsr.val & IA64_PSR_RI; - if ( !vpsr.ic ) - panic("Interrupt when IC=0\n"); - vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ -} - -vhpi_detection(VCPU *vcpu) -{ - uint64_t threshold,vhpi; - tpr_t vtpr; - IA64_PSR vpsr; - - vpsr.val = vmx_vcpu_get_psr(vcpu); - vtpr.val = VPD_CR(vcpu, tpr); - - threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic; - vhpi = VMX_VPD(vcpu,vhpi); - if ( vhpi > threshold ) { - // interrupt actived - generate_exirq (vcpu); - } -} - -vmx_vexirq(VCPU *vcpu) -{ - static uint64_t vexirq_count=0; - - vexirq_count ++; - printk("Virtual ex-irq %ld\n", vexirq_count); - generate_exirq (vcpu); -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmmu.c --- a/xen/arch/ia64/vmmu.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,846 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmmu.c: virtual memory management unit components. - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) - * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <asm/tlb.h> -#include <asm/gcc_intrin.h> -#include <asm/vcpu.h> -#include <linux/interrupt.h> -#include <asm/vmx_vcpu.h> -#include <asm/vmx_mm_def.h> -#include <asm/vmx.h> -#include <asm/hw_irq.h> -#include <asm/vmx_pal_vsa.h> -#include <asm/kregs.h> - -/* - * Architecture ppn is in 4KB unit while XEN - * page may be different(1<<PAGE_SHIFT). - */ -static inline u64 arch_ppn_to_xen_ppn(u64 appn) -{ - return (appn << ARCH_PAGE_SHIFT) >> PAGE_SHIFT; -} - -static inline u64 xen_ppn_to_arch_ppn(u64 xppn) -{ - return (xppn << PAGE_SHIFT) >> ARCH_PAGE_SHIFT; -} - - -/* - * Get the machine page frame number in 16KB unit - * Input: - * d: - */ -u64 get_mfn(domid_t domid, u64 gpfn, u64 pages) -{ - struct domain *d; - u64 i, xen_gppn, xen_mppn, mpfn; - - if ( domid == DOMID_SELF ) { - d = current->domain; - } - else { - d = find_domain_by_id(domid); - } - xen_gppn = arch_ppn_to_xen_ppn(gpfn); - xen_mppn = __gpfn_to_mfn(d, xen_gppn); -/* - for (i=0; i<pages; i++) { - if ( __gpfn_to_mfn(d, gpfn+i) == INVALID_MFN ) { - return INVALID_MFN; - } - } -*/ - mpfn= xen_ppn_to_arch_ppn(xen_mppn); - mpfn = mpfn | (((1UL <<(PAGE_SHIFT-12))-1)&gpfn); - return mpfn; - -} - -/* - * The VRN bits of va stand for which rr to get. - */ -ia64_rr vmmu_get_rr(VCPU *vcpu, u64 va) -{ - ia64_rr vrr; - vmx_vcpu_get_rr(vcpu, va, &vrr.rrval); - return vrr; -} - - -void recycle_message(thash_cb_t *hcb, u64 para) -{ - printk("hcb=%p recycled with %lx\n",hcb,para); -} - - -/* - * Purge all guest TCs in logical processor. - * Instead of purging all LP TCs, we should only purge - * TCs that belong to this guest. - */ -void -purge_machine_tc_by_domid(domid_t domid) -{ -#ifndef PURGE_GUEST_TC_ONLY - // purge all TCs - struct ia64_pal_retval result; - u64 addr; - u32 count1,count2; - u32 stride1,stride2; - u32 i,j; - u64 psr; - - - result = ia64_pal_call_static(PAL_PTCE_INFO,0,0,0, 0); - if ( result.status != 0 ) { - panic ("PAL_PTCE_INFO failed\n"); - } - addr = result.v0; - count1 = HIGH_32BITS(result.v1); - count2 = LOW_32BITS (result.v1); - stride1 = HIGH_32BITS(result.v2); - stride2 = LOW_32BITS (result.v2); - - local_irq_save(psr); - for (i=0; i<count1; i++) { - for (j=0; j<count2; j++) { - ia64_ptce(addr); - addr += stride2; - } - addr += stride1; - } - local_irq_restore(psr); -#else - // purge all TCs belong to this guest. -#endif -} - -static thash_cb_t *init_domain_vhpt(struct vcpu *d) -{ - struct pfn_info *page; - void *vbase,*vcur; - vhpt_special *vs; - thash_cb_t *vhpt; - PTA pta_value; - - page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0); - if ( page == NULL ) { - panic("No enough contiguous memory for init_domain_mm\n"); - } - vbase = page_to_virt(page); - printk("Allocate domain vhpt at 0x%lx\n", (u64)vbase); - memset(vbase, 0, VCPU_TLB_SIZE); - vcur = (void*)((u64)vbase + VCPU_TLB_SIZE); - vhpt = --((thash_cb_t*)vcur); - vhpt->ht = THASH_VHPT; - vhpt->vcpu = d; - vhpt->hash_func = machine_thash; - vs = --((vhpt_special *)vcur); - - /* Setup guest pta */ - pta_value.val = 0; - pta_value.ve = 1; - pta_value.vf = 1; - pta_value.size = VCPU_TLB_SHIFT - 1; /* 2M */ - pta_value.base = ((u64)vbase) >> PTA_BASE_SHIFT; - d->arch.arch_vmx.mpta = pta_value.val; - - vhpt->vs = vs; - vhpt->vs->get_mfn = get_mfn; - vhpt->vs->tag_func = machine_ttag; - vhpt->hash = vbase; - vhpt->hash_sz = VCPU_TLB_SIZE/2; - vhpt->cch_buf = (u64)vbase + vhpt->hash_sz; - vhpt->cch_sz = (u64)vcur - (u64)vhpt->cch_buf; - vhpt->recycle_notifier = recycle_message; - thash_init(vhpt,VCPU_TLB_SHIFT-1); - return vhpt; -} - - -thash_cb_t *init_domain_tlb(struct vcpu *d) -{ - struct pfn_info *page; - void *vbase,*vcur; - tlb_special_t *ts; - thash_cb_t *tlb; - - page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0); - if ( page == NULL ) { - panic("No enough contiguous memory for init_domain_mm\n"); - } - vbase = page_to_virt(page); - printk("Allocate domain tlb at 0x%lx\n", (u64)vbase); - memset(vbase, 0, VCPU_TLB_SIZE); - vcur = (void*)((u64)vbase + VCPU_TLB_SIZE); - tlb = --((thash_cb_t*)vcur); - tlb->ht = THASH_TLB; - tlb->vcpu = d; - ts = --((tlb_special_t *)vcur); - tlb->ts = ts; - tlb->ts->vhpt = init_domain_vhpt(d); - tlb->hash_func = machine_thash; - tlb->hash = vbase; - tlb->hash_sz = VCPU_TLB_SIZE/2; - tlb->cch_buf = (u64)vbase + tlb->hash_sz; - tlb->cch_sz = (u64)vcur - (u64)tlb->cch_buf; - tlb->recycle_notifier = recycle_message; - thash_init(tlb,VCPU_TLB_SHIFT-1); - return tlb; -} - -/* Allocate physical to machine mapping table for domN - * FIXME: Later this interface may be removed, if that table is provided - * by control panel. Dom0 has gpfn identical to mfn, which doesn't need - * this interface at all. - */ -void -alloc_pmt(struct domain *d) -{ - struct pfn_info *page; - - /* Only called once */ - ASSERT(d->arch.pmt); - - page = alloc_domheap_pages(NULL, get_order(d->max_pages), 0); - ASSERT(page); - - d->arch.pmt = page_to_virt(page); - memset(d->arch.pmt, 0x55, d->max_pages * 8); -} - -/* - * Insert guest TLB to machine TLB. - * data: In TLB format - */ -void machine_tlb_insert(struct vcpu *d, thash_data_t *tlb) -{ - u64 saved_itir, saved_ifa, saved_rr; - u64 pages; - thash_data_t mtlb; - ia64_rr vrr; - unsigned int cl = tlb->cl; - - mtlb.ifa = tlb->vadr; - mtlb.itir = tlb->itir & ~ITIR_RV_MASK; - vrr = vmmu_get_rr(d,mtlb.ifa); - //vmx_vcpu_get_rr(d, mtlb.ifa, &vrr.value); - pages = PSIZE(vrr.ps) >> PAGE_SHIFT; - mtlb.page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK; - mtlb.ppn = get_mfn(DOMID_SELF,tlb->ppn, pages); - if (mtlb.ppn == INVALID_MFN) - panic("Machine tlb insert with invalid mfn number.\n"); - - __asm __volatile("rsm psr.ic|psr.i;; srlz.i" ); - - saved_itir = ia64_getreg(_IA64_REG_CR_ITIR); - saved_ifa = ia64_getreg(_IA64_REG_CR_IFA); - saved_rr = ia64_get_rr(mtlb.ifa); - - ia64_setreg(_IA64_REG_CR_ITIR, mtlb.itir); - ia64_setreg(_IA64_REG_CR_IFA, mtlb.ifa); - /* Only access memory stack which is mapped by TR, - * after rr is switched. - */ - ia64_set_rr(mtlb.ifa, vmx_vrrtomrr(d, vrr.rrval)); - ia64_srlz_d(); - if ( cl == ISIDE_TLB ) { - ia64_itci(mtlb.page_flags); - ia64_srlz_i(); - } - else { - ia64_itcd(mtlb.page_flags); - ia64_srlz_d(); - } - ia64_set_rr(mtlb.ifa,saved_rr); - ia64_srlz_d(); - ia64_setreg(_IA64_REG_CR_IFA, saved_ifa); - ia64_setreg(_IA64_REG_CR_ITIR, saved_itir); - __asm __volatile("ssm psr.ic|psr.i;; srlz.i" ); -} - -u64 machine_thash(PTA pta, u64 va, u64 rid, u64 ps) -{ - u64 saved_pta, saved_rr0; - u64 hash_addr, tag; - unsigned long psr; - struct vcpu *v = current; - ia64_rr vrr; - - - saved_pta = ia64_getreg(_IA64_REG_CR_PTA); - saved_rr0 = ia64_get_rr(0); - vrr.rrval = saved_rr0; - vrr.rid = rid; - vrr.ps = ps; - - va = (va << 3) >> 3; // set VRN to 0. - // TODO: Set to enforce lazy mode - local_irq_save(psr); - ia64_setreg(_IA64_REG_CR_PTA, pta.val); - ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval)); - ia64_srlz_d(); - - hash_addr = ia64_thash(va); - ia64_setreg(_IA64_REG_CR_PTA, saved_pta); - - ia64_set_rr(0, saved_rr0); - ia64_srlz_d(); - local_irq_restore(psr); - return hash_addr; -} - -u64 machine_ttag(PTA pta, u64 va, u64 rid, u64 ps) -{ - u64 saved_pta, saved_rr0; - u64 hash_addr, tag; - u64 psr; - struct vcpu *v = current; - ia64_rr vrr; - - // TODO: Set to enforce lazy mode - saved_pta = ia64_getreg(_IA64_REG_CR_PTA); - saved_rr0 = ia64_get_rr(0); - vrr.rrval = saved_rr0; - vrr.rid = rid; - vrr.ps = ps; - - va = (va << 3) >> 3; // set VRN to 0. - local_irq_save(psr); - ia64_setreg(_IA64_REG_CR_PTA, pta.val); - ia64_set_rr(0, vmx_vrrtomrr(v, vrr.rrval)); - ia64_srlz_d(); - - tag = ia64_ttag(va); - ia64_setreg(_IA64_REG_CR_PTA, saved_pta); - - ia64_set_rr(0, saved_rr0); - ia64_srlz_d(); - local_irq_restore(psr); - return tag; -} - -/* - * Purge machine tlb. - * INPUT - * rr: guest rr. - * va: only bits 0:60 is valid - * size: bits format (1<<size) for the address range to purge. - * - */ -void machine_tlb_purge(u64 rid, u64 va, u64 ps) -{ - u64 saved_rr0; - u64 psr; - ia64_rr vrr; - - va = (va << 3) >> 3; // set VRN to 0. - saved_rr0 = ia64_get_rr(0); - vrr.rrval = saved_rr0; - vrr.rid = rid; - vrr.ps = ps; - local_irq_save(psr); - ia64_set_rr( 0, vmx_vrrtomrr(current,vrr.rrval) ); - ia64_srlz_d(); - ia64_ptcl(va, ps << 2); - ia64_set_rr( 0, saved_rr0 ); - ia64_srlz_d(); - local_irq_restore(psr); -} - - -int vhpt_enabled(VCPU *vcpu, uint64_t vadr, vhpt_ref_t ref) -{ - ia64_rr vrr; - PTA vpta; - IA64_PSR vpsr; - - vpsr.val = vmx_vcpu_get_psr(vcpu); - vrr = vmx_vcpu_rr(vcpu, vadr); - vmx_vcpu_get_pta(vcpu,&vpta.val); - - if ( vrr.ve & vpta.ve ) { - switch ( ref ) { - case DATA_REF: - case NA_REF: - return vpsr.dt; - case INST_REF: - return vpsr.dt && vpsr.it && vpsr.ic; - case RSE_REF: - return vpsr.dt && vpsr.rt; - - } - } - return 0; -} - - -int unimplemented_gva(VCPU *vcpu,u64 vadr) -{ - int bit=vcpu->domain->arch.imp_va_msb; - u64 ladr =(vadr<<3)>>(3+bit); - if(!ladr||ladr==(1U<<(61-bit))-1){ - return 0; - }else{ - return 1; - } -} - - -/* - * Prefetch guest bundle code. - * INPUT: - * code: buffer pointer to hold the read data. - * num: number of dword (8byts) to read. - */ -int -fetch_code(VCPU *vcpu, u64 gip, u64 *code) -{ - u64 gpip; // guest physical IP - u64 mpa; - thash_data_t *tlb; - ia64_rr vrr; - u64 mfn; - - if ( !(VMX_VPD(vcpu, vpsr) & IA64_PSR_IT) ) { // I-side physical mode - gpip = gip; - } - else { - vmx_vcpu_get_rr(vcpu, gip, &vrr.rrval); - tlb = vtlb_lookup_ex (vmx_vcpu_get_vtlb(vcpu), - vrr.rid, gip, ISIDE_TLB ); - if ( tlb == NULL ) panic("No entry found in ITLB\n"); - gpip = (tlb->ppn << 12) | ( gip & (PSIZE(tlb->ps)-1) ); - } - mfn = __gpfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT); - if ( mfn == INVALID_MFN ) return 0; - - mpa = (gpip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT); - *code = *(u64*)__va(mpa); - return 1; -} - -IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) -{ - - thash_data_t data, *ovl; - thash_cb_t *hcb; - search_section_t sections; - ia64_rr vrr; - - hcb = vmx_vcpu_get_vtlb(vcpu); - data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; - data.itir=itir; - data.vadr=PAGEALIGN(ifa,data.ps); - data.tc = 1; - data.cl=ISIDE_TLB; - vmx_vcpu_get_rr(vcpu, ifa, &vrr); - data.rid = vrr.rid; - - sections.tr = 1; - sections.tc = 0; - - ovl = thash_find_overlap(hcb, &data, sections); - while (ovl) { - // generate MCA. - panic("Tlb conflict!!"); - return; - } - thash_purge_and_insert(hcb, &data); - return IA64_NO_FAULT; -} - - - - -IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) -{ - - thash_data_t data, *ovl; - thash_cb_t *hcb; - search_section_t sections; - ia64_rr vrr; - - hcb = vmx_vcpu_get_vtlb(vcpu); - data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; - data.itir=itir; - data.vadr=PAGEALIGN(ifa,data.ps); - data.tc = 1; - data.cl=DSIDE_TLB; - vmx_vcpu_get_rr(vcpu, ifa, &vrr); - data.rid = vrr.rid; - sections.tr = 1; - sections.tc = 0; - - ovl = thash_find_overlap(hcb, &data, sections); - if (ovl) { - // generate MCA. - panic("Tlb conflict!!"); - return; - } - thash_purge_and_insert(hcb, &data); - return IA64_NO_FAULT; -} - -/* - * Return TRUE/FALSE for success of lock operation - */ -int vmx_lock_guest_dtc (VCPU *vcpu, UINT64 va, int lock) -{ - - thash_cb_t *hcb; - ia64_rr vrr; - u64 preferred_size; - - vmx_vcpu_get_rr(vcpu, va, &vrr); - hcb = vmx_vcpu_get_vtlb(vcpu); - va = PAGEALIGN(va,vrr.ps); - preferred_size = PSIZE(vrr.ps); - return thash_lock_tc(hcb, va, preferred_size, vrr.rid, DSIDE_TLB, lock); -} - -IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx) -{ - - thash_data_t data, *ovl; - thash_cb_t *hcb; - search_section_t sections; - ia64_rr vrr; - - hcb = vmx_vcpu_get_vtlb(vcpu); - data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; - data.itir=itir; - data.vadr=PAGEALIGN(ifa,data.ps); - data.tc = 0; - data.cl=ISIDE_TLB; - vmx_vcpu_get_rr(vcpu, ifa, &vrr); - data.rid = vrr.rid; - sections.tr = 1; - sections.tc = 0; - - ovl = thash_find_overlap(hcb, &data, sections); - if (ovl) { - // generate MCA. - panic("Tlb conflict!!"); - return; - } - sections.tr = 0; - sections.tc = 1; - thash_purge_entries(hcb, &data, sections); - thash_tr_insert(hcb, &data, ifa, idx); - return IA64_NO_FAULT; -} - -IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx) -{ - - thash_data_t data, *ovl; - thash_cb_t *hcb; - search_section_t sections; - ia64_rr vrr; - - - hcb = vmx_vcpu_get_vtlb(vcpu); - data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; - data.itir=itir; - data.vadr=PAGEALIGN(ifa,data.ps); - data.tc = 0; - data.cl=DSIDE_TLB; - vmx_vcpu_get_rr(vcpu, ifa, &vrr); - data.rid = vrr.rid; - sections.tr = 1; - sections.tc = 0; - - ovl = thash_find_overlap(hcb, &data, sections); - while (ovl) { - // generate MCA. - panic("Tlb conflict!!"); - return; - } - sections.tr = 0; - sections.tc = 1; - thash_purge_entries(hcb, &data, sections); - thash_tr_insert(hcb, &data, ifa, idx); - return IA64_NO_FAULT; -} - - - -IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps) -{ - thash_cb_t *hcb; - ia64_rr rr; - search_section_t sections; - - hcb = vmx_vcpu_get_vtlb(vcpu); - rr=vmx_vcpu_rr(vcpu,vadr); - sections.tr = 1; - sections.tc = 1; - thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,DSIDE_TLB); - return IA64_NO_FAULT; -} - -IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps) -{ - thash_cb_t *hcb; - ia64_rr rr; - search_section_t sections; - hcb = vmx_vcpu_get_vtlb(vcpu); - rr=vmx_vcpu_rr(vcpu,vadr); - sections.tr = 1; - sections.tc = 1; - thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,ISIDE_TLB); - return IA64_NO_FAULT; -} - -IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps) -{ - thash_cb_t *hcb; - ia64_rr vrr; - search_section_t sections; - thash_data_t data, *ovl; - hcb = vmx_vcpu_get_vtlb(vcpu); - vrr=vmx_vcpu_rr(vcpu,vadr); - sections.tr = 0; - sections.tc = 1; - vadr = PAGEALIGN(vadr, ps); - - thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,DSIDE_TLB); - thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,ISIDE_TLB); - return IA64_NO_FAULT; -} - - -IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 vadr) -{ - thash_cb_t *hcb; - hcb = vmx_vcpu_get_vtlb(vcpu); - thash_purge_all(hcb); - return IA64_NO_FAULT; -} - -IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 ps) -{ - vmx_vcpu_ptc_l(vcpu, vadr, ps); - return IA64_ILLOP_FAULT; -} - -IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 ps) -{ - vmx_vcpu_ptc_l(vcpu, vadr, ps); - return IA64_NO_FAULT; -} - - -IA64FAULT vmx_vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval) -{ - PTA vpta; - ia64_rr vrr; - u64 vhpt_offset,tmp; - vmx_vcpu_get_pta(vcpu, &vpta.val); - vrr=vmx_vcpu_rr(vcpu, vadr); - if(vpta.vf){ - panic("THASH,Don't support long format VHPT"); - *pval = ia64_call_vsa(PAL_VPS_THASH,vadr,vrr.rrval,vpta.val,0,0,0,0); - }else{ - vhpt_offset=((vadr>>vrr.ps)<<3)&((1UL<<(vpta.size))-1); - *pval = (vadr&VRN_MASK)| - (vpta.val<<3>>(vpta.size+3)<<(vpta.size))| - vhpt_offset; - } - return IA64_NO_FAULT; -} - - -IA64FAULT vmx_vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *pval) -{ - ia64_rr vrr; - PTA vpta; - vmx_vcpu_get_pta(vcpu, &vpta.val); - vrr=vmx_vcpu_rr(vcpu, vadr); - if(vpta.vf){ - panic("THASH,Don't support long format VHPT"); - *pval = ia64_call_vsa(PAL_VPS_TTAG,vadr,vrr.rrval,0,0,0,0,0); - }else{ - *pval = 1; - } - return IA64_NO_FAULT; -} - - - -IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr) -{ - thash_data_t *data; - thash_cb_t *hcb; - ia64_rr vrr; - ISR visr,pt_isr; - REGS *regs; - u64 vhpt_adr; - IA64_PSR vpsr; - hcb = vmx_vcpu_get_vtlb(vcpu); - vrr=vmx_vcpu_rr(vcpu,vadr); - regs=vcpu_regs(vcpu); - pt_isr.val=regs->cr_isr; - visr.val=0; - visr.ei=pt_isr.ei; - visr.ir=pt_isr.ir; - vpsr.val = vmx_vcpu_get_psr(vcpu); - if(vpsr.ic==0){ - visr.ni=1; - } - visr.na=1; - data = vtlb_lookup_ex(hcb, vrr.rid, vadr, DSIDE_TLB); - if(data){ - if(data->p==0){ - visr.na=1; - vmx_vcpu_set_isr(vcpu,visr.val); - page_not_present(vcpu, vadr); - return IA64_FAULT; - }else if(data->ma == VA_MATTR_NATPAGE){ - visr.na = 1; - vmx_vcpu_set_isr(vcpu, visr.val); - dnat_page_consumption(vcpu, vadr); - return IA64_FAULT; - }else{ - *padr = (data->ppn<<12) | (vadr&(PSIZE(data->ps)-1)); - return IA64_NO_FAULT; - } - }else{ - if(!vhpt_enabled(vcpu, vadr, NA_REF)){ - if(vpsr.ic){ - vmx_vcpu_set_isr(vcpu, visr.val); - alt_dtlb(vcpu, vadr); - return IA64_FAULT; - } - else{ - nested_dtlb(vcpu); - return IA64_FAULT; - } - } - else{ - vmx_vcpu_thash(vcpu, vadr, &vhpt_adr); - vrr=vmx_vcpu_rr(vcpu,vhpt_adr); - data = vtlb_lookup_ex(hcb, vrr.rid, vhpt_adr, DSIDE_TLB); - if(data){ - if(vpsr.ic){ - vmx_vcpu_set_isr(vcpu, visr.val); - dtlb_fault(vcpu, vadr); - return IA64_FAULT; - } - else{ - nested_dtlb(vcpu); - return IA64_FAULT; - } - } - else{ - if(vpsr.ic){ - vmx_vcpu_set_isr(vcpu, visr.val); - dvhpt_fault(vcpu, vadr); - return IA64_FAULT; - } - else{ - nested_dtlb(vcpu); - return IA64_FAULT; - } - } - } - } -} - -IA64FAULT vmx_vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key) -{ - thash_data_t *data; - thash_cb_t *hcb; - ia64_rr rr; - PTA vpta; - vmx_vcpu_get_pta(vcpu, &vpta.val); - if(vpta.vf==0 || unimplemented_gva(vcpu, vadr)){ - *key=1; - return IA64_NO_FAULT; - } - hcb = vmx_vcpu_get_vtlb(vcpu); - rr=vmx_vcpu_rr(vcpu,vadr); - data = vtlb_lookup_ex(hcb, rr.rid, vadr, DSIDE_TLB); - if(!data||!data->p){ - *key=1; - }else{ - *key=data->key; - } - return IA64_NO_FAULT; -} - -/* - * [FIXME] Is there any effective way to move this routine - * into vmx_uaccess.h? struct exec_domain is incomplete type - * in that way... - * - * This is the interface to lookup virtual TLB, and then - * return corresponding machine address in 2nd parameter. - * The 3rd parameter contains how many bytes mapped by - * matched vTLB entry, thus to allow caller copy more once. - * - * If failed to lookup, -EFAULT is returned. Or else reutrn - * 0. All upper domain access utilities rely on this routine - * to determine the real machine address. - * - * Yes, put_user and get_user seems to somhow slow upon it. - * However it's the necessary steps for any vmx domain virtual - * address, since that's difference address space as HV's one. - * Later some short-circuit may be created for special case - */ -long -__domain_va_to_ma(unsigned long va, unsigned long* ma, unsigned long *len) -{ - unsigned long mpfn, gpfn, m, n = *len; - thash_cb_t *vtlb; - unsigned long end; /* end of the area mapped by current entry */ - thash_data_t *entry; - struct vcpu *v = current; - ia64_rr vrr; - - vtlb = vmx_vcpu_get_vtlb(v); - vrr = vmx_vcpu_rr(v, va); - entry = vtlb_lookup_ex(vtlb, vrr.rid, va, DSIDE_TLB); - if (entry == NULL) - return -EFAULT; - - gpfn =(entry->ppn>>(PAGE_SHIFT-12)); - gpfn =PAGEALIGN(gpfn,(entry->ps-PAGE_SHIFT)); - gpfn = gpfn | POFFSET(va>>PAGE_SHIFT,(entry->ps-PAGE_SHIFT)); - - mpfn = __gpfn_to_mfn(v->domain, gpfn); - m = (mpfn<<PAGE_SHIFT) | (va & (PAGE_SIZE - 1)); - /* machine address may be not continuous */ - end = PAGEALIGN(m, PAGE_SHIFT) + PAGE_SIZE; - /*end = PAGEALIGN(m, entry->ps) + PSIZE(entry->ps);*/ - /* Current entry can't map all requested area */ - if ((m + n) > end) - n = end - m; - - *ma = m; - *len = n; - return 0; -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_entry.S --- a/xen/arch/ia64/vmx_entry.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,611 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_entry.S: - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx) - * Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx) - */ - -#ifndef VCPU_TLB_SHIFT -#define VCPU_TLB_SHIFT 22 -#endif -#include <linux/config.h> -#include <asm/asmmacro.h> -#include <asm/cache.h> -#include <asm/kregs.h> -#include <asm/offsets.h> -#include <asm/pgtable.h> -#include <asm/percpu.h> -#include <asm/processor.h> -#include <asm/thread_info.h> -#include <asm/unistd.h> - -#include "vmx_minstate.h" - -/* - * prev_task <- vmx_ia64_switch_to(struct task_struct *next) - * With Ingo's new scheduler, interrupts are disabled when this routine gets - * called. The code starting at .map relies on this. The rest of the code - * doesn't care about the interrupt masking status. - * - * Since we allocate domain stack in xenheap, there's no need to map new - * domain's stack since all xenheap is mapped by TR. Another different task - * for vmx_ia64_switch_to is to switch to bank0 and change current pointer. - */ -GLOBAL_ENTRY(vmx_ia64_switch_to) - .prologue - alloc r16=ar.pfs,1,0,0,0 - DO_SAVE_SWITCH_STACK - .body - - bsw.0 // Switch to bank0, because bank0 r21 is current pointer - ;; - adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 - movl r25=init_task - adds r26=IA64_TASK_THREAD_KSP_OFFSET,in0 - ;; - st8 [r22]=sp // save kernel stack pointer of old task - ;; - /* - * TR always mapped this task's page, we can skip doing it again. - */ - ld8 sp=[r26] // load kernel stack pointer of new task - mov r21=in0 // update "current" application register - mov r8=r13 // return pointer to previously running task - mov r13=in0 // set "current" pointer - ;; - bsw.1 - ;; - DO_LOAD_SWITCH_STACK - -#ifdef CONFIG_SMP - sync.i // ensure "fc"s done by this CPU are visible on other CPUs -#endif - br.ret.sptk.many rp // boogie on out in new context -END(vmx_ia64_switch_to) - -GLOBAL_ENTRY(ia64_leave_nested) - rsm psr.i - ;; - adds r21=PT(PR)+16,r12 - ;; - - lfetch [r21],PT(CR_IPSR)-PT(PR) - adds r2=PT(B6)+16,r12 - adds r3=PT(R16)+16,r12 - ;; - lfetch [r21] - ld8 r28=[r2],8 // load b6 - adds r29=PT(R24)+16,r12 - - ld8.fill r16=[r3] - adds r3=PT(AR_CSD)-PT(R16),r3 - adds r30=PT(AR_CCV)+16,r12 - ;; - ld8.fill r24=[r29] - ld8 r15=[r30] // load ar.ccv - ;; - ld8 r29=[r2],16 // load b7 - ld8 r30=[r3],16 // load ar.csd - ;; - ld8 r31=[r2],16 // load ar.ssd - ld8.fill r8=[r3],16 - ;; - ld8.fill r9=[r2],16 - ld8.fill r10=[r3],PT(R17)-PT(R10) - ;; - ld8.fill r11=[r2],PT(R18)-PT(R11) - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - mov ar.csd=r30 - mov ar.ssd=r31 - ;; - rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - ld8.fill r22=[r2],24 - ld8.fill r23=[r3],24 - mov b6=r28 - ;; - ld8.fill r25=[r2],16 - ld8.fill r26=[r3],16 - mov b7=r29 - ;; - ld8.fill r27=[r2],16 - ld8.fill r28=[r3],16 - ;; - ld8.fill r29=[r2],16 - ld8.fill r30=[r3],24 - ;; - ld8.fill r31=[r2],PT(F9)-PT(R31) - adds r3=PT(F10)-PT(F6),r3 - ;; - ldf.fill f9=[r2],PT(F6)-PT(F9) - ldf.fill f10=[r3],PT(F8)-PT(F10) - ;; - ldf.fill f6=[r2],PT(F7)-PT(F6) - ;; - ldf.fill f7=[r2],PT(F11)-PT(F7) - ldf.fill f8=[r3],32 - ;; - srlz.i // ensure interruption collection is off - mov ar.ccv=r15 - ;; - bsw.0 // switch back to bank 0 (no stop bit required beforehand...) - ;; - ldf.fill f11=[r2] -// mov r18=r13 -// mov r21=r13 - adds r16=PT(CR_IPSR)+16,r12 - adds r17=PT(CR_IIP)+16,r12 - ;; - ld8 r29=[r16],16 // load cr.ipsr - ld8 r28=[r17],16 // load cr.iip - ;; - ld8 r30=[r16],16 // load cr.ifs - ld8 r25=[r17],16 // load ar.unat - ;; - ld8 r26=[r16],16 // load ar.pfs - ld8 r27=[r17],16 // load ar.rsc - cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs - ;; - ld8 r24=[r16],16 // load ar.rnat (may be garbage) - ld8 r23=[r17],16// load ar.bspstore (may be garbage) - ;; - ld8 r31=[r16],16 // load predicates - ld8 r22=[r17],16 // load b0 - ;; - ld8 r19=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 // load r1 - ;; - ld8.fill r12=[r16],16 - ld8.fill r13=[r17],16 - ;; - ld8 r20=[r16],16 // ar.fpsr - ld8.fill r15=[r17],16 - ;; - ld8.fill r14=[r16],16 - ld8.fill r2=[r17] - ;; - ld8.fill r3=[r16] - ;; - mov r16=ar.bsp // get existing backing store pointer - ;; - mov b0=r22 - mov ar.pfs=r26 - mov cr.ifs=r30 - mov cr.ipsr=r29 - mov ar.fpsr=r20 - mov cr.iip=r28 - ;; - mov ar.rsc=r27 - mov ar.unat=r25 - mov pr=r31,-1 - rfi -END(ia64_leave_nested) - - - -GLOBAL_ENTRY(ia64_leave_hypervisor) - PT_REGS_UNWIND_INFO(0) - /* - * work.need_resched etc. mustn't get changed by this CPU before it returns to - ;; - * user- or fsys-mode, hence we disable interrupts early on: - */ - rsm psr.i - ;; - alloc loc0=ar.pfs,0,1,1,0 - adds out0=16,r12 - ;; - br.call.sptk.many b0=leave_hypervisor_tail - mov ar.pfs=loc0 - adds r8=IA64_VPD_BASE_OFFSET,r13 - ;; - ld8 r8=[r8] - ;; - adds r9=VPD(VPSR),r8 - ;; - ld8 r9=[r9] - ;; - tbit.z pBN0,pBN1=r9,IA64_PSR_BN_BIT - ;; -(pBN0) add r7=VPD(VBNAT),r8; -(pBN1) add r7=VPD(VNAT),r8; - ;; - ld8 r7=[r7] - ;; - mov ar.unat=r7 -(pBN0) add r4=VPD(VBGR),r8; -(pBN1) add r4=VPD(VGR),r8; -(pBN0) add r5=VPD(VBGR)+0x8,r8; -(pBN1) add r5=VPD(VGR)+0x8,r8; - ;; - ld8.fill r16=[r4],16 - ld8.fill r17=[r5],16 - ;; - ld8.fill r18=[r4],16 - ld8.fill r19=[r5],16 - ;; - ld8.fill r20=[r4],16 - ld8.fill r21=[r5],16 - ;; - ld8.fill r22=[r4],16 - ld8.fill r23=[r5],16 - ;; - ld8.fill r24=[r4],16 - ld8.fill r25=[r5],16 - ;; - ld8.fill r26=[r4],16 - ld8.fill r27=[r5],16 - ;; - ld8.fill r28=[r4],16 - ld8.fill r29=[r5],16 - ;; - ld8.fill r30=[r4],16 - ld8.fill r31=[r5],16 - ;; - bsw.0 - ;; - mov r18=r8 //vpd - mov r19=r9 //vpsr - adds r20=PT(PR)+16,r12 - ;; - lfetch [r20],PT(CR_IPSR)-PT(PR) - adds r16=PT(B6)+16,r12 - adds r17=PT(B7)+16,r12 - ;; - lfetch [r20] - mov r21=r13 // get current - ;; - ld8 r30=[r16],16 // load b6 - ld8 r31=[r17],16 // load b7 - add r20=PT(EML_UNAT)+16,r12 - ;; - ld8 r29=[r20] //load ar_unat - mov b6=r30 - mov b7=r31 - ld8 r30=[r16],16 //load ar_csd - ld8 r31=[r17],16 //load ar_ssd - ;; - mov ar.unat=r29 - mov ar.csd=r30 - mov ar.ssd=r31 - ;; - ld8.fill r8=[r16],16 //load r8 - ld8.fill r9=[r17],16 //load r9 - ;; - ld8.fill r10=[r16],PT(R1)-PT(R10) //load r10 - ld8.fill r11=[r17],PT(R12)-PT(R11) //load r11 - ;; - ld8.fill r1=[r16],16 //load r1 - ld8.fill r12=[r17],16 //load r12 - ;; - ld8.fill r13=[r16],16 //load r13 - ld8 r30=[r17],16 //load ar_fpsr - ;; - ld8.fill r15=[r16],16 //load r15 - ld8.fill r14=[r17],16 //load r14 - mov ar.fpsr=r30 - ;; - ld8.fill r2=[r16],16 //load r2 - ld8.fill r3=[r17],16 //load r3 - ;; -/* -(pEml) ld8.fill r4=[r16],16 //load r4 -(pEml) ld8.fill r5=[r17],16 //load r5 - ;; -(pEml) ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6 -(pEml) ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7 - ;; -(pNonEml) adds r16=PT(AR_CCV)-PT(R4),r16 -(pNonEml) adds r17=PT(F7)-PT(R5),r17 - ;; -*/ - ld8.fill r4=[r16],16 //load r4 - ld8.fill r5=[r17],16 //load r5 - ;; - ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6 - ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7 - ;; - - ld8 r30=[r16],PT(F6)-PT(AR_CCV) - rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection - ;; - srlz.i // ensure interruption collection is off - ;; - invala // invalidate ALAT - ;; - ldf.fill f6=[r16],32 - ldf.fill f7=[r17],32 - ;; - ldf.fill f8=[r16],32 - ldf.fill f9=[r17],32 - ;; - ldf.fill f10=[r16] - ldf.fill f11=[r17] - ;; - mov ar.ccv=r30 - adds r16=PT(CR_IPSR)-PT(F10),r16 - adds r17=PT(CR_IIP)-PT(F11),r17 - ;; - ld8 r31=[r16],16 // load cr.ipsr - ld8 r30=[r17],16 // load cr.iip - ;; - ld8 r29=[r16],16 // load cr.ifs - ld8 r28=[r17],16 // load ar.unat - ;; - ld8 r27=[r16],16 // load ar.pfs - ld8 r26=[r17],16 // load ar.rsc - ;; - ld8 r25=[r16],16 // load ar.rnat (may be garbage) - ld8 r24=[r17],16// load ar.bspstore (may be garbage) - ;; - ld8 r23=[r16],16 // load predicates - ld8 r22=[r17],PT(RFI_PFS)-PT(B0) // load b0 - ;; - ld8 r20=[r16],16 // load ar.rsc value for "loadrs" - ;; -//rbs_switch - // loadrs has already been shifted - alloc r16=ar.pfs,0,0,0,0 // drop current register frame - ;; - mov ar.rsc=r20 - ;; - loadrs - ;; - mov ar.bspstore=r24 - ;; - ld8 r24=[r17] //load rfi_pfs - mov ar.unat=r28 - mov ar.rnat=r25 - mov ar.rsc=r26 - ;; - mov cr.ipsr=r31 - mov cr.iip=r30 - mov cr.ifs=r29 - cmp.ne p6,p0=r24,r0 -(p6)br.sptk vmx_dorfirfi - ;; -vmx_dorfirfi_back: - mov ar.pfs=r27 - -//vsa_sync_write_start - movl r20=__vsa_base - ;; - ld8 r20=[r20] // read entry point - mov r25=r18 - ;; - add r16=PAL_VPS_SYNC_WRITE,r20 - movl r24=switch_rr7 // calculate return address - ;; - mov b0=r16 - br.cond.sptk b0 // call the service - ;; -// switch rr7 and rr5 -switch_rr7: - adds r24=SWITCH_MRR5_OFFSET, r21 - adds r26=SWITCH_MRR6_OFFSET, r21 - adds r16=SWITCH_MRR7_OFFSET ,r21 - movl r25=(5<<61) - movl r27=(6<<61) - movl r17=(7<<61) - ;; - ld8 r24=[r24] - ld8 r26=[r26] - ld8 r16=[r16] - ;; - mov rr[r25]=r24 - mov rr[r27]=r26 - mov rr[r17]=r16 - ;; - srlz.i - ;; - add r24=SWITCH_MPTA_OFFSET, r21 - ;; - ld8 r24=[r24] - ;; - mov cr.pta=r24 - ;; - srlz.i - ;; -// fall through -GLOBAL_ENTRY(ia64_vmm_entry) -/* - * must be at bank 0 - * parameter: - * r18:vpd - * r19:vpsr - * r20:__vsa_base - * r22:b0 - * r23:predicate - */ - mov r24=r22 - mov r25=r18 - tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic - ;; - (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 - (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 - ;; - mov pr=r23,-2 - mov b0=r29 - ;; - br.cond.sptk b0 // call pal service -END(ia64_leave_hypervisor) - -//r24 rfi_pfs -//r17 address of rfi_pfs -GLOBAL_ENTRY(vmx_dorfirfi) - mov r16=ar.ec - movl r20 = vmx_dorfirfi_back - ;; -// clean rfi_pfs - st8 [r17]=r0 - mov b0=r20 -// pfs.pec=ar.ec - dep r24 = r16, r24, 52, 6 - ;; - mov ar.pfs=r24 - ;; - br.ret.sptk b0 - ;; -END(vmx_dorfirfi) - - -#define VMX_PURGE_RR7 0 -#define VMX_INSERT_RR7 1 -/* - * in0: old rr7 - * in1: virtual address of xen image - * in2: virtual address of vhpt table - */ -GLOBAL_ENTRY(vmx_purge_double_mapping) - alloc loc1 = ar.pfs,5,9,0,0 - mov loc0 = rp - movl r8 = 1f - ;; - movl loc4 = KERNEL_TR_PAGE_SHIFT - movl loc5 = VCPU_TLB_SHIFT - mov loc6 = psr - movl loc7 = XEN_RR7_SWITCH_STUB - mov loc8 = (1<<VMX_PURGE_RR7) - ;; - srlz.i - ;; - rsm psr.i | psr.ic - ;; - srlz.i - ;; - mov ar.rsc = 0 - mov b6 = loc7 - mov rp = r8 - ;; - br.sptk b6 -1: - mov ar.rsc = 3 - mov rp = loc0 - ;; - mov psr.l = loc6 - ;; - srlz.i - ;; - br.ret.sptk rp -END(vmx_purge_double_mapping) - -/* - * in0: new rr7 - * in1: virtual address of xen image - * in2: virtual address of vhpt table - * in3: pte entry of xen image - * in4: pte entry of vhpt table - */ -GLOBAL_ENTRY(vmx_insert_double_mapping) - alloc loc1 = ar.pfs,5,9,0,0 - mov loc0 = rp - movl loc2 = IA64_TR_XEN_IN_DOM // TR number for xen image - ;; - movl loc3 = IA64_TR_VHPT_IN_DOM // TR number for vhpt table - movl r8 = 1f - movl loc4 = KERNEL_TR_PAGE_SHIFT - ;; - movl loc5 = VCPU_TLB_SHIFT - mov loc6 = psr - movl loc7 = XEN_RR7_SWITCH_STUB - ;; - srlz.i - ;; - rsm psr.i | psr.ic - mov loc8 = (1<<VMX_INSERT_RR7) - ;; - srlz.i - ;; - mov ar.rsc = 0 - mov b6 = loc7 - mov rp = r8 - ;; - br.sptk b6 -1: - mov ar.rsc = 3 - mov rp = loc0 - ;; - mov psr.l = loc6 - ;; - srlz.i - ;; - br.ret.sptk rp -END(vmx_insert_double_mapping) - - .align PAGE_SIZE -/* - * Stub to add double mapping for new domain, which shouldn't - * access any memory when active. Before reaching this point, - * both psr.i/ic is cleared and rse is set in lazy mode. - * - * in0: new rr7 - * in1: virtual address of xen image - * in2: virtual address of vhpt table - * in3: pte entry of xen image - * in4: pte entry of vhpt table - * loc2: TR number for xen image - * loc3: TR number for vhpt table - * loc4: page size for xen image - * loc5: page size of vhpt table - * loc7: free to use - * loc8: purge or insert - * r8: will contain old rid value - */ -GLOBAL_ENTRY(vmx_switch_rr7) - movl loc7 = (7<<61) - dep.z loc4 = loc4, 2, 6 - dep.z loc5 = loc5, 2, 6 - ;; - tbit.nz p6,p7=loc8, VMX_INSERT_RR7 - mov r8 = rr[loc7] - ;; - mov rr[loc7] = in0 -(p6)mov cr.ifa = in1 -(p6)mov cr.itir = loc4 - ;; - srlz.i - ;; -(p6)itr.i itr[loc2] = in3 -(p7)ptr.i in1, loc4 - ;; -(p6)itr.d dtr[loc2] = in3 -(p7)ptr.d in1, loc4 - ;; - srlz.i - ;; -(p6)mov cr.ifa = in2 -(p6)mov cr.itir = loc5 - ;; -(p6)itr.d dtr[loc3] = in4 -(p7)ptr.d in2, loc5 - ;; - srlz.i - ;; - mov rr[loc7] = r8 - ;; - srlz.i - br.sptk rp -END(vmx_switch_rr7) - .align PAGE_SIZE diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_hypercall.c --- a/xen/arch/ia64/vmx_hypercall.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,235 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_hyparcall.c: handling hypercall from domain - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) - */ - -#include <xen/config.h> -#include <xen/errno.h> -#include <asm/vmx_vcpu.h> -#include <public/xen.h> -#include <public/event_channel.h> -#include <asm/vmmu.h> -#include <asm/tlb.h> -#include <asm/regionreg.h> -#include <asm/page.h> -#include <xen/mm.h> -#include <xen/multicall.h> - - -void hyper_not_support(void) -{ - VCPU *vcpu=current; - vmx_vcpu_set_gr(vcpu, 8, -1, 0); - vmx_vcpu_increment_iip(vcpu); -} - -void hyper_mmu_update(void) -{ - VCPU *vcpu=current; - u64 r32,r33,r34,r35,ret; - vmx_vcpu_get_gr(vcpu,16,&r32); - vmx_vcpu_get_gr(vcpu,17,&r33); - vmx_vcpu_get_gr(vcpu,18,&r34); - vmx_vcpu_get_gr(vcpu,19,&r35); - ret=do_mmu_update((mmu_update_t*)r32,r33,r34,r35); - vmx_vcpu_set_gr(vcpu, 8, ret, 0); - vmx_vcpu_increment_iip(vcpu); -} - -unsigned long __hypercall_create_continuation( - unsigned int op, unsigned int nr_args, ...) -{ - struct mc_state *mcs = &mc_state[smp_processor_id()]; - VCPU *vcpu = current; - struct cpu_user_regs *regs = vcpu_regs(vcpu); - unsigned int i; - va_list args; - - va_start(args, nr_args); - if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) { - panic("PREEMPT happen in multicall\n"); // Not support yet - } else { - vmx_vcpu_set_gr(vcpu, 15, op, 0); - for ( i = 0; i < nr_args; i++) { - switch (i) { - case 0: vmx_vcpu_set_gr(vcpu, 16, va_arg(args, unsigned long), 0); - break; - case 1: vmx_vcpu_set_gr(vcpu, 17, va_arg(args, unsigned long), 0); - break; - case 2: vmx_vcpu_set_gr(vcpu, 18, va_arg(args, unsigned long), 0); - break; - case 3: vmx_vcpu_set_gr(vcpu, 19, va_arg(args, unsigned long), 0); - break; - case 4: vmx_vcpu_set_gr(vcpu, 20, va_arg(args, unsigned long), 0); - break; - default: panic("Too many args for hypercall continuation\n"); - break; - } - } - } - vcpu->arch.hypercall_continuation = 1; - va_end(args); - return op; -} - -void hyper_dom_mem_op(void) -{ - VCPU *vcpu=current; - u64 r32,r33,r34,r35,r36; - u64 ret; - vmx_vcpu_get_gr(vcpu,16,&r32); - vmx_vcpu_get_gr(vcpu,17,&r33); - vmx_vcpu_get_gr(vcpu,18,&r34); - vmx_vcpu_get_gr(vcpu,19,&r35); - vmx_vcpu_get_gr(vcpu,20,&r36); - ret=do_dom_mem_op(r32,(u64 *)r33,r34,r35,r36); - printf("do_dom_mem return value: %lx\n", ret); - vmx_vcpu_set_gr(vcpu, 8, ret, 0); - - /* Hard to define a special return value to indicate hypercall restart. - * So just add a new mark, which is SMP safe - */ - if (vcpu->arch.hypercall_continuation == 1) - vcpu->arch.hypercall_continuation = 0; - else - vmx_vcpu_increment_iip(vcpu); -} - - -void hyper_sched_op(void) -{ - VCPU *vcpu=current; - u64 r32,ret; - vmx_vcpu_get_gr(vcpu,16,&r32); - ret=do_sched_op(r32); - vmx_vcpu_set_gr(vcpu, 8, ret, 0); - - vmx_vcpu_increment_iip(vcpu); -} - -void hyper_dom0_op(void) -{ - VCPU *vcpu=current; - u64 r32,ret; - vmx_vcpu_get_gr(vcpu,16,&r32); - ret=do_dom0_op((dom0_op_t *)r32); - vmx_vcpu_set_gr(vcpu, 8, ret, 0); - - vmx_vcpu_increment_iip(vcpu); -} - -void hyper_event_channel_op(void) -{ - VCPU *vcpu=current; - u64 r32,ret; - vmx_vcpu_get_gr(vcpu,16,&r32); - ret=do_event_channel_op((evtchn_op_t *)r32); - vmx_vcpu_set_gr(vcpu, 8, ret, 0); - vmx_vcpu_increment_iip(vcpu); -} - -void hyper_xen_version(void) -{ - VCPU *vcpu=current; - u64 r32,ret; - vmx_vcpu_get_gr(vcpu,16,&r32); - ret=do_xen_version((int )r32); - vmx_vcpu_set_gr(vcpu, 8, ret, 0); - vmx_vcpu_increment_iip(vcpu); -} - -static int do_lock_page(VCPU *vcpu, u64 va, u64 lock) -{ - int i; - ia64_rr rr; - thash_cb_t *hcb; - hcb = vmx_vcpu_get_vtlb(vcpu); - rr = vmx_vcpu_rr(vcpu, va); - return thash_lock_tc(hcb, va ,1U<<rr.ps, rr.rid, DSIDE_TLB, lock); -} - -/* - * Lock guest page in vTLB, so that it's not relinquished by recycle - * session when HV is servicing that hypercall. - */ -void hyper_lock_page(void) -{ -//TODO: - VCPU *vcpu=current; - u64 va,lock, ret; - vmx_vcpu_get_gr(vcpu,16,&va); - vmx_vcpu_get_gr(vcpu,17,&lock); - ret=do_lock_page(vcpu, va, lock); - vmx_vcpu_set_gr(vcpu, 8, ret, 0); - - vmx_vcpu_increment_iip(vcpu); -} - -static int do_set_shared_page(VCPU *vcpu, u64 gpa) -{ - u64 shared_info, o_info; - struct domain *d = vcpu->domain; - struct vcpu *v; - if(vcpu->domain!=dom0) - return -EPERM; - shared_info = __gpa_to_mpa(vcpu->domain, gpa); - o_info = (u64)vcpu->domain->shared_info; - d->shared_info= (shared_info_t *)__va(shared_info); - - /* Copy existing shared info into new page */ - if (o_info) { - memcpy((void*)d->shared_info, (void*)o_info, PAGE_SIZE); - for_each_vcpu(d, v) { - v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id]; - } - /* If original page belongs to xen heap, then relinguish back - * to xen heap. Or else, leave to domain itself to decide. - */ - if (likely(IS_XEN_HEAP_FRAME(virt_to_page(o_info)))) - free_xenheap_page(o_info); - } else - memset(d->shared_info, 0, PAGE_SIZE); - return 0; -} - -void hyper_set_shared_page(void) -{ - VCPU *vcpu=current; - u64 gpa,ret; - vmx_vcpu_get_gr(vcpu,16,&gpa); - - ret=do_set_shared_page(vcpu, gpa); - vmx_vcpu_set_gr(vcpu, 8, ret, 0); - - vmx_vcpu_increment_iip(vcpu); -} - -/* -void hyper_grant_table_op(void) -{ - VCPU *vcpu=current; - u64 r32,r33,r34,ret; - vmx_vcpu_get_gr(vcpu,16,&r32); - vmx_vcpu_get_gr(vcpu,17,&r33); - vmx_vcpu_get_gr(vcpu,18,&r34); - - ret=do_grant_table_op((unsigned int)r32, (void *)r33, (unsigned int)r34); - vmx_vcpu_set_gr(vcpu, 8, ret, 0); -} -*/ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_init.c --- a/xen/arch/ia64/vmx_init.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,375 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_init.c: initialization work for vt specific domain - * Copyright (c) 2005, Intel Corporation. - * Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx> - * Xuefei Xu (Anthony Xu) <anthony.xu@xxxxxxxxx> - * Fred Yang <fred.yang@xxxxxxxxx> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -/* - * 05/08/16 Kun tian (Kevin Tian) <kevin.tian@xxxxxxxxx>: - * Disable doubling mapping - * - * 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@xxxxxxxxx>: - * Simplied design in first step: - * - One virtual environment - * - Domain is bound to one LP - * Later to support guest SMP: - * - Need interface to handle VP scheduled to different LP - */ -#include <xen/config.h> -#include <xen/types.h> -#include <xen/sched.h> -#include <asm/pal.h> -#include <asm/page.h> -#include <asm/processor.h> -#include <asm/vmx_vcpu.h> -#include <xen/lib.h> -#include <asm/vmmu.h> -#include <public/arch-ia64.h> -#include <public/io/ioreq.h> -#include <asm/vmx_phy_mode.h> -#include <asm/processor.h> -#include <asm/vmx.h> -#include <xen/mm.h> - -/* Global flag to identify whether Intel vmx feature is on */ -u32 vmx_enabled = 0; -static u32 vm_order; -static u64 buffer_size; -static u64 vp_env_info; -static u64 vm_buffer = 0; /* Buffer required to bring up VMX feature */ -u64 __vsa_base = 0; /* Run-time service base of VMX */ - -/* Check whether vt feature is enabled or not. */ -void -identify_vmx_feature(void) -{ - pal_status_t ret; - u64 avail = 1, status = 1, control = 1; - - vmx_enabled = 0; - /* Check VT-i feature */ - ret = ia64_pal_proc_get_features(&avail, &status, &control); - if (ret != PAL_STATUS_SUCCESS) { - printk("Get proc features failed.\n"); - goto no_vti; - } - - /* FIXME: do we need to check status field, to see whether - * PSR.vm is actually enabled? If yes, aonther call to - * ia64_pal_proc_set_features may be reuqired then. - */ - printk("avail:0x%lx, status:0x%lx,control:0x%lx, vm?0x%lx\n", - avail, status, control, avail & PAL_PROC_VM_BIT); - if (!(avail & PAL_PROC_VM_BIT)) { - printk("No VT feature supported.\n"); - goto no_vti; - } - - ret = ia64_pal_vp_env_info(&buffer_size, &vp_env_info); - if (ret != PAL_STATUS_SUCCESS) { - printk("Get vp environment info failed.\n"); - goto no_vti; - } - - /* Does xen has ability to decode itself? */ - if (!(vp_env_info & VP_OPCODE)) - printk("WARNING: no opcode provided from hardware(%lx)!!!\n", vp_env_info); - vm_order = get_order(buffer_size); - printk("vm buffer size: %d, order: %d\n", buffer_size, vm_order); - - vmx_enabled = 1; -no_vti: - return; -} - -/* - * Init virtual environment on current LP - * vsa_base is the indicator whether it's first LP to be initialized - * for current domain. - */ -void -vmx_init_env(void) -{ - u64 status, tmp_base; - - if (!vm_buffer) { - vm_buffer = alloc_xenheap_pages(vm_order); - ASSERT(vm_buffer); - printk("vm_buffer: 0x%lx\n", vm_buffer); - } - - status=ia64_pal_vp_init_env(__vsa_base ? VP_INIT_ENV : VP_INIT_ENV_INITALIZE, - __pa(vm_buffer), - vm_buffer, - &tmp_base); - - if (status != PAL_STATUS_SUCCESS) { - printk("ia64_pal_vp_init_env failed.\n"); - return -1; - } - - if (!__vsa_base) - __vsa_base = tmp_base; - else - ASSERT(tmp_base != __vsa_base); - -#ifdef XEN_DBL_MAPPING - /* Init stub for rr7 switch */ - vmx_init_double_mapping_stub(); -#endif -} - -void vmx_setup_platform(struct vcpu *v, struct vcpu_guest_context *c) -{ - struct domain *d = v->domain; - shared_iopage_t *sp; - - ASSERT(d != dom0); /* only for non-privileged vti domain */ - d->arch.vmx_platform.shared_page_va = __va(c->share_io_pg); - sp = get_sp(d); - memset((char *)sp,0,PAGE_SIZE); - /* FIXME: temp due to old CP */ - sp->sp_global.eport = 2; -#ifdef V_IOSAPIC_READY - sp->vcpu_number = 1; -#endif - /* TEMP */ - d->arch.vmx_platform.pib_base = 0xfee00000UL; - - /* One more step to enable interrupt assist */ - set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags); - /* Only open one port for I/O and interrupt emulation */ - if (v == d->vcpu[0]) { - memset(&d->shared_info->evtchn_mask[0], 0xff, - sizeof(d->shared_info->evtchn_mask)); - clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]); - } - - /* FIXME: only support PMT table continuously by far */ - d->arch.pmt = __va(c->pt_base); - d->arch.max_pfn = c->pt_max_pfn; - - vmx_final_setup_domain(d); -} - -typedef union { - u64 value; - struct { - u64 number : 8; - u64 revision : 8; - u64 model : 8; - u64 family : 8; - u64 archrev : 8; - u64 rv : 24; - }; -} cpuid3_t; - -/* Allocate vpd from xenheap */ -static vpd_t *alloc_vpd(void) -{ - int i; - cpuid3_t cpuid3; - vpd_t *vpd; - - vpd = alloc_xenheap_pages(get_order(VPD_SIZE)); - if (!vpd) { - printk("VPD allocation failed.\n"); - return NULL; - } - - printk("vpd base: 0x%lx, vpd size:%d\n", vpd, sizeof(vpd_t)); - memset(vpd, 0, VPD_SIZE); - /* CPUID init */ - for (i = 0; i < 5; i++) - vpd->vcpuid[i] = ia64_get_cpuid(i); - - /* Limit the CPUID number to 5 */ - cpuid3.value = vpd->vcpuid[3]; - cpuid3.number = 4; /* 5 - 1 */ - vpd->vcpuid[3] = cpuid3.value; - - vpd->vdc.d_vmsw = 1; - return vpd; -} - - -#ifdef CONFIG_VTI -/* - * Create a VP on intialized VMX environment. - */ -static void -vmx_create_vp(struct vcpu *v) -{ - u64 ret; - vpd_t *vpd = v->arch.arch_vmx.vpd; - u64 ivt_base; - extern char vmx_ia64_ivt; - /* ia64_ivt is function pointer, so need this tranlation */ - ivt_base = (u64) &vmx_ia64_ivt; - printk("ivt_base: 0x%lx\n", ivt_base); - ret = ia64_pal_vp_create(vpd, ivt_base, 0); - if (ret != PAL_STATUS_SUCCESS) - panic("ia64_pal_vp_create failed. \n"); -} - -#ifdef XEN_DBL_MAPPING -void vmx_init_double_mapping_stub(void) -{ - u64 base, psr; - extern void vmx_switch_rr7(void); - - base = (u64) &vmx_switch_rr7; - base = *((u64*)base); - - psr = ia64_clear_ic(); - ia64_itr(0x1, IA64_TR_RR7_SWITCH_STUB, XEN_RR7_SWITCH_STUB, - pte_val(pfn_pte(__pa(base) >> PAGE_SHIFT, PAGE_KERNEL)), - RR7_SWITCH_SHIFT); - ia64_set_psr(psr); - ia64_srlz_i(); - printk("Add TR mapping for rr7 switch stub, with physical: 0x%lx\n", (u64)(__pa(base))); -} -#endif - -/* Other non-context related tasks can be done in context switch */ -void -vmx_save_state(struct vcpu *v) -{ - u64 status, psr; - u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt; - - /* FIXME: about setting of pal_proc_vector... time consuming */ - status = ia64_pal_vp_save(v->arch.arch_vmx.vpd, 0); - if (status != PAL_STATUS_SUCCESS) - panic("Save vp status failed\n"); - -#ifdef XEN_DBL_MAPPING - /* FIXME: Do we really need purge double mapping for old vcpu? - * Since rid is completely different between prev and next, - * it's not overlap and thus no MCA possible... */ - dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7])); - vmx_purge_double_mapping(dom_rr7, KERNEL_START, - (u64)v->arch.vtlb->ts->vhpt->hash); -#endif - - /* Need to save KR when domain switch, though HV itself doesn;t - * use them. - */ - v->arch.arch_vmx.vkr[0] = ia64_get_kr(0); - v->arch.arch_vmx.vkr[1] = ia64_get_kr(1); - v->arch.arch_vmx.vkr[2] = ia64_get_kr(2); - v->arch.arch_vmx.vkr[3] = ia64_get_kr(3); - v->arch.arch_vmx.vkr[4] = ia64_get_kr(4); - v->arch.arch_vmx.vkr[5] = ia64_get_kr(5); - v->arch.arch_vmx.vkr[6] = ia64_get_kr(6); - v->arch.arch_vmx.vkr[7] = ia64_get_kr(7); -} - -/* Even guest is in physical mode, we still need such double mapping */ -void -vmx_load_state(struct vcpu *v) -{ - u64 status, psr; - u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt; - u64 pte_xen, pte_vhpt; - int i; - - status = ia64_pal_vp_restore(v->arch.arch_vmx.vpd, 0); - if (status != PAL_STATUS_SUCCESS) - panic("Restore vp status failed\n"); - -#ifdef XEN_DBL_MAPPING - dom_rr7 = vmx_vrrtomrr(v, VMX(v, vrr[7])); - pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL)); - pte_vhpt = pte_val(pfn_pte((__pa(v->arch.vtlb->ts->vhpt->hash) >> PAGE_SHIFT), PAGE_KERNEL)); - vmx_insert_double_mapping(dom_rr7, KERNEL_START, - (u64)v->arch.vtlb->ts->vhpt->hash, - pte_xen, pte_vhpt); -#endif - - ia64_set_kr(0, v->arch.arch_vmx.vkr[0]); - ia64_set_kr(1, v->arch.arch_vmx.vkr[1]); - ia64_set_kr(2, v->arch.arch_vmx.vkr[2]); - ia64_set_kr(3, v->arch.arch_vmx.vkr[3]); - ia64_set_kr(4, v->arch.arch_vmx.vkr[4]); - ia64_set_kr(5, v->arch.arch_vmx.vkr[5]); - ia64_set_kr(6, v->arch.arch_vmx.vkr[6]); - ia64_set_kr(7, v->arch.arch_vmx.vkr[7]); - /* Guest vTLB is not required to be switched explicitly, since - * anchored in vcpu */ -} - -#ifdef XEN_DBL_MAPPING -/* Purge old double mapping and insert new one, due to rr7 change */ -void -vmx_change_double_mapping(struct vcpu *v, u64 oldrr7, u64 newrr7) -{ - u64 pte_xen, pte_vhpt, vhpt_base; - - vhpt_base = (u64)v->arch.vtlb->ts->vhpt->hash; - vmx_purge_double_mapping(oldrr7, KERNEL_START, - vhpt_base); - - pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL)); - pte_vhpt = pte_val(pfn_pte((__pa(vhpt_base) >> PAGE_SHIFT), PAGE_KERNEL)); - vmx_insert_double_mapping(newrr7, KERNEL_START, - vhpt_base, - pte_xen, pte_vhpt); -} -#endif // XEN_DBL_MAPPING -#endif // CONFIG_VTI - -/* - * Initialize VMX envirenment for guest. Only the 1st vp/vcpu - * is registered here. - */ -void -vmx_final_setup_domain(struct domain *d) -{ - struct vcpu *v = d->vcpu[0]; - vpd_t *vpd; - - /* Allocate resources for vcpu 0 */ - //memset(&v->arch.arch_vmx, 0, sizeof(struct arch_vmx_struct)); - - vpd = alloc_vpd(); - ASSERT(vpd); - - v->arch.arch_vmx.vpd = vpd; - vpd->virt_env_vaddr = vm_buffer; - -#ifdef CONFIG_VTI - /* v->arch.schedule_tail = arch_vmx_do_launch; */ - vmx_create_vp(v); - - /* Set this ed to be vmx */ - set_bit(ARCH_VMX_VMCS_LOADED, &v->arch.arch_vmx.flags); - - /* Physical mode emulation initialization, including - * emulation ID allcation and related memory request - */ - physical_mode_init(v); - - vlsapic_reset(v); - vtm_init(v); -#endif - - /* Other vmx specific initialization work */ -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_interrupt.c --- a/xen/arch/ia64/vmx_interrupt.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,388 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_interrupt.c: handle inject interruption. - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx> - * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx> - * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) - */ - - -#include <xen/types.h> -#include <asm/vmx_vcpu.h> -#include <asm/vmx_mm_def.h> -#include <asm/vmx_pal_vsa.h> -/* SDM vol2 5.5 - IVA based interruption handling */ -#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034 -void -collect_interruption(VCPU *vcpu) -{ - u64 ipsr; - u64 vdcr; - u64 vifs; - IA64_PSR vpsr; - REGS * regs = vcpu_regs(vcpu); - vpsr.val = vmx_vcpu_get_psr(vcpu); - - if(vpsr.ic){ - extern void vmx_dorfirfi(void); - if (regs->cr_iip == *(unsigned long *)vmx_dorfirfi) - panic("COLLECT interruption for vmx_dorfirfi\n"); - - /* Sync mpsr id/da/dd/ss/ed bits to vipsr - * since after guest do rfi, we still want these bits on in - * mpsr - */ - - ipsr = regs->cr_ipsr; - vpsr.val = vpsr.val | (ipsr & (IA64_PSR_ID | IA64_PSR_DA - | IA64_PSR_DD |IA64_PSR_SS |IA64_PSR_ED)); - vmx_vcpu_set_ipsr(vcpu, vpsr.val); - - /* Currently, for trap, we do not advance IIP to next - * instruction. That's because we assume caller already - * set up IIP correctly - */ - - vmx_vcpu_set_iip(vcpu , regs->cr_iip); - - /* set vifs.v to zero */ - vifs = VPD_CR(vcpu,ifs); - vifs &= ~IA64_IFS_V; - vmx_vcpu_set_ifs(vcpu, vifs); - - vmx_vcpu_set_iipa(vcpu, regs->cr_iipa); - } - - vdcr = VPD_CR(vcpu,dcr); - - /* Set guest psr - * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged - * be: set to the value of dcr.be - * pp: set to the value of dcr.pp - */ - vpsr.val &= INITIAL_PSR_VALUE_AT_INTERRUPTION; - vpsr.val |= ( vdcr & IA64_DCR_BE); - - /* VDCR pp bit position is different from VPSR pp bit */ - if ( vdcr & IA64_DCR_PP ) { - vpsr.val |= IA64_PSR_PP; - } else { - vpsr.val &= ~IA64_PSR_PP;; - } - - vmx_vcpu_set_psr(vcpu, vpsr.val); - -} -int -inject_guest_interruption(VCPU *vcpu, u64 vec) -{ - u64 viva; - REGS *regs; - regs=vcpu_regs(vcpu); - - collect_interruption(vcpu); - - vmx_vcpu_get_iva(vcpu,&viva); - regs->cr_iip = viva + vec; -} - - -/* - * Set vIFA & vITIR & vIHA, when vPSR.ic =1 - * Parameter: - * set_ifa: if true, set vIFA - * set_itir: if true, set vITIR - * set_iha: if true, set vIHA - */ -void -set_ifa_itir_iha (VCPU *vcpu, u64 vadr, - int set_ifa, int set_itir, int set_iha) -{ - IA64_PSR vpsr; - u64 value; - vpsr.val = vmx_vcpu_get_psr(vcpu); - /* Vol2, Table 8-1 */ - if ( vpsr.ic ) { - if ( set_ifa){ - vmx_vcpu_set_ifa(vcpu, vadr); - } - if ( set_itir) { - value = vmx_vcpu_get_itir_on_fault(vcpu, vadr); - vmx_vcpu_set_itir(vcpu, value); - } - - if ( set_iha) { - vmx_vcpu_thash(vcpu, vadr, &value); - vmx_vcpu_set_iha(vcpu, value); - } - } - - -} - -/* - * Data TLB Fault - * @ Data TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -dtlb_fault (VCPU *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR, IHA */ - set_ifa_itir_iha (vcpu, vadr, 1, 1, 1); - inject_guest_interruption(vcpu,IA64_DATA_TLB_VECTOR); -} - -/* - * Instruction TLB Fault - * @ Instruction TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -itlb_fault (VCPU *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR, IHA */ - set_ifa_itir_iha (vcpu, vadr, 1, 1, 1); - inject_guest_interruption(vcpu,IA64_INST_TLB_VECTOR); -} - - - -/* - * Data Nested TLB Fault - * @ Data Nested TLB Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -nested_dtlb (VCPU *vcpu) -{ - inject_guest_interruption(vcpu,IA64_DATA_NESTED_TLB_VECTOR); -} - -/* - * Alternate Data TLB Fault - * @ Alternate Data TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -alt_dtlb (VCPU *vcpu, u64 vadr) -{ - set_ifa_itir_iha (vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu,IA64_ALT_DATA_TLB_VECTOR); -} - - -/* - * Data TLB Fault - * @ Data TLB vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -alt_itlb (VCPU *vcpu, u64 vadr) -{ - set_ifa_itir_iha (vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu,IA64_ALT_INST_TLB_VECTOR); -} - -/* Deal with: - * VHPT Translation Vector - */ -static void -_vhpt_fault(VCPU *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR, IHA*/ - set_ifa_itir_iha (vcpu, vadr, 1, 1, 1); - inject_guest_interruption(vcpu,IA64_VHPT_TRANS_VECTOR); - - -} - -/* - * VHPT Instruction Fault - * @ VHPT Translation vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -ivhpt_fault (VCPU *vcpu, u64 vadr) -{ - _vhpt_fault(vcpu, vadr); -} - - -/* - * VHPT Data Fault - * @ VHPT Translation vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -dvhpt_fault (VCPU *vcpu, u64 vadr) -{ - _vhpt_fault(vcpu, vadr); -} - - - -/* - * Deal with: - * General Exception vector - */ -void -_general_exception (VCPU *vcpu) -{ - inject_guest_interruption(vcpu,IA64_GENEX_VECTOR); -} - - -/* - * Illegal Operation Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -illegal_op (VCPU *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Illegal Dependency Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -illegal_dep (VCPU *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Reserved Register/Field Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -rsv_reg_field (VCPU *vcpu) -{ - _general_exception(vcpu); -} -/* - * Privileged Operation Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ - -void -privilege_op (VCPU *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Unimplement Data Address Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -unimpl_daddr (VCPU *vcpu) -{ - _general_exception(vcpu); -} - -/* - * Privileged Register Fault - * @ General Exception Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -privilege_reg (VCPU *vcpu) -{ - _general_exception(vcpu); -} - -/* Deal with - * Nat consumption vector - * Parameter: - * vaddr: Optional, if t == REGISTER - */ -static void -_nat_consumption_fault(VCPU *vcpu, u64 vadr, miss_type t) -{ - /* If vPSR.ic && t == DATA/INST, IFA */ - if ( t == DATA || t == INSTRUCTION ) { - /* IFA */ - set_ifa_itir_iha (vcpu, vadr, 1, 0, 0); - } - - inject_guest_interruption(vcpu,IA64_NAT_CONSUMPTION_VECTOR); -} - -/* - * IR Data Nat Page Consumption Fault - * @ Nat Consumption Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -static void -ir_nat_page_consumption (VCPU *vcpu, u64 vadr) -{ - _nat_consumption_fault(vcpu, vadr, DATA); -} - -/* - * Instruction Nat Page Consumption Fault - * @ Nat Consumption Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -inat_page_consumption (VCPU *vcpu, u64 vadr) -{ - _nat_consumption_fault(vcpu, vadr, INSTRUCTION); -} - -/* - * Register Nat Consumption Fault - * @ Nat Consumption Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -rnat_consumption (VCPU *vcpu) -{ - _nat_consumption_fault(vcpu, 0, REGISTER); -} - -/* - * Data Nat Page Consumption Fault - * @ Nat Consumption Vector - * Refer to SDM Vol2 Table 5-6 & 8-1 - */ -void -dnat_page_consumption (VCPU *vcpu, uint64_t vadr) -{ - _nat_consumption_fault(vcpu, vadr, DATA); -} - -/* Deal with - * Page not present vector - */ -void -page_not_present(VCPU *vcpu, u64 vadr) -{ - /* If vPSR.ic, IFA, ITIR */ - set_ifa_itir_iha (vcpu, vadr, 1, 1, 0); - inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); -} - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_irq_ia64.c --- a/xen/arch/ia64/vmx_irq_ia64.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,127 +0,0 @@ -#include <linux/config.h> -#include <linux/module.h> - -#include <linux/jiffies.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/ioport.h> -#include <linux/kernel_stat.h> -#include <linux/slab.h> -#include <linux/ptrace.h> -#include <linux/random.h> /* for rand_initialize_irq() */ -#include <linux/signal.h> -#include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/threads.h> -#include <linux/bitops.h> - -#include <asm/delay.h> -#include <asm/intrinsics.h> -#include <asm/io.h> -#include <asm/hw_irq.h> -#include <asm/machvec.h> -#include <asm/pgtable.h> -#include <asm/system.h> - -#ifdef CONFIG_PERFMON -# include <asm/perfmon.h> -#endif - -#define IRQ_DEBUG 0 - -#ifdef CONFIG_VTI -#define vmx_irq_enter() \ - add_preempt_count(HARDIRQ_OFFSET); - -/* Now softirq will be checked when leaving hypervisor, or else - * scheduler irq will be executed too early. - */ -#define vmx_irq_exit(void) \ - sub_preempt_count(HARDIRQ_OFFSET); -/* - * That's where the IVT branches when we get an external - * interrupt. This branches to the correct hardware IRQ handler via - * function ptr. - */ -void -vmx_ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) -{ - unsigned long saved_tpr; - int wake_dom0 = 0; - - -#if IRQ_DEBUG - { - unsigned long bsp, sp; - - /* - * Note: if the interrupt happened while executing in - * the context switch routine (ia64_switch_to), we may - * get a spurious stack overflow here. This is - * because the register and the memory stack are not - * switched atomically. - */ - bsp = ia64_getreg(_IA64_REG_AR_BSP); - sp = ia64_getreg(_IA64_REG_AR_SP); - - if ((sp - bsp) < 1024) { - static unsigned char count; - static long last_time; - - if (jiffies - last_time > 5*HZ) - count = 0; - if (++count < 5) { - last_time = jiffies; - printk("ia64_handle_irq: DANGER: less than " - "1KB of free stack space!!\n" - "(bsp=0x%lx, sp=%lx)\n", bsp, sp); - } - } - } -#endif /* IRQ_DEBUG */ - - /* - * Always set TPR to limit maximum interrupt nesting depth to - * 16 (without this, it would be ~240, which could easily lead - * to kernel stack overflows). - */ - vmx_irq_enter(); - saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); - ia64_srlz_d(); - while (vector != IA64_SPURIOUS_INT_VECTOR) { - if (!IS_RESCHEDULE(vector)) { - ia64_setreg(_IA64_REG_CR_TPR, vector); - ia64_srlz_d(); - - if (vector != IA64_TIMER_VECTOR) { - /* FIXME: Leave IRQ re-route later */ - vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector); - wake_dom0 = 1; - } - else { // FIXME: Handle Timer only now - __do_IRQ(local_vector_to_irq(vector), regs); - } - - /* - * Disable interrupts and send EOI: - */ - local_irq_disable(); - ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); - } - else { - printf("Oops: RESCHEDULE IPI absorbed by HV\n"); - } - ia64_eoi(); - vector = ia64_get_ivr(); - } - /* - * This must be done *after* the ia64_eoi(). For example, the keyboard softirq - * handler needs to be able to wait for further keyboard interrupts, which can't - * come through until ia64_eoi() has been done. - */ - vmx_irq_exit(); - if ( wake_dom0 && current != dom0 ) - vcpu_wake(dom0->vcpu[0]); -} -#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_ivt.S --- a/xen/arch/ia64/vmx_ivt.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,1085 +0,0 @@ -/* - * arch/ia64/kernel/vmx_ivt.S - * - * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co - * Stephane Eranian <eranian@xxxxxxxxxx> - * David Mosberger <davidm@xxxxxxxxxx> - * Copyright (C) 2000, 2002-2003 Intel Co - * Asit Mallick <asit.k.mallick@xxxxxxxxx> - * Suresh Siddha <suresh.b.siddha@xxxxxxxxx> - * Kenneth Chen <kenneth.w.chen@xxxxxxxxx> - * Fenghua Yu <fenghua.yu@xxxxxxxxx> - * - * - * 00/08/23 Asit Mallick <asit.k.mallick@xxxxxxxxx> TLB handling for SMP - * 00/12/20 David Mosberger-Tang <davidm@xxxxxxxxxx> DTLB/ITLB handler now uses virtual PT. - * - * 05/3/20 Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx) - * Supporting Intel virtualization architecture - * - */ - -/* - * This file defines the interruption vector table used by the CPU. - * It does not include one entry per possible cause of interruption. - * - * The first 20 entries of the table contain 64 bundles each while the - * remaining 48 entries contain only 16 bundles each. - * - * The 64 bundles are used to allow inlining the whole handler for critical - * interruptions like TLB misses. - * - * For each entry, the comment is as follows: - * - * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) - * entry offset ----/ / / / / - * entry number ---------/ / / / - * size of the entry -------------/ / / - * vector name -------------------------------------/ / - * interruptions triggering this vector ----------------------/ - * - * The table is 32KB in size and must be aligned on 32KB boundary. - * (The CPU ignores the 15 lower bits of the address) - * - * Table is based upon EAS2.6 (Oct 1999) - */ - -#include <linux/config.h> - -#include <asm/asmmacro.h> -#include <asm/break.h> -#include <asm/ia32.h> -#include <asm/kregs.h> -#include <asm/offsets.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/ptrace.h> -#include <asm/system.h> -#include <asm/thread_info.h> -#include <asm/unistd.h> -#include <asm/vhpt.h> - - -#if 0 - /* - * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't - * needed for something else before enabling this... - */ -# define VMX_DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 -#else -# define VMX_DBG_FAULT(i) -#endif - -#include "vmx_minstate.h" - - - -#define VMX_FAULT(n) \ -vmx_fault_##n:; \ - br.sptk vmx_fault_##n; \ - ;; \ - - -#define VMX_REFLECT(n) \ - mov r31=pr; \ - mov r19=n; /* prepare to save predicates */ \ - mov r29=cr.ipsr; \ - ;; \ - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ -(p7) br.sptk.many vmx_dispatch_reflection; \ - VMX_FAULT(n); \ - - -GLOBAL_ENTRY(vmx_panic) - br.sptk.many vmx_panic - ;; -END(vmx_panic) - - - - - - .section .text.ivt,"ax" - - .align 32768 // align on 32KB boundary - .global vmx_ia64_ivt -vmx_ia64_ivt: -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) -ENTRY(vmx_vhpt_miss) - VMX_FAULT(0) -END(vmx_vhpt_miss) - - .org vmx_ia64_ivt+0x400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0400 Entry 1 (size 64 bundles) ITLB (21) -ENTRY(vmx_itlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p6) br.sptk vmx_fault_1 - mov r16 = cr.ifa - ;; - thash r17 = r16 - ttag r20 = r16 - ;; -vmx_itlb_loop: - cmp.eq p6,p0 = r0, r17 -(p6) br vmx_itlb_out - ;; - adds r22 = VLE_TITAG_OFFSET, r17 - adds r23 = VLE_CCHAIN_OFFSET, r17 - ;; - ld8 r24 = [r22] - ld8 r25 = [r23] - ;; - lfetch [r25] - cmp.eq p6,p7 = r20, r24 - ;; -(p7) mov r17 = r25; -(p7) br.sptk vmx_itlb_loop - ;; - adds r23 = VLE_PGFLAGS_OFFSET, r17 - adds r24 = VLE_ITIR_OFFSET, r17 - ;; - ld8 r26 = [r23] - ld8 r25 = [r24] - ;; - mov cr.itir = r25 - ;; - itc.i r26 - ;; - srlz.i - ;; - mov r23=r31 - mov r22=b0 - adds r16=IA64_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r16] - ;; - adds r19=VPD(VPSR),r18 - movl r20=__vsa_base - ;; - ld8 r19=[r19] - ld8 r20=[r20] - ;; - br.sptk ia64_vmm_entry - ;; -vmx_itlb_out: - mov r19 = 1 - br.sptk vmx_dispatch_tlb_miss - VMX_FAULT(1); -END(vmx_itlb_miss) - - .org vmx_ia64_ivt+0x0800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) -ENTRY(vmx_dtlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p6)br.sptk vmx_fault_2 - mov r16 = cr.ifa - ;; - thash r17 = r16 - ttag r20 = r16 - ;; -vmx_dtlb_loop: - cmp.eq p6,p0 = r0, r17 -(p6)br vmx_dtlb_out - ;; - adds r22 = VLE_TITAG_OFFSET, r17 - adds r23 = VLE_CCHAIN_OFFSET, r17 - ;; - ld8 r24 = [r22] - ld8 r25 = [r23] - ;; - lfetch [r25] - cmp.eq p6,p7 = r20, r24 - ;; -(p7)mov r17 = r25; -(p7)br.sptk vmx_dtlb_loop - ;; - adds r23 = VLE_PGFLAGS_OFFSET, r17 - adds r24 = VLE_ITIR_OFFSET, r17 - ;; - ld8 r26 = [r23] - ld8 r25 = [r24] - ;; - mov cr.itir = r25 - ;; - itc.d r26 - ;; - srlz.d; - ;; - mov r23=r31 - mov r22=b0 - adds r16=IA64_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r16] - ;; - adds r19=VPD(VPSR),r18 - movl r20=__vsa_base - ;; - ld8 r19=[r19] - ld8 r20=[r20] - ;; - br.sptk ia64_vmm_entry - ;; -vmx_dtlb_out: - mov r19 = 2 - br.sptk vmx_dispatch_tlb_miss - VMX_FAULT(2); -END(vmx_dtlb_miss) - - .org vmx_ia64_ivt+0x0c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) -ENTRY(vmx_alt_itlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p7)br.sptk vmx_fault_3 - mov r16=cr.ifa // get address that caused the TLB miss - movl r17=PAGE_KERNEL - mov r24=cr.ipsr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - shr.u r18=r16,55 // move address bit 59 to bit 4 - ;; - and r18=0x10,r18 // bit 4=address-bit(61) - or r19=r17,r19 // insert PTE control bits into r19 - ;; - or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 - ;; - itc.i r19 // insert the TLB entry - mov pr=r31,-1 - rfi - VMX_FAULT(3); -END(vmx_alt_itlb_miss) - - - .org vmx_ia64_ivt+0x1000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) -ENTRY(vmx_alt_dtlb_miss) - mov r31=pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p7)br.sptk vmx_fault_4 - mov r16=cr.ifa // get address that caused the TLB miss - movl r17=PAGE_KERNEL - mov r20=cr.isr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r24=cr.ipsr - ;; - and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field - tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? - shr.u r18=r16,55 // move address bit 59 to bit 4 - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? - ;; - and r18=0x10,r18 // bit 4=address-bit(61) -(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field - dep r24=-1,r24,IA64_PSR_ED_BIT,1 - or r19=r19,r17 // insert PTE control bits into r19 - ;; - or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 -(p6) mov cr.ipsr=r24 - ;; -(p7) itc.d r19 // insert the TLB entry - mov pr=r31,-1 - rfi - VMX_FAULT(4); -END(vmx_alt_dtlb_miss) - - .org vmx_ia64_ivt+0x1400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) -ENTRY(vmx_nested_dtlb_miss) - VMX_FAULT(5) -END(vmx_nested_dtlb_miss) - - .org vmx_ia64_ivt+0x1800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) -ENTRY(vmx_ikey_miss) - VMX_REFLECT(6) -END(vmx_ikey_miss) - - .org vmx_ia64_ivt+0x1c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) -ENTRY(vmx_dkey_miss) - VMX_REFLECT(7) -END(vmx_dkey_miss) - - .org vmx_ia64_ivt+0x2000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) -ENTRY(vmx_dirty_bit) - VMX_REFLECT(8) -END(vmx_idirty_bit) - - .org vmx_ia64_ivt+0x2400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) -ENTRY(vmx_iaccess_bit) - VMX_REFLECT(9) -END(vmx_iaccess_bit) - - .org vmx_ia64_ivt+0x2800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) -ENTRY(vmx_daccess_bit) - VMX_REFLECT(10) -END(vmx_daccess_bit) - - .org vmx_ia64_ivt+0x2c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) -ENTRY(vmx_break_fault) - mov r31=pr - mov r19=11 - mov r30=cr.iim - movl r29=0x1100 - ;; - cmp.eq p6,p7=r30,r0 - (p6) br.sptk vmx_fault_11 - ;; - cmp.eq p6,p7=r29,r30 - (p6) br.dptk.few vmx_hypercall_dispatch - (p7) br.sptk.many vmx_dispatch_break_fault - ;; - VMX_FAULT(11); -END(vmx_break_fault) - - .org vmx_ia64_ivt+0x3000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) -ENTRY(vmx_interrupt) - mov r31=pr // prepare to save predicates - mov r19=12 - mov r29=cr.ipsr - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT - tbit.z p0,p15=r29,IA64_PSR_I_BIT - ;; -(p7) br.sptk vmx_dispatch_interrupt - ;; - mov r27=ar.rsc /* M */ - mov r20=r1 /* A */ - mov r25=ar.unat /* M */ - mov r26=ar.pfs /* I */ - mov r28=cr.iip /* M */ - cover /* B (or nothing) */ - ;; - mov r1=sp - ;; - invala /* M */ - mov r30=cr.ifs - ;; - addl r1=-IA64_PT_REGS_SIZE,r1 - ;; - adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ - adds r16=PT(CR_IPSR),r1 - ;; - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES - st8 [r16]=r29 /* save cr.ipsr */ - ;; - lfetch.fault.excl.nt1 [r17] - mov r29=b0 - ;; - adds r16=PT(R8),r1 /* initialize first base pointer */ - adds r17=PT(R9),r1 /* initialize second base pointer */ - mov r18=r0 /* make sure r18 isn't NaT */ - ;; -.mem.offset 0,0; st8.spill [r16]=r8,16 -.mem.offset 8,0; st8.spill [r17]=r9,16 - ;; -.mem.offset 0,0; st8.spill [r16]=r10,24 -.mem.offset 8,0; st8.spill [r17]=r11,24 - ;; - st8 [r16]=r28,16 /* save cr.iip */ - st8 [r17]=r30,16 /* save cr.ifs */ - mov r8=ar.fpsr /* M */ - mov r9=ar.csd - mov r10=ar.ssd - movl r11=FPSR_DEFAULT /* L-unit */ - ;; - st8 [r16]=r25,16 /* save ar.unat */ - st8 [r17]=r26,16 /* save ar.pfs */ - shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ - ;; - st8 [r16]=r27,16 /* save ar.rsc */ - adds r17=16,r17 /* skip over ar_rnat field */ - ;; /* avoid RAW on r16 & r17 */ - st8 [r17]=r31,16 /* save predicates */ - adds r16=16,r16 /* skip over ar_bspstore field */ - ;; - st8 [r16]=r29,16 /* save b0 */ - st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ - ;; -.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ -.mem.offset 8,0; st8.spill [r17]=r12,16 - adds r12=-16,r1 /* switch to kernel memory stack (with 16 bytes of scratch) */ - ;; -.mem.offset 0,0; st8.spill [r16]=r13,16 -.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ - mov r13=r21 /* establish `current' */ - ;; -.mem.offset 0,0; st8.spill [r16]=r15,16 -.mem.offset 8,0; st8.spill [r17]=r14,16 - dep r14=-1,r0,60,4 - ;; -.mem.offset 0,0; st8.spill [r16]=r2,16 -.mem.offset 8,0; st8.spill [r17]=r3,16 - adds r2=IA64_PT_REGS_R16_OFFSET,r1 - ;; - mov r8=ar.ccv - movl r1=__gp /* establish kernel global pointer */ - ;; \ - bsw.1 - ;; - alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group - mov out0=cr.ivr // pass cr.ivr as first arg - add out1=16,sp // pass pointer to pt_regs as second arg - - ssm psr.ic - ;; - srlz.i - ;; - (p15) ssm psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on - ;; -.mem.offset 0,0; st8.spill [r2]=r16,16 -.mem.offset 8,0; st8.spill [r3]=r17,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r18,16 -.mem.offset 8,0; st8.spill [r3]=r19,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r20,16 -.mem.offset 8,0; st8.spill [r3]=r21,16 - mov r18=b6 - ;; -.mem.offset 0,0; st8.spill [r2]=r22,16 -.mem.offset 8,0; st8.spill [r3]=r23,16 - mov r19=b7 - ;; -.mem.offset 0,0; st8.spill [r2]=r24,16 -.mem.offset 8,0; st8.spill [r3]=r25,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r26,16 -.mem.offset 8,0; st8.spill [r3]=r27,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r28,16 -.mem.offset 8,0; st8.spill [r3]=r29,16 - ;; -.mem.offset 0,0; st8.spill [r2]=r30,16 -.mem.offset 8,0; st8.spill [r3]=r31,32 - ;; - mov ar.fpsr=r11 /* M-unit */ - st8 [r2]=r8,8 /* ar.ccv */ - adds r24=PT(B6)-PT(F7),r3 - ;; - stf.spill [r2]=f6,32 - stf.spill [r3]=f7,32 - ;; - stf.spill [r2]=f8,32 - stf.spill [r3]=f9,32 - ;; - stf.spill [r2]=f10 - stf.spill [r3]=f11 - adds r25=PT(B7)-PT(F11),r3 - ;; - st8 [r24]=r18,16 /* b6 */ - st8 [r25]=r19,16 /* b7 */ - ;; - st8 [r24]=r9 /* ar.csd */ - st8 [r25]=r10 /* ar.ssd */ - ;; - srlz.d // make sure we see the effect of cr.ivr - movl r14=ia64_leave_nested - ;; - mov rp=r14 - br.call.sptk.many b6=vmx_ia64_handle_irq - ;; -END(vmx_interrupt) - - .org vmx_ia64_ivt+0x3400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3400 Entry 13 (size 64 bundles) Reserved -ENTRY(vmx_virtual_exirq) - VMX_DBG_FAULT(13) - mov r31=pr - mov r19=13 - br.sptk vmx_dispatch_vexirq -END(vmx_virtual_exirq) - - .org vmx_ia64_ivt+0x3800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3800 Entry 14 (size 64 bundles) Reserved - VMX_DBG_FAULT(14) - VMX_FAULT(14) - - - .org vmx_ia64_ivt+0x3c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x3c00 Entry 15 (size 64 bundles) Reserved - VMX_DBG_FAULT(15) - VMX_FAULT(15) - - - .org vmx_ia64_ivt+0x4000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4000 Entry 16 (size 64 bundles) Reserved - VMX_DBG_FAULT(16) - VMX_FAULT(16) - - .org vmx_ia64_ivt+0x4400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4400 Entry 17 (size 64 bundles) Reserved - VMX_DBG_FAULT(17) - VMX_FAULT(17) - - .org vmx_ia64_ivt+0x4800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4800 Entry 18 (size 64 bundles) Reserved - VMX_DBG_FAULT(18) - VMX_FAULT(18) - - .org vmx_ia64_ivt+0x4c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x4c00 Entry 19 (size 64 bundles) Reserved - VMX_DBG_FAULT(19) - VMX_FAULT(19) - - .org vmx_ia64_ivt+0x5000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5000 Entry 20 (size 16 bundles) Page Not Present -ENTRY(vmx_page_not_present) - VMX_REFLECT(20) -END(vmx_page_not_present) - - .org vmx_ia64_ivt+0x5100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5100 Entry 21 (size 16 bundles) Key Permission vector -ENTRY(vmx_key_permission) - VMX_REFLECT(21) -END(vmx_key_permission) - - .org vmx_ia64_ivt+0x5200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) -ENTRY(vmx_iaccess_rights) - VMX_REFLECT(22) -END(vmx_iaccess_rights) - - .org vmx_ia64_ivt+0x5300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) -ENTRY(vmx_daccess_rights) - VMX_REFLECT(23) -END(vmx_daccess_rights) - - .org vmx_ia64_ivt+0x5400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) -ENTRY(vmx_general_exception) - VMX_FAULT(24) -// VMX_REFLECT(24) -END(vmx_general_exception) - - .org vmx_ia64_ivt+0x5500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) -ENTRY(vmx_disabled_fp_reg) - VMX_REFLECT(25) -END(vmx_disabled_fp_reg) - - .org vmx_ia64_ivt+0x5600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) -ENTRY(vmx_nat_consumption) - VMX_REFLECT(26) -END(vmx_nat_consumption) - - .org vmx_ia64_ivt+0x5700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5700 Entry 27 (size 16 bundles) Speculation (40) -ENTRY(vmx_speculation_vector) - VMX_REFLECT(27) -END(vmx_speculation_vector) - - .org vmx_ia64_ivt+0x5800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5800 Entry 28 (size 16 bundles) Reserved - VMX_DBG_FAULT(28) - VMX_FAULT(28) - - .org vmx_ia64_ivt+0x5900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) -ENTRY(vmx_debug_vector) - VMX_DBG_FAULT(29) - VMX_FAULT(29) -END(vmx_debug_vector) - - .org vmx_ia64_ivt+0x5a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) -ENTRY(vmx_unaligned_access) - VMX_REFLECT(30) -END(vmx_unaligned_access) - - .org vmx_ia64_ivt+0x5b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) -ENTRY(vmx_unsupported_data_reference) - VMX_REFLECT(31) -END(vmx_unsupported_data_reference) - - .org vmx_ia64_ivt+0x5c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) -ENTRY(vmx_floating_point_fault) - VMX_REFLECT(32) -END(vmx_floating_point_fault) - - .org vmx_ia64_ivt+0x5d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) -ENTRY(vmx_floating_point_trap) - VMX_REFLECT(33) -END(vmx_floating_point_trap) - - .org vmx_ia64_ivt+0x5e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) -ENTRY(vmx_lower_privilege_trap) - VMX_REFLECT(34) -END(vmx_lower_privilege_trap) - - .org vmx_ia64_ivt+0x5f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) -ENTRY(vmx_taken_branch_trap) - VMX_REFLECT(35) -END(vmx_taken_branch_trap) - - .org vmx_ia64_ivt+0x6000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) -ENTRY(vmx_single_step_trap) - VMX_REFLECT(36) -END(vmx_single_step_trap) - - .org vmx_ia64_ivt+0x6100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault -ENTRY(vmx_virtualization_fault) - VMX_DBG_FAULT(37) - mov r31=pr - mov r19=37 - br.sptk vmx_dispatch_virtualization_fault -END(vmx_virtualization_fault) - - .org vmx_ia64_ivt+0x6200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6200 Entry 38 (size 16 bundles) Reserved - VMX_DBG_FAULT(38) - VMX_FAULT(38) - - .org vmx_ia64_ivt+0x6300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6300 Entry 39 (size 16 bundles) Reserved - VMX_DBG_FAULT(39) - VMX_FAULT(39) - - .org vmx_ia64_ivt+0x6400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6400 Entry 40 (size 16 bundles) Reserved - VMX_DBG_FAULT(40) - VMX_FAULT(40) - - .org vmx_ia64_ivt+0x6500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6500 Entry 41 (size 16 bundles) Reserved - VMX_DBG_FAULT(41) - VMX_FAULT(41) - - .org vmx_ia64_ivt+0x6600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6600 Entry 42 (size 16 bundles) Reserved - VMX_DBG_FAULT(42) - VMX_FAULT(42) - - .org vmx_ia64_ivt+0x6700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6700 Entry 43 (size 16 bundles) Reserved - VMX_DBG_FAULT(43) - VMX_FAULT(43) - - .org vmx_ia64_ivt+0x6800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6800 Entry 44 (size 16 bundles) Reserved - VMX_DBG_FAULT(44) - VMX_FAULT(44) - - .org vmx_ia64_ivt+0x6900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) -ENTRY(vmx_ia32_exception) - VMX_DBG_FAULT(45) - VMX_FAULT(45) -END(vmx_ia32_exception) - - .org vmx_ia64_ivt+0x6a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) -ENTRY(vmx_ia32_intercept) - VMX_DBG_FAULT(46) - VMX_FAULT(46) -END(vmx_ia32_intercept) - - .org vmx_ia64_ivt+0x6b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) -ENTRY(vmx_ia32_interrupt) - VMX_DBG_FAULT(47) - VMX_FAULT(47) -END(vmx_ia32_interrupt) - - .org vmx_ia64_ivt+0x6c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6c00 Entry 48 (size 16 bundles) Reserved - VMX_DBG_FAULT(48) - VMX_FAULT(48) - - .org vmx_ia64_ivt+0x6d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6d00 Entry 49 (size 16 bundles) Reserved - VMX_DBG_FAULT(49) - VMX_FAULT(49) - - .org vmx_ia64_ivt+0x6e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6e00 Entry 50 (size 16 bundles) Reserved - VMX_DBG_FAULT(50) - VMX_FAULT(50) - - .org vmx_ia64_ivt+0x6f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x6f00 Entry 51 (size 16 bundles) Reserved - VMX_DBG_FAULT(51) - VMX_FAULT(51) - - .org vmx_ia64_ivt+0x7000 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7000 Entry 52 (size 16 bundles) Reserved - VMX_DBG_FAULT(52) - VMX_FAULT(52) - - .org vmx_ia64_ivt+0x7100 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7100 Entry 53 (size 16 bundles) Reserved - VMX_DBG_FAULT(53) - VMX_FAULT(53) - - .org vmx_ia64_ivt+0x7200 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7200 Entry 54 (size 16 bundles) Reserved - VMX_DBG_FAULT(54) - VMX_FAULT(54) - - .org vmx_ia64_ivt+0x7300 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7300 Entry 55 (size 16 bundles) Reserved - VMX_DBG_FAULT(55) - VMX_FAULT(55) - - .org vmx_ia64_ivt+0x7400 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7400 Entry 56 (size 16 bundles) Reserved - VMX_DBG_FAULT(56) - VMX_FAULT(56) - - .org vmx_ia64_ivt+0x7500 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7500 Entry 57 (size 16 bundles) Reserved - VMX_DBG_FAULT(57) - VMX_FAULT(57) - - .org vmx_ia64_ivt+0x7600 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7600 Entry 58 (size 16 bundles) Reserved - VMX_DBG_FAULT(58) - VMX_FAULT(58) - - .org vmx_ia64_ivt+0x7700 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7700 Entry 59 (size 16 bundles) Reserved - VMX_DBG_FAULT(59) - VMX_FAULT(59) - - .org vmx_ia64_ivt+0x7800 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7800 Entry 60 (size 16 bundles) Reserved - VMX_DBG_FAULT(60) - VMX_FAULT(60) - - .org vmx_ia64_ivt+0x7900 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7900 Entry 61 (size 16 bundles) Reserved - VMX_DBG_FAULT(61) - VMX_FAULT(61) - - .org vmx_ia64_ivt+0x7a00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7a00 Entry 62 (size 16 bundles) Reserved - VMX_DBG_FAULT(62) - VMX_FAULT(62) - - .org vmx_ia64_ivt+0x7b00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7b00 Entry 63 (size 16 bundles) Reserved - VMX_DBG_FAULT(63) - VMX_FAULT(63) - - .org vmx_ia64_ivt+0x7c00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7c00 Entry 64 (size 16 bundles) Reserved - VMX_DBG_FAULT(64) - VMX_FAULT(64) - - .org vmx_ia64_ivt+0x7d00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7d00 Entry 65 (size 16 bundles) Reserved - VMX_DBG_FAULT(65) - VMX_FAULT(65) - - .org vmx_ia64_ivt+0x7e00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7e00 Entry 66 (size 16 bundles) Reserved - VMX_DBG_FAULT(66) - VMX_FAULT(66) - - .org vmx_ia64_ivt+0x7f00 -///////////////////////////////////////////////////////////////////////////////////////// -// 0x7f00 Entry 67 (size 16 bundles) Reserved - VMX_DBG_FAULT(67) - VMX_FAULT(67) - - .org vmx_ia64_ivt+0x8000 - // There is no particular reason for this code to be here, other than that - // there happens to be space here that would go unused otherwise. If this - // fault ever gets "unreserved", simply moved the following code to a more - // suitable spot... - - -ENTRY(vmx_dispatch_reflection) - /* - * Input: - * psr.ic: off - * r19: intr type (offset into ivt, see ia64_int.h) - * r31: contains saved predicates (pr) - */ - VMX_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,4,0 - mov out0=cr.ifa - mov out1=cr.isr - mov out2=cr.iim - mov out3=r15 - - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - adds r3=16,r2 // set up second base pointer - ;; - VMX_SAVE_REST - movl r14=ia64_leave_hypervisor - ;; - mov rp=r14 - br.call.sptk.many b6=vmx_reflect_interruption -END(vmx_dispatch_reflection) - -ENTRY(vmx_dispatch_virtualization_fault) - VMX_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,3,0 // now it's safe (must be first in insn group!) - mov out0=r13 //vcpu - mov out1=r4 //cause - mov out2=r5 //opcode - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - adds r3=16,r2 // set up second base pointer - ;; - VMX_SAVE_REST - movl r14=ia64_leave_hypervisor - ;; - mov rp=r14 - br.call.sptk.many b6=vmx_emulate -END(vmx_dispatch_virtualization_fault) - - -ENTRY(vmx_dispatch_vexirq) - VMX_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,1,0 - mov out0=r13 - - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - adds r3=16,r2 // set up second base pointer - ;; - VMX_SAVE_REST - movl r14=ia64_leave_hypervisor - ;; - mov rp=r14 - br.call.sptk.many b6=vmx_vexirq -END(vmx_dispatch_vexirq) - -ENTRY(vmx_dispatch_tlb_miss) - VMX_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=r13 - mov out1=r15 - mov out2=cr.ifa - - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - adds r3=16,r2 // set up second base pointer - ;; - VMX_SAVE_REST - movl r14=ia64_leave_hypervisor - ;; - mov rp=r14 - br.call.sptk.many b6=vmx_hpw_miss -END(vmx_dispatch_tlb_miss) - - -ENTRY(vmx_dispatch_break_fault) - VMX_SAVE_MIN_WITH_COVER_R19 - ;; - ;; - alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) - mov out0=cr.ifa - adds out1=16,sp - mov out2=cr.isr // FIXME: pity to make this slow access twice - mov out3=cr.iim // FIXME: pity to make this slow access twice - - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15)ssm psr.i // restore psr.i - adds r3=16,r2 // set up second base pointer - ;; - VMX_SAVE_REST - movl r14=ia64_leave_hypervisor - ;; - mov rp=r14 - br.call.sptk.many b6=vmx_ia64_handle_break - ;; -END(vmx_dispatch_break_fault) - - -ENTRY(vmx_hypercall_dispatch) - VMX_SAVE_MIN_WITH_COVER - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - (p15) ssm psr.i // restore psr.i - adds r3=16,r2 // set up second base pointer - ;; - VMX_SAVE_REST - ;; - movl r14=ia64_leave_hypervisor - movl r2=hyper_call_table - ;; - mov rp=r14 - shladd r2=r15,3,r2 - ;; - ld8 r2=[r2] - ;; - mov b6=r2 - ;; - br.call.sptk.many b6=b6 - ;; -END(vmx_hypercall_dispatch) - - - -ENTRY(vmx_dispatch_interrupt) - VMX_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 - ;; - alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group - mov out0=cr.ivr // pass cr.ivr as first arg - add out1=16,sp // pass pointer to pt_regs as second arg - - ssm psr.ic - ;; - srlz.i - ;; - (p15) ssm psr.i - adds r3=16,r2 // set up second base pointer for SAVE_REST - ;; - VMX_SAVE_REST - movl r14=ia64_leave_hypervisor - ;; - mov rp=r14 - br.call.sptk.many b6=vmx_ia64_handle_irq -END(vmx_dispatch_interrupt) - - - - .rodata - .align 8 - .globl hyper_call_table -hyper_call_table: - data8 hyper_not_support //hyper_set_trap_table /* 0 */ - data8 hyper_mmu_update - data8 hyper_not_support //hyper_set_gdt - data8 hyper_not_support //hyper_stack_switch - data8 hyper_not_support //hyper_set_callbacks - data8 hyper_not_support //hyper_fpu_taskswitch /* 5 */ - data8 hyper_sched_op - data8 hyper_dom0_op - data8 hyper_not_support //hyper_set_debugreg - data8 hyper_not_support //hyper_get_debugreg - data8 hyper_not_support //hyper_update_descriptor /* 10 */ - data8 hyper_not_support //hyper_set_fast_trap - data8 hyper_dom_mem_op - data8 hyper_not_support //hyper_multicall - data8 hyper_not_support //hyper_update_va_mapping - data8 hyper_not_support //hyper_set_timer_op /* 15 */ - data8 hyper_event_channel_op - data8 hyper_xen_version - data8 hyper_not_support //hyper_console_io - data8 hyper_not_support //hyper_physdev_op - data8 hyper_not_support //hyper_grant_table_op /* 20 */ - data8 hyper_not_support //hyper_vm_assist - data8 hyper_not_support //hyper_update_va_mapping_otherdomain - data8 hyper_not_support //hyper_switch_vm86 - data8 hyper_not_support //hyper_boot_vcpu - data8 hyper_not_support //hyper_ni_hypercall /* 25 */ - data8 hyper_not_support //hyper_mmuext_op - data8 hyper_lock_page - data8 hyper_set_shared_page diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_minstate.h --- a/xen/arch/ia64/vmx_minstate.h Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,333 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_minstate.h: - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) - */ - -#include <linux/config.h> - -#include <asm/asmmacro.h> -#include <asm/fpu.h> -#include <asm/mmu_context.h> -#include <asm/offsets.h> -#include <asm/pal.h> -#include <asm/pgtable.h> -#include <asm/processor.h> -#include <asm/ptrace.h> -#include <asm/system.h> -#include <asm/vmx_pal_vsa.h> -#include <asm/vmx_vpd.h> -#include <asm/cache.h> -#include "entry.h" - -#define VMX_MINSTATE_START_SAVE_MIN \ - mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ - ;; \ - mov.m r28=ar.rnat; \ - addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ - ;; \ - lfetch.fault.excl.nt1 [r22]; \ - addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ - mov r23=ar.bspstore; /* save ar.bspstore */ \ - ;; \ - mov ar.bspstore=r22; /* switch to kernel RBS */ \ - ;; \ - mov r18=ar.bsp; \ - mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ - - - -#define VMX_MINSTATE_END_SAVE_MIN \ - bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ - ;; - - -#define PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \ - /* begin to call pal vps sync_read and cleanup psr.pl */ \ - add r25=IA64_VPD_BASE_OFFSET, r21; \ - movl r20=__vsa_base; \ - ;; \ - ld8 r25=[r25]; /* read vpd base */ \ - ld8 r20=[r20]; /* read entry point */ \ - ;; \ - mov r6=r25; \ - add r20=PAL_VPS_SYNC_READ,r20; \ - ;; \ -{ .mii; \ - add r22=VPD(VPSR),r25; \ - mov r24=ip; \ - mov b0=r20; \ - ;; \ -}; \ -{ .mmb; \ - add r24 = 0x20, r24; \ - mov r16 = cr.ipsr; /* Temp workaround since psr.ic is off */ \ - br.cond.sptk b0; /* call the service */ \ - ;; \ -}; \ - ld8 r7=[r22]; \ - /* deposite ipsr bit cpl into vpd.vpsr, since epc will change */ \ - extr.u r30=r16, IA64_PSR_CPL0_BIT, 2; \ - ;; \ - dep r7=r30, r7, IA64_PSR_CPL0_BIT, 2; \ - ;; \ - extr.u r30=r16, IA64_PSR_BE_BIT, 5; \ - ;; \ - dep r7=r30, r7, IA64_PSR_BE_BIT, 5; \ - ;; \ - extr.u r30=r16, IA64_PSR_RI_BIT, 2; \ - ;; \ - dep r7=r30, r7, IA64_PSR_RI_BIT, 2; \ - ;; \ - st8 [r22]=r7; \ - ;; - - - -#define IA64_CURRENT_REG IA64_KR(CURRENT) /* r21 is reserved for current pointer */ -//#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=IA64_CURRENT_REG -#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=r21 - -/* - * VMX_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves - * the minimum state necessary that allows us to turn psr.ic back - * on. - * - * Assumed state upon entry: - * psr.ic: off - * r31: contains saved predicates (pr) - * - * Upon exit, the state is as follows: - * psr.ic: off - * r2 = points to &pt_regs.r16 - * r8 = contents of ar.ccv - * r9 = contents of ar.csd - * r10 = contents of ar.ssd - * r11 = FPSR_DEFAULT - * r12 = kernel sp (kernel virtual address) - * r13 = points to current task_struct (kernel virtual address) - * p15 = TRUE if psr.i is set in cr.ipsr - * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: - * preserved - * - * Note that psr.ic is NOT turned on by this macro. This is so that - * we can pass interruption state as arguments to a handler. - */ -#define VMX_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ -/* switch rr7 */ \ - movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)); \ - movl r17=(7<<61); \ - movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)); \ - movl r22=(6<<61); \ - movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1); \ - movl r23=(5<<61); \ - ;; \ - mov rr[r17]=r16; \ - mov rr[r22]=r20; \ - mov rr[r23]=r18; \ - ;; \ - srlz.i; \ - ;; \ - VMX_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ - mov r27=ar.rsc; /* M */ \ - mov r20=r1; /* A */ \ - mov r26=ar.unat; /* M */ \ - mov r29=cr.ipsr; /* M */ \ - mov r18=cr.isr; \ - COVER; /* B;; (or nothing) */ \ - ;; \ - tbit.z p6,p0=r29,IA64_PSR_VM_BIT; \ - tbit.nz.or p6,p0 = r18,39; \ - ;; \ -(p6) br.sptk.few vmx_panic; \ - tbit.z p0,p15=r29,IA64_PSR_I_BIT; \ - mov r1=r16; \ -/* mov r21=r16; */ \ - /* switch from user to kernel RBS: */ \ - ;; \ - invala; /* M */ \ - SAVE_IFS; \ - ;; \ - VMX_MINSTATE_START_SAVE_MIN \ - adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ - adds r16=PT(CR_IPSR),r1; \ - ;; \ - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ - st8 [r16]=r29; /* save cr.ipsr */ \ - ;; \ - lfetch.fault.excl.nt1 [r17]; \ - tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ - mov r29=b0 \ - ;; \ - adds r16=PT(R8),r1; /* initialize first base pointer */ \ - adds r17=PT(R9),r1; /* initialize second base pointer */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r8,16; \ -.mem.offset 8,0; st8.spill [r17]=r9,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r10,24; \ -.mem.offset 8,0; st8.spill [r17]=r11,24; \ - ;; \ - mov r8=ar.pfs; /* I */ \ - mov r9=cr.iip; /* M */ \ - mov r10=ar.fpsr; /* M */ \ - ;; \ - st8 [r16]=r9,16; /* save cr.iip */ \ - st8 [r17]=r30,16; /* save cr.ifs */ \ - sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ - ;; \ - st8 [r16]=r26,16; /* save ar.unat */ \ - st8 [r17]=r8,16; /* save ar.pfs */ \ - shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ - ;; \ - st8 [r16]=r27,16; /* save ar.rsc */ \ - st8 [r17]=r28,16; /* save ar.rnat */ \ - ;; /* avoid RAW on r16 & r17 */ \ - st8 [r16]=r23,16; /* save ar.bspstore */ \ - st8 [r17]=r31,16; /* save predicates */ \ - ;; \ - st8 [r16]=r29,16; /* save b0 */ \ - st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ -.mem.offset 8,0; st8.spill [r17]=r12,16; \ - adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r13,16; \ -.mem.offset 8,0; st8.spill [r17]=r10,16; /* save ar.fpsr */ \ - mov r13=r21; /* establish `current' */ \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r15,16; \ -.mem.offset 8,0; st8.spill [r17]=r14,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r16]=r2,16; \ -.mem.offset 8,0; st8.spill [r17]=r3,16; \ - adds r2=PT(F6),r1; \ - ;; \ - .mem.offset 0,0; st8.spill [r16]=r4,16; \ - .mem.offset 8,0; st8.spill [r17]=r5,16; \ - ;; \ - .mem.offset 0,0; st8.spill [r16]=r6,16; \ - .mem.offset 8,0; st8.spill [r17]=r7,16; \ - mov r20=ar.ccv; \ - ;; \ - mov r18=cr.iipa; \ - mov r4=cr.isr; \ - mov r22=ar.unat; \ - ;; \ - st8 [r16]=r18,16; \ - st8 [r17]=r4; \ - ;; \ - adds r16=PT(EML_UNAT),r1; \ - adds r17=PT(AR_CCV),r1; \ - ;; \ - st8 [r16]=r22,8; \ - st8 [r17]=r20; \ - mov r4=r24; \ - mov r5=r25; \ - ;; \ - st8 [r16]=r0; \ - EXTRA; \ - mov r9=ar.csd; \ - mov r10=ar.ssd; \ - movl r11=FPSR_DEFAULT; /* L-unit */ \ - movl r1=__gp; /* establish kernel global pointer */ \ - ;; \ - PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \ - VMX_MINSTATE_END_SAVE_MIN - -/* - * SAVE_REST saves the remainder of pt_regs (with psr.ic on). - * - * Assumed state upon entry: - * psr.ic: on - * r2: points to &pt_regs.f6 - * r3: points to &pt_regs.f7 - * r4,r5,scrach - * r6: points to vpd - * r7: vpsr - * r9: contents of ar.csd - * r10: contents of ar.ssd - * r11: FPSR_DEFAULT - * - * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. - */ -#define VMX_SAVE_REST \ - tbit.z pBN0,pBN1=r7,IA64_PSR_BN_BIT; /* guest bank0 or bank1 ? */ \ - ;; \ -(pBN0) add r4=VPD(VBGR),r6; \ -(pBN0) add r5=VPD(VBGR)+0x8,r6; \ -(pBN0) add r7=VPD(VBNAT),r6; \ - ;; \ -(pBN1) add r5=VPD(VGR)+0x8,r6; \ -(pBN1) add r4=VPD(VGR),r6; \ -(pBN1) add r7=VPD(VNAT),r6; \ - ;; \ -.mem.offset 0,0; st8.spill [r4]=r16,16; \ -.mem.offset 8,0; st8.spill [r5]=r17,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r4]=r18,16; \ -.mem.offset 8,0; st8.spill [r5]=r19,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r4]=r20,16; \ -.mem.offset 8,0; st8.spill [r5]=r21,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r4]=r22,16; \ -.mem.offset 8,0; st8.spill [r5]=r23,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r4]=r24,16; \ -.mem.offset 8,0; st8.spill [r5]=r25,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r4]=r26,16; \ -.mem.offset 8,0; st8.spill [r5]=r27,16; \ - ;; \ -.mem.offset 0,0; st8.spill [r4]=r28,16; \ -.mem.offset 8,0; st8.spill [r5]=r29,16; \ - mov r26=b6; \ - ;; \ -.mem.offset 0,0; st8.spill [r4]=r30,16; \ -.mem.offset 8,0; st8.spill [r5]=r31,16; \ - mov r27=b7; \ - ;; \ - mov r30=ar.unat; \ - ;; \ - st8 [r7]=r30; \ - mov ar.fpsr=r11; /* M-unit */ \ - ;; \ - stf.spill [r2]=f6,32; \ - stf.spill [r3]=f7,32; \ - ;; \ - stf.spill [r2]=f8,32; \ - stf.spill [r3]=f9,32; \ - ;; \ - stf.spill [r2]=f10; \ - stf.spill [r3]=f11; \ - ;; \ - adds r2=PT(B6)-PT(F10),r2; \ - adds r3=PT(B7)-PT(F11),r3; \ - ;; \ - st8 [r2]=r26,16; /* b6 */ \ - st8 [r3]=r27,16; /* b7 */ \ - ;; \ - st8 [r2]=r9; /* ar.csd */ \ - st8 [r3]=r10; /* ar.ssd */ \ - ;; - -#define VMX_SAVE_MIN_WITH_COVER VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs,) -#define VMX_SAVE_MIN_WITH_COVER_R19 VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19) -#define VMX_SAVE_MIN VMX_DO_SAVE_MIN( , mov r30=r0, ) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_phy_mode.c --- a/xen/arch/ia64/vmx_phy_mode.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,433 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_phy_mode.c: emulating domain physical mode. - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Arun Sharma (arun.sharma@xxxxxxxxx) - * Kun Tian (Kevin Tian) (kevin.tian@xxxxxxxxx) - * Xuefei Xu (Anthony Xu) (anthony.xu@xxxxxxxxx) - */ - - -#include <asm/processor.h> -#include <asm/gcc_intrin.h> -#include <asm/vmx_phy_mode.h> -#include <xen/sched.h> -#include <asm/pgtable.h> - - -int valid_mm_mode[8] = { - GUEST_PHYS, /* (it, dt, rt) -> (0, 0, 0) */ - INV_MODE, - INV_MODE, - GUEST_PHYS, /* (it, dt, rt) -> (0, 1, 1) */ - INV_MODE, - GUEST_PHYS, /* (it, dt, rt) -> (1, 0, 1) */ - INV_MODE, - GUEST_VIRT, /* (it, dt, rt) -> (1, 1, 1).*/ -}; - -/* - * Special notes: - * - Index by it/dt/rt sequence - * - Only existing mode transitions are allowed in this table - * - RSE is placed at lazy mode when emulating guest partial mode - * - If gva happens to be rr0 and rr4, only allowed case is identity - * mapping (gva=gpa), or panic! (How?) - */ -int mm_switch_table[8][8] = { - /* 2004/09/12(Kevin): Allow switch to self */ - /* - * (it,dt,rt): (0,0,0) -> (1,1,1) - * This kind of transition usually occurs in the very early - * stage of Linux boot up procedure. Another case is in efi - * and pal calls. (see "arch/ia64/kernel/head.S") - * - * (it,dt,rt): (0,0,0) -> (0,1,1) - * This kind of transition is found when OSYa exits efi boot - * service. Due to gva = gpa in this case (Same region), - * data access can be satisfied though itlb entry for physical - * emulation is hit. - */ - SW_SELF,0, 0, SW_NOP, 0, 0, 0, SW_P2V, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - /* - * (it,dt,rt): (0,1,1) -> (1,1,1) - * This kind of transition is found in OSYa. - * - * (it,dt,rt): (0,1,1) -> (0,0,0) - * This kind of transition is found in OSYa - */ - SW_NOP, 0, 0, SW_SELF,0, 0, 0, SW_P2V, - /* (1,0,0)->(1,1,1) */ - 0, 0, 0, 0, 0, 0, 0, SW_P2V, - /* - * (it,dt,rt): (1,0,1) -> (1,1,1) - * This kind of transition usually occurs when Linux returns - * from the low level TLB miss handlers. - * (see "arch/ia64/kernel/ivt.S") - */ - 0, 0, 0, 0, 0, SW_SELF,0, SW_P2V, - 0, 0, 0, 0, 0, 0, 0, 0, - /* - * (it,dt,rt): (1,1,1) -> (1,0,1) - * This kind of transition usually occurs in Linux low level - * TLB miss handler. (see "arch/ia64/kernel/ivt.S") - * - * (it,dt,rt): (1,1,1) -> (0,0,0) - * This kind of transition usually occurs in pal and efi calls, - * which requires running in physical mode. - * (see "arch/ia64/kernel/head.S") - * (1,1,1)->(1,0,0) - */ - - SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF, -}; - -void -physical_mode_init(VCPU *vcpu) -{ - UINT64 psr; - struct domain * d = vcpu->domain; - - vcpu->arch.old_rsc = 0; - vcpu->arch.mode_flags = GUEST_IN_PHY; -} - -extern u64 get_mfn(domid_t domid, u64 gpfn, u64 pages); -#if 0 -void -physical_itlb_miss_domn(VCPU *vcpu, u64 vadr) -{ - u64 psr; - IA64_PSR vpsr; - u64 mppn,gppn,mpp1,gpp1; - struct domain *d; - static u64 test=0; - d=vcpu->domain; - if(test) - panic("domn physical itlb miss happen\n"); - else - test=1; - vpsr.val=vmx_vcpu_get_psr(vcpu); - gppn=(vadr<<1)>>13; - mppn = get_mfn(DOMID_SELF,gppn,1); - mppn=(mppn<<12)|(vpsr.cpl<<7); - gpp1=0; - mpp1 = get_mfn(DOMID_SELF,gpp1,1); - mpp1=(mpp1<<12)|(vpsr.cpl<<7); -// if(vadr>>63) -// mppn |= PHY_PAGE_UC; -// else -// mppn |= PHY_PAGE_WB; - mpp1 |= PHY_PAGE_WB; - psr=ia64_clear_ic(); - ia64_itr(0x1, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24); - ia64_srlz_i(); - ia64_itr(0x2, IA64_TEMP_PHYSICAL, vadr&(~0xfff), (mppn|PHY_PAGE_WB), 24); - ia64_stop(); - ia64_srlz_i(); - ia64_itr(0x1, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL), (mppn|PHY_PAGE_WB), 24); - ia64_srlz_i(); - ia64_itr(0x2, IA64_TEMP_PHYSICAL+1, vadr&(~0x8000000000000fffUL), (mppn|PHY_PAGE_WB), 24); - ia64_stop(); - ia64_srlz_i(); - ia64_itr(0x1, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28); - ia64_srlz_i(); - ia64_itr(0x2, IA64_TEMP_PHYSICAL+2, gpp1&(~0xfff), mpp1, 28); - ia64_stop(); - ia64_srlz_i(); - ia64_set_psr(psr); - ia64_srlz_i(); - return; -} -#endif - -void -physical_itlb_miss(VCPU *vcpu, u64 vadr) -{ - physical_itlb_miss_dom0(vcpu, vadr); -} - - -void -physical_itlb_miss_dom0(VCPU *vcpu, u64 vadr) -{ - u64 psr; - IA64_PSR vpsr; - u64 mppn,gppn; - vpsr.val=vmx_vcpu_get_psr(vcpu); - gppn=(vadr<<1)>>13; - mppn = get_mfn(DOMID_SELF,gppn,1); - mppn=(mppn<<12)|(vpsr.cpl<<7); -// if(vadr>>63) -// mppn |= PHY_PAGE_UC; -// else - mppn |= PHY_PAGE_WB; - - psr=ia64_clear_ic(); - ia64_itc(1,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT); - ia64_set_psr(psr); - ia64_srlz_i(); - return; -} - - -void -physical_dtlb_miss(VCPU *vcpu, u64 vadr) -{ - u64 psr; - IA64_PSR vpsr; - u64 mppn,gppn; -// if(vcpu->domain!=dom0) -// panic("dom n physical dtlb miss happen\n"); - vpsr.val=vmx_vcpu_get_psr(vcpu); - gppn=(vadr<<1)>>13; - mppn = get_mfn(DOMID_SELF,gppn,1); - mppn=(mppn<<12)|(vpsr.cpl<<7); - if(vadr>>63) - mppn |= PHY_PAGE_UC; - else - mppn |= PHY_PAGE_WB; - - psr=ia64_clear_ic(); - ia64_itc(2,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT); - ia64_set_psr(psr); - ia64_srlz_i(); - return; -} - -void -vmx_init_all_rr(VCPU *vcpu) -{ - VMX(vcpu,vrr[VRN0]) = 0x38; - VMX(vcpu,vrr[VRN1]) = 0x38; - VMX(vcpu,vrr[VRN2]) = 0x38; - VMX(vcpu,vrr[VRN3]) = 0x38; - VMX(vcpu,vrr[VRN4]) = 0x38; - VMX(vcpu,vrr[VRN5]) = 0x38; - VMX(vcpu,vrr[VRN6]) = 0x60; - VMX(vcpu,vrr[VRN7]) = 0x60; - - VMX(vcpu,mrr5) = vmx_vrrtomrr(vcpu, 0x38); - VMX(vcpu,mrr6) = vmx_vrrtomrr(vcpu, 0x60); - VMX(vcpu,mrr7) = vmx_vrrtomrr(vcpu, 0x60); -} - -void -vmx_load_all_rr(VCPU *vcpu) -{ - unsigned long psr; - ia64_rr phy_rr; - - psr = ia64_clear_ic(); - - phy_rr.ps = EMUL_PHY_PAGE_SHIFT; - phy_rr.ve = 1; - - /* WARNING: not allow co-exist of both virtual mode and physical - * mode in same region - */ - if (is_physical_mode(vcpu)) { - if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) - panic("Unexpected domain switch in phy emul\n"); - phy_rr.rid = vcpu->domain->arch.metaphysical_rr0; - ia64_set_rr((VRN0 << VRN_SHIFT), phy_rr.rrval); - phy_rr.rid = vcpu->domain->arch.metaphysical_rr4; - ia64_set_rr((VRN4 << VRN_SHIFT), phy_rr.rrval); - } else { - ia64_set_rr((VRN0 << VRN_SHIFT), - vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN0]))); - ia64_set_rr((VRN4 << VRN_SHIFT), - vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN4]))); - } - -#if 1 - /* rr567 will be postponed to last point when resuming back to guest */ - ia64_set_rr((VRN1 << VRN_SHIFT), - vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN1]))); - ia64_set_rr((VRN2 << VRN_SHIFT), - vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN2]))); - ia64_set_rr((VRN3 << VRN_SHIFT), - vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN3]))); -#endif - ia64_srlz_d(); - ia64_set_psr(psr); - ia64_srlz_i(); -} - -void -switch_to_physical_rid(VCPU *vcpu) -{ - UINT64 psr; - ia64_rr phy_rr; - - phy_rr.ps = EMUL_PHY_PAGE_SHIFT; - phy_rr.ve = 1; - - /* Save original virtual mode rr[0] and rr[4] */ - psr=ia64_clear_ic(); - phy_rr.rid = vcpu->domain->arch.metaphysical_rr0; - ia64_set_rr(VRN0<<VRN_SHIFT, phy_rr.rrval); - ia64_srlz_d(); - phy_rr.rid = vcpu->domain->arch.metaphysical_rr4; - ia64_set_rr(VRN4<<VRN_SHIFT, phy_rr.rrval); - ia64_srlz_d(); - - ia64_set_psr(psr); - ia64_srlz_i(); - return; -} - - -void -switch_to_virtual_rid(VCPU *vcpu) -{ - UINT64 psr; - ia64_rr mrr; - - psr=ia64_clear_ic(); - - mrr=vmx_vcpu_rr(vcpu,VRN0<<VRN_SHIFT); - ia64_set_rr(VRN0<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval)); - ia64_srlz_d(); - mrr=vmx_vcpu_rr(vcpu,VRN4<<VRN_SHIFT); - ia64_set_rr(VRN4<<VRN_SHIFT, vmx_vrrtomrr(vcpu, mrr.rrval)); - ia64_srlz_d(); - ia64_set_psr(psr); - ia64_srlz_i(); - return; -} - -static int mm_switch_action(IA64_PSR opsr, IA64_PSR npsr) -{ - return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)]; -} - -void -switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr) -{ - int act; - REGS * regs=vcpu_regs(vcpu); - act = mm_switch_action(old_psr, new_psr); - switch (act) { - case SW_V2P: - vcpu->arch.old_rsc = regs->ar_rsc; - switch_to_physical_rid(vcpu); - /* - * Set rse to enforced lazy, to prevent active rse save/restor when - * guest physical mode. - */ - regs->ar_rsc &= ~(IA64_RSC_MODE); - vcpu->arch.mode_flags |= GUEST_IN_PHY; - break; - case SW_P2V: - switch_to_virtual_rid(vcpu); - /* - * recover old mode which is saved when entering - * guest physical mode - */ - regs->ar_rsc = vcpu->arch.old_rsc; - vcpu->arch.mode_flags &= ~GUEST_IN_PHY; - break; - case SW_SELF: - printf("Switch to self-0x%lx!!! MM mode doesn't change...\n", - old_psr.val); - break; - case SW_NOP: - printf("No action required for mode transition: (0x%lx -> 0x%lx)\n", - old_psr.val, new_psr.val); - break; - default: - /* Sanity check */ - printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val); - panic("Unexpected virtual <--> physical mode transition"); - break; - } - return; -} - - - -/* - * In physical mode, insert tc/tr for region 0 and 4 uses - * RID[0] and RID[4] which is for physical mode emulation. - * However what those inserted tc/tr wants is rid for - * virtual mode. So original virtual rid needs to be restored - * before insert. - * - * Operations which required such switch include: - * - insertions (itc.*, itr.*) - * - purges (ptc.* and ptr.*) - * - tpa - * - tak - * - thash?, ttag? - * All above needs actual virtual rid for destination entry. - */ - -void -check_mm_mode_switch (VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr) -{ - - if ( (old_psr.dt != new_psr.dt ) || - (old_psr.it != new_psr.it ) || - (old_psr.rt != new_psr.rt ) - ) { - switch_mm_mode (vcpu, old_psr, new_psr); - } - - return 0; -} - - -/* - * In physical mode, insert tc/tr for region 0 and 4 uses - * RID[0] and RID[4] which is for physical mode emulation. - * However what those inserted tc/tr wants is rid for - * virtual mode. So original virtual rid needs to be restored - * before insert. - * - * Operations which required such switch include: - * - insertions (itc.*, itr.*) - * - purges (ptc.* and ptr.*) - * - tpa - * - tak - * - thash?, ttag? - * All above needs actual virtual rid for destination entry. - */ - -void -prepare_if_physical_mode(VCPU *vcpu) -{ - if (is_physical_mode(vcpu)) { - vcpu->arch.mode_flags |= GUEST_PHY_EMUL; - switch_to_virtual_rid(vcpu); - } - return; -} - -/* Recover always follows prepare */ -void -recover_if_physical_mode(VCPU *vcpu) -{ - if (is_physical_mode(vcpu)) { - vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL; - switch_to_physical_rid(vcpu); - } - return; -} - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_process.c --- a/xen/arch/ia64/vmx_process.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,375 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_process.c: handling VMX architecture-related VM exits - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx> - * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) - */ - -#include <xen/config.h> -#include <xen/lib.h> -#include <xen/errno.h> -#include <xen/sched.h> -#include <xen/smp.h> -#include <asm/ptrace.h> -#include <xen/delay.h> - -#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */ -#include <asm/sal.h> /* FOR struct ia64_sal_retval */ - -#include <asm/system.h> -#include <asm/io.h> -#include <asm/processor.h> -#include <asm/desc.h> -//#include <asm/ldt.h> -#include <xen/irq.h> -#include <xen/event.h> -#include <asm/regionreg.h> -#include <asm/privop.h> -#include <asm/ia64_int.h> -#include <asm/hpsim_ssc.h> -#include <asm/dom_fw.h> -#include <asm/vmx_vcpu.h> -#include <asm/kregs.h> -#include <asm/vmx.h> -#include <asm/vmx_mm_def.h> -#include <xen/mm.h> -/* reset all PSR field to 0, except up,mfl,mfh,pk,dt,rt,mc,it */ -#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034 - - -extern struct ia64_sal_retval pal_emulator_static(UINT64); -extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64); -extern void rnat_consumption (VCPU *vcpu); -#define DOMN_PAL_REQUEST 0x110000 -IA64FAULT -vmx_ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim) -{ - static int first_time = 1; - struct domain *d = (struct domain *) current->domain; - struct vcpu *v = (struct domain *) current; - extern unsigned long running_on_sim; - unsigned long i, sal_param[8]; - -#if 0 - if (first_time) { - if (platform_is_hp_ski()) running_on_sim = 1; - else running_on_sim = 0; - first_time = 0; - } - if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant - if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs); - else do_ssc(vcpu_get_gr(current,36), regs); - } -#endif - if (iim == d->arch.breakimm) { - struct ia64_sal_retval x; - switch (regs->r2) { - case FW_HYPERCALL_PAL_CALL: - //printf("*** PAL hypercall: index=%d\n",regs->r28); - //FIXME: This should call a C routine - x = pal_emulator_static(VMX_VPD(v, vgr[12])); - regs->r8 = x.status; regs->r9 = x.v0; - regs->r10 = x.v1; regs->r11 = x.v2; -#if 0 - if (regs->r8) - printk("Failed vpal emulation, with index:0x%lx\n", - VMX_VPD(v, vgr[12])); -#endif - break; - case FW_HYPERCALL_SAL_CALL: - for (i = 0; i < 8; i++) - vmx_vcpu_get_gr(v, 32+i, &sal_param[i]); - x = sal_emulator(sal_param[0], sal_param[1], - sal_param[2], sal_param[3], - sal_param[4], sal_param[5], - sal_param[6], sal_param[7]); - regs->r8 = x.status; regs->r9 = x.v0; - regs->r10 = x.v1; regs->r11 = x.v2; -#if 0 - if (regs->r8) - printk("Failed vsal emulation, with index:0x%lx\n", - sal_param[0]); -#endif - break; - case FW_HYPERCALL_EFI_RESET_SYSTEM: - printf("efi.reset_system called "); - if (current->domain == dom0) { - printf("(by dom0)\n "); - (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL); - } - printf("(not supported for non-0 domain)\n"); - regs->r8 = EFI_UNSUPPORTED; - break; - case FW_HYPERCALL_EFI_GET_TIME: - { - unsigned long *tv, *tc; - vmx_vcpu_get_gr(v, 32, &tv); - vmx_vcpu_get_gr(v, 33, &tc); - printf("efi_get_time(%p,%p) called...",tv,tc); - tv = __va(translate_domain_mpaddr(tv)); - if (tc) tc = __va(translate_domain_mpaddr(tc)); - regs->r8 = (*efi.get_time)(tv,tc); - printf("and returns %lx\n",regs->r8); - } - break; - case FW_HYPERCALL_EFI_SET_TIME: - case FW_HYPERCALL_EFI_GET_WAKEUP_TIME: - case FW_HYPERCALL_EFI_SET_WAKEUP_TIME: - // FIXME: need fixes in efi.h from 2.6.9 - case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP: - // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED - // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS - // POINTER ARGUMENTS WILL BE VIRTUAL!! - case FW_HYPERCALL_EFI_GET_VARIABLE: - // FIXME: need fixes in efi.h from 2.6.9 - case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE: - case FW_HYPERCALL_EFI_SET_VARIABLE: - case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT: - // FIXME: need fixes in efi.h from 2.6.9 - regs->r8 = EFI_UNSUPPORTED; - break; - } -#if 0 - if (regs->r8) - printk("Failed vgfw emulation, with index:0x%lx\n", - regs->r2); -#endif - vmx_vcpu_increment_iip(current); - }else if(iim == DOMN_PAL_REQUEST){ - pal_emul(current); - vmx_vcpu_increment_iip(current); - } else - vmx_reflect_interruption(ifa,isr,iim,11); -} - -static UINT64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000, 0x1400,0x1800, - 0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,0x4000, - 0x4400,0x4800,0x4c00,0x5000,0x5100,0x5200,0x5300,0x5400,0x5500,0x5600, - 0x5700,0x5800,0x5900,0x5a00,0x5b00,0x5c00,0x5d00,0x5e00,0x5f00,0x6000, - 0x6100,0x6200,0x6300,0x6400,0x6500,0x6600,0x6700,0x6800,0x6900,0x6a00, - 0x6b00,0x6c00,0x6d00,0x6e00,0x6f00,0x7000,0x7100,0x7200,0x7300,0x7400, - 0x7500,0x7600,0x7700,0x7800,0x7900,0x7a00,0x7b00,0x7c00,0x7d00,0x7e00, - 0x7f00, -}; - - - -void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim, - UINT64 vector) -{ - VCPU *vcpu = current; - REGS *regs=vcpu_regs(vcpu); - UINT64 viha,vpsr = vmx_vcpu_get_psr(vcpu); - if(!(vpsr&IA64_PSR_IC)&&(vector!=5)){ - panic("Guest nested fault!"); - } - VPD_CR(vcpu,isr)=isr; - VPD_CR(vcpu,iipa) = regs->cr_iip; - vector=vec2off[vector]; - if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) - VPD_CR(vcpu,iim) = iim; - else { - set_ifa_itir_iha(vcpu,ifa,1,1,1); - } - inject_guest_interruption(vcpu, vector); -} - -// ONLY gets called from ia64_leave_kernel -// ONLY call with interrupts disabled?? (else might miss one?) -// NEVER successful if already reflecting a trap/fault because psr.i==0 -void leave_hypervisor_tail(struct pt_regs *regs) -{ - struct domain *d = current->domain; - struct vcpu *v = current; - // FIXME: Will this work properly if doing an RFI??? - if (!is_idle_task(d) ) { // always comes from guest - extern void vmx_dorfirfi(void); - struct pt_regs *user_regs = vcpu_regs(current); - - if (local_softirq_pending()) - do_softirq(); - local_irq_disable(); - - if (user_regs != regs) - printk("WARNING: checking pending interrupt in nested interrupt!!!\n"); - - /* VMX Domain N has other interrupt source, saying DM */ - if (test_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags)) - vmx_intr_assist(v); - - /* FIXME: Check event pending indicator, and set - * pending bit if necessary to inject back to guest. - * Should be careful about window between this check - * and above assist, since IOPACKET_PORT shouldn't be - * injected into vmx domain. - * - * Now hardcode the vector as 0x10 temporarily - */ - if (event_pending(v)&&(!((v->arch.arch_vmx.in_service[0])&(1UL<<0x10)))) { - VPD_CR(v, irr[0]) |= 1UL << 0x10; - v->arch.irq_new_pending = 1; - } - - if ( v->arch.irq_new_pending ) { - v->arch.irq_new_pending = 0; - vmx_check_pending_irq(v); - } - } -} - -extern ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr); - -/* We came here because the H/W VHPT walker failed to find an entry */ -void vmx_hpw_miss(VCPU *vcpu, u64 vec, u64 vadr) -{ - IA64_PSR vpsr; - CACHE_LINE_TYPE type; - u64 vhpt_adr; - ISR misr; - ia64_rr vrr; - REGS *regs; - thash_cb_t *vtlb, *vhpt; - thash_data_t *data, me; - vtlb=vmx_vcpu_get_vtlb(vcpu); -#ifdef VTLB_DEBUG - check_vtlb_sanity(vtlb); - dump_vtlb(vtlb); -#endif - vpsr.val = vmx_vcpu_get_psr(vcpu); - regs = vcpu_regs(vcpu); - misr.val=regs->cr_isr; -/* TODO - if(vcpu->domain->id && vec == 2 && - vpsr.dt == 0 && is_gpa_io(MASK_PMA(vaddr))){ - emulate_ins(&v); - return; - } -*/ - - if((vec==1)&&(!vpsr.it)){ - physical_itlb_miss(vcpu, vadr); - return; - } - if((vec==2)&&(!vpsr.dt)){ - if(vcpu->domain!=dom0&&__gpfn_is_io(vcpu->domain,(vadr<<1)>>(PAGE_SHIFT+1))){ - emulate_io_inst(vcpu,((vadr<<1)>>1),4); // UC - }else{ - physical_dtlb_miss(vcpu, vadr); - } - return; - } - vrr = vmx_vcpu_rr(vcpu,vadr); - if(vec == 1) type = ISIDE_TLB; - else if(vec == 2) type = DSIDE_TLB; - else panic("wrong vec\n"); - -// prepare_if_physical_mode(vcpu); - - if(data=vtlb_lookup_ex(vtlb, vrr.rid, vadr,type)){ - if(vcpu->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(vcpu->domain, data->ppn>>(PAGE_SHIFT-12))){ - vadr=(vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps); - emulate_io_inst(vcpu, vadr, data->ma); - return IA64_FAULT; - } - if ( data->ps != vrr.ps ) { - machine_tlb_insert(vcpu, data); - } - else { - thash_insert(vtlb->ts->vhpt,data,vadr); - } - }else if(type == DSIDE_TLB){ - if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){ - if(vpsr.ic){ - vmx_vcpu_set_isr(vcpu, misr.val); - alt_dtlb(vcpu, vadr); - return IA64_FAULT; - } else{ - if(misr.sp){ - //TODO lds emulation - panic("Don't support speculation load"); - }else{ - nested_dtlb(vcpu); - return IA64_FAULT; - } - } - } else{ - vmx_vcpu_thash(vcpu, vadr, &vhpt_adr); - vrr=vmx_vcpu_rr(vcpu,vhpt_adr); - data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB); - if(data){ - if(vpsr.ic){ - vmx_vcpu_set_isr(vcpu, misr.val); - dtlb_fault(vcpu, vadr); - return IA64_FAULT; - }else{ - if(misr.sp){ - //TODO lds emulation - panic("Don't support speculation load"); - }else{ - nested_dtlb(vcpu); - return IA64_FAULT; - } - } - }else{ - if(vpsr.ic){ - vmx_vcpu_set_isr(vcpu, misr.val); - dvhpt_fault(vcpu, vadr); - return IA64_FAULT; - }else{ - if(misr.sp){ - //TODO lds emulation - panic("Don't support speculation load"); - }else{ - nested_dtlb(vcpu); - return IA64_FAULT; - } - } - } - } - }else if(type == ISIDE_TLB){ - if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){ - if(!vpsr.ic){ - misr.ni=1; - } - vmx_vcpu_set_isr(vcpu, misr.val); - alt_itlb(vcpu, vadr); - return IA64_FAULT; - } else{ - vmx_vcpu_thash(vcpu, vadr, &vhpt_adr); - vrr=vmx_vcpu_rr(vcpu,vhpt_adr); - data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB); - if(data){ - if(!vpsr.ic){ - misr.ni=1; - } - vmx_vcpu_set_isr(vcpu, misr.val); - itlb_fault(vcpu, vadr); - return IA64_FAULT; - }else{ - if(!vpsr.ic){ - misr.ni=1; - } - vmx_vcpu_set_isr(vcpu, misr.val); - ivhpt_fault(vcpu, vadr); - return IA64_FAULT; - } - } - } -} - - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_support.c --- a/xen/arch/ia64/vmx_support.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,164 +0,0 @@ - -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_support.c: vmx specific support interface. - * Copyright (c) 2005, Intel Corporation. - * Kun Tian (Kevin Tian) (Kevin.tian@xxxxxxxxx) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ -#include <xen/config.h> -#include <xen/sched.h> -#include <public/io/ioreq.h> -#include <asm/vmx.h> -#include <asm/vmx_vcpu.h> - -/* - * I/O emulation should be atomic from domain point of view. However, - * when emulation code is waiting for I/O completion by do_block, - * other events like DM interrupt, VBD, etc. may come and unblock - * current exection flow. So we have to prepare for re-block if unblocked - * by non I/O completion event. - */ -void vmx_wait_io(void) -{ - struct vcpu *v = current; - struct domain *d = v->domain; - extern void do_block(); - int port = iopacket_port(d); - - do { - if (!test_bit(port, - &d->shared_info->evtchn_pending[0])) - do_block(); - - /* Unblocked when some event is coming. Clear pending indication - * immediately if deciding to go for io assist - */ - if (test_and_clear_bit(port, - &d->shared_info->evtchn_pending[0])) { - clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel); - clear_bit(0, &v->vcpu_info->evtchn_upcall_pending); - vmx_io_assist(v); - } - - - if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) { - /* - * Latest event is not I/O completion, so clear corresponding - * selector and pending indication, to allow real event coming - */ - clear_bit(0, &v->vcpu_info->evtchn_upcall_pending); - - /* Here atually one window is leaved before selector is cleared. - * However this window only delay the indication to coming event, - * nothing losed. Next loop will check I/O channel to fix this - * window. - */ - clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel); - } - else - break; - } while (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)); -} - -/* - * Only place to call vmx_io_assist is mmio/legacy_io emulation. - * Since I/O emulation is synchronous, it shouldn't be called in - * other places. This is not like x86, since IA-64 implements a - * per-vp stack without continuation. - */ -void vmx_io_assist(struct vcpu *v) -{ - vcpu_iodata_t *vio; - ioreq_t *p; - - /* - * This shared page contains I/O request between emulation code - * and device model. - */ - vio = get_vio(v->domain, v->vcpu_id); - if (!vio) - panic("Corruption: bad shared page: %lx\n", (unsigned long)vio); - - p = &vio->vp_ioreq; - - if (p->state == STATE_IORESP_HOOK) - panic("Not supported: No hook available for DM request\n"); - - if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) { - if (p->state != STATE_IORESP_READY) { - /* Can't do_block here, for the same reason as other places to - * use vmx_wait_io. Simple return is safe since vmx_wait_io will - * try to block again - */ - return; - } else - p->state = STATE_INVALID; - - clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); - } else - return; /* Spurous event? */ -} - -/* - * VMX domainN has two types of interrupt source: lsapic model within - * HV, and device model within domain 0 (service OS). There're another - * pending array in share page, manipulated by device model directly. - * To conform to VT-i spec, we have to sync pending bits in shared page - * into VPD. This has to be done before checking pending interrupt at - * resume to guest. For domain 0, all the interrupt sources come from - * HV, which then doesn't require this assist. - */ -void vmx_intr_assist(struct vcpu *v) -{ - vcpu_iodata_t *vio; - struct domain *d = v->domain; - extern void vmx_vcpu_pend_batch_interrupt(VCPU *vcpu, - unsigned long *pend_irr); - int port = iopacket_port(d); - - /* I/O emulation is atomic, so it's impossible to see execution flow - * out of vmx_wait_io, when guest is still waiting for response. - */ - if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) - panic("!!!Bad resume to guest before I/O emulation is done.\n"); - - /* Clear indicator specific to interrupt delivered from DM */ - if (test_and_clear_bit(port, - &d->shared_info->evtchn_pending[0])) { - if (!d->shared_info->evtchn_pending[port >> 5]) - clear_bit(port>>5, &v->vcpu_info->evtchn_pending_sel); - - if (!v->vcpu_info->evtchn_pending_sel) - clear_bit(0, &v->vcpu_info->evtchn_upcall_pending); - } - - /* Even without event pending, we still need to sync pending bits - * between DM and vlsapic. The reason is that interrupt delivery - * shares same event channel as I/O emulation, with corresponding - * indicator possibly cleared when vmx_wait_io(). - */ - vio = get_vio(v->domain, v->vcpu_id); - if (!vio) - panic("Corruption: bad shared page: %lx\n", (unsigned long)vio); - -#ifdef V_IOSAPIC_READY - vlapic_update_ext_irq(v); -#else - panic("IOSAPIC model is missed in qemu\n"); -#endif - return; -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_utility.c --- a/xen/arch/ia64/vmx_utility.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,659 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_utility.c: - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx> - * Xiaoyan Feng (Fleming Feng) <fleming.feng@xxxxxxxxx> - * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) - */ - -#include <xen/types.h> -#include <asm/vmx_vcpu.h> -#include <asm/processor.h> -#include <asm/vmx_mm_def.h> - - -/* - * Return: - * 0: Not reserved indirect registers - * 1: Is reserved indirect registers - */ -int -is_reserved_indirect_register ( - int type, - int index ) -{ - switch (type) { - case IA64_CPUID: - if ( index >= 5 ) { - return 1; - } - - case IA64_DBR: - case IA64_IBR: - //bugbugbug:check with pal about the max ibr/dbr!!!! - break; - - case IA64_PMC: - //bugbugbug:check with pal about the max ibr/dbr!!!! - break; - - case IA64_PMD: - //bugbugbug:check with pal about the max ibr/dbr!!!! - break; - - case IA64_PKR: - //bugbugbug:check with pal about the max pkr!!!! - break; - - case IA64_RR: - //bugbugbug:check with pal about the max rr!!!! - break; - - default: - panic ("Unsupported instruction!"); - } - - return 0; - -} - -/* - * Return: - * Set all ignored fields in value to 0 and return - */ -u64 -indirect_reg_igfld_MASK ( - int type, - int index, - u64 value - ) -{ - u64 nvalue; - - nvalue = value; - switch ( type ) { - case IA64_CPUID: - if ( index == 2 ) { - nvalue = 0; - } - break; - - case IA64_DBR: - case IA64_IBR: - /* Refer to SDM Vol2 Table 7-1,7-2 */ - if ( index % 2 != 0) { - /* Ignore field: {61:60} */ - nvalue = value & (~MASK (60, 2)); - } - break; - case IA64_PMC: - if ( index == 0 ) { - /* Ignore field: 3:1 */ - nvalue = value & (~MASK (1, 3)); - } - break; - case IA64_PMD: - if ( index >= 4 ) { - /* Ignore field: 7:7 */ - /* bugbug: this code is correct for generic - * PMD. However, for implementation specific - * PMD, it's WRONG. need more info to judge - * what's implementation specific PMD. - */ - nvalue = value & (~MASK (7, 1)); - } - break; - case IA64_PKR: - case IA64_RR: - break; - default: - panic ("Unsupported instruction!"); - } - - return nvalue; -} - -/* - * Return: - * Set all ignored fields in value to 0 and return - */ -u64 -cr_igfld_mask (int index, u64 value) -{ - u64 nvalue; - - nvalue = value; - - switch ( index ) { - case IA64_REG_CR_IVA: - /* Ignore filed: 14:0 */ - nvalue = value & (~MASK (0, 15)); - break; - - case IA64_REG_CR_IHA: - /* Ignore filed: 1:0 */ - nvalue = value & (~MASK (0, 2)); - break; - - case IA64_REG_CR_LID: - /* Ignore filed: 63:32 */ - nvalue = value & (~MASK (32, 32)); - break; - - case IA64_REG_CR_TPR: - /* Ignore filed: 63:17,3:0 */ - nvalue = value & (~MASK (17, 47)); - nvalue = nvalue & (~MASK (0, 4)); - break; - - case IA64_REG_CR_EOI: - /* Ignore filed: 63:0 */ - nvalue = 0; - break; - - case IA64_REG_CR_ITV: - case IA64_REG_CR_PMV: - case IA64_REG_CR_CMCV: - case IA64_REG_CR_LRR0: - case IA64_REG_CR_LRR1: - /* Ignore filed: 63:17,12:12 */ - nvalue = value & (~MASK (17, 47)); - nvalue = nvalue & (~MASK (12, 1)); - break; - } - - return nvalue; -} - - -/* - * Return: - * 1: PSR reserved fields are not zero - * 0: PSR reserved fields are all zero - */ -int -check_psr_rsv_fields (u64 value) -{ - /* PSR reserved fields: 0, 12~6, 16, 31~28, 63~46 - * These reserved fields shall all be zero - * Otherwise we will panic - */ - - if ( value & MASK (0, 1) || - value & MASK (6, 7) || - value & MASK (16, 1) || - value & MASK (28, 4) || - value & MASK (46, 18) - ) { - return 1; - } - - return 0; -} - - - -/* - * Return: - * 1: CR reserved fields are not zero - * 0: CR reserved fields are all zero - */ -int -check_cr_rsv_fields (int index, u64 value) -{ - switch (index) { - case IA64_REG_CR_DCR: - if ( (value & MASK ( 3, 5 )) || - (value & MASK (15, 49))) { - return 1; - } - return 0; - - case IA64_REG_CR_ITM: - case IA64_REG_CR_IVA: - case IA64_REG_CR_IIP: - case IA64_REG_CR_IFA: - case IA64_REG_CR_IIPA: - case IA64_REG_CR_IIM: - case IA64_REG_CR_IHA: - case IA64_REG_CR_EOI: - return 0; - - case IA64_REG_CR_PTA: - if ( (value & MASK ( 1, 1 )) || - (value & MASK (9, 6))) { - return 1; - } - return 0; - - case IA64_REG_CR_IPSR: - return check_psr_rsv_fields (value); - - - case IA64_REG_CR_ISR: - if ( (value & MASK ( 24, 8 )) || - (value & MASK (44, 20))) { - return 1; - } - return 0; - - case IA64_REG_CR_ITIR: - if ( (value & MASK ( 0, 2 )) || - (value & MASK (32, 32))) { - return 1; - } - return 0; - - case IA64_REG_CR_IFS: - if ( (value & MASK ( 38, 25 ))) { - return 1; - } - return 0; - - case IA64_REG_CR_LID: - if ( (value & MASK ( 0, 16 ))) { - return 1; - } - return 0; - - case IA64_REG_CR_IVR: - if ( (value & MASK ( 8, 56 ))) { - return 1; - } - return 0; - - case IA64_REG_CR_TPR: - if ( (value & MASK ( 8, 8 ))) { - return 1; - } - return 0; - - case IA64_REG_CR_IRR0: - if ( (value & MASK ( 1, 1 )) || - (value & MASK (3, 13))) { - return 1; - } - return 0; - - case IA64_REG_CR_ITV: - case IA64_REG_CR_PMV: - case IA64_REG_CR_CMCV: - if ( (value & MASK ( 8, 4 )) || - (value & MASK (13, 3))) { - return 1; - } - return 0; - - case IA64_REG_CR_LRR0: - case IA64_REG_CR_LRR1: - if ( (value & MASK ( 11, 1 )) || - (value & MASK (14, 1))) { - return 1; - } - return 0; - } - - - panic ("Unsupported CR"); -} - - - -/* - * Return: - * 0: Indirect Reg reserved fields are not zero - * 1: Indirect Reg reserved fields are all zero - */ -int -check_indirect_reg_rsv_fields ( int type, int index, u64 value ) -{ - - switch ( type ) { - case IA64_CPUID: - if ( index == 3 ) { - if ( value & MASK (40, 24 )) { - return 0; - } - } else if ( index == 4 ) { - if ( value & MASK (2, 62 )) { - return 0; - } - } - break; - - case IA64_DBR: - case IA64_IBR: - case IA64_PMC: - case IA64_PMD: - break; - - case IA64_PKR: - if ( value & MASK (4, 4) || - value & MASK (32, 32 )) { - return 0; - } - break; - - case IA64_RR: - if ( value & MASK (1, 1) || - value & MASK (32, 32 )) { - return 0; - } - break; - - default: - panic ("Unsupported instruction!"); - } - - return 1; -} - - - - -/* Return - * Same format as isr_t - * Only ei/ni bits are valid, all other bits are zero - */ -u64 -set_isr_ei_ni (VCPU *vcpu) -{ - - IA64_PSR vpsr,ipsr; - ISR visr; - REGS *regs; - - regs=vcpu_regs(vcpu); - - visr.val = 0; - - vpsr.val = vmx_vcpu_get_psr (vcpu); - - if (!vpsr.ic == 1 ) { - /* Set ISR.ni */ - visr.ni = 1; - } - ipsr.val = regs->cr_ipsr; - - visr.ei = ipsr.ri; - return visr.val; -} - - -/* Set up ISR.na/code{3:0}/r/w for no-access instructions - * Refer to SDM Vol Table 5-1 - * Parameter: - * setr: if 1, indicates this function will set up ISR.r - * setw: if 1, indicates this function will set up ISR.w - * Return: - * Same format as ISR. All fields are zero, except na/code{3:0}/r/w - */ -u64 -set_isr_for_na_inst(VCPU *vcpu, int op) -{ - ISR visr; - visr.val = 0; - switch (op) { - case IA64_INST_TPA: - visr.na = 1; - visr.code = 0; - break; - case IA64_INST_TAK: - visr.na = 1; - visr.code = 3; - break; - } - return visr.val; -} - - - -/* - * Set up ISR for registe Nat consumption fault - * Parameters: - * read: if 1, indicates this is a read access; - * write: if 1, indicates this is a write access; - */ -void -set_rnat_consumption_isr (VCPU *vcpu,int inst,int read,int write) -{ - ISR visr; - u64 value; - /* Need set up ISR: code, ei, ni, na, r/w */ - visr.val = 0; - - /* ISR.code{7:4} =1, - * Set up ISR.code{3:0}, ISR.na - */ - visr.code = (1 << 4); - if (inst) { - - value = set_isr_for_na_inst (vcpu,inst); - visr.val = visr.val | value; - } - - /* Set up ISR.r/w */ - visr.r = read; - visr.w = write; - - /* Set up ei/ni */ - value = set_isr_ei_ni (vcpu); - visr.val = visr.val | value; - - vmx_vcpu_set_isr (vcpu,visr.val); -} - - - -/* - * Set up ISR for break fault - */ -void set_break_isr (VCPU *vcpu) -{ - ISR visr; - u64 value; - - /* Need set up ISR: ei, ni */ - - visr.val = 0; - - /* Set up ei/ni */ - value = set_isr_ei_ni (vcpu); - visr.val = visr.val | value; - - vmx_vcpu_set_isr(vcpu, visr.val); -} - - - - - - -/* - * Set up ISR for Priviledged Operation fault - */ -void set_privileged_operation_isr (VCPU *vcpu,int inst) -{ - ISR visr; - u64 value; - - /* Need set up ISR: code, ei, ni, na */ - - visr.val = 0; - - /* Set up na, code{3:0} for no-access instruction */ - value = set_isr_for_na_inst (vcpu, inst); - visr.val = visr.val | value; - - - /* ISR.code{7:4} =1 */ - visr.code = (1 << 4) | visr.code; - - /* Set up ei/ni */ - value = set_isr_ei_ni (vcpu); - visr.val = visr.val | value; - - vmx_vcpu_set_isr (vcpu, visr.val); -} - - - - -/* - * Set up ISR for Priviledged Register fault - */ -void set_privileged_reg_isr (VCPU *vcpu, int inst) -{ - ISR visr; - u64 value; - - /* Need set up ISR: code, ei, ni */ - - visr.val = 0; - - /* ISR.code{7:4} =2 */ - visr.code = 2 << 4; - - /* Set up ei/ni */ - value = set_isr_ei_ni (vcpu); - visr.val = visr.val | value; - - vmx_vcpu_set_isr (vcpu, visr.val); -} - - - - - -/* - * Set up ISR for Reserved Register/Field fault - */ -void set_rsv_reg_field_isr (VCPU *vcpu) -{ - ISR visr; - u64 value; - - /* Need set up ISR: code, ei, ni */ - - visr.val = 0; - - /* ISR.code{7:4} =4 */ - visr.code = (3 << 4) | visr.code; - - /* Set up ei/ni */ - value = set_isr_ei_ni (vcpu); - visr.val = visr.val | value; - - vmx_vcpu_set_isr (vcpu, visr.val); -} - - - -/* - * Set up ISR for Illegal Operation fault - */ -void set_illegal_op_isr (VCPU *vcpu) -{ - ISR visr; - u64 value; - - /* Need set up ISR: ei, ni */ - - visr.val = 0; - - /* Set up ei/ni */ - value = set_isr_ei_ni (vcpu); - visr.val = visr.val | value; - - vmx_vcpu_set_isr (vcpu, visr.val); -} - - -void set_isr_reg_nat_consumption(VCPU *vcpu, u64 flag, u64 non_access) -{ - ISR isr; - - isr.val = 0; - isr.val = set_isr_ei_ni(vcpu); - isr.code = IA64_REG_NAT_CONSUMPTION_FAULT | flag; - isr.na = non_access; - isr.r = 1; - isr.w = 0; - vmx_vcpu_set_isr(vcpu, isr.val); - return; -} - -void set_isr_for_priv_fault(VCPU *vcpu, u64 non_access) -{ - u64 value; - ISR isr; - - isr.val = set_isr_ei_ni(vcpu); - isr.code = IA64_PRIV_OP_FAULT; - isr.na = non_access; - vmx_vcpu_set_isr(vcpu, isr.val); - - return; -} - - -IA64FAULT check_target_register(VCPU *vcpu, u64 reg_index) -{ - u64 sof; - REGS *regs; - regs=vcpu_regs(vcpu); - sof = regs->cr_ifs & 0x7f; - if(reg_index >= sof + 32) - return IA64_FAULT; - return IA64_NO_FAULT;; -} - - -int is_reserved_rr_register(VCPU* vcpu, int reg_index) -{ - return (reg_index >= 8); -} - -#define ITIR_RSV_MASK (0x3UL | (((1UL<<32)-1) << 32)) -int is_reserved_itir_field(VCPU* vcpu, u64 itir) -{ - if ( itir & ITIR_RSV_MASK ) { - return 1; - } - return 0; -} - -int is_reserved_rr_field(VCPU* vcpu, u64 reg_value) -{ - ia64_rr rr; - rr.rrval = reg_value; - - if(rr.reserved0 != 0 || rr.reserved1 != 0){ - return 1; - } - if(rr.ps < 12 || rr.ps > 28){ - // page too big or small. - return 1; - } - if(rr.ps > 15 && rr.ps % 2 != 0){ - // unsupported page size. - return 1; - } - return 0; -} - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_vcpu.c --- a/xen/arch/ia64/vmx_vcpu.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,446 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_vcpu.c: handling all virtual cpu related thing. - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Fred yang (fred.yang@xxxxxxxxx) - * Arun Sharma (arun.sharma@xxxxxxxxx) - * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx> - * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) - * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) - */ - -#include <xen/sched.h> -#include <public/arch-ia64.h> -#include <asm/ia64_int.h> -#include <asm/vmx_vcpu.h> -#include <asm/regionreg.h> -#include <asm/tlb.h> -#include <asm/processor.h> -#include <asm/delay.h> -#include <asm/regs.h> -#include <asm/gcc_intrin.h> -#include <asm/vmx_mm_def.h> -#include <asm/vmx.h> - -//u64 fire_itc; -//u64 fire_itc2; -//u64 fire_itm; -//u64 fire_itm2; -/* - * Copyright (c) 2005 Intel Corporation. - * Anthony Xu (anthony.xu@xxxxxxxxx) - * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -/************************************************************************** - VCPU general register access routines -**************************************************************************/ -#include <asm/hw_irq.h> -#include <asm/vmx_pal_vsa.h> -#include <asm/kregs.h> - -//unsigned long last_guest_rsm = 0x0; -struct guest_psr_bundle{ - unsigned long ip; - unsigned long psr; -}; - -struct guest_psr_bundle guest_psr_buf[100]; -unsigned long guest_psr_index = 0; - -void -vmx_vcpu_set_psr(VCPU *vcpu, unsigned long value) -{ - - UINT64 mask; - REGS *regs; - IA64_PSR old_psr, new_psr; - old_psr.val=vmx_vcpu_get_psr(vcpu); - - regs=vcpu_regs(vcpu); - /* We only support guest as: - * vpsr.pk = 0 - * vpsr.is = 0 - * Otherwise panic - */ - if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) { - panic ("Setting unsupport guest psr!"); - } - - /* - * For those IA64_PSR bits: id/da/dd/ss/ed/ia - * Since these bits will become 0, after success execution of each - * instruction, we will change set them to mIA64_PSR - */ - VMX_VPD(vcpu,vpsr) = value & - (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD | - IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA - )); - - if ( !old_psr.i && (value & IA64_PSR_I) ) { - // vpsr.i 0->1 - vcpu->arch.irq_new_condition = 1; - } - new_psr.val=vmx_vcpu_get_psr(vcpu); - { - struct pt_regs *regs = vcpu_regs(vcpu); - guest_psr_buf[guest_psr_index].ip = regs->cr_iip; - guest_psr_buf[guest_psr_index].psr = new_psr.val; - if (++guest_psr_index >= 100) - guest_psr_index = 0; - } -#if 0 - if (old_psr.i != new_psr.i) { - if (old_psr.i) - last_guest_rsm = vcpu_regs(vcpu)->cr_iip; - else - last_guest_rsm = 0; - } -#endif - - /* - * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr) - * , except for the following bits: - * ic/i/dt/si/rt/mc/it/bn/vm - */ - mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI + - IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN + - IA64_PSR_VM; - - regs->cr_ipsr = (regs->cr_ipsr & mask ) | ( value & (~mask) ); - - check_mm_mode_switch(vcpu, old_psr, new_psr); - return IA64_NO_FAULT; -} - -/* Adjust slot both in pt_regs and vpd, upon vpsr.ri which - * should have sync with ipsr in entry. - * - * Clear some bits due to successfully emulation. - */ -IA64FAULT vmx_vcpu_increment_iip(VCPU *vcpu) -{ - // TODO: trap_bounce?? Eddie - REGS *regs = vcpu_regs(vcpu); - IA64_PSR vpsr; - IA64_PSR *ipsr = (IA64_PSR *)&regs->cr_ipsr; - - vpsr.val = vmx_vcpu_get_psr(vcpu); - if (vpsr.ri == 2) { - vpsr.ri = 0; - regs->cr_iip += 16; - } else { - vpsr.ri++; - } - - ipsr->ri = vpsr.ri; - vpsr.val &= - (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD | - IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA - )); - - VMX_VPD(vcpu, vpsr) = vpsr.val; - - ipsr->val &= - (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD | - IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA - )); - - return (IA64_NO_FAULT); -} - - -IA64FAULT vmx_vcpu_cover(VCPU *vcpu) -{ - REGS *regs = vcpu_regs(vcpu); - IA64_PSR vpsr; - vpsr.val = vmx_vcpu_get_psr(vcpu); - - if(!vpsr.ic) - VPD_CR(vcpu,ifs) = regs->cr_ifs; - regs->cr_ifs = IA64_IFS_V; - return (IA64_NO_FAULT); -} - - -thash_cb_t * -vmx_vcpu_get_vtlb(VCPU *vcpu) -{ - return vcpu->arch.vtlb; -} - - -struct virutal_platform_def * -vmx_vcpu_get_plat(VCPU *vcpu) -{ - return &(vcpu->domain->arch.vmx_platform); -} - - -ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr) -{ - return (ia64_rr)VMX(vcpu,vrr[vadr>>61]); -} - - -IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val) -{ - ia64_rr oldrr,newrr; - thash_cb_t *hcb; - oldrr=vmx_vcpu_rr(vcpu,reg); - newrr.rrval=val; -#if 1 - if(oldrr.ps!=newrr.ps){ - hcb = vmx_vcpu_get_vtlb(vcpu); - thash_purge_all(hcb); - } -#endif - VMX(vcpu,vrr[reg>>61]) = val; - switch((u64)(reg>>61)) { - case VRN5: - VMX(vcpu,mrr5)=vmx_vrrtomrr(vcpu,val); - break; - case VRN6: - VMX(vcpu,mrr6)=vmx_vrrtomrr(vcpu,val); - break; - case VRN7: - VMX(vcpu,mrr7)=vmx_vrrtomrr(vcpu,val); - /* Change double mapping for this domain */ -#ifdef XEN_DBL_MAPPING - vmx_change_double_mapping(vcpu, - vmx_vrrtomrr(vcpu,oldrr.rrval), - vmx_vrrtomrr(vcpu,newrr.rrval)); -#endif - break; - default: - ia64_set_rr(reg,vmx_vrrtomrr(vcpu,val)); - break; - } - - return (IA64_NO_FAULT); -} - - - -/************************************************************************** - VCPU protection key register access routines -**************************************************************************/ - -IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval) -{ - UINT64 val = (UINT64)ia64_get_pkr(reg); - *pval = val; - return (IA64_NO_FAULT); -} - -IA64FAULT vmx_vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val) -{ - ia64_set_pkr(reg,val); - return (IA64_NO_FAULT); -} - -#if 0 -int tlb_debug=0; -check_entry(u64 va, u64 ps, char *str) -{ - va &= ~ (PSIZE(ps)-1); - if ( va == 0x2000000002908000UL || - va == 0x600000000000C000UL ) { - stop(); - } - if (tlb_debug) printf("%s at %lx %lx\n", str, va, 1UL<<ps); -} -#endif - - -u64 vmx_vcpu_get_itir_on_fault(VCPU *vcpu, u64 ifa) -{ - ia64_rr rr,rr1; - rr=vmx_vcpu_rr(vcpu,ifa); - rr1.rrval=0; - rr1.ps=rr.ps; - rr1.rid=rr.rid; - return (rr1.rrval); -} - - - - -IA64FAULT vmx_vcpu_rfi(VCPU *vcpu) -{ - // TODO: Only allowed for current vcpu - UINT64 ifs, psr; - REGS *regs = vcpu_regs(vcpu); - psr = VPD_CR(vcpu,ipsr); - vmx_vcpu_set_psr(vcpu,psr); - ifs=VPD_CR(vcpu,ifs); - if((ifs>>63)&&(ifs<<1)){ - ifs=(regs->cr_ifs)&0x7f; - regs->rfi_pfs = (ifs<<7)|ifs; - regs->cr_ifs = VPD_CR(vcpu,ifs); - } - regs->cr_iip = VPD_CR(vcpu,iip); - return (IA64_NO_FAULT); -} - - -UINT64 -vmx_vcpu_get_psr(VCPU *vcpu) -{ - return VMX_VPD(vcpu,vpsr); -} - - -IA64FAULT -vmx_vcpu_get_bgr(VCPU *vcpu, unsigned int reg, UINT64 *val) -{ - IA64_PSR vpsr; - - vpsr.val = vmx_vcpu_get_psr(vcpu); - if ( vpsr.bn ) { - *val=VMX_VPD(vcpu,vgr[reg-16]); - // Check NAT bit - if ( VMX_VPD(vcpu,vnat) & (1UL<<(reg-16)) ) { - // TODO - //panic ("NAT consumption fault\n"); - return IA64_FAULT; - } - - } - else { - *val=VMX_VPD(vcpu,vbgr[reg-16]); - if ( VMX_VPD(vcpu,vbnat) & (1UL<<reg) ) { - //panic ("NAT consumption fault\n"); - return IA64_FAULT; - } - - } - return IA64_NO_FAULT; -} - -IA64FAULT -vmx_vcpu_set_bgr(VCPU *vcpu, unsigned int reg, u64 val,int nat) -{ - IA64_PSR vpsr; - vpsr.val = vmx_vcpu_get_psr(vcpu); - if ( vpsr.bn ) { - VMX_VPD(vcpu,vgr[reg-16]) = val; - if(nat){ - VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg-16) ); - }else{ - VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg-16) ); - } - } - else { - VMX_VPD(vcpu,vbgr[reg-16]) = val; - if(nat){ - VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg) ); - }else{ - VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg) ); - } - } - return IA64_NO_FAULT; -} - - - -IA64FAULT -vmx_vcpu_get_gr(VCPU *vcpu, unsigned reg, UINT64 * val) -{ - REGS *regs=vcpu_regs(vcpu); - int nat; - //TODO, Eddie - if (!regs) return 0; - if (reg >= 16 && reg < 32) { - return vmx_vcpu_get_bgr(vcpu,reg,val); - } - getreg(reg,val,&nat,regs); // FIXME: handle NATs later - if(nat){ - return IA64_FAULT; - } - return IA64_NO_FAULT; -} - -// returns: -// IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault -// IA64_NO_FAULT otherwise - -IA64FAULT -vmx_vcpu_set_gr(VCPU *vcpu, unsigned reg, u64 value, int nat) -{ - REGS *regs = vcpu_regs(vcpu); - long sof = (regs->cr_ifs) & 0x7f; - //TODO Eddie - - if (!regs) return IA64_ILLOP_FAULT; - if (reg >= sof + 32) return IA64_ILLOP_FAULT; - if ( reg >= 16 && reg < 32 ) { - return vmx_vcpu_set_bgr(vcpu,reg, value, nat); - } - setreg(reg,value,nat,regs); - return IA64_NO_FAULT; -} - - -IA64FAULT vmx_vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24) -{ - UINT64 vpsr; - vpsr = vmx_vcpu_get_psr(vcpu); - vpsr &= (~imm24); - vmx_vcpu_set_psr(vcpu, vpsr); - return IA64_NO_FAULT; -} - - -IA64FAULT vmx_vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24) -{ - UINT64 vpsr; - vpsr = vmx_vcpu_get_psr(vcpu); - vpsr |= imm24; - vmx_vcpu_set_psr(vcpu, vpsr); - return IA64_NO_FAULT; -} - - -IA64FAULT vmx_vcpu_set_psr_l(VCPU *vcpu, UINT64 val) -{ - vmx_vcpu_set_psr(vcpu, val); - return IA64_NO_FAULT; -} - -IA64FAULT -vmx_vcpu_set_tpr(VCPU *vcpu, u64 val) -{ - VPD_CR(vcpu,tpr)=val; - vcpu->arch.irq_new_condition = 1; - return IA64_NO_FAULT; -} - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_virt.c --- a/xen/arch/ia64/vmx_virt.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,1511 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_virt.c: - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Fred yang (fred.yang@xxxxxxxxx) - * Shaofan Li (Susue Li) <susie.li@xxxxxxxxx> - * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) - */ - - - -#include <asm/privop.h> -#include <asm/vmx_vcpu.h> -#include <asm/processor.h> -#include <asm/delay.h> // Debug only -#include <asm/vmmu.h> -#include <asm/vmx_mm_def.h> -#include <asm/smp.h> - -#include <asm/virt_event.h> -extern UINT64 privop_trace; - -void -ia64_priv_decoder(IA64_SLOT_TYPE slot_type, INST64 inst, UINT64 * cause) -{ - *cause=0; - switch (slot_type) { - case M: - if (inst.generic.major==0){ - if(inst.M28.x3==0){ - if(inst.M44.x4==6){ - *cause=EVENT_SSM; - }else if(inst.M44.x4==7){ - *cause=EVENT_RSM; - }else if(inst.M30.x4==8&&inst.M30.x2==2){ - *cause=EVENT_MOV_TO_AR_IMM; - } - } - } - else if(inst.generic.major==1){ - if(inst.M28.x3==0){ - if(inst.M32.x6==0x2c){ - *cause=EVENT_MOV_TO_CR; - }else if(inst.M33.x6==0x24){ - *cause=EVENT_MOV_FROM_CR; - }else if(inst.M35.x6==0x2d){ - *cause=EVENT_MOV_TO_PSR; - }else if(inst.M36.x6==0x25){ - *cause=EVENT_MOV_FROM_PSR; - }else if(inst.M29.x6==0x2A){ - *cause=EVENT_MOV_TO_AR; - }else if(inst.M31.x6==0x22){ - *cause=EVENT_MOV_FROM_AR; - }else if(inst.M45.x6==0x09){ - *cause=EVENT_PTC_L; - }else if(inst.M45.x6==0x0A){ - *cause=EVENT_PTC_G; - }else if(inst.M45.x6==0x0B){ - *cause=EVENT_PTC_GA; - }else if(inst.M45.x6==0x0C){ - *cause=EVENT_PTR_D; - }else if(inst.M45.x6==0x0D){ - *cause=EVENT_PTR_I; - }else if(inst.M46.x6==0x1A){ - *cause=EVENT_THASH; - }else if(inst.M46.x6==0x1B){ - *cause=EVENT_TTAG; - }else if(inst.M46.x6==0x1E){ - *cause=EVENT_TPA; - }else if(inst.M46.x6==0x1F){ - *cause=EVENT_TAK; - }else if(inst.M47.x6==0x34){ - *cause=EVENT_PTC_E; - }else if(inst.M41.x6==0x2E){ - *cause=EVENT_ITC_D; - }else if(inst.M41.x6==0x2F){ - *cause=EVENT_ITC_I; - }else if(inst.M42.x6==0x00){ - *cause=EVENT_MOV_TO_RR; - }else if(inst.M42.x6==0x01){ - *cause=EVENT_MOV_TO_DBR; - }else if(inst.M42.x6==0x02){ - *cause=EVENT_MOV_TO_IBR; - }else if(inst.M42.x6==0x03){ - *cause=EVENT_MOV_TO_PKR; - }else if(inst.M42.x6==0x04){ - *cause=EVENT_MOV_TO_PMC; - }else if(inst.M42.x6==0x05){ - *cause=EVENT_MOV_TO_PMD; - }else if(inst.M42.x6==0x0E){ - *cause=EVENT_ITR_D; - }else if(inst.M42.x6==0x0F){ - *cause=EVENT_ITR_I; - }else if(inst.M43.x6==0x10){ - *cause=EVENT_MOV_FROM_RR; - }else if(inst.M43.x6==0x11){ - *cause=EVENT_MOV_FROM_DBR; - }else if(inst.M43.x6==0x12){ - *cause=EVENT_MOV_FROM_IBR; - }else if(inst.M43.x6==0x13){ - *cause=EVENT_MOV_FROM_PKR; - }else if(inst.M43.x6==0x14){ - *cause=EVENT_MOV_FROM_PMC; -/* - }else if(inst.M43.x6==0x15){ - *cause=EVENT_MOV_FROM_PMD; -*/ - }else if(inst.M43.x6==0x17){ - *cause=EVENT_MOV_FROM_CPUID; - } - } - } - break; - case B: - if(inst.generic.major==0){ - if(inst.B8.x6==0x02){ - *cause=EVENT_COVER; - }else if(inst.B8.x6==0x08){ - *cause=EVENT_RFI; - }else if(inst.B8.x6==0x0c){ - *cause=EVENT_BSW_0; - }else if(inst.B8.x6==0x0d){ - *cause=EVENT_BSW_1; - } - } - } -} - -IA64FAULT vmx_emul_rsm(VCPU *vcpu, INST64 inst) -{ - UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm; - return vmx_vcpu_reset_psr_sm(vcpu,imm24); -} - -IA64FAULT vmx_emul_ssm(VCPU *vcpu, INST64 inst) -{ - UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm; - return vmx_vcpu_set_psr_sm(vcpu,imm24); -} - -unsigned long last_guest_psr = 0x0; -IA64FAULT vmx_emul_mov_from_psr(VCPU *vcpu, INST64 inst) -{ - UINT64 tgt = inst.M33.r1; - UINT64 val; - IA64FAULT fault; - -/* - if ((fault = vmx_vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT) - return vmx_vcpu_set_gr(vcpu, tgt, val); - else return fault; - */ - val = vmx_vcpu_get_psr(vcpu); - val = (val & MASK(0, 32)) | (val & MASK(35, 2)); - last_guest_psr = val; - return vmx_vcpu_set_gr(vcpu, tgt, val, 0); -} - -/** - * @todo Check for reserved bits and return IA64_RSVDREG_FAULT. - */ -IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst) -{ - UINT64 val; - IA64FAULT fault; - if(vmx_vcpu_get_gr(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT) - panic(" get_psr nat bit fault\n"); - - val = (val & MASK(0, 32)) | (VMX_VPD(vcpu, vpsr) & MASK(32, 32)); -#if 0 - if (last_mov_from_psr && (last_guest_psr != (val & MASK(0,32)))) - while(1); - else - last_mov_from_psr = 0; -#endif - return vmx_vcpu_set_psr_l(vcpu,val); -} - - -/************************************************************************** -Privileged operation emulation routines -**************************************************************************/ - -IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST64 inst) -{ - IA64_PSR vpsr; - REGS *regs; -#ifdef CHECK_FAULT - vpsr.val=vmx_vcpu_get_psr(vcpu); - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - regs=vcpu_regs(vcpu); - vpsr.val=regs->cr_ipsr; - if ( vpsr.is == 1 ) { - panic ("We do not support IA32 instruction yet"); - } - - return vmx_vcpu_rfi(vcpu); -} - -IA64FAULT vmx_emul_bsw0(VCPU *vcpu, INST64 inst) -{ -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - return vmx_vcpu_bsw0(vcpu); -} - -IA64FAULT vmx_emul_bsw1(VCPU *vcpu, INST64 inst) -{ -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - return vmx_vcpu_bsw1(vcpu); -} - -IA64FAULT vmx_emul_cover(VCPU *vcpu, INST64 inst) -{ - return vmx_vcpu_cover(vcpu); -} - -IA64FAULT vmx_emul_ptc_l(VCPU *vcpu, INST64 inst) -{ - u64 r2,r3; - ISR isr; - IA64_PSR vpsr; - - vpsr.val=vmx_vcpu_get_psr(vcpu); - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } - if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&r2)){ -#ifdef VMAL_NO_FAULT_CHECK - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif // VMAL_NO_FAULT_CHECK - } -#ifdef VMAL_NO_FAULT_CHECK - if (unimplemented_gva(vcpu,r3) ) { - isr.val = set_isr_ei_ni(vcpu); - isr.code = IA64_RESERVED_REG_FAULT; - vcpu_set_isr(vcpu, isr.val); - unimpl_daddr(vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - return vmx_vcpu_ptc_l(vcpu,r3,bits(r2,2,7)); -} - -IA64FAULT vmx_emul_ptc_e(VCPU *vcpu, INST64 inst) -{ - u64 r3; - ISR isr; - IA64_PSR vpsr; - - vpsr.val=vmx_vcpu_get_psr(vcpu); -#ifdef VMAL_NO_FAULT_CHECK - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - if(vmx_vcpu_get_gr(vcpu,inst.M47.r3,&r3)){ -#ifdef VMAL_NO_FAULT_CHECK - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif // VMAL_NO_FAULT_CHECK - } - return vmx_vcpu_ptc_e(vcpu,r3); -} - -IA64FAULT vmx_emul_ptc_g(VCPU *vcpu, INST64 inst) -{ - return vmx_emul_ptc_l(vcpu, inst); -} - -IA64FAULT vmx_emul_ptc_ga(VCPU *vcpu, INST64 inst) -{ - return vmx_emul_ptc_l(vcpu, inst); -} - -IA64FAULT ptr_fault_check(VCPU *vcpu, INST64 inst, u64 *pr2, u64 *pr3) -{ - ISR isr; - IA64FAULT ret1, ret2; - -#ifdef VMAL_NO_FAULT_CHECK - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r3,pr3); - ret2 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pr2); -#ifdef VMAL_NO_FAULT_CHECK - if ( ret1 != IA64_NO_FAULT || ret2 != IA64_NO_FAULT ) { - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; - } - if (unimplemented_gva(vcpu,r3) ) { - isr.val = set_isr_ei_ni(vcpu); - isr.code = IA64_RESERVED_REG_FAULT; - vcpu_set_isr(vcpu, isr.val); - unimpl_daddr(vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - return IA64_NO_FAULT; -} - -IA64FAULT vmx_emul_ptr_d(VCPU *vcpu, INST64 inst) -{ - u64 r2,r3; - if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT ) - return IA64_FAULT; - return vmx_vcpu_ptr_d(vcpu,r3,bits(r2,2,7)); -} - -IA64FAULT vmx_emul_ptr_i(VCPU *vcpu, INST64 inst) -{ - u64 r2,r3; - if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT ) - return IA64_FAULT; - return vmx_vcpu_ptr_i(vcpu,r3,bits(r2,2,7)); -} - - -IA64FAULT vmx_emul_thash(VCPU *vcpu, INST64 inst) -{ - u64 r1,r3; - ISR visr; - IA64_PSR vpsr; -#ifdef CHECK_FAULT - if(check_target_register(vcpu, inst.M46.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } -#endif //CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ -#ifdef CHECK_FAULT - vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); - return IA64_NO_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - if(unimplemented_gva(vcpu, r3)){ - vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); - return IA64_NO_FAULT; - } -#endif //CHECK_FAULT - vmx_vcpu_thash(vcpu, r3, &r1); - vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); - return(IA64_NO_FAULT); -} - - -IA64FAULT vmx_emul_ttag(VCPU *vcpu, INST64 inst) -{ - u64 r1,r3; - ISR visr; - IA64_PSR vpsr; - #ifdef CHECK_FAULT - if(check_target_register(vcpu, inst.M46.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } -#endif //CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ -#ifdef CHECK_FAULT - vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); - return IA64_NO_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - if(unimplemented_gva(vcpu, r3)){ - vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); - return IA64_NO_FAULT; - } -#endif //CHECK_FAULT - vmx_vcpu_ttag(vcpu, r3, &r1); - vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); - return(IA64_NO_FAULT); -} - - -IA64FAULT vmx_emul_tpa(VCPU *vcpu, INST64 inst) -{ - u64 r1,r3; - ISR visr; -#ifdef CHECK_FAULT - if(check_target_register(vcpu, inst.M46.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if(vpsr.cpl!=0){ - visr.val=0; - vcpu_set_isr(vcpu, visr.val); - return IA64_FAULT; - } -#endif //CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,1); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - if (unimplemented_gva(vcpu,r3) ) { - // inject unimplemented_data_address_fault - visr.val = set_isr_ei_ni(vcpu); - visr.code = IA64_RESERVED_REG_FAULT; - vcpu_set_isr(vcpu, isr.val); - // FAULT_UNIMPLEMENTED_DATA_ADDRESS. - unimpl_daddr(vcpu); - return IA64_FAULT; - } -#endif //CHECK_FAULT - - if(vmx_vcpu_tpa(vcpu, r3, &r1)){ - return IA64_FAULT; - } - vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); - return(IA64_NO_FAULT); -} - -IA64FAULT vmx_emul_tak(VCPU *vcpu, INST64 inst) -{ - u64 r1,r3; - ISR visr; - IA64_PSR vpsr; - int fault=IA64_NO_FAULT; -#ifdef CHECK_FAULT - visr.val=0; - if(check_target_register(vcpu, inst.M46.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - vpsr.val=vmx_vcpu_get_psr(vcpu); - if(vpsr.cpl!=0){ - vcpu_set_isr(vcpu, visr.val); - return IA64_FAULT; - } -#endif - if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,1); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif - } - if(vmx_vcpu_tak(vcpu, r3, &r1)){ - return IA64_FAULT; - } - vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); - return(IA64_NO_FAULT); -} - - -/************************************ - * Insert translation register/cache -************************************/ - -IA64FAULT vmx_emul_itr_d(VCPU *vcpu, INST64 inst) -{ - UINT64 fault, itir, ifa, pte, slot; - ISR isr; - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if ( vpsr.ic ) { - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } -#ifdef VMAL_NO_FAULT_CHECK - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){ -#ifdef VMAL_NO_FAULT_CHECK - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif // VMAL_NO_FAULT_CHECK - } -#ifdef VMAL_NO_FAULT_CHECK - if(is_reserved_rr_register(vcpu, slot)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - - if (vmx_vcpu_get_itir(vcpu,&itir)){ - return(IA64_FAULT); - } - if (vmx_vcpu_get_ifa(vcpu,&ifa)){ - return(IA64_FAULT); - } -#ifdef VMAL_NO_FAULT_CHECK - if (is_reserved_itir_field(vcpu, itir)) { - // TODO - return IA64_FAULT; - } - if (unimplemented_gva(vcpu,ifa) ) { - isr.val = set_isr_ei_ni(vcpu); - isr.code = IA64_RESERVED_REG_FAULT; - vcpu_set_isr(vcpu, isr.val); - unimpl_daddr(vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - - return (vmx_vcpu_itr_d(vcpu,pte,itir,ifa,slot)); -} - -IA64FAULT vmx_emul_itr_i(VCPU *vcpu, INST64 inst) -{ - UINT64 fault, itir, ifa, pte, slot; - ISR isr; - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if ( vpsr.ic ) { - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } -#ifdef VMAL_NO_FAULT_CHECK - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){ -#ifdef VMAL_NO_FAULT_CHECK - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif // VMAL_NO_FAULT_CHECK - } -#ifdef VMAL_NO_FAULT_CHECK - if(is_reserved_rr_register(vcpu, slot)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - - if (vmx_vcpu_get_itir(vcpu,&itir)){ - return(IA64_FAULT); - } - if (vmx_vcpu_get_ifa(vcpu,&ifa)){ - return(IA64_FAULT); - } -#ifdef VMAL_NO_FAULT_CHECK - if (is_reserved_itir_field(vcpu, itir)) { - // TODO - return IA64_FAULT; - } - if (unimplemented_gva(vcpu,ifa) ) { - isr.val = set_isr_ei_ni(vcpu); - isr.code = IA64_RESERVED_REG_FAULT; - vcpu_set_isr(vcpu, isr.val); - unimpl_daddr(vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - - return (vmx_vcpu_itr_i(vcpu,pte,itir,ifa,slot)); -} - -IA64FAULT itc_fault_check(VCPU *vcpu, INST64 inst, u64 *itir, u64 *ifa,u64 *pte) -{ - UINT64 fault; - ISR isr; - IA64_PSR vpsr; - IA64FAULT ret1; - - vpsr.val=vmx_vcpu_get_psr(vcpu); - if ( vpsr.ic ) { - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - -#ifdef VMAL_NO_FAULT_CHECK - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pte); -#ifdef VMAL_NO_FAULT_CHECK - if( ret1 != IA64_NO_FAULT ){ - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - - if (vmx_vcpu_get_itir(vcpu,itir)){ - return(IA64_FAULT); - } - if (vmx_vcpu_get_ifa(vcpu,ifa)){ - return(IA64_FAULT); - } -#ifdef VMAL_NO_FAULT_CHECK - if (unimplemented_gva(vcpu,ifa) ) { - isr.val = set_isr_ei_ni(vcpu); - isr.code = IA64_RESERVED_REG_FAULT; - vcpu_set_isr(vcpu, isr.val); - unimpl_daddr(vcpu); - return IA64_FAULT; - } -#endif // VMAL_NO_FAULT_CHECK - return IA64_NO_FAULT; -} - -IA64FAULT vmx_emul_itc_d(VCPU *vcpu, INST64 inst) -{ - UINT64 itir, ifa, pte; - - if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) { - return IA64_FAULT; - } - - return (vmx_vcpu_itc_d(vcpu,pte,itir,ifa)); -} - -IA64FAULT vmx_emul_itc_i(VCPU *vcpu, INST64 inst) -{ - UINT64 itir, ifa, pte; - - if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) { - return IA64_FAULT; - } - - return (vmx_vcpu_itc_i(vcpu,pte,itir,ifa)); - -} - -/************************************* - * Moves to semi-privileged registers -*************************************/ - -IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *vcpu, INST64 inst) -{ - // I27 and M30 are identical for these fields - if(inst.M30.ar3!=44){ - panic("Can't support ar register other than itc"); - } -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - UINT64 imm; - if(inst.M30.s){ - imm = -inst.M30.imm; - }else{ - imm = inst.M30.imm; - } - return (vmx_vcpu_set_itc(vcpu, imm)); -} - -IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *vcpu, INST64 inst) -{ - // I26 and M29 are identical for these fields - u64 r2; - if(inst.M29.ar3!=44){ - panic("Can't support ar register other than itc"); - } - if(vmx_vcpu_get_gr(vcpu,inst.M29.r2,&r2)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - return (vmx_vcpu_set_itc(vcpu, r2)); -} - - -IA64FAULT vmx_emul_mov_from_ar_reg(VCPU *vcpu, INST64 inst) -{ - // I27 and M30 are identical for these fields - if(inst.M31.ar3!=44){ - panic("Can't support ar register other than itc"); - } -#ifdef CHECK_FAULT - if(check_target_register(vcpu,inst.M31.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.si&& vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - u64 r1; - vmx_vcpu_get_itc(vcpu,&r1); - vmx_vcpu_set_gr(vcpu,inst.M31.r1,r1,0); - return IA64_NO_FAULT; -} - - -/******************************** - * Moves to privileged registers -********************************/ - -IA64FAULT vmx_emul_mov_to_pkr(VCPU *vcpu, INST64 inst) -{ - u64 r3,r2; -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } - return (vmx_vcpu_set_pkr(vcpu,r3,r2)); -} - -IA64FAULT vmx_emul_mov_to_rr(VCPU *vcpu, INST64 inst) -{ - u64 r3,r2; -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } - return (vmx_vcpu_set_rr(vcpu,r3,r2)); -} - -IA64FAULT vmx_emul_mov_to_dbr(VCPU *vcpu, INST64 inst) -{ - u64 r3,r2; -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } - return (vmx_vcpu_set_dbr(vcpu,r3,r2)); -} - -IA64FAULT vmx_emul_mov_to_ibr(VCPU *vcpu, INST64 inst) -{ - u64 r3,r2; -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } - return (vmx_vcpu_set_ibr(vcpu,r3,r2)); -} - -IA64FAULT vmx_emul_mov_to_pmc(VCPU *vcpu, INST64 inst) -{ - u64 r3,r2; -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } - return (vmx_vcpu_set_pmc(vcpu,r3,r2)); -} - -IA64FAULT vmx_emul_mov_to_pmd(VCPU *vcpu, INST64 inst) -{ - u64 r3,r2; -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } - return (vmx_vcpu_set_pmd(vcpu,r3,r2)); -} - - -/********************************** - * Moves from privileged registers - **********************************/ - -IA64FAULT vmx_emul_mov_from_rr(VCPU *vcpu, INST64 inst) -{ - u64 r3,r1; -#ifdef CHECK_FAULT - if(check_target_register(vcpu, inst.M43.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } - -#endif //CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - if(is_reserved_rr_register(vcpu,r3>>VRN_SHIFT)){ - set_rsv_reg_field_isr(vcpu); - rsv_reg_field(vcpu); - } -#endif //CHECK_FAULT - vmx_vcpu_get_rr(vcpu,r3,&r1); - return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); -} - -IA64FAULT vmx_emul_mov_from_pkr(VCPU *vcpu, INST64 inst) -{ - u64 r3,r1; -#ifdef CHECK_FAULT - if(check_target_register(vcpu, inst.M43.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } - -#endif //CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - if(is_reserved_indirect_register(vcpu,r3)){ - set_rsv_reg_field_isr(vcpu); - rsv_reg_field(vcpu); - return IA64_FAULT; - } -#endif //CHECK_FAULT - vmx_vcpu_get_pkr(vcpu,r3,&r1); - return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); -} - -IA64FAULT vmx_emul_mov_from_dbr(VCPU *vcpu, INST64 inst) -{ - u64 r3,r1; -#ifdef CHECK_FAULT - if(check_target_register(vcpu, inst.M43.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } - -#endif //CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - if(is_reserved_indirect_register(vcpu,r3)){ - set_rsv_reg_field_isr(vcpu); - rsv_reg_field(vcpu); - return IA64_FAULT; - } -#endif //CHECK_FAULT - vmx_vcpu_get_dbr(vcpu,r3,&r1); - return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); -} - -IA64FAULT vmx_emul_mov_from_ibr(VCPU *vcpu, INST64 inst) -{ - u64 r3,r1; -#ifdef CHECK_FAULT - if(check_target_register(vcpu, inst.M43.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } - -#endif //CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - if(is_reserved_indirect_register(vcpu,r3)){ - set_rsv_reg_field_isr(vcpu); - rsv_reg_field(vcpu); - return IA64_FAULT; - } -#endif //CHECK_FAULT - vmx_vcpu_get_ibr(vcpu,r3,&r1); - return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); -} - -IA64FAULT vmx_emul_mov_from_pmc(VCPU *vcpu, INST64 inst) -{ - u64 r3,r1; -#ifdef CHECK_FAULT - if(check_target_register(vcpu, inst.M43.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if (vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } - -#endif //CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - if(is_reserved_indirect_register(vcpu,r3)){ - set_rsv_reg_field_isr(vcpu); - rsv_reg_field(vcpu); - return IA64_FAULT; - } -#endif //CHECK_FAULT - vmx_vcpu_get_pmc(vcpu,r3,&r1); - return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); -} - -IA64FAULT vmx_emul_mov_from_cpuid(VCPU *vcpu, INST64 inst) -{ - u64 r3,r1; -#ifdef CHECK_FAULT - if(check_target_register(vcpu, inst.M43.r1)){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } -#endif //CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - if(is_reserved_indirect_register(vcpu,r3)){ - set_rsv_reg_field_isr(vcpu); - rsv_reg_field(vcpu); - return IA64_FAULT; - } -#endif //CHECK_FAULT - vmx_vcpu_get_cpuid(vcpu,r3,&r1); - return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); -} - -IA64FAULT vmx_emul_mov_to_cr(VCPU *vcpu, INST64 inst) -{ - u64 r2,cr3; -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if(is_reserved_cr(inst.M32.cr3)||(vpsr.ic&&is_interruption_control_cr(inst.M32.cr3))){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - if(vmx_vcpu_get_gr(vcpu, inst.M32.r2, &r2)){ -#ifdef CHECK_FAULT - set_isr_reg_nat_consumption(vcpu,0,0); - rnat_comsumption(vcpu); - return IA64_FAULT; -#endif //CHECK_FAULT - } -#ifdef CHECK_FAULT - if ( check_cr_rsv_fields (inst.M32.cr3, r2)) { - /* Inject Reserved Register/Field fault - * into guest */ - set_rsv_reg_field_isr (vcpu,0); - rsv_reg_field (vcpu); - return IA64_FAULT; - } -#endif //CHECK_FAULT - extern u64 cr_igfld_mask(int index, u64 value); - r2 = cr_igfld_mask(inst.M32.cr3,r2); - VMX_VPD(vcpu, vcr[inst.M32.cr3]) = r2; - switch (inst.M32.cr3) { - case 0: return vmx_vcpu_set_dcr(vcpu,r2); - case 1: return vmx_vcpu_set_itm(vcpu,r2); - case 2: return vmx_vcpu_set_iva(vcpu,r2); - case 8: return vmx_vcpu_set_pta(vcpu,r2); - case 16:return vmx_vcpu_set_ipsr(vcpu,r2); - case 17:return vmx_vcpu_set_isr(vcpu,r2); - case 19:return vmx_vcpu_set_iip(vcpu,r2); - case 20:return vmx_vcpu_set_ifa(vcpu,r2); - case 21:return vmx_vcpu_set_itir(vcpu,r2); - case 22:return vmx_vcpu_set_iipa(vcpu,r2); - case 23:return vmx_vcpu_set_ifs(vcpu,r2); - case 24:return vmx_vcpu_set_iim(vcpu,r2); - case 25:return vmx_vcpu_set_iha(vcpu,r2); - case 64:printk("SET LID to 0x%lx\n", r2); - return vmx_vcpu_set_lid(vcpu,r2); - case 65:return IA64_NO_FAULT; - case 66:return vmx_vcpu_set_tpr(vcpu,r2); - case 67:return vmx_vcpu_set_eoi(vcpu,r2); - case 68:return IA64_NO_FAULT; - case 69:return IA64_NO_FAULT; - case 70:return IA64_NO_FAULT; - case 71:return IA64_NO_FAULT; - case 72:return vmx_vcpu_set_itv(vcpu,r2); - case 73:return vmx_vcpu_set_pmv(vcpu,r2); - case 74:return vmx_vcpu_set_cmcv(vcpu,r2); - case 80:return vmx_vcpu_set_lrr0(vcpu,r2); - case 81:return vmx_vcpu_set_lrr1(vcpu,r2); - default: return IA64_NO_FAULT; - } -} - - -#define cr_get(cr) \ - ((fault=vmx_vcpu_get_##cr(vcpu,&val))==IA64_NO_FAULT)?\ - vmx_vcpu_set_gr(vcpu, tgt, val,0):fault; - - -IA64FAULT vmx_emul_mov_from_cr(VCPU *vcpu, INST64 inst) -{ - UINT64 tgt = inst.M33.r1; - UINT64 val; - IA64FAULT fault; -#ifdef CHECK_FAULT - IA64_PSR vpsr; - vpsr.val=vmx_vcpu_get_psr(vcpu); - if(is_reserved_cr(inst.M33.cr3)||is_read_only_cr(inst.M33.cr3|| - (vpsr.ic&&is_interruption_control_cr(inst.M33.cr3)))){ - set_illegal_op_isr(vcpu); - illegal_op(vcpu); - return IA64_FAULT; - } - if ( vpsr.cpl != 0) { - /* Inject Privileged Operation fault into guest */ - set_privileged_operation_isr (vcpu, 0); - privilege_op (vcpu); - return IA64_FAULT; - } -#endif // CHECK_FAULT - -// from_cr_cnt[inst.M33.cr3]++; - switch (inst.M33.cr3) { - case 0: return cr_get(dcr); - case 1: return cr_get(itm); - case 2: return cr_get(iva); - case 8: return cr_get(pta); - case 16:return cr_get(ipsr); - case 17:return cr_get(isr); - case 19:return cr_get(iip); - case 20:return cr_get(ifa); - case 21:return cr_get(itir); - case 22:return cr_get(iipa); - case 23:return cr_get(ifs); - case 24:return cr_get(iim); - case 25:return cr_get(iha); -// case 64:val = ia64_getreg(_IA64_REG_CR_LID); -// return vmx_vcpu_set_gr(vcpu,tgt,val,0); - case 64:return cr_get(lid); - case 65: - vmx_vcpu_get_ivr(vcpu,&val); - return vmx_vcpu_set_gr(vcpu,tgt,val,0); - case 66:return cr_get(tpr); - case 67:return vmx_vcpu_set_gr(vcpu,tgt,0L,0); - case 68:return cr_get(irr0); - case 69:return cr_get(irr1); - case 70:return cr_get(irr2); - case 71:return cr_get(irr3); - case 72:return cr_get(itv); - case 73:return cr_get(pmv); - case 74:return cr_get(cmcv); - case 80:return cr_get(lrr0); - case 81:return cr_get(lrr1); - default: - panic("Read reserved cr register"); - } -} - - -static void post_emulation_action(VCPU *vcpu) -{ - if ( vcpu->arch.irq_new_condition ) { - vcpu->arch.irq_new_condition = 0; - vhpi_detection(vcpu); - } -} - -//#define BYPASS_VMAL_OPCODE -extern IA64_SLOT_TYPE slot_types[0x20][3]; -IA64_BUNDLE __vmx_get_domain_bundle(u64 iip) -{ - IA64_BUNDLE bundle; - - fetch_code( current,iip, &bundle.i64[0]); - fetch_code( current,iip+8, &bundle.i64[1]); - return bundle; -} - -/** Emulate a privileged operation. - * - * - * @param vcpu virtual cpu - * @cause the reason cause virtualization fault - * @opcode the instruction code which cause virtualization fault - */ - -void -vmx_emulate(VCPU *vcpu, UINT64 cause, UINT64 opcode) -{ - IA64_BUNDLE bundle; - int slot; - IA64_SLOT_TYPE slot_type; - IA64FAULT status; - INST64 inst; - REGS * regs; - UINT64 iip; - regs = vcpu_regs(vcpu); - iip = regs->cr_iip; - IA64_PSR vpsr; -/* - if (privop_trace) { - static long i = 400; - //if (i > 0) printf("privop @%p\n",iip); - if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n", - iip,ia64_get_itc(),ia64_get_itm()); - i--; - } -*/ -#ifdef VTLB_DEBUG - check_vtlb_sanity(vmx_vcpu_get_vtlb(vcpu)); - dump_vtlb(vmx_vcpu_get_vtlb(vcpu)); -#endif -#if 0 -if ( (cause == 0xff && opcode == 0x1e000000000) || cause == 0 ) { - printf ("VMAL decode error: cause - %lx; op - %lx\n", - cause, opcode ); - return; -} -#endif -#ifdef BYPASS_VMAL_OPCODE - // make a local copy of the bundle containing the privop - bundle = __vmx_get_domain_bundle(iip); - slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; - if (!slot) inst.inst = bundle.slot0; - else if (slot == 1) - inst.inst = bundle.slot1a + (bundle.slot1b<<18); - else if (slot == 2) inst.inst = bundle.slot2; - else printf("priv_handle_op: illegal slot: %d\n", slot); - slot_type = slot_types[bundle.template][slot]; - ia64_priv_decoder(slot_type, inst, &cause); - if(cause==0){ - printf("This instruction at 0x%lx slot %d can't be virtualized", iip, slot); - panic("123456\n"); - } -#else - inst.inst=opcode; -#endif /* BYPASS_VMAL_OPCODE */ - - /* - * Switch to actual virtual rid in rr0 and rr4, - * which is required by some tlb related instructions. - */ - prepare_if_physical_mode(vcpu); - - switch(cause) { - case EVENT_RSM: - status=vmx_emul_rsm(vcpu, inst); - break; - case EVENT_SSM: - status=vmx_emul_ssm(vcpu, inst); - break; - case EVENT_MOV_TO_PSR: - status=vmx_emul_mov_to_psr(vcpu, inst); - break; - case EVENT_MOV_FROM_PSR: - status=vmx_emul_mov_from_psr(vcpu, inst); - break; - case EVENT_MOV_FROM_CR: - status=vmx_emul_mov_from_cr(vcpu, inst); - break; - case EVENT_MOV_TO_CR: - status=vmx_emul_mov_to_cr(vcpu, inst); - break; - case EVENT_BSW_0: - status=vmx_emul_bsw0(vcpu, inst); - break; - case EVENT_BSW_1: - status=vmx_emul_bsw1(vcpu, inst); - break; - case EVENT_COVER: - status=vmx_emul_cover(vcpu, inst); - break; - case EVENT_RFI: - status=vmx_emul_rfi(vcpu, inst); - break; - case EVENT_ITR_D: - status=vmx_emul_itr_d(vcpu, inst); - break; - case EVENT_ITR_I: - status=vmx_emul_itr_i(vcpu, inst); - break; - case EVENT_PTR_D: - status=vmx_emul_ptr_d(vcpu, inst); - break; - case EVENT_PTR_I: - status=vmx_emul_ptr_i(vcpu, inst); - break; - case EVENT_ITC_D: - status=vmx_emul_itc_d(vcpu, inst); - break; - case EVENT_ITC_I: - status=vmx_emul_itc_i(vcpu, inst); - break; - case EVENT_PTC_L: - status=vmx_emul_ptc_l(vcpu, inst); - break; - case EVENT_PTC_G: - status=vmx_emul_ptc_g(vcpu, inst); - break; - case EVENT_PTC_GA: - status=vmx_emul_ptc_ga(vcpu, inst); - break; - case EVENT_PTC_E: - status=vmx_emul_ptc_e(vcpu, inst); - break; - case EVENT_MOV_TO_RR: - status=vmx_emul_mov_to_rr(vcpu, inst); - break; - case EVENT_MOV_FROM_RR: - status=vmx_emul_mov_from_rr(vcpu, inst); - break; - case EVENT_THASH: - status=vmx_emul_thash(vcpu, inst); - break; - case EVENT_TTAG: - status=vmx_emul_ttag(vcpu, inst); - break; - case EVENT_TPA: - status=vmx_emul_tpa(vcpu, inst); - break; - case EVENT_TAK: - status=vmx_emul_tak(vcpu, inst); - break; - case EVENT_MOV_TO_AR_IMM: - status=vmx_emul_mov_to_ar_imm(vcpu, inst); - break; - case EVENT_MOV_TO_AR: - status=vmx_emul_mov_to_ar_reg(vcpu, inst); - break; - case EVENT_MOV_FROM_AR: - status=vmx_emul_mov_from_ar_reg(vcpu, inst); - break; - case EVENT_MOV_TO_DBR: - status=vmx_emul_mov_to_dbr(vcpu, inst); - break; - case EVENT_MOV_TO_IBR: - status=vmx_emul_mov_to_ibr(vcpu, inst); - break; - case EVENT_MOV_TO_PMC: - status=vmx_emul_mov_to_pmc(vcpu, inst); - break; - case EVENT_MOV_TO_PMD: - status=vmx_emul_mov_to_pmd(vcpu, inst); - break; - case EVENT_MOV_TO_PKR: - status=vmx_emul_mov_to_pkr(vcpu, inst); - break; - case EVENT_MOV_FROM_DBR: - status=vmx_emul_mov_from_dbr(vcpu, inst); - break; - case EVENT_MOV_FROM_IBR: - status=vmx_emul_mov_from_ibr(vcpu, inst); - break; - case EVENT_MOV_FROM_PMC: - status=vmx_emul_mov_from_pmc(vcpu, inst); - break; - case EVENT_MOV_FROM_PKR: - status=vmx_emul_mov_from_pkr(vcpu, inst); - break; - case EVENT_MOV_FROM_CPUID: - status=vmx_emul_mov_from_cpuid(vcpu, inst); - break; - case EVENT_VMSW: - printf ("Unimplemented instruction %d\n", cause); - status=IA64_FAULT; - break; - default: - printf("unknown cause %d, iip: %lx, ipsr: %lx\n", cause,regs->cr_iip,regs->cr_ipsr); - while(1); - /* For unknown cause, let hardware to re-execute */ - status=IA64_RETRY; - break; -// panic("unknown cause in virtualization intercept"); - }; - -#if 0 - if (status == IA64_FAULT) - panic("Emulation failed with cause %d:\n", cause); -#endif - - if ( status == IA64_NO_FAULT && cause !=EVENT_RFI ) { - vmx_vcpu_increment_iip(vcpu); - } - - recover_if_physical_mode(vcpu); - post_emulation_action (vcpu); -//TODO set_irq_check(v); - return; - -} - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vmx_vsa.S --- a/xen/arch/ia64/vmx_vsa.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,84 +0,0 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vmx_vsa.c: Call PAL virtualization services. - * Copyright (c) 2005, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Arun Sharma <arun.sharma@xxxxxxxxx> - * Xuefei Xu (Anthony Xu) (Anthony.xu@xxxxxxxxx) - */ - -#include <asm/asmmacro.h> - - - .text - -/* - * extern UINT64 ia64_call_vsa(UINT64 proc,UINT64 arg1, UINT64 arg2, - * UINT64 arg3, UINT64 arg4, UINT64 arg5, - * UINT64 arg6, UINT64 arg7); - * - * XXX: The currently defined services use only 4 args at the max. The - * rest are not consumed. - */ -GLOBAL_ENTRY(ia64_call_vsa) - .regstk 4,4,0,0 - -rpsave = loc0 -pfssave = loc1 -psrsave = loc2 -entry = loc3 -hostret = r24 - - alloc pfssave=ar.pfs,4,4,0,0 - mov rpsave=rp - movl entry=@gprel(__vsa_base) -1: mov hostret=ip - mov r25=in1 // copy arguments - mov r26=in2 - mov r27=in3 - mov psrsave=psr - ;; - add entry=entry,gp - tbit.nz p6,p0=psrsave,14 // IA64_PSR_I - tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC - ;; - ld8 entry=[entry] // read entry point - ;; - add hostret=2f-1b,hostret // calculate return address - add entry=entry,in0 - ;; - rsm psr.i | psr.ic - ;; - srlz.d - mov b6=entry - br.cond.sptk b6 // call the service -2: - // Architectural sequence for enabling interrupts if necessary -(p7) ssm psr.ic - ;; -(p7) srlz.d - ;; -(p6) ssm psr.i - ;; - mov rp=rpsave - mov ar.pfs=pfssave - mov r8=r31 - ;; - srlz.d - br.ret.sptk rp - -END(ia64_call_vsa) - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/vtlb.c --- a/xen/arch/ia64/vtlb.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,1094 +0,0 @@ - -/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ -/* - * vtlb.c: guest virtual tlb handling module. - * Copyright (c) 2004, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Yaozu Dong (Eddie Dong) (Eddie.dong@xxxxxxxxx) - * XiaoYan Feng (Fleming Feng) (Fleming.feng@xxxxxxxxx) - */ - -#include <linux/sched.h> -#include <asm/tlb.h> -#include <asm/mm.h> -#include <asm/vmx_mm_def.h> -#include <asm/gcc_intrin.h> -#include <linux/interrupt.h> -#include <asm/vmx_vcpu.h> -#define MAX_CCH_LENGTH 40 - - -static void cch_mem_init(thash_cb_t *hcb) -{ - thash_cch_mem_t *p, *q; - - hcb->cch_freelist = p = hcb->cch_buf; - - for ( q=p+1; (u64)(q + 1) <= (u64)hcb->cch_buf + hcb->cch_sz; - p++, q++ ) { - p->next = q; - } - p->next = NULL; -} - -static thash_data_t *cch_alloc(thash_cb_t *hcb) -{ - thash_cch_mem_t *p; - - if ( (p = hcb->cch_freelist) != NULL ) { - hcb->cch_freelist = p->next; - } - return &(p->data); -} - -static void cch_free(thash_cb_t *hcb, thash_data_t *cch) -{ - thash_cch_mem_t *p = (thash_cch_mem_t*)cch; - - p->next = hcb->cch_freelist; - hcb->cch_freelist = p; -} - -/* - * Check to see if the address rid:va is translated by the TLB - */ -static int __is_translated(thash_data_t *tlb, u64 rid, u64 va, CACHE_LINE_TYPE cl) -{ - u64 size1,sa1,ea1; - - if ( tlb->rid != rid || tlb->cl != cl ) - return 0; - size1 = PSIZE(tlb->ps); - sa1 = tlb->vadr & ~(size1-1); // mask the low address bits - ea1 = sa1 + size1; - - if ( va >= sa1 && (va < ea1 || ea1 == 0) ) - return 1; - else - return 0; -} - -/* - * Only for TLB format. - */ -static int -__is_tlb_overlap(thash_cb_t *hcb,thash_data_t *entry,int rid, char cl, u64 sva, u64 eva) -{ - uint64_t size1,size2,sa1,ea1,ea2; - - if ( entry->invalid || entry->rid != rid || entry->cl != cl ) { - return 0; - } - size1=PSIZE(entry->ps); - sa1 = entry->vadr & ~(size1-1); // mask the low address bits - ea1 = sa1 + size1; - if ( (sva >= ea1 && ea1 != 0) || (eva <= sa1 && eva != 0) ) - return 0; - else - return 1; - -} - -static void __rem_tr (thash_cb_t *hcb, thash_data_t *tr) -{ - if ( hcb->remove_notifier ) { - (hcb->remove_notifier)(hcb,tr); - } - tr->invalid = 1; -} - -static inline void __set_tr (thash_data_t *tr, thash_data_t *data, int idx) -{ - *tr = *data; - tr->tr_idx = idx; -} - - -static void __init_tr(thash_cb_t *hcb) -{ - int i; - thash_data_t *tr; - - for ( i=0, tr = &ITR(hcb,0); i<NITRS; i++ ) { - tr[i].invalid = 1; - } - for ( i=0, tr = &DTR(hcb,0); i<NDTRS; i++ ) { - tr[i].invalid = 1; - } -} - -/* - * Replace TR entry. - */ -static void rep_tr(thash_cb_t *hcb,thash_data_t *insert, int idx) -{ - thash_data_t *tr; - - if ( insert->cl == ISIDE_TLB ) { - tr = &ITR(hcb,idx); - } - else { - tr = &DTR(hcb,idx); - } - if ( !INVALID_TLB(tr) ) { - __rem_tr(hcb, tr); - } - __set_tr (tr, insert, idx); -} - -/* - * remove TR entry. - */ -static void rem_tr(thash_cb_t *hcb,CACHE_LINE_TYPE cl, int idx) -{ - thash_data_t *tr; - - if ( cl == ISIDE_TLB ) { - tr = &ITR(hcb,idx); - } - else { - tr = &DTR(hcb,idx); - } - if ( !INVALID_TLB(tr) ) { - __rem_tr(hcb, tr); - } -} - -/* - * Delete an thash entry in collision chain. - * prev: the previous entry. - * rem: the removed entry. - */ -static void __rem_chain(thash_cb_t *hcb/*, thash_data_t *prev*/, thash_data_t *rem) -{ - //prev->next = rem->next; - if ( hcb->remove_notifier ) { - (hcb->remove_notifier)(hcb,rem); - } - cch_free (hcb, rem); -} - -/* - * Delete an thash entry leading collision chain. - */ -static void __rem_hash_head(thash_cb_t *hcb, thash_data_t *hash) -{ - thash_data_t *next=hash->next; - - if ( hcb->remove_notifier ) { - (hcb->remove_notifier)(hcb,hash); - } - if ( next != NULL ) { - *hash = *next; - cch_free (hcb, next); - } - else { - INVALIDATE_HASH(hcb, hash); - } -} - -thash_data_t *__vtr_lookup(thash_cb_t *hcb, - u64 rid, u64 va, - CACHE_LINE_TYPE cl) -{ - thash_data_t *tr; - int num,i; - - if ( cl == ISIDE_TLB ) { - tr = &ITR(hcb,0); - num = NITRS; - } - else { - tr = &DTR(hcb,0); - num = NDTRS; - } - for ( i=0; i<num; i++ ) { - if ( !INVALID_ENTRY(hcb,&tr[i]) && - __is_translated(&tr[i], rid, va, cl) ) - return &tr[i]; - } - return NULL; -} - - -/* - * Find overlap VHPT entry within current collision chain - * base on internal priv info. - */ -static inline thash_data_t* _vhpt_next_overlap_in_chain(thash_cb_t *hcb) -{ - thash_data_t *cch; - thash_internal_t *priv = &hcb->priv; - - - for (cch=priv->cur_cch; cch; cch = cch->next) { - if ( priv->tag == cch->etag ) { - return cch; - } - } - return NULL; -} - -/* - * Find overlap TLB/VHPT entry within current collision chain - * base on internal priv info. - */ -static thash_data_t *_vtlb_next_overlap_in_chain(thash_cb_t *hcb) -{ - thash_data_t *cch; - thash_internal_t *priv = &hcb->priv; - - /* Find overlap TLB entry */ - for (cch=priv->cur_cch; cch; cch = cch->next) { - if ( ( cch->tc ? priv->s_sect.tc : priv->s_sect.tr ) && - __is_tlb_overlap(hcb, cch, priv->rid, priv->cl, - priv->_curva, priv->_eva) ) { - return cch; - } - } - return NULL; -} - -/* - * Get the machine format of VHPT entry. - * PARAS: - * 1: tlb: means the tlb format hash entry converting to VHPT. - * 2: va means the guest virtual address that must be coverd by - * the translated machine VHPT. - * 3: vhpt: means the machine format VHPT converting from tlb. - * NOTES: - * 1: In case of the machine address is discontiguous, - * "tlb" needs to be covered by several machine VHPT. va - * is used to choice one of them. - * 2: Foreign map is supported in this API. - * RETURN: - * 0/1: means successful or fail. - * - */ -int __tlb_to_vhpt(thash_cb_t *hcb, - thash_data_t *tlb, u64 va, - thash_data_t *vhpt) -{ - u64 pages,mfn; - ia64_rr vrr; - - ASSERT ( hcb->ht == THASH_VHPT ); - vrr = (hcb->get_rr_fn)(hcb->vcpu,va); - pages = PSIZE(vrr.ps) >> PAGE_SHIFT; - mfn = (hcb->vs->get_mfn)(DOMID_SELF,tlb->ppn, pages); - if ( mfn == INVALID_MFN ) return 0; - - // TODO with machine discontinuous address space issue. - vhpt->etag = (hcb->vs->tag_func)( hcb->pta, - tlb->vadr, tlb->rid, tlb->ps); - //vhpt->ti = 0; - vhpt->itir = tlb->itir & ~ITIR_RV_MASK; - vhpt->page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK; - vhpt->ppn = mfn; - vhpt->next = 0; - return 1; -} - - -/* - * Insert an entry to hash table. - * NOTES: - * 1: TLB entry may be TR, TC or Foreign Map. For TR entry, - * itr[]/dtr[] need to be updated too. - * 2: Inserting to collision chain may trigger recycling if - * the buffer for collision chain is empty. - * 3: The new entry is inserted at the next of hash table. - * (I.e. head of the collision chain) - * 4: The buffer holding the entry is allocated internally - * from cch_buf or just in the hash table. - * 5: Return the entry in hash table or collision chain. - * 6: Input parameter, entry, should be in TLB format. - * I.e. Has va, rid, ps... - * 7: This API is invoked by emulating ITC/ITR and tlb_miss. - * - */ - -void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx) -{ - if ( hcb->ht != THASH_TLB || entry->tc ) { - panic("wrong parameter\n"); - } - entry->vadr = PAGEALIGN(entry->vadr,entry->ps); - entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12); - rep_tr(hcb, entry, idx); - return ; -} - -thash_data_t *__alloc_chain(thash_cb_t *hcb,thash_data_t *entry) -{ - thash_data_t *cch; - - cch = cch_alloc(hcb); - if(cch == NULL){ - // recycle - if ( hcb->recycle_notifier ) { - hcb->recycle_notifier(hcb,(u64)entry); - } - thash_purge_all(hcb); - cch = cch_alloc(hcb); - } - return cch; -} - -/* - * Insert an entry into hash TLB or VHPT. - * NOTES: - * 1: When inserting VHPT to thash, "va" is a must covered - * address by the inserted machine VHPT entry. - * 2: The format of entry is always in TLB. - * 3: The caller need to make sure the new entry will not overlap - * with any existed entry. - */ -void vtlb_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) -{ - thash_data_t *hash_table, *cch; - int flag; - ia64_rr vrr; - u64 gppn; - u64 ppns, ppne; - - hash_table = (hcb->hash_func)(hcb->pta, - va, entry->rid, entry->ps); - if( INVALID_ENTRY(hcb, hash_table) ) { - *hash_table = *entry; - hash_table->next = 0; - } - else { - // TODO: Add collision chain length limitation. - cch = __alloc_chain(hcb,entry); - - *cch = *hash_table; - *hash_table = *entry; - hash_table->next = cch; - } - if(hcb->vcpu->domain->domain_id==0){ - thash_insert(hcb->ts->vhpt, entry, va); - return; - } - flag = 1; - gppn = (POFFSET(va,entry->ps)|PAGEALIGN((entry->ppn<<12),entry->ps))>>PAGE_SHIFT; - ppns = PAGEALIGN((entry->ppn<<12),entry->ps); - ppne = ppns + PSIZE(entry->ps); - if(((ppns<=0xa0000)&&(ppne>0xa0000))||((ppne>0xc0000)&&(ppns<=0xc0000))) - flag = 0; - if((__gpfn_is_mem(hcb->vcpu->domain, gppn)&&flag)) - thash_insert(hcb->ts->vhpt, entry, va); - return ; -} - -static void vhpt_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) -{ - thash_data_t *hash_table, *cch; - ia64_rr vrr; - - hash_table = (hcb->hash_func)(hcb->pta, - va, entry->rid, entry->ps); - if( INVALID_ENTRY(hcb, hash_table) ) { - if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) { - panic("Can't convert to machine VHPT entry\n"); - } - hash_table->next = 0; - } - else { - // TODO: Add collision chain length limitation. - cch = __alloc_chain(hcb,entry); - - *cch = *hash_table; - if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) { - panic("Can't convert to machine VHPT entry\n"); - } - hash_table->next = cch; - if(hash_table->tag==hash_table->next->tag) - while(1); - } - return /*hash_table*/; -} - -void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) -{ - thash_data_t *hash_table; - ia64_rr vrr; - - vrr = (hcb->get_rr_fn)(hcb->vcpu,entry->vadr); - if ( entry->ps != vrr.ps && entry->tc ) { - panic("Not support for multiple page size now\n"); - } - entry->vadr = PAGEALIGN(entry->vadr,entry->ps); - entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12); - (hcb->ins_hash)(hcb, entry, va); - -} - -static void rem_thash(thash_cb_t *hcb, thash_data_t *entry) -{ - thash_data_t *hash_table, *p, *q; - thash_internal_t *priv = &hcb->priv; - int idx; - - hash_table = priv->hash_base; - if ( hash_table == entry ) { -// if ( PURGABLE_ENTRY(hcb, entry) ) { - __rem_hash_head (hcb, entry); -// } - return ; - } - // remove from collision chain - p = hash_table; - for ( q=p->next; q; q = p->next ) { - if ( q == entry ){ -// if ( PURGABLE_ENTRY(hcb,q ) ) { - p->next = q->next; - __rem_chain(hcb, entry); -// } - return ; - } - p = q; - } - panic("Entry not existed or bad sequence\n"); -} - -static void rem_vtlb(thash_cb_t *hcb, thash_data_t *entry) -{ - thash_data_t *hash_table, *p, *q; - thash_internal_t *priv = &hcb->priv; - int idx; - - if ( !entry->tc ) { - return rem_tr(hcb, entry->cl, entry->tr_idx); - } - rem_thash(hcb, entry); -} - -int cch_depth=0; -/* - * Purge the collision chain starting from cch. - * NOTE: - * For those UN-Purgable entries(FM), this function will return - * the head of left collision chain. - */ -static thash_data_t *thash_rem_cch(thash_cb_t *hcb, thash_data_t *cch) -{ - thash_data_t *next; - - if ( ++cch_depth > MAX_CCH_LENGTH ) { - printf ("cch length > MAX_CCH_LENGTH, exceed the expected length\n"); - while(1); - } - if ( cch -> next ) { - next = thash_rem_cch(hcb, cch->next); - } - else { - next = NULL; - } - if ( PURGABLE_ENTRY(hcb, cch) ) { - __rem_chain(hcb, cch); - return next; - } - else { - cch->next = next; - return cch; - } -} - -/* - * Purge one hash line (include the entry in hash table). - * Can only be called by thash_purge_all. - * Input: - * hash: The head of collision chain (hash table) - * - */ -static void thash_rem_line(thash_cb_t *hcb, thash_data_t *hash) -{ - if ( INVALID_ENTRY(hcb, hash) ) return; - - if ( hash->next ) { - cch_depth = 0; - hash->next = thash_rem_cch(hcb, hash->next); - } - // Then hash table itself. - if ( PURGABLE_ENTRY(hcb, hash) ) { - __rem_hash_head(hcb, hash); - } -} - - -/* - * Find an overlap entry in hash table and its collision chain. - * Refer to SDM2 4.1.1.4 for overlap definition. - * PARAS: - * 1: in: TLB format entry, rid:ps must be same with vrr[]. - * va & ps identify the address space for overlap lookup - * 2: section can be combination of TR, TC and FM. (THASH_SECTION_XX) - * 3: cl means I side or D side. - * RETURNS: - * NULL to indicate the end of findings. - * NOTES: - * - */ -thash_data_t *thash_find_overlap(thash_cb_t *hcb, - thash_data_t *in, search_section_t s_sect) -{ - return (hcb->find_overlap)(hcb, in->vadr, - PSIZE(in->ps), in->rid, in->cl, s_sect); -} - -static thash_data_t *vtlb_find_overlap(thash_cb_t *hcb, - u64 va, u64 size, int rid, char cl, search_section_t s_sect) -{ - thash_data_t *hash_table; - thash_internal_t *priv = &hcb->priv; - u64 tag; - ia64_rr vrr; - - priv->_curva = va & ~(size-1); - priv->_eva = priv->_curva + size; - priv->rid = rid; - vrr = (hcb->get_rr_fn)(hcb->vcpu,va); - priv->ps = vrr.ps; - hash_table = (hcb->hash_func)(hcb->pta, - priv->_curva, rid, priv->ps); - - priv->s_sect = s_sect; - priv->cl = cl; - priv->_tr_idx = 0; - priv->hash_base = hash_table; - priv->cur_cch = hash_table; - return (hcb->next_overlap)(hcb); -} - -static thash_data_t *vhpt_find_overlap(thash_cb_t *hcb, - u64 va, u64 size, int rid, char cl, search_section_t s_sect) -{ - thash_data_t *hash_table; - thash_internal_t *priv = &hcb->priv; - u64 tag; - ia64_rr vrr; - - priv->_curva = va & ~(size-1); - priv->_eva = priv->_curva + size; - priv->rid = rid; - vrr = (hcb->get_rr_fn)(hcb->vcpu,va); - priv->ps = vrr.ps; - hash_table = (hcb->hash_func)( hcb->pta, - priv->_curva, rid, priv->ps); - tag = (hcb->vs->tag_func)( hcb->pta, - priv->_curva, rid, priv->ps); - - priv->tag = tag; - priv->hash_base = hash_table; - priv->cur_cch = hash_table; - return (hcb->next_overlap)(hcb); -} - - -static thash_data_t *vtr_find_next_overlap(thash_cb_t *hcb) -{ - thash_data_t *tr; - thash_internal_t *priv = &hcb->priv; - int num; - - if ( priv->cl == ISIDE_TLB ) { - num = NITRS; - tr = &ITR(hcb,0); - } - else { - num = NDTRS; - tr = &DTR(hcb,0); - } - for (; priv->_tr_idx < num; priv->_tr_idx ++ ) { - if ( __is_tlb_overlap(hcb, &tr[priv->_tr_idx], - priv->rid, priv->cl, - priv->_curva, priv->_eva) ) { - return &tr[priv->_tr_idx++]; - } - } - return NULL; -} - -/* - * Similar with vtlb_next_overlap but find next entry. - * NOTES: - * Intermediate position information is stored in hcb->priv. - */ -static thash_data_t *vtlb_next_overlap(thash_cb_t *hcb) -{ - thash_data_t *ovl; - thash_internal_t *priv = &hcb->priv; - u64 addr,rr_psize; - ia64_rr vrr; - - if ( priv->s_sect.tr ) { - ovl = vtr_find_next_overlap (hcb); - if ( ovl ) return ovl; - priv->s_sect.tr = 0; - } - if ( priv->s_sect.v == 0 ) return NULL; - vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva); - rr_psize = PSIZE(vrr.ps); - - while ( priv->_curva < priv->_eva ) { - if ( !INVALID_ENTRY(hcb, priv->hash_base) ) { - ovl = _vtlb_next_overlap_in_chain(hcb); - if ( ovl ) { - priv->cur_cch = ovl->next; - return ovl; - } - } - priv->_curva += rr_psize; - priv->hash_base = (hcb->hash_func)( hcb->pta, - priv->_curva, priv->rid, priv->ps); - priv->cur_cch = priv->hash_base; - } - return NULL; -} - -static thash_data_t *vhpt_next_overlap(thash_cb_t *hcb) -{ - thash_data_t *ovl; - thash_internal_t *priv = &hcb->priv; - u64 addr,rr_psize; - ia64_rr vrr; - - vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva); - rr_psize = PSIZE(vrr.ps); - - while ( priv->_curva < priv->_eva ) { - if ( !INVALID_ENTRY(hcb, priv->hash_base) ) { - ovl = _vhpt_next_overlap_in_chain(hcb); - if ( ovl ) { - priv->cur_cch = ovl->next; - return ovl; - } - } - priv->_curva += rr_psize; - priv->hash_base = (hcb->hash_func)( hcb->pta, - priv->_curva, priv->rid, priv->ps); - priv->tag = (hcb->vs->tag_func)( hcb->pta, - priv->_curva, priv->rid, priv->ps); - priv->cur_cch = priv->hash_base; - } - return NULL; -} - - -/* - * Find and purge overlap entries in hash table and its collision chain. - * PARAS: - * 1: in: TLB format entry, rid:ps must be same with vrr[]. - * rid, va & ps identify the address space for purge - * 2: section can be combination of TR, TC and FM. (thash_SECTION_XX) - * 3: cl means I side or D side. - * NOTES: - * - */ -void thash_purge_entries(thash_cb_t *hcb, - thash_data_t *in, search_section_t p_sect) -{ - return thash_purge_entries_ex(hcb, in->rid, in->vadr, - in->ps, p_sect, in->cl); -} - -void thash_purge_entries_ex(thash_cb_t *hcb, - u64 rid, u64 va, u64 ps, - search_section_t p_sect, - CACHE_LINE_TYPE cl) -{ - thash_data_t *ovl; - - ovl = (hcb->find_overlap)(hcb, va, PSIZE(ps), rid, cl, p_sect); - while ( ovl != NULL ) { - (hcb->rem_hash)(hcb, ovl); - ovl = (hcb->next_overlap)(hcb); - }; -} - -/* - * Purge overlap TCs and then insert the new entry to emulate itc ops. - * Notes: Only TC entry can purge and insert. - */ -void thash_purge_and_insert(thash_cb_t *hcb, thash_data_t *in) -{ - thash_data_t *ovl; - search_section_t sections; - -#ifdef XEN_DEBUGGER - vrr = (hcb->get_rr_fn)(hcb->vcpu,in->vadr); - if ( in->ps != vrr.ps || hcb->ht != THASH_TLB || !in->tc ) { - panic ("Oops, wrong call for purge_and_insert\n"); - return; - } -#endif - in->vadr = PAGEALIGN(in->vadr,in->ps); - in->ppn = PAGEALIGN(in->ppn, in->ps-12); - sections.tr = 0; - sections.tc = 1; - ovl = (hcb->find_overlap)(hcb, in->vadr, PSIZE(in->ps), - in->rid, in->cl, sections); - if(ovl) - (hcb->rem_hash)(hcb, ovl); -#ifdef XEN_DEBUGGER - ovl = (hcb->next_overlap)(hcb); - if ( ovl ) { - panic ("Oops, 2+ overlaps for purge_and_insert\n"); - return; - } -#endif - (hcb->ins_hash)(hcb, in, in->vadr); -} - -/* - * Purge all TCs or VHPT entries including those in Hash table. - * - */ - -// TODO: add sections. -void thash_purge_all(thash_cb_t *hcb) -{ - thash_data_t *hash_table; - -#ifdef VTLB_DEBUG - extern u64 sanity_check; - static u64 statistics_before_purge_all=0; - if ( statistics_before_purge_all ) { - sanity_check = 1; - check_vtlb_sanity(hcb); - } -#endif - - hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz); - - for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) { - thash_rem_line(hcb, hash_table); - } -} - - -/* - * Lookup the hash table and its collision chain to find an entry - * covering this address rid:va or the entry. - * - * INPUT: - * in: TLB format for both VHPT & TLB. - */ -thash_data_t *vtlb_lookup(thash_cb_t *hcb, - thash_data_t *in) -{ - return vtlb_lookup_ex(hcb, in->rid, in->vadr, in->cl); -} - -thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb, - u64 rid, u64 va, - CACHE_LINE_TYPE cl) -{ - thash_data_t *hash_table, *cch; - u64 tag; - ia64_rr vrr; - - ASSERT ( hcb->ht == THASH_VTLB ); - - cch = __vtr_lookup(hcb, rid, va, cl);; - if ( cch ) return cch; - - vrr = (hcb->get_rr_fn)(hcb->vcpu,va); - hash_table = (hcb->hash_func)( hcb->pta,va, rid, vrr.ps); - - if ( INVALID_ENTRY(hcb, hash_table ) ) - return NULL; - - - for (cch=hash_table; cch; cch = cch->next) { - if ( __is_translated(cch, rid, va, cl) ) - return cch; - } - return NULL; -} - -/* - * Lock/Unlock TC if found. - * NOTES: Only the page in prefered size can be handled. - * return: - * 1: failure - * 0: success - */ -int thash_lock_tc(thash_cb_t *hcb, u64 va, u64 size, int rid, char cl, int lock) -{ - thash_data_t *ovl; - search_section_t sections; - - sections.tr = 1; - sections.tc = 1; - ovl = (hcb->find_overlap)(hcb, va, size, rid, cl, sections); - if ( ovl ) { - if ( !ovl->tc ) { -// panic("Oops, TR for lock\n"); - return 0; - } - else if ( lock ) { - if ( ovl->locked ) { - DPRINTK("Oops, already locked entry\n"); - } - ovl->locked = 1; - } - else if ( !lock ) { - if ( !ovl->locked ) { - DPRINTK("Oops, already unlocked entry\n"); - } - ovl->locked = 0; - } - return 0; - } - return 1; -} - -/* - * Notifier when TLB is deleted from hash table and its collision chain. - * NOTES: - * The typical situation is that TLB remove needs to inform - * VHPT to remove too. - * PARAS: - * 1: hcb is TLB object. - * 2: The format of entry is always in TLB. - * - */ -void tlb_remove_notifier(thash_cb_t *hcb, thash_data_t *entry) -{ - thash_cb_t *vhpt; - search_section_t s_sect; - - s_sect.v = 0; - thash_purge_entries(hcb->ts->vhpt, entry, s_sect); - machine_tlb_purge(entry->rid, entry->vadr, entry->ps); -} - -/* - * Initialize internal control data before service. - */ -void thash_init(thash_cb_t *hcb, u64 sz) -{ - thash_data_t *hash_table; - - cch_mem_init (hcb); - hcb->magic = THASH_CB_MAGIC; - hcb->pta.val = hcb->hash; - hcb->pta.vf = 1; - hcb->pta.ve = 1; - hcb->pta.size = sz; - hcb->get_rr_fn = vmmu_get_rr; - ASSERT ( hcb->hash_sz % sizeof(thash_data_t) == 0 ); - if ( hcb->ht == THASH_TLB ) { - hcb->remove_notifier = tlb_remove_notifier; - hcb->find_overlap = vtlb_find_overlap; - hcb->next_overlap = vtlb_next_overlap; - hcb->rem_hash = rem_vtlb; - hcb->ins_hash = vtlb_insert; - __init_tr(hcb); - } - else { - hcb->remove_notifier = NULL; - hcb->find_overlap = vhpt_find_overlap; - hcb->next_overlap = vhpt_next_overlap; - hcb->rem_hash = rem_thash; - hcb->ins_hash = vhpt_insert; - } - hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz); - - for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) { - INVALIDATE_HASH(hcb,hash_table); - } -} - -#ifdef VTLB_DEBUG -static u64 cch_length_statistics[MAX_CCH_LENGTH+1]; -u64 sanity_check=0; -u64 vtlb_chain_sanity(thash_cb_t *vtlb, thash_cb_t *vhpt, thash_data_t *hash) -{ - thash_data_t *cch; - thash_data_t *ovl; - search_section_t s_sect; - u64 num=0; - - s_sect.v = 0; - for (cch=hash; cch; cch=cch->next) { - ovl = thash_find_overlap(vhpt, cch, s_sect); - while ( ovl != NULL ) { - ovl->checked = 1; - ovl = (vhpt->next_overlap)(vhpt); - }; - num ++; - } - if ( num >= MAX_CCH_LENGTH ) { - cch_length_statistics[MAX_CCH_LENGTH] ++; - } - else { - cch_length_statistics[num] ++; - } - return num; -} - -void check_vtlb_sanity(thash_cb_t *vtlb) -{ -// struct pfn_info *page; - u64 hash_num, i, psr; - static u64 check_ok_num, check_fail_num,check_invalid; -// void *vb1, *vb2; - thash_data_t *hash, *cch; - thash_data_t *ovl; - search_section_t s_sect; - thash_cb_t *vhpt = vtlb->ts->vhpt; - u64 invalid_ratio; - - if ( sanity_check == 0 ) return; - sanity_check --; - s_sect.v = 0; -// page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER, 0); -// if ( page == NULL ) { -// panic("No enough contiguous memory for init_domain_mm\n"); -// }; -// vb1 = page_to_virt(page); -// printf("Allocated page=%lp vbase=%lp\n", page, vb1); -// vb2 = vb1 + vtlb->hash_sz; - hash_num = vhpt->hash_sz / sizeof(thash_data_t); -// printf("vb2=%lp, size=%lx hash_num=%lx\n", vb2, vhpt->hash_sz, hash_num); - printf("vtlb=%lp, hash=%lp size=0x%lx; vhpt=%lp, hash=%lp size=0x%lx\n", - vtlb, vtlb->hash,vtlb->hash_sz, - vhpt, vhpt->hash, vhpt->hash_sz); - //memcpy(vb1, vtlb->hash, vtlb->hash_sz); - //memcpy(vb2, vhpt->hash, vhpt->hash_sz); - for ( i=0; i < sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) { - cch_length_statistics[i] = 0; - } - - local_irq_save(psr); - - hash = vhpt->hash; - for (i=0; i < hash_num; i++) { - if ( !INVALID_ENTRY(vhpt, hash) ) { - for ( cch= hash; cch; cch=cch->next) { - cch->checked = 0; - } - } - hash ++; - } - printf("Done vhpt clear checked flag, hash_num=0x%lx\n", hash_num); - check_invalid = 0; - check_ok_num=0; - hash = vtlb->hash; - for ( i=0; i< hash_num; i++ ) { - if ( !INVALID_ENTRY(vtlb, hash) ) { - check_ok_num += vtlb_chain_sanity(vtlb, vhpt, hash); - } - else { - check_invalid++; - } - hash ++; - } - printf("Done vtlb entry check, hash=%lp\n", hash); - printf("check_ok_num = 0x%lx check_invalid=0x%lx\n", check_ok_num,check_invalid); - invalid_ratio = 1000*check_invalid / hash_num; - printf("%02ld.%01ld%% entries are invalid\n", - invalid_ratio/10, invalid_ratio % 10 ); - for (i=0; i<NDTRS; i++) { - ovl = thash_find_overlap(vhpt, &vtlb->ts->dtr[i], s_sect); - while ( ovl != NULL ) { - ovl->checked = 1; - ovl = (vhpt->next_overlap)(vhpt); - }; - } - printf("Done dTR\n"); - for (i=0; i<NITRS; i++) { - ovl = thash_find_overlap(vhpt, &vtlb->ts->itr[i], s_sect); - while ( ovl != NULL ) { - ovl->checked = 1; - ovl = (vhpt->next_overlap)(vhpt); - }; - } - printf("Done iTR\n"); - check_fail_num = 0; - check_invalid = 0; - check_ok_num=0; - hash = vhpt->hash; - for (i=0; i < hash_num; i++) { - if ( !INVALID_ENTRY(vhpt, hash) ) { - for ( cch= hash; cch; cch=cch->next) { - if ( !cch->checked ) { - printf ("!!!Hash=%lp cch=%lp not within vtlb\n", hash, cch); - check_fail_num ++; - } - else { - check_ok_num++; - } - } - } - else { - check_invalid ++; - } - hash ++; - } - local_irq_restore(psr); - printf("check_ok_num=0x%lx check_fail_num=0x%lx check_invalid=0x%lx\n", - check_ok_num, check_fail_num, check_invalid); - //memcpy(vtlb->hash, vb1, vtlb->hash_sz); - //memcpy(vhpt->hash, vb2, vhpt->hash_sz); - printf("The statistics of collision chain length is listed\n"); - for ( i=0; i < sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) { - printf("CCH length=%02ld, chain number=%ld\n", i, cch_length_statistics[i]); - } -// free_domheap_pages(page, VCPU_TLB_ORDER); - printf("Done check_vtlb\n"); -} - -void dump_vtlb(thash_cb_t *vtlb) -{ - static u64 dump_vtlb=0; - thash_data_t *hash, *cch, *tr; - u64 hash_num,i; - - if ( dump_vtlb == 0 ) return; - dump_vtlb --; - hash_num = vtlb->hash_sz / sizeof(thash_data_t); - hash = vtlb->hash; - - printf("Dump vTC\n"); - for ( i = 0; i < hash_num; i++ ) { - if ( !INVALID_ENTRY(vtlb, hash) ) { - printf("VTLB at hash=%lp\n", hash); - for (cch=hash; cch; cch=cch->next) { - printf("Entry %lp va=%lx ps=%lx rid=%lx\n", - cch, cch->vadr, cch->ps, cch->rid); - } - } - hash ++; - } - printf("Dump vDTR\n"); - for (i=0; i<NDTRS; i++) { - tr = &DTR(vtlb,i); - printf("Entry %lp va=%lx ps=%lx rid=%lx\n", - tr, tr->vadr, tr->ps, tr->rid); - } - printf("Dump vITR\n"); - for (i=0; i<NITRS; i++) { - tr = &ITR(vtlb,i); - printf("Entry %lp va=%lx ps=%lx rid=%lx\n", - tr, tr->vadr, tr->ps, tr->rid); - } - printf("End of vTLB dump\n"); -} -#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xen.lds.S --- a/xen/arch/ia64/xen.lds.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,251 +0,0 @@ -#include <linux/config.h> - -#include <asm/cache.h> -#include <asm/ptrace.h> -#include <asm/system.h> -#include <asm/pgtable.h> - -#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) -#include <asm-generic/vmlinux.lds.h> - -OUTPUT_FORMAT("elf64-ia64-little") -OUTPUT_ARCH(ia64) -ENTRY(phys_start) -jiffies = jiffies_64; -PHDRS { - code PT_LOAD; - percpu PT_LOAD; - data PT_LOAD; -} -SECTIONS -{ - /* Sections to be discarded */ - /DISCARD/ : { - *(.exit.text) - *(.exit.data) - *(.exitcall.exit) - *(.IA_64.unwind.exit.text) - *(.IA_64.unwind_info.exit.text) - } - - v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ - phys_start = _start - LOAD_OFFSET; - - code : { } :code - . = KERNEL_START; - - _text = .; - _stext = .; - - .text : AT(ADDR(.text) - LOAD_OFFSET) - { - *(.text.ivt) - *(.text) - SCHED_TEXT - LOCK_TEXT - *(.gnu.linkonce.t*) - } - .text2 : AT(ADDR(.text2) - LOAD_OFFSET) - { *(.text2) } -#ifdef CONFIG_SMP - .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) - { *(.text.lock) } -#endif - _etext = .; - - /* Read-only data */ - - /* Exception table */ - . = ALIGN(16); - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) - { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } - - .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET) - { - __start___vtop_patchlist = .; - *(.data.patch.vtop) - __end___vtop_patchlist = .; - } - - .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET) - { - __start___mckinley_e9_bundles = .; - *(.data.patch.mckinley_e9) - __end___mckinley_e9_bundles = .; - } - - /* Global data */ - _data = .; - -#if defined(CONFIG_IA64_GENERIC) - /* Machine Vector */ - . = ALIGN(16); - .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) - { - machvec_start = .; - *(.machvec) - machvec_end = .; - } -#endif - - /* Unwind info & table: */ - . = ALIGN(8); - .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) - { *(.IA_64.unwind_info*) } - .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) - { - __start_unwind = .; - *(.IA_64.unwind*) - __end_unwind = .; - } - - RODATA - - .opd : AT(ADDR(.opd) - LOAD_OFFSET) - { *(.opd) } - - /* Initialization code and data: */ - - . = ALIGN(PAGE_SIZE); - __init_begin = .; - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) - { - _sinittext = .; - *(.init.text) - _einittext = .; - } - - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) - { *(.init.data) } - - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) - { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } - - . = ALIGN(16); - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) - { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) - { - __initcall_start = .; - *(.initcall1.init) - *(.initcall2.init) - *(.initcall3.init) - *(.initcall4.init) - *(.initcall5.init) - *(.initcall6.init) - *(.initcall7.init) - __initcall_end = .; - } - __con_initcall_start = .; - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) - { *(.con_initcall.init) } - __con_initcall_end = .; - __security_initcall_start = .; - .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) - { *(.security_initcall.init) } - __security_initcall_end = .; - . = ALIGN(PAGE_SIZE); - __init_end = .; - - /* The initial task and kernel stack */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) - { *(.data.init_task) } - - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) - { *(__special_page_section) - __start_gate_section = .; - *(.data.gate) - __stop_gate_section = .; - } - . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */ - - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) - { *(.data.cacheline_aligned) } - - /* Per-cpu data: */ - percpu : { } :percpu - . = ALIGN(PERCPU_PAGE_SIZE); - __phys_per_cpu_start = .; - .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) - { - __per_cpu_start = .; - *(.data.percpu) - __per_cpu_end = .; - } - . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */ - - data : { } :data - .data : AT(ADDR(.data) - LOAD_OFFSET) - { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } - - . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ - .got : AT(ADDR(.got) - LOAD_OFFSET) - { *(.got.plt) *(.got) } - __gp = ADDR(.got) + 0x200000; - /* We want the small data sections together, so single-instruction offsets - can access them all, and initialized data all before uninitialized, so - we can shorten the on-disk segment size. */ - .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) - { *(.sdata) *(.sdata1) *(.srdata) } - _edata = .; - _bss = .; - .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) - { *(.sbss) *(.scommon) } - .bss : AT(ADDR(.bss) - LOAD_OFFSET) - { *(.bss) *(COMMON) } - - _end = .; - - code : { } :code - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to the beginning - of the section so we begin them at 0. */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* These must appear regardless of . */ - /* Discard them for now since Intel SoftSDV cannot handle them. - .comment 0 : { *(.comment) } - .note 0 : { *(.note) } - */ - /DISCARD/ : { *(.comment) } - /DISCARD/ : { *(.note) } -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xenasm.S --- a/xen/arch/ia64/xenasm.S Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,501 +0,0 @@ -/* - * Assembly support routines for Xen/ia64 - * - * Copyright (C) 2004 Hewlett-Packard Co - * Dan Magenheimer <dan.magenheimer@xxxxxx> - */ - -#include <linux/config.h> -#include <asm/asmmacro.h> -#include <asm/processor.h> -#include <asm/pgtable.h> -#include <asm/vhpt.h> - -#if 0 -// FIXME: there's gotta be a better way... -// ski and spaski are different... moved to xenmisc.c -#define RunningOnHpSki(rx,ry,pn) \ - addl rx = 2, r0; \ - addl ry = 3, r0; \ - ;; \ - mov rx = cpuid[rx]; \ - mov ry = cpuid[ry]; \ - ;; \ - cmp.eq pn,p0 = 0, rx; \ - ;; \ - (pn) movl rx = 0x7000004 ; \ - ;; \ - (pn) cmp.ge pn,p0 = ry, rx; \ - ;; - -//int platform_is_hp_ski(void) -GLOBAL_ENTRY(platform_is_hp_ski) - mov r8 = 0 - RunningOnHpSki(r3,r9,p8) -(p8) mov r8 = 1 - br.ret.sptk.many b0 -END(platform_is_hp_ski) -#endif - -// Change rr7 to the passed value while ensuring -// Xen is mapped into the new region. -// in0: new rr7 value -// in1: Xen virtual address of shared info (to be pinned) -#define PSR_BITS_TO_CLEAR \ - (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ - IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ - IA64_PSR_DFL | IA64_PSR_DFH) -// FIXME? Note that this turns off the DB bit (debug) -#define PSR_BITS_TO_SET IA64_PSR_BN - -//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void *shared_arch_info); -GLOBAL_ENTRY(ia64_new_rr7) - // not sure this unwind statement is correct... - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1) - alloc loc1 = ar.pfs, 3, 8, 0, 0 -1: { - mov r28 = in0 // copy procedure index - mov r8 = ip // save ip to compute branch - mov loc0 = rp // save rp - };; - .body - movl loc2=PERCPU_ADDR - ;; - tpa loc2=loc2 // grab this BEFORE changing rr7 - ;; -#if VHPT_ENABLED - movl loc6=VHPT_ADDR - ;; - tpa loc6=loc6 // grab this BEFORE changing rr7 - ;; -#endif - mov loc5=in1 - ;; - tpa loc5=loc5 // grab this BEFORE changing rr7 - ;; - mov loc7=in2 // arch_vcpu_info_t - ;; - tpa loc7=loc7 // grab this BEFORE changing rr7 - ;; - mov loc3 = psr // save psr - adds r8 = 1f-1b,r8 // calculate return address for call - ;; - tpa r8=r8 // convert rp to physical - ;; - mov loc4=ar.rsc // save RSE configuration - ;; - mov ar.rsc=0 // put RSE in enforced lazy, LE mode - movl r16=PSR_BITS_TO_CLEAR - movl r17=PSR_BITS_TO_SET - ;; - or loc3=loc3,r17 // add in psr the bits to set - ;; - andcm r16=loc3,r16 // removes bits to clear from psr - br.call.sptk.many rp=ia64_switch_mode_phys -1: - // now in physical mode with psr.i/ic off so do rr7 switch - dep r16=-1,r0,61,3 - ;; - mov rr[r16]=in0 - srlz.d - ;; - - // re-pin mappings for kernel text and data - mov r18=KERNEL_TR_PAGE_SHIFT<<2 - movl r17=KERNEL_START - ;; - rsm psr.i | psr.ic - ;; - srlz.i - ;; - ptr.i r17,r18 - ptr.d r17,r18 - ;; - mov cr.itir=r18 - mov cr.ifa=r17 - mov r16=IA64_TR_KERNEL - //mov r3=ip - movl r18=PAGE_KERNEL - ;; - dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT - ;; - or r18=r2,r18 - ;; - srlz.i - ;; - itr.i itr[r16]=r18 - ;; - itr.d dtr[r16]=r18 - ;; - - // re-pin mappings for stack (current), per-cpu, vhpt, and shared info - - // unless overlaps with KERNEL_TR - dep r18=0,r13,0,KERNEL_TR_PAGE_SHIFT - ;; - cmp.eq p7,p0=r17,r18 -(p7) br.cond.sptk .stack_overlaps - ;; - movl r25=PAGE_KERNEL - dep r21=0,r13,60,4 // physical address of "current" - ;; - or r23=r25,r21 // construct PA | page properties - mov r25=IA64_GRANULE_SHIFT<<2 - ;; - ptr.d r13,r25 - ;; - mov cr.itir=r25 - mov cr.ifa=r13 // VA of next task... - ;; - mov r25=IA64_TR_CURRENT_STACK - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - ;; -.stack_overlaps: - - movl r22=PERCPU_ADDR - ;; - movl r25=PAGE_KERNEL - ;; - mov r21=loc2 // saved percpu physical address - ;; - or r23=r25,r21 // construct PA | page properties - mov r24=PERCPU_PAGE_SHIFT<<2 - ;; - ptr.d r22,r24 - ;; - mov cr.itir=r24 - mov cr.ifa=r22 - ;; - mov r25=IA64_TR_PERCPU_DATA - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - ;; - -#if VHPT_ENABLED - movl r22=VHPT_ADDR - ;; - movl r25=PAGE_KERNEL - ;; - mov r21=loc6 // saved vhpt physical address - ;; - or r23=r25,r21 // construct PA | page properties - mov r24=VHPT_PAGE_SHIFT<<2 - ;; - ptr.d r22,r24 - ;; - mov cr.itir=r24 - mov cr.ifa=r22 - ;; - mov r25=IA64_TR_VHPT - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - ;; -#endif - - movl r22=SHAREDINFO_ADDR - ;; - movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW) - ;; - mov r21=loc5 // saved sharedinfo physical address - ;; - or r23=r25,r21 // construct PA | page properties - mov r24=PAGE_SHIFT<<2 - ;; - ptr.d r22,r24 - ;; - mov cr.itir=r24 - mov cr.ifa=r22 - ;; - mov r25=IA64_TR_SHARED_INFO - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - ;; - // Map for arch_vcpu_info_t - movl r22=SHARED_ARCHINFO_ADDR - ;; - movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW) - ;; - mov r21=loc7 // saved sharedinfo physical address - ;; - or r23=r25,r21 // construct PA | page properties - mov r24=PAGE_SHIFT<<2 - ;; - ptr.d r22,r24 - ;; - mov cr.itir=r24 - mov cr.ifa=r22 - ;; - mov r25=IA64_TR_ARCH_INFO - ;; - itr.d dtr[r25]=r23 // wire in new mapping... - ;; - - // done, switch back to virtual and return - mov r16=loc3 // r16= original psr - br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode - mov psr.l = loc3 // restore init PSR - - mov ar.pfs = loc1 - mov rp = loc0 - ;; - mov ar.rsc=loc4 // restore RSE configuration - srlz.d // seralize restoration of psr.l - br.ret.sptk.many rp -END(ia64_new_rr7) - -#include "minstate.h" - -GLOBAL_ENTRY(ia64_prepare_handle_privop) - .prologue - /* - * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 - */ - mov r16=r0 - DO_SAVE_SWITCH_STACK - br.call.sptk.many rp=ia64_handle_privop // stack frame setup in ivt -.ret22: .body - DO_LOAD_SWITCH_STACK - br.cond.sptk.many rp // goes to ia64_leave_kernel -END(ia64_prepare_handle_privop) - -GLOBAL_ENTRY(ia64_prepare_handle_break) - .prologue - /* - * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 - */ - mov r16=r0 - DO_SAVE_SWITCH_STACK - br.call.sptk.many rp=ia64_handle_break // stack frame setup in ivt -.ret23: .body - DO_LOAD_SWITCH_STACK - br.cond.sptk.many rp // goes to ia64_leave_kernel -END(ia64_prepare_handle_break) - -GLOBAL_ENTRY(ia64_prepare_handle_reflection) - .prologue - /* - * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 - */ - mov r16=r0 - DO_SAVE_SWITCH_STACK - br.call.sptk.many rp=ia64_handle_reflection // stack frame setup in ivt -.ret24: .body - DO_LOAD_SWITCH_STACK - br.cond.sptk.many rp // goes to ia64_leave_kernel -END(ia64_prepare_handle_reflection) - -GLOBAL_ENTRY(__get_domain_bundle) - EX(.failure_in_get_bundle,ld8 r8=[r32],8) - ;; - EX(.failure_in_get_bundle,ld8 r9=[r32]) - ;; - br.ret.sptk.many rp - ;; -.failure_in_get_bundle: - mov r8=0 - ;; - mov r9=0 - ;; - br.ret.sptk.many rp - ;; -END(__get_domain_bundle) - -GLOBAL_ENTRY(dorfirfi) - movl r16 = XSI_IIP - movl r17 = XSI_IPSR - movl r18 = XSI_IFS - ;; - ld8 r16 = [r16] - ld8 r17 = [r17] - ld8 r18 = [r18] - ;; - mov cr.iip=r16 - mov cr.ipsr=r17 - mov cr.ifs=r18 - ;; - // fall through -END(dorfirfi) - -GLOBAL_ENTRY(dorfi) - rfi - ;; -END(dorfirfi) - -// -// Long's Peak UART Offsets -// -#define COM_TOP 0xff5e0000 -#define COM_BOT 0xff5e2000 - -// UART offsets -#define UART_TX 0 /* Out: Transmit buffer (DLAB=0) */ -#define UART_INT_ENB 1 /* interrupt enable (DLAB=0) */ -#define UART_INT_ID 2 /* Interrupt ID register */ -#define UART_LINE_CTL 3 /* Line control register */ -#define UART_MODEM_CTL 4 /* Modem Control Register */ -#define UART_LSR 5 /* In: Line Status Register */ -#define UART_MSR 6 /* Modem status register */ -#define UART_DLATCH_LOW UART_TX -#define UART_DLATCH_HIGH UART_INT_ENB -#define COM1 0x3f8 -#define COM2 0x2F8 -#define COM3 0x3E8 - -/* interrupt enable bits (offset 1) */ -#define DATA_AVAIL_INT 1 -#define XMIT_HOLD_EMPTY_INT 2 -#define LINE_STAT_INT 4 -#define MODEM_STAT_INT 8 - -/* line status bits (offset 5) */ -#define REC_DATA_READY 1 -#define OVERRUN 2 -#define PARITY_ERROR 4 -#define FRAMING_ERROR 8 -#define BREAK_INTERRUPT 0x10 -#define XMIT_HOLD_EMPTY 0x20 -#define XMIT_SHIFT_EMPTY 0x40 - -// Write a single character -// input: r32 = character to be written -// output: none -GLOBAL_ENTRY(longs_peak_putc) - rsm psr.dt - movl r16 = 0x8000000000000000 + COM_TOP + UART_LSR - ;; - srlz.i - ;; - -.Chk_THRE_p: - ld1.acq r18=[r16] - ;; - - and r18 = XMIT_HOLD_EMPTY, r18 - ;; - cmp4.eq p6,p0=0,r18 - ;; - -(p6) br .Chk_THRE_p - ;; - movl r16 = 0x8000000000000000 + COM_TOP + UART_TX - ;; - st1.rel [r16]=r32 - ;; - ssm psr.dt - ;; - srlz.i - ;; - br.ret.sptk.many b0 -END(longs_peak_putc) - -/* derived from linux/arch/ia64/hp/sim/boot/boot_head.S */ -GLOBAL_ENTRY(pal_emulator_static) - mov r8=-1 - mov r9=256 - ;; - cmp.gtu p7,p8=r9,r32 /* r32 <= 255? */ -(p7) br.cond.sptk.few static - ;; - mov r9=512 - ;; - cmp.gtu p7,p8=r9,r32 -(p7) br.cond.sptk.few stacked - ;; -static: cmp.eq p7,p8=6,r32 /* PAL_PTCE_INFO */ -(p8) br.cond.sptk.few 1f - ;; - mov r8=0 /* status = 0 */ - movl r9=0x100000000 /* tc.base */ - movl r10=0x0000000200000003 /* count[0], count[1] */ - movl r11=0x1000000000002000 /* stride[0], stride[1] */ - br.ret.sptk.few rp -1: cmp.eq p7,p8=14,r32 /* PAL_FREQ_RATIOS */ -(p8) br.cond.sptk.few 1f - mov r8=0 /* status = 0 */ - movl r9 =0x900000002 /* proc_ratio (1/100) */ - movl r10=0x100000100 /* bus_ratio<<32 (1/256) */ - movl r11=0x900000002 /* itc_ratio<<32 (1/100) */ - ;; -1: cmp.eq p7,p8=19,r32 /* PAL_RSE_INFO */ -(p8) br.cond.sptk.few 1f - mov r8=0 /* status = 0 */ - mov r9=96 /* num phys stacked */ - mov r10=0 /* hints */ - mov r11=0 - br.ret.sptk.few rp -1: cmp.eq p7,p8=1,r32 /* PAL_CACHE_FLUSH */ -(p8) br.cond.sptk.few 1f -#if 0 - mov r9=ar.lc - movl r8=524288 /* flush 512k million cache lines (16MB) */ - ;; - mov ar.lc=r8 - movl r8=0xe000000000000000 - ;; -.loop: fc r8 - add r8=32,r8 - br.cloop.sptk.few .loop - sync.i - ;; - srlz.i - ;; - mov ar.lc=r9 - mov r8=r0 - ;; -1: cmp.eq p7,p8=15,r32 /* PAL_PERF_MON_INFO */ -(p8) br.cond.sptk.few 1f - mov r8=0 /* status = 0 */ - movl r9 =0x08122f04 /* generic=4 width=47 retired=8 cycles=18 */ - mov r10=0 /* reserved */ - mov r11=0 /* reserved */ - mov r16=0xffff /* implemented PMC */ - mov r17=0x3ffff /* implemented PMD */ - add r18=8,r29 /* second index */ - ;; - st8 [r29]=r16,16 /* store implemented PMC */ - st8 [r18]=r0,16 /* clear remaining bits */ - ;; - st8 [r29]=r0,16 /* clear remaining bits */ - st8 [r18]=r0,16 /* clear remaining bits */ - ;; - st8 [r29]=r17,16 /* store implemented PMD */ - st8 [r18]=r0,16 /* clear remaining bits */ - mov r16=0xf0 /* cycles count capable PMC */ - ;; - st8 [r29]=r0,16 /* clear remaining bits */ - st8 [r18]=r0,16 /* clear remaining bits */ - mov r17=0xf0 /* retired bundles capable PMC */ - ;; - st8 [r29]=r16,16 /* store cycles capable */ - st8 [r18]=r0,16 /* clear remaining bits */ - ;; - st8 [r29]=r0,16 /* clear remaining bits */ - st8 [r18]=r0,16 /* clear remaining bits */ - ;; - st8 [r29]=r17,16 /* store retired bundle capable */ - st8 [r18]=r0,16 /* clear remaining bits */ - ;; - st8 [r29]=r0,16 /* clear remaining bits */ - st8 [r18]=r0,16 /* clear remaining bits */ - ;; -1: br.cond.sptk.few rp -#else -1: -#endif -stacked: - br.ret.sptk.few rp -END(pal_emulator_static) - -GLOBAL_ENTRY(vhpt_insert) -// alloc loc0 = ar.pfs, 3, 1, 0, 0 - mov r16=r32 - mov r26=r33 - mov r27=r34 - ;; - VHPT_INSERT() -// VHPT_INSERT1() ... add collision chains later -// mov ar.pfs = loc0 - br.ret.sptk.few rp - ;; -END(vhpt_insert) diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xenirq.c --- a/xen/arch/ia64/xenirq.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,77 +0,0 @@ -/* - * Xen irq routines - * - * Copyright (C) 2005 Hewlett-Packard Co. - * Dan Magenheimer (dan.magenheimer@xxxxxx) - * - */ - -#include <asm/ptrace.h> -#include <asm/hw_irq.h> - - -void -xen_debug_irq(ia64_vector vector, struct pt_regs *regs) -{ -//FIXME: For debug only, can be removed - static char firstirq = 1; - static char firsttime[256]; - static char firstpend[256]; - if (firstirq) { - int i; - for (i=0;i<256;i++) firsttime[i] = 1; - for (i=0;i<256;i++) firstpend[i] = 1; - firstirq = 0; - } - if (firsttime[vector]) { - printf("**** (entry) First received int on vector=%d,itc=%lx\n", - (unsigned long) vector, ia64_get_itc()); - firsttime[vector] = 0; - } -} - - -int -xen_do_IRQ(ia64_vector vector) -{ - if (vector != 0xef) { - extern void vcpu_pend_interrupt(void *, int); -#if 0 - if (firsttime[vector]) { - printf("**** (iterate) First received int on vector=%d,itc=%lx\n", - (unsigned long) vector, ia64_get_itc()); - firsttime[vector] = 0; - } - if (firstpend[vector]) { - printf("**** First pended int on vector=%d,itc=%lx\n", - (unsigned long) vector,ia64_get_itc()); - firstpend[vector] = 0; - } -#endif - //FIXME: TEMPORARY HACK!!!! - vcpu_pend_interrupt(dom0->vcpu[0],vector); - vcpu_wake(dom0->vcpu[0]); - return(1); - } - return(0); -} - -/* From linux/kernel/softirq.c */ -#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED -# define invoke_softirq() __do_softirq() -#else -# define invoke_softirq() do_softirq() -#endif - -/* - * Exit an interrupt context. Process softirqs if needed and possible: - */ -void irq_exit(void) -{ - //account_system_vtime(current); - //sub_preempt_count(IRQ_EXIT_OFFSET); - if (!in_interrupt() && local_softirq_pending()) - invoke_softirq(); - //preempt_enable_no_resched(); -} -/* end from linux/kernel/softirq.c */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xenmem.c --- a/xen/arch/ia64/xenmem.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,86 +0,0 @@ -/* - * Xen memory allocator routines - * - * Copyright (C) 2005 Hewlett-Packard Co - * Dan Magenheimer <dan.magenheimer@xxxxxx> - * Copyright (C) 2005 Intel Corp. - * - * Routines used by ia64 machines with contiguous (or virtually contiguous) - * memory. - */ - -#include <linux/config.h> -#include <asm/pgtable.h> -#include <xen/mm.h> - -extern struct page *zero_page_memmap_ptr; -struct pfn_info *frame_table; -unsigned long frame_table_size; -unsigned long max_page; - -struct page *mem_map; -#define MAX_DMA_ADDRESS ~0UL // FIXME??? - -#ifdef CONFIG_VIRTUAL_MEM_MAP -static unsigned long num_dma_physpages; -#endif - -/* - * Set up the page tables. - */ -#ifdef CONFIG_VTI -unsigned long *mpt_table; -unsigned long mpt_table_size; -#endif // CONFIG_VTI - -void -paging_init (void) -{ - struct pfn_info *pg; - -#ifdef CONFIG_VTI - unsigned int mpt_order; - /* Create machine to physical mapping table - * NOTE: similar to frame table, later we may need virtually - * mapped mpt table if large hole exists. Also MAX_ORDER needs - * to be changed in common code, which only support 16M by far - */ - mpt_table_size = max_page * sizeof(unsigned long); - mpt_order = get_order(mpt_table_size); - ASSERT(mpt_order <= MAX_ORDER); - if ((mpt_table = alloc_xenheap_pages(mpt_order)) == NULL) - panic("Not enough memory to bootstrap Xen.\n"); - - printk("machine to physical table: 0x%lx\n", (u64)mpt_table); - memset(mpt_table, INVALID_M2P_ENTRY, mpt_table_size); -#endif // CONFIG_VTI - - /* Other mapping setup */ - - zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); -} - -/* FIXME: postpone support to machines with big holes between physical memorys. - * Current hack allows only efi memdesc upto 4G place. (See efi.c) - */ -#ifndef CONFIG_VIRTUAL_MEM_MAP -#define FT_ALIGN_SIZE (16UL << 20) -void __init init_frametable(void) -{ - unsigned long i, pfn; - frame_table_size = max_page * sizeof(struct pfn_info); - frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK; - - /* Request continuous trunk from boot allocator, since HV - * address is identity mapped */ - pfn = alloc_boot_pages( - frame_table_size >> PAGE_SHIFT, FT_ALIGN_SIZE >> PAGE_SHIFT); - if (pfn == 0) - panic("Not enough memory for frame table.\n"); - - frame_table = __va(pfn << PAGE_SHIFT); - memset(frame_table, 0, frame_table_size); - printk("size of frame_table: %lukB\n", - frame_table_size >> 10); -} -#endif diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xenmisc.c --- a/xen/arch/ia64/xenmisc.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,391 +0,0 @@ -/* - * Xen misc - * - * Functions/decls that are/may be needed to link with Xen because - * of x86 dependencies - * - * Copyright (C) 2004 Hewlett-Packard Co. - * Dan Magenheimer (dan.magenheimer@xxxxxx) - * - */ - -#include <linux/config.h> -#include <xen/sched.h> -#include <linux/efi.h> -#include <asm/processor.h> -#include <xen/serial.h> -#include <asm/io.h> -#include <xen/softirq.h> - -efi_memory_desc_t ia64_efi_io_md; -EXPORT_SYMBOL(ia64_efi_io_md); -unsigned long wait_init_idle; -int phys_proc_id[NR_CPUS]; -unsigned long loops_per_jiffy = (1<<12); // from linux/init/main.c - -void unw_init(void) { printf("unw_init() skipped (NEED FOR KERNEL UNWIND)\n"); } -void ia64_mca_init(void) { printf("ia64_mca_init() skipped (Machine check abort handling)\n"); } -void ia64_mca_cpu_init(void *x) { } -void ia64_patch_mckinley_e9(unsigned long a, unsigned long b) { } -void ia64_patch_vtop(unsigned long a, unsigned long b) { } -void hpsim_setup(char **x) -{ -#ifdef CONFIG_SMP - init_smp_config(); -#endif -} - -// called from mem_init... don't think s/w I/O tlb is needed in Xen -//void swiotlb_init(void) { } ...looks like it IS needed - -long -is_platform_hp_ski(void) -{ - int i; - long cpuid[6]; - - for (i = 0; i < 5; ++i) - cpuid[i] = ia64_get_cpuid(i); - if ((cpuid[0] & 0xff) != 'H') return 0; - if ((cpuid[3] & 0xff) != 0x4) return 0; - if (((cpuid[3] >> 8) & 0xff) != 0x0) return 0; - if (((cpuid[3] >> 16) & 0xff) != 0x0) return 0; - if (((cpuid[3] >> 24) & 0x7) != 0x7) return 0; - return 1; -} - -long -platform_is_hp_ski(void) -{ - extern long running_on_sim; - return running_on_sim; -} - -/* calls in xen/common code that are unused on ia64 */ - -void sync_lazy_execstate_cpu(unsigned int cpu) {} - -#ifdef CONFIG_VTI -int grant_table_create(struct domain *d) { return 0; } -void grant_table_destroy(struct domain *d) { return; } -#endif - -struct pt_regs *guest_cpu_user_regs(void) { return ia64_task_regs(current); } - -void raise_actimer_softirq(void) -{ - raise_softirq(AC_TIMER_SOFTIRQ); -} - -#ifndef CONFIG_VTI -unsigned long -__gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) -{ - if (d == dom0) - return(gpfn); - else { - unsigned long pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT); - if (!pte) { -printk("__gpfn_to_mfn_foreign: bad gpfn. spinning...\n"); -while(1); - return 0; - } - return ((pte & _PFN_MASK) >> PAGE_SHIFT); - } -} - -u32 -__mfn_to_gpfn(struct domain *d, unsigned long frame) -{ - // FIXME: is this right? -if ((frame << PAGE_SHIFT) & _PAGE_PPN_MASK) { -printk("__mfn_to_gpfn: bad frame. spinning...\n"); -while(1); -} - return frame; -} -#endif - -#ifndef CONFIG_VTI -unsigned long __hypercall_create_continuation( - unsigned int op, unsigned int nr_args, ...) -{ - printf("__hypercall_create_continuation: not implemented!!!\n"); -} -#endif - -/////////////////////////////// - -/////////////////////////////// -// from arch/x86/apic.c -/////////////////////////////// - -extern unsigned long domain0_ready; - -int reprogram_ac_timer(s_time_t timeout) -{ - struct vcpu *v = current; - -#ifdef CONFIG_VTI -// if(VMX_DOMAIN(v)) - return 1; -#endif // CONFIG_VTI - if (!domain0_ready) return 1; - local_cpu_data->itm_next = timeout; - if (is_idle_task(v->domain)) vcpu_safe_set_itm(timeout); - else vcpu_set_next_timer(current); - return 1; -} - -/////////////////////////////// -// from arch/ia64/page_alloc.c -/////////////////////////////// -DEFINE_PER_CPU(struct page_state, page_states) = {0}; -unsigned long totalram_pages; - -void __mod_page_state(unsigned offset, unsigned long delta) -{ - unsigned long flags; - void* ptr; - - local_irq_save(flags); - ptr = &__get_cpu_var(page_states); - *(unsigned long*)(ptr + offset) += delta; - local_irq_restore(flags); -} - -/////////////////////////////// -// from arch/x86/flushtlb.c -/////////////////////////////// - -u32 tlbflush_clock; -u32 tlbflush_time[NR_CPUS]; - -/////////////////////////////// -// from arch/x86/memory.c -/////////////////////////////// - -void init_percpu_info(void) -{ - dummy(); - //memset(percpu_info, 0, sizeof(percpu_info)); -} - -void free_page_type(struct pfn_info *page, unsigned int type) -{ - dummy(); -} - -/////////////////////////////// -//// misc memory stuff -/////////////////////////////// - -unsigned long __get_free_pages(unsigned int mask, unsigned int order) -{ - void *p = alloc_xenheap_pages(order); - - memset(p,0,PAGE_SIZE<<order); - return (unsigned long)p; -} - -void __free_pages(struct page *page, unsigned int order) -{ - if (order) BUG(); - free_xenheap_page(page); -} - -void *pgtable_quicklist_alloc(void) -{ - return alloc_xenheap_pages(0); -} - -void pgtable_quicklist_free(void *pgtable_entry) -{ - free_xenheap_page(pgtable_entry); -} - -/////////////////////////////// -// from arch/ia64/traps.c -/////////////////////////////// - -void show_registers(struct pt_regs *regs) -{ - printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n"); -} - -int is_kernel_text(unsigned long addr) -{ - extern char _stext[], _etext[]; - if (addr >= (unsigned long) _stext && - addr <= (unsigned long) _etext) - return 1; - - return 0; -} - -unsigned long kernel_text_end(void) -{ - extern char _etext[]; - return (unsigned long) _etext; -} - -/////////////////////////////// -// from common/keyhandler.c -/////////////////////////////// -void dump_pageframe_info(struct domain *d) -{ - printk("dump_pageframe_info not implemented\n"); -} - -/////////////////////////////// -// called from arch/ia64/head.S -/////////////////////////////// - -void console_print(char *msg) -{ - printk("console_print called, how did start_kernel return???\n"); -} - -void kernel_thread_helper(void) -{ - printk("kernel_thread_helper not implemented\n"); - dummy(); -} - -void sys_exit(void) -{ - printk("sys_exit not implemented\n"); - dummy(); -} - -//////////////////////////////////// -// called from unaligned.c -//////////////////////////////////// - -void die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ ((noreturn)) */ -{ - printk("die_if_kernel: called, not implemented\n"); -} - -long -ia64_peek (struct task_struct *child, struct switch_stack *child_stack, - unsigned long user_rbs_end, unsigned long addr, long *val) -{ - printk("ia64_peek: called, not implemented\n"); -} - -long -ia64_poke (struct task_struct *child, struct switch_stack *child_stack, - unsigned long user_rbs_end, unsigned long addr, long val) -{ - printk("ia64_poke: called, not implemented\n"); -} - -void -ia64_sync_fph (struct task_struct *task) -{ - printk("ia64_sync_fph: called, not implemented\n"); -} - -void -ia64_flush_fph (struct task_struct *task) -{ - printk("ia64_flush_fph: called, not implemented\n"); -} - -//////////////////////////////////// -// called from irq_ia64.c:init_IRQ() -// (because CONFIG_IA64_HP_SIM is specified) -//////////////////////////////////// -void hpsim_irq_init(void) { } - - -// accomodate linux extable.c -//const struct exception_table_entry * -void *search_module_extables(unsigned long addr) { return NULL; } -void *__module_text_address(unsigned long addr) { return NULL; } -void *module_text_address(unsigned long addr) { return NULL; } - -void cs10foo(void) {} -void cs01foo(void) {} - -unsigned long context_switch_count = 0; - -void context_switch(struct vcpu *prev, struct vcpu *next) -{ -//printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); -//printk("@@@@@@ context switch from domain %d (%x) to domain %d (%x)\n", -//prev->domain->domain_id,(long)prev&0xffffff,next->domain->domain_id,(long)next&0xffffff); -//if (prev->domain->domain_id == 1 && next->domain->domain_id == 0) cs10foo(); -//if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo(); -//printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id); -#ifdef CONFIG_VTI - vtm_domain_out(prev); -#endif - context_switch_count++; - switch_to(prev,next,prev); -#ifdef CONFIG_VTI - vtm_domain_in(current); -#endif - -// leave this debug for now: it acts as a heartbeat when more than -// one domain is active -{ -static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50}; -static int i = 100; -int id = ((struct vcpu *)current)->domain->domain_id & 0xf; -if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; } -if (!i--) { printk("+",id); i = 1000000; } -} - -#ifdef CONFIG_VTI - if (VMX_DOMAIN(current)) - vmx_load_all_rr(current); -#else - if (!is_idle_task(current->domain)) { - load_region_regs(current); - if (vcpu_timer_expired(current)) vcpu_pend_timer(current); - } - if (vcpu_timer_expired(current)) vcpu_pend_timer(current); -#endif -} - -void context_switch_finalise(struct vcpu *next) -{ - /* nothing to do */ -} - -void continue_running(struct vcpu *same) -{ - /* nothing to do */ -} - -void panic_domain(struct pt_regs *regs, const char *fmt, ...) -{ - va_list args; - char buf[128]; - struct vcpu *v = current; - static volatile int test = 1; // so can continue easily in debug - extern spinlock_t console_lock; - unsigned long flags; - -loop: - printf("$$$$$ PANIC in domain %d (k6=%p): ", - v->domain->domain_id, - __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT]); - va_start(args, fmt); - (void)vsnprintf(buf, sizeof(buf), fmt, args); - va_end(args); - printf(buf); - if (regs) show_registers(regs); - domain_pause_by_systemcontroller(current->domain); - v->domain->shutdown_code = SHUTDOWN_crash; - set_bit(_DOMF_shutdown, v->domain->domain_flags); - if (v->domain->domain_id == 0) { - int i = 1000000000L; - // if domain0 crashes, just periodically print out panic - // message to make post-mortem easier - while(i--); - goto loop; - } -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xensetup.c --- a/xen/arch/ia64/xensetup.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,389 +0,0 @@ -/****************************************************************************** - * xensetup.c - * Copyright (c) 2004-2005 Hewlett-Packard Co - * Dan Magenheimer <dan.magenheimer@xxxxxx> - */ - -#include <xen/config.h> -#include <xen/lib.h> -#include <xen/errno.h> -//#include <xen/spinlock.h> -#include <xen/multiboot.h> -#include <xen/sched.h> -#include <xen/mm.h> -//#include <xen/delay.h> -#include <xen/compile.h> -//#include <xen/console.h> -#include <xen/serial.h> -#include <xen/trace.h> -#include <asm/meminit.h> -#include <asm/page.h> -#include <asm/setup.h> -#include <xen/string.h> - -unsigned long xenheap_phys_end; - -char saved_command_line[COMMAND_LINE_SIZE]; - -struct vcpu *idle_task[NR_CPUS] = { &idle0_vcpu }; - -cpumask_t cpu_present_map; - -#ifdef CLONE_DOMAIN0 -struct domain *clones[CLONE_DOMAIN0]; -#endif -extern unsigned long domain0_ready; - -int find_max_pfn (unsigned long, unsigned long, void *); -void start_of_day(void); - -/* opt_nosmp: If true, secondary processors are ignored. */ -static int opt_nosmp = 0; -boolean_param("nosmp", opt_nosmp); - -/* maxcpus: maximum number of CPUs to activate. */ -static unsigned int max_cpus = NR_CPUS; -integer_param("maxcpus", max_cpus); - -/* - * opt_xenheap_megabytes: Size of Xen heap in megabytes, including: - * xen image - * bootmap bits - * xen heap - * Note: To allow xenheap size configurable, the prerequisite is - * to configure elilo allowing relocation defaultly. Then since - * elilo chooses 256M as alignment when relocating, alignment issue - * on IPF can be addressed. - */ -unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB; -unsigned long xenheap_size = XENHEAP_DEFAULT_SIZE; -extern long running_on_sim; -unsigned long xen_pstart; - -static int -xen_count_pages(u64 start, u64 end, void *arg) -{ - unsigned long *count = arg; - - /* FIXME: do we need consider difference between DMA-usable memory and - * normal memory? Seems that HV has no requirement to operate DMA which - * is owned by Dom0? */ - *count += (end - start) >> PAGE_SHIFT; - return 0; -} - -/* Find first hole after trunk for xen image */ -static int -xen_find_first_hole(u64 start, u64 end, void *arg) -{ - unsigned long *first_hole = arg; - - if ((*first_hole) == 0) { - if ((start <= KERNEL_START) && (KERNEL_START < end)) - *first_hole = __pa(end); - } - - return 0; -} - -static void __init do_initcalls(void) -{ - initcall_t *call; - for ( call = &__initcall_start; call < &__initcall_end; call++ ) - (*call)(); -} - -/* - * IPF loader only supports one commaind line currently, for - * both xen and guest kernel. This function provides pre-parse - * to mixed command line, to split it into two parts. - * - * User should split the parameters by "--", with strings after - * spliter for guest kernel. Missing "--" means whole line belongs - * to guest. Example: - * "com2=57600,8n1 console=com2 -- console=ttyS1 console=tty - * root=/dev/sda3 ro" - */ -static char null[4] = { 0 }; - -void early_cmdline_parse(char **cmdline_p) -{ - char *guest_cmd; - char *split = "--"; - - if (*cmdline_p == NULL) { - *cmdline_p = &null[0]; - saved_command_line[0] = '\0'; - return; - } - - guest_cmd = strstr(*cmdline_p, split); - /* If no spliter, whole line is for guest */ - if (guest_cmd == NULL) { - guest_cmd = *cmdline_p; - *cmdline_p = &null[0]; - } else { - *guest_cmd = '\0'; /* Split boot parameters for xen and guest */ - guest_cmd += strlen(split); - while (*guest_cmd == ' ') guest_cmd++; - } - - strlcpy(saved_command_line, guest_cmd, COMMAND_LINE_SIZE); - return; -} - -struct ns16550_defaults ns16550_com1 = { - .baud = BAUD_AUTO, - .data_bits = 8, - .parity = 'n', - .stop_bits = 1 -}; - -struct ns16550_defaults ns16550_com2 = { - .baud = BAUD_AUTO, - .data_bits = 8, - .parity = 'n', - .stop_bits = 1 -}; - -void start_kernel(void) -{ - unsigned char *cmdline; - void *heap_start; - int i; - unsigned long max_mem, nr_pages, firsthole_start; - unsigned long dom0_memory_start, dom0_memory_end; - unsigned long initial_images_start, initial_images_end; - - running_on_sim = is_platform_hp_ski(); - /* Kernel may be relocated by EFI loader */ - xen_pstart = ia64_tpa(KERNEL_START); - - /* Must do this early -- e.g., spinlocks rely on get_current(). */ - //set_current(&idle0_vcpu); - ia64_r13 = (void *)&idle0_vcpu; - idle0_vcpu.domain = &idle0_domain; - - early_setup_arch(&cmdline); - - /* We initialise the serial devices very early so we can get debugging. */ - if (running_on_sim) hpsim_serial_init(); - else { - ns16550_init(0, &ns16550_com1); - /* Also init com2 for Tiger4. */ - ns16550_com2.io_base = 0x2f8; - ns16550_com2.irq = 3; - ns16550_init(1, &ns16550_com2); - } - serial_init_preirq(); - - init_console(); - set_printk_prefix("(XEN) "); - - /* xenheap should be in same TR-covered range with xen image */ - xenheap_phys_end = xen_pstart + xenheap_size; - printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n", - xen_pstart, xenheap_phys_end); - - /* Find next hole */ - firsthole_start = 0; - efi_memmap_walk(xen_find_first_hole, &firsthole_start); - - initial_images_start = xenheap_phys_end; - initial_images_end = initial_images_start + ia64_boot_param->initrd_size; - - /* Later may find another memory trunk, even away from xen image... */ - if (initial_images_end > firsthole_start) { - printk("Not enough memory to stash the DOM0 kernel image.\n"); - printk("First hole:0x%lx, relocation end: 0x%lx\n", - firsthole_start, initial_images_end); - for ( ; ; ); - } - - /* This copy is time consuming, but elilo may load Dom0 image - * within xenheap range */ - printk("ready to move Dom0 to 0x%lx...", initial_images_start); - memmove(__va(initial_images_start), - __va(ia64_boot_param->initrd_start), - ia64_boot_param->initrd_size); - ia64_boot_param->initrd_start = initial_images_start; - printk("Done\n"); - - /* first find highest page frame number */ - max_page = 0; - efi_memmap_walk(find_max_pfn, &max_page); - printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page); - - heap_start = memguard_init(ia64_imva(&_end)); - printf("Before heap_start: 0x%lx\n", heap_start); - heap_start = __va(init_boot_allocator(__pa(heap_start))); - printf("After heap_start: 0x%lx\n", heap_start); - - reserve_memory(); - - efi_memmap_walk(filter_rsvd_memory, init_boot_pages); - efi_memmap_walk(xen_count_pages, &nr_pages); - - printk("System RAM: %luMB (%lukB)\n", - nr_pages >> (20 - PAGE_SHIFT), - nr_pages << (PAGE_SHIFT - 10)); - - init_frametable(); - - ia64_fph_enable(); - __ia64_init_fpu(); - - alloc_dom0(); -#ifdef DOMU_BUILD_STAGING - alloc_domU_staging(); -#endif - - end_boot_allocator(); - - init_xenheap_pages(__pa(heap_start), xenheap_phys_end); - printk("Xen heap: %luMB (%lukB)\n", - (xenheap_phys_end-__pa(heap_start)) >> 20, - (xenheap_phys_end-__pa(heap_start)) >> 10); - - late_setup_arch(&cmdline); - setup_per_cpu_areas(); - mem_init(); - -printk("About to call scheduler_init()\n"); - scheduler_init(); - local_irq_disable(); -printk("About to call xen_time_init()\n"); - xen_time_init(); -#ifdef CONFIG_VTI - init_xen_time(); /* initialise the time */ -#endif // CONFIG_VTI -printk("About to call ac_timer_init()\n"); - ac_timer_init(); -// init_xen_time(); ??? - -#ifdef CONFIG_SMP - if ( opt_nosmp ) - { - max_cpus = 0; - smp_num_siblings = 1; - //boot_cpu_data.x86_num_cores = 1; - } - - smp_prepare_cpus(max_cpus); - - /* We aren't hotplug-capable yet. */ - //BUG_ON(!cpus_empty(cpu_present_map)); - for_each_cpu ( i ) - cpu_set(i, cpu_present_map); - - //BUG_ON(!local_irq_is_enabled()); - -printk("num_online_cpus=%d, max_cpus=%d\n",num_online_cpus(),max_cpus); - for_each_present_cpu ( i ) - { - if ( num_online_cpus() >= max_cpus ) - break; - if ( !cpu_online(i) ) { -printk("About to call __cpu_up(%d)\n",i); - __cpu_up(i); - } - } - - printk("Brought up %ld CPUs\n", (long)num_online_cpus()); - smp_cpus_done(max_cpus); -#endif - - - // FIXME: Should the following be swapped and moved later? - schedulers_start(); - do_initcalls(); -printk("About to call sort_main_extable()\n"); - sort_main_extable(); - - /* surrender usage of kernel registers to domain, use percpu area instead */ - __get_cpu_var(cpu_kr)._kr[IA64_KR_IO_BASE] = ia64_get_kr(IA64_KR_IO_BASE); - __get_cpu_var(cpu_kr)._kr[IA64_KR_PER_CPU_DATA] = ia64_get_kr(IA64_KR_PER_CPU_DATA); - __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT_STACK] = ia64_get_kr(IA64_KR_CURRENT_STACK); - __get_cpu_var(cpu_kr)._kr[IA64_KR_FPU_OWNER] = ia64_get_kr(IA64_KR_FPU_OWNER); - __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT] = ia64_get_kr(IA64_KR_CURRENT); - __get_cpu_var(cpu_kr)._kr[IA64_KR_PT_BASE] = ia64_get_kr(IA64_KR_PT_BASE); - - /* Create initial domain 0. */ -printk("About to call do_createdomain()\n"); - dom0 = do_createdomain(0, 0); - init_task.domain = &idle0_domain; - init_task.processor = 0; -// init_task.mm = &init_mm; - init_task.domain->arch.mm = &init_mm; -// init_task.thread = INIT_THREAD; - //arch_do_createdomain(current); -#ifdef CLONE_DOMAIN0 - { - int i; - for (i = 0; i < CLONE_DOMAIN0; i++) { - clones[i] = do_createdomain(i+1, 0); - if ( clones[i] == NULL ) - panic("Error creating domain0 clone %d\n",i); - } - } -#endif - if ( dom0 == NULL ) - panic("Error creating domain 0\n"); - - set_bit(_DOMF_privileged, &dom0->domain_flags); - - /* - * We're going to setup domain0 using the module(s) that we stashed safely - * above our heap. The second module, if present, is an initrd ramdisk. - */ -printk("About to call construct_dom0()\n"); - dom0_memory_start = __va(ia64_boot_param->initrd_start); - dom0_memory_end = ia64_boot_param->initrd_size; - if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end, - 0, - 0, - 0) != 0) - panic("Could not set up DOM0 guest OS\n"); -#ifdef CLONE_DOMAIN0 - { - int i; - dom0_memory_start = __va(ia64_boot_param->initrd_start); - dom0_memory_end = ia64_boot_param->initrd_size; - for (i = 0; i < CLONE_DOMAIN0; i++) { -printk("CONSTRUCTING DOMAIN0 CLONE #%d\n",i+1); - if ( construct_domU(clones[i], dom0_memory_start, dom0_memory_end, - 0, - 0, - 0) != 0) - panic("Could not set up DOM0 clone %d\n",i); - } - } -#endif - - /* The stash space for the initial kernel image can now be freed up. */ - init_domheap_pages(ia64_boot_param->initrd_start, - ia64_boot_param->initrd_start + ia64_boot_param->initrd_size); - if (!running_on_sim) // slow on ski and pages are pre-initialized to zero - scrub_heap_pages(); - -printk("About to call init_trace_bufs()\n"); - init_trace_bufs(); - - /* Give up the VGA console if DOM0 is configured to grab it. */ -#ifndef IA64 - console_endboot(cmdline && strstr(cmdline, "tty0")); -#endif - -#ifdef CLONE_DOMAIN0 - { - int i; - for (i = 0; i < CLONE_DOMAIN0; i++) - domain_unpause_by_systemcontroller(clones[i]); - } -#endif - domain_unpause_by_systemcontroller(dom0); - domain0_ready = 1; - local_irq_enable(); -printk("About to call startup_cpu_idle_loop()\n"); - startup_cpu_idle_loop(); -} diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/arch/ia64/xentime.c --- a/xen/arch/ia64/xentime.c Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,382 +0,0 @@ -/* - * xen/arch/ia64/time.c - * - * Copyright (C) 2005 Hewlett-Packard Co - * Dan Magenheimer <dan.magenheimer@xxxxxx> - */ - -#include <linux/config.h> - -#include <linux/cpu.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/profile.h> -#include <linux/sched.h> -#include <linux/time.h> -#include <linux/interrupt.h> -#include <linux/efi.h> -#include <linux/profile.h> -#include <linux/timex.h> - -#include <asm/machvec.h> -#include <asm/delay.h> -#include <asm/hw_irq.h> -#include <asm/ptrace.h> -#include <asm/sal.h> -#include <asm/sections.h> -#include <asm/system.h> -#ifdef XEN -#include <asm/vcpu.h> -#include <linux/jiffies.h> // not included by xen/sched.h -#endif -#include <xen/softirq.h> - -#ifdef XEN -seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; -#endif - -#define TIME_KEEPER_ID 0 -extern unsigned long wall_jiffies; - -static s_time_t stime_irq; /* System time at last 'time update' */ - -unsigned long domain0_ready = 0; - -#ifndef CONFIG_VTI -static inline u64 get_time_delta(void) -{ - return ia64_get_itc(); -} -#else // CONFIG_VTI -static s_time_t stime_irq = 0x0; /* System time at last 'time update' */ -unsigned long itc_scale; -unsigned long itc_at_irq; -static unsigned long wc_sec, wc_nsec; /* UTC time at last 'time update'. */ -//static rwlock_t time_lock = RW_LOCK_UNLOCKED; -static irqreturn_t vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs); - -static inline u64 get_time_delta(void) -{ - s64 delta_itc; - u64 delta, cur_itc; - - cur_itc = ia64_get_itc(); - - delta_itc = (s64)(cur_itc - itc_at_irq); - if ( unlikely(delta_itc < 0) ) delta_itc = 0; - delta = ((u64)delta_itc) * itc_scale; - delta = delta >> 32; - - return delta; -} - -u64 tick_to_ns(u64 tick) -{ - return (tick * itc_scale) >> 32; -} -#endif // CONFIG_VTI - -s_time_t get_s_time(void) -{ - s_time_t now; - unsigned long flags; - - read_lock_irqsave(&xtime_lock, flags); - - now = stime_irq + get_time_delta(); - - /* Ensure that the returned system time is monotonically increasing. */ - { - static s_time_t prev_now = 0; - if ( unlikely(now < prev_now) ) - now = prev_now; - prev_now = now; - } - - read_unlock_irqrestore(&xtime_lock, flags); - - return now; -} - -void update_dom_time(struct vcpu *v) -{ -// FIXME: implement this? -// printf("update_dom_time: called, not implemented, skipping\n"); - return; -} - -/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */ -void do_settime(unsigned long secs, unsigned long nsecs, u64 system_time_base) -{ -#ifdef CONFIG_VTI - u64 _nsecs; - - write_lock_irq(&xtime_lock); - - _nsecs = (u64)nsecs + (s64)(stime_irq - system_time_base); - while ( _nsecs >= 1000000000 ) - { - _nsecs -= 1000000000; - secs++; - } - - wc_sec = secs; - wc_nsec = (unsigned long)_nsecs; - - write_unlock_irq(&xtime_lock); - - update_dom_time(current->domain); -#else -// FIXME: Should this be do_settimeofday (from linux)??? - printf("do_settime: called, not implemented, stopping\n"); - dummy(); -#endif -} - -irqreturn_t -xen_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) -{ - unsigned long new_itm; - -#define HEARTBEAT_FREQ 16 // period in seconds -#ifdef HEARTBEAT_FREQ - static long count = 0; - if (!(++count & ((HEARTBEAT_FREQ*1024)-1))) { - printf("Heartbeat... iip=%p,psr.i=%d,pend=%d\n", - regs->cr_iip, - VCPU(current,interrupt_delivery_enabled), - VCPU(current,pending_interruption)); - count = 0; - } -#endif -#ifndef XEN - if (unlikely(cpu_is_offline(smp_processor_id()))) { - return IRQ_HANDLED; - } -#endif -#ifdef XEN - if (current->domain == dom0) { - // FIXME: there's gotta be a better way of doing this... - // We have to ensure that domain0 is launched before we - // call vcpu_timer_expired on it - //domain0_ready = 1; // moved to xensetup.c - VCPU(current,pending_interruption) = 1; - } - if (domain0_ready && vcpu_timer_expired(dom0->vcpu[0])) { - vcpu_pend_timer(dom0->vcpu[0]); - //vcpu_set_next_timer(dom0->vcpu[0]); - vcpu_wake(dom0->vcpu[0]); - } - if (!is_idle_task(current->domain) && current->domain != dom0) { - if (vcpu_timer_expired(current)) { - vcpu_pend_timer(current); - // ensure another timer interrupt happens even if domain doesn't - vcpu_set_next_timer(current); - vcpu_wake(current); - } - } - raise_actimer_softirq(); -#endif - -#ifndef XEN - platform_timer_interrupt(irq, dev_id, regs); -#endif - - new_itm = local_cpu_data->itm_next; - - if (!time_after(ia64_get_itc(), new_itm)) -#ifdef XEN - return; -#else - printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", - ia64_get_itc(), new_itm); -#endif - -#ifdef XEN -// printf("GOT TO HERE!!!!!!!!!!!\n"); - //while(1); -#else - profile_tick(CPU_PROFILING, regs); -#endif - - while (1) { -#ifndef XEN - update_process_times(user_mode(regs)); -#endif - - new_itm += local_cpu_data->itm_delta; - - if (smp_processor_id() == TIME_KEEPER_ID) { - /* - * Here we are in the timer irq handler. We have irqs locally - * disabled, but we don't know if the timer_bh is running on - * another CPU. We need to avoid to SMP race by acquiring the - * xtime_lock. - */ -#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN - write_seqlock(&xtime_lock); -#endif -#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN - do_timer(regs); -#endif - local_cpu_data->itm_next = new_itm; -#ifdef TURN_ME_OFF_FOR_NOW_IA64_XEN - write_sequnlock(&xtime_lock); -#endif - } else - local_cpu_data->itm_next = new_itm; - - if (time_after(new_itm, ia64_get_itc())) - break; - } - - do { - /* - * If we're too close to the next clock tick for - * comfort, we increase the safety margin by - * intentionally dropping the next tick(s). We do NOT - * update itm.next because that would force us to call - * do_timer() which in turn would let our clock run - * too fast (with the potentially devastating effect - * of losing monotony of time). - */ - while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) - new_itm += local_cpu_data->itm_delta; -//#ifdef XEN -// vcpu_set_next_timer(current); -//#else -//printf("***** timer_interrupt: Setting itm to %lx\n",new_itm); - ia64_set_itm(new_itm); -//#endif - /* double check, in case we got hit by a (slow) PMI: */ - } while (time_after_eq(ia64_get_itc(), new_itm)); - return IRQ_HANDLED; -} - -static struct irqaction xen_timer_irqaction = { -#ifdef CONFIG_VTI - .handler = vmx_timer_interrupt, -#else // CONFIG_VTI - .handler = xen_timer_interrupt, -#endif // CONFIG_VTI -#ifndef XEN - .flags = SA_INTERRUPT, -#endif - .name = "timer" -}; - -void __init -xen_time_init (void) -{ - register_percpu_irq(IA64_TIMER_VECTOR, &xen_timer_irqaction); - ia64_init_itm(); -} - - -#ifdef CONFIG_VTI - -/* Late init function (after all CPUs are booted). */ -int __init init_xen_time() -{ - struct timespec tm; - - itc_scale = 1000000000UL << 32 ; - itc_scale /= local_cpu_data->itc_freq; - - /* System time ticks from zero. */ - stime_irq = (s_time_t)0; - itc_at_irq = ia64_get_itc(); - - /* Wallclock time starts as the initial RTC time. */ - efi_gettimeofday(&tm); - wc_sec = tm.tv_sec; - wc_nsec = tm.tv_nsec; - - - printk("Time init:\n"); - printk(".... System Time: %ldns\n", NOW()); - printk(".... scale: %16lX\n", itc_scale); - printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_nsec/1000); - - return 0; -} - -static irqreturn_t -vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) -{ - unsigned long new_itm; - struct vcpu *v = current; - - - new_itm = local_cpu_data->itm_next; - - if (!time_after(ia64_get_itc(), new_itm)) - return; - - while (1) { -#ifdef CONFIG_SMP - /* - * For UP, this is done in do_timer(). Weird, but - * fixing that would require updates to all - * platforms. - */ - update_process_times(user_mode(v, regs)); -#endif - new_itm += local_cpu_data->itm_delta; - - if (smp_processor_id() == TIME_KEEPER_ID) { - /* - * Here we are in the timer irq handler. We have irqs locally - * disabled, but we don't know if the timer_bh is running on - * another CPU. We need to avoid to SMP race by acquiring the - * xtime_lock. - */ - local_cpu_data->itm_next = new_itm; - - write_lock_irq(&xtime_lock); - /* Update jiffies counter. */ - (*(unsigned long *)&jiffies_64)++; - - /* Update wall time. */ - wc_nsec += 1000000000/HZ; - if ( wc_nsec >= 1000000000 ) - { - wc_nsec -= 1000000000; - wc_sec++; - } - - /* Updates system time (nanoseconds since boot). */ - stime_irq += MILLISECS(1000/HZ); - itc_at_irq = ia64_get_itc(); - - write_unlock_irq(&xtime_lock); - - } else - local_cpu_data->itm_next = new_itm; - - if (time_after(new_itm, ia64_get_itc())) - break; - } - - do { - /* - * If we're too close to the next clock tick for - * comfort, we increase the safety margin by - * intentionally dropping the next tick(s). We do NOT - * update itm.next because that would force us to call - * do_timer() which in turn would let our clock run - * too fast (with the potentially devastating effect - * of losing monotony of time). - */ - while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) - new_itm += local_cpu_data->itm_delta; - ia64_set_itm(new_itm); - /* double check, in case we got hit by a (slow) PMI: */ - } while (time_after_eq(ia64_get_itc(), new_itm)); - raise_softirq(AC_TIMER_SOFTIRQ); - - return IRQ_HANDLED; -} -#endif // CONFIG_VTI - diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/asm/pgtable.h --- a/xen/include/asm-ia64/linux/asm/pgtable.h Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,567 +0,0 @@ -#ifndef _ASM_IA64_PGTABLE_H -#define _ASM_IA64_PGTABLE_H - -/* - * This file contains the functions and defines necessary to modify and use - * the IA-64 page table tree. - * - * This hopefully works with any (fixed) IA-64 page-size, as defined - * in <asm/page.h>. - * - * Copyright (C) 1998-2004 Hewlett-Packard Co - * David Mosberger-Tang <davidm@xxxxxxxxxx> - */ - -#include <linux/config.h> - -#include <asm/mman.h> -#include <asm/page.h> -#include <asm/processor.h> -#include <asm/system.h> -#include <asm/types.h> - -#define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits (architected) */ - -/* - * First, define the various bits in a PTE. Note that the PTE format - * matches the VHPT short format, the firt doubleword of the VHPD long - * format, and the first doubleword of the TLB insertion format. - */ -#define _PAGE_P_BIT 0 -#define _PAGE_A_BIT 5 -#define _PAGE_D_BIT 6 - -#define _PAGE_P (1 << _PAGE_P_BIT) /* page present bit */ -#define _PAGE_MA_WB (0x0 << 2) /* write back memory attribute */ -#define _PAGE_MA_UC (0x4 << 2) /* uncacheable memory attribute */ -#define _PAGE_MA_UCE (0x5 << 2) /* UC exported attribute */ -#define _PAGE_MA_WC (0x6 << 2) /* write coalescing memory attribute */ -#define _PAGE_MA_NAT (0x7 << 2) /* not-a-thing attribute */ -#define _PAGE_MA_MASK (0x7 << 2) -#define _PAGE_PL_0 (0 << 7) /* privilege level 0 (kernel) */ -#define _PAGE_PL_1 (1 << 7) /* privilege level 1 (unused) */ -#define _PAGE_PL_2 (2 << 7) /* privilege level 2 (unused) */ -#define _PAGE_PL_3 (3 << 7) /* privilege level 3 (user) */ -#define _PAGE_PL_MASK (3 << 7) -#define _PAGE_AR_R (0 << 9) /* read only */ -#define _PAGE_AR_RX (1 << 9) /* read & execute */ -#define _PAGE_AR_RW (2 << 9) /* read & write */ -#define _PAGE_AR_RWX (3 << 9) /* read, write & execute */ -#define _PAGE_AR_R_RW (4 << 9) /* read / read & write */ -#define _PAGE_AR_RX_RWX (5 << 9) /* read & exec / read, write & exec */ -#define _PAGE_AR_RWX_RW (6 << 9) /* read, write & exec / read & write */ -#define _PAGE_AR_X_RX (7 << 9) /* exec & promote / read & exec */ -#define _PAGE_AR_MASK (7 << 9) -#define _PAGE_AR_SHIFT 9 -#define _PAGE_A (1 << _PAGE_A_BIT) /* page accessed bit */ -#define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty bit */ -#define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL) -#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */ -#define _PAGE_PROTNONE (__IA64_UL(1) << 63) - -/* Valid only for a PTE with the present bit cleared: */ -#define _PAGE_FILE (1 << 1) /* see swap & file pte remarks below */ - -#define _PFN_MASK _PAGE_PPN_MASK -/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */ -#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED) - -#define _PAGE_SIZE_4K 12 -#define _PAGE_SIZE_8K 13 -#define _PAGE_SIZE_16K 14 -#define _PAGE_SIZE_64K 16 -#define _PAGE_SIZE_256K 18 -#define _PAGE_SIZE_1M 20 -#define _PAGE_SIZE_4M 22 -#define _PAGE_SIZE_16M 24 -#define _PAGE_SIZE_64M 26 -#define _PAGE_SIZE_256M 28 -#define _PAGE_SIZE_1G 30 -#define _PAGE_SIZE_4G 32 - -#define __ACCESS_BITS _PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB -#define __DIRTY_BITS_NO_ED _PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB -#define __DIRTY_BITS _PAGE_ED | __DIRTY_BITS_NO_ED - -/* - * Definitions for first level: - * - * PGDIR_SHIFT determines what a first-level page table entry can map. - */ -#define PGDIR_SHIFT (PAGE_SHIFT + 2*(PAGE_SHIFT-3)) -#define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PGD (1UL << (PAGE_SHIFT-3)) -#define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */ -#define FIRST_USER_PGD_NR 0 - -/* - * Definitions for second level: - * - * PMD_SHIFT determines the size of the area a second-level page table - * can map. - */ -#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) -#define PTRS_PER_PMD (1UL << (PAGE_SHIFT-3)) - -/* - * Definitions for third level: - */ -#define PTRS_PER_PTE (__IA64_UL(1) << (PAGE_SHIFT-3)) - -/* - * All the normal masks have the "page accessed" bits on, as any time - * they are used, the page is accessed. They are cleared only by the - * page-out routines. - */ -#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_A) -#define PAGE_SHARED __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW) -#define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) -#define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) -#define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) -#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) -#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) -#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) - -# ifndef __ASSEMBLY__ - -#include <asm/bitops.h> -#include <asm/cacheflush.h> -#include <asm/mmu_context.h> -#include <asm/processor.h> - -/* - * Next come the mappings that determine how mmap() protection bits - * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented. The - * _P version gets used for a private shared memory segment, the _S - * version gets used for a shared memory segment with MAP_SHARED on. - * In a private shared memory segment, we do a copy-on-write if a task - * attempts to write to the page. - */ - /* xwr */ -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_READONLY /* write to priv pg -> copy & make writable */ -#define __P011 PAGE_READONLY /* ditto */ -#define __P100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX) -#define __P101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) -#define __P110 PAGE_COPY_EXEC -#define __P111 PAGE_COPY_EXEC - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED /* we don't have (and don't need) write-only */ -#define __S011 PAGE_SHARED -#define __S100 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_X_RX) -#define __S101 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) -#define __S110 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) -#define __S111 __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RWX) - -#define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) -#define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) -#define pte_ERROR(e) printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) - - -/* - * Some definitions to translate between mem_map, PTEs, and page addresses: - */ - - -/* Quick test to see if ADDR is a (potentially) valid physical address. */ -static inline long -ia64_phys_addr_valid (unsigned long addr) -{ - return (addr & (local_cpu_data->unimpl_pa_mask)) == 0; -} - -/* - * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel - * memory. For the return value to be meaningful, ADDR must be >= - * PAGE_OFFSET. This operation can be relatively expensive (e.g., - * require a hash-, or multi-level tree-lookup or something of that - * sort) but it guarantees to return TRUE only if accessing the page - * at that address does not cause an error. Note that there may be - * addresses for which kern_addr_valid() returns FALSE even though an - * access would not cause an error (e.g., this is typically true for - * memory mapped I/O regions. - * - * XXX Need to implement this for IA-64. - */ -#define kern_addr_valid(addr) (1) - - -/* - * Now come the defines and routines to manage and access the three-level - * page table. - */ - -/* - * On some architectures, special things need to be done when setting - * the PTE in a page table. Nothing special needs to be on IA-64. - */ -#define set_pte(ptep, pteval) (*(ptep) = (pteval)) - -#define RGN_SIZE (1UL << 61) -#define RGN_KERNEL 7 - -#define VMALLOC_START 0xa000000200000000UL -#ifdef CONFIG_VIRTUAL_MEM_MAP -# define VMALLOC_END_INIT (0xa000000000000000UL + (1UL << (4*PAGE_SHIFT - 9))) -# define VMALLOC_END vmalloc_end - extern unsigned long vmalloc_end; -#else -# define VMALLOC_END (0xa000000000000000UL + (1UL << (4*PAGE_SHIFT - 9))) -#endif - -/* fs/proc/kcore.c */ -#define kc_vaddr_to_offset(v) ((v) - 0xa000000000000000UL) -#define kc_offset_to_vaddr(o) ((o) + 0xa000000000000000UL) - -/* - * Conversion functions: convert page frame number (pfn) and a protection value to a page - * table entry (pte). - */ -#define pfn_pte(pfn, pgprot) \ -({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot); __pte; }) - -/* Extract pfn from pte. */ -#define pte_pfn(_pte) ((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT) - -#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) - -/* This takes a physical page address that is used by the remapping functions */ -#define mk_pte_phys(physpage, pgprot) \ -({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; }) - -#define pte_modify(_pte, newprot) \ - (__pte((pte_val(_pte) & ~_PAGE_CHG_MASK) | (pgprot_val(newprot) & _PAGE_CHG_MASK))) - -#define page_pte_prot(page,prot) mk_pte(page, prot) -#define page_pte(page) page_pte_prot(page, __pgprot(0)) - -#define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE)) -#define pte_clear(pte) (pte_val(*(pte)) = 0UL) -/* pte_page() returns the "struct page *" corresponding to the PTE: */ -#define pte_page(pte) virt_to_page(((pte_val(pte) & _PFN_MASK) + PAGE_OFFSET)) - -#define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd))) -#define pmd_present(pmd) (pmd_val(pmd) != 0UL) -#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) -#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK)) -#define pmd_page(pmd) virt_to_page((pmd_val(pmd) + PAGE_OFFSET)) - -#define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud))) -#define pud_present(pud) (pud_val(pud) != 0UL) -#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) - -#define pud_page(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) - -/* - * The following have defined behavior only work if pte_present() is true. - */ -#define pte_user(pte) ((pte_val(pte) & _PAGE_PL_MASK) == _PAGE_PL_3) -#define pte_read(pte) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) < 6) -#define pte_write(pte) ((unsigned) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) - 2) <= 4) -#define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) -#define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) -#define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) -#define pte_file(pte) ((pte_val(pte) & _PAGE_FILE) != 0) -/* - * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the - * access rights: - */ -#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW)) -#define pte_mkwrite(pte) (__pte(pte_val(pte) | _PAGE_AR_RW)) -#define pte_mkexec(pte) (__pte(pte_val(pte) | _PAGE_AR_RX)) -#define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A)) -#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) -#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) -#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) - -/* - * Macro to a page protection value as "uncacheable". Note that "protection" is really a - * misnomer here as the protection value contains the memory attribute bits, dirty bits, - * and various other bits as well. - */ -#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC) - -/* - * Macro to make mark a page protection value as "write-combining". - * Note that "protection" is really a misnomer here as the protection - * value contains the memory attribute bits, dirty bits, and various - * other bits as well. Accesses through a write-combining translation - * works bypasses the caches, but does allow for consecutive writes to - * be combined into single (but larger) write transactions. - */ -#define pgprot_writecombine(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC) - -static inline unsigned long -pgd_index (unsigned long address) -{ - unsigned long region = address >> 61; - unsigned long l1index = (address >> PGDIR_SHIFT) & ((PTRS_PER_PGD >> 3) - 1); - - return (region << (PAGE_SHIFT - 6)) | l1index; -} - -/* The offset in the 1-level directory is given by the 3 region bits - (61..63) and the level-1 bits. */ -static inline pgd_t* -pgd_offset (struct mm_struct *mm, unsigned long address) -{ - return mm->pgd + pgd_index(address); -} - -/* In the kernel's mapped region we completely ignore the region number - (since we know it's in region number 5). */ -#define pgd_offset_k(addr) \ - (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))) - -/* Look up a pgd entry in the gate area. On IA-64, the gate-area - resides in the kernel-mapped segment, hence we use pgd_offset_k() - here. */ -#define pgd_offset_gate(mm, addr) pgd_offset_k(addr) - -/* Find an entry in the second-level page table.. */ -#define pmd_offset(dir,addr) \ - ((pmd_t *) pud_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) - -/* - * Find an entry in the third-level page table. This looks more complicated than it - * should be because some platforms place page tables in high memory. - */ -#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -#define pte_offset_kernel(dir,addr) ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(addr)) -#define pte_offset_map(dir,addr) pte_offset_kernel(dir, addr) -#define pte_offset_map_nested(dir,addr) pte_offset_map(dir, addr) -#define pte_unmap(pte) do { } while (0) -#define pte_unmap_nested(pte) do { } while (0) - -/* atomic versions of the some PTE manipulations: */ - -static inline int -ptep_test_and_clear_young (pte_t *ptep) -{ -#ifdef CONFIG_SMP - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_A_BIT, ptep); -#else - pte_t pte = *ptep; - if (!pte_young(pte)) - return 0; - set_pte(ptep, pte_mkold(pte)); - return 1; -#endif -} - -static inline int -ptep_test_and_clear_dirty (pte_t *ptep) -{ -#ifdef CONFIG_SMP - if (!pte_dirty(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_D_BIT, ptep); -#else - pte_t pte = *ptep; - if (!pte_dirty(pte)) - return 0; - set_pte(ptep, pte_mkclean(pte)); - return 1; -#endif -} - -static inline pte_t -ptep_get_and_clear (pte_t *ptep) -{ -#ifdef CONFIG_SMP - return __pte(xchg((long *) ptep, 0)); -#else - pte_t pte = *ptep; - pte_clear(ptep); - return pte; -#endif -} - -static inline void -ptep_set_wrprotect (pte_t *ptep) -{ -#ifdef CONFIG_SMP - unsigned long new, old; - - do { - old = pte_val(*ptep); - new = pte_val(pte_wrprotect(__pte (old))); - } while (cmpxchg((unsigned long *) ptep, old, new) != old); -#else - pte_t old_pte = *ptep; - set_pte(ptep, pte_wrprotect(old_pte)); -#endif -} - -static inline void -ptep_mkdirty (pte_t *ptep) -{ -#ifdef CONFIG_SMP - set_bit(_PAGE_D_BIT, ptep); -#else - pte_t old_pte = *ptep; - set_pte(ptep, pte_mkdirty(old_pte)); -#endif -} - -static inline int -pte_same (pte_t a, pte_t b) -{ - return pte_val(a) == pte_val(b); -} - -extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -extern void paging_init (void); - -/* - * Note: The macros below rely on the fact that MAX_SWAPFILES_SHIFT <= number of - * bits in the swap-type field of the swap pte. It would be nice to - * enforce that, but we can't easily include <linux/swap.h> here. - * (Of course, better still would be to define MAX_SWAPFILES_SHIFT here...). - * - * Format of swap pte: - * bit 0 : present bit (must be zero) - * bit 1 : _PAGE_FILE (must be zero) - * bits 2- 8: swap-type - * bits 9-62: swap offset - * bit 63 : _PAGE_PROTNONE bit - * - * Format of file pte: - * bit 0 : present bit (must be zero) - * bit 1 : _PAGE_FILE (must be one) - * bits 2-62: file_offset/PAGE_SIZE - * bit 63 : _PAGE_PROTNONE bit - */ -#define __swp_type(entry) (((entry).val >> 2) & 0x7f) -#define __swp_offset(entry) (((entry).val << 1) >> 10) -#define __swp_entry(type,offset) ((swp_entry_t) { ((type) << 2) | ((long) (offset) << 9) }) -#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) - -#define PTE_FILE_MAX_BITS 61 -#define pte_to_pgoff(pte) ((pte_val(pte) << 1) >> 3) -#define pgoff_to_pte(off) ((pte_t) { ((off) << 2) | _PAGE_FILE }) - -/* XXX is this right? */ -#define io_remap_page_range(vma, vaddr, paddr, size, prot) \ - remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot) - -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; -extern struct page *zero_page_memmap_ptr; -#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr) - -/* We provide our own get_unmapped_area to cope with VA holes for userland */ -#define HAVE_ARCH_UNMAPPED_AREA - -#ifdef CONFIG_HUGETLB_PAGE -#define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3)) -#define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT) -#define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1)) -struct mmu_gather; -extern void hugetlb_free_pgtables(struct mmu_gather *tlb, - struct vm_area_struct * prev, unsigned long start, unsigned long end); -#endif - -/* - * IA-64 doesn't have any external MMU info: the page tables contain all the necessary - * information. However, we use this routine to take care of any (delayed) i-cache - * flushing that may be necessary. - */ -extern void update_mmu_cache (struct vm_area_struct *vma, unsigned long vaddr, pte_t pte); - -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -/* - * Update PTEP with ENTRY, which is guaranteed to be a less - * restrictive PTE. That is, ENTRY may have the ACCESSED, DIRTY, and - * WRITABLE bits turned on, when the value at PTEP did not. The - * WRITABLE bit may only be turned if SAFELY_WRITABLE is TRUE. - * - * SAFELY_WRITABLE is TRUE if we can update the value at PTEP without - * having to worry about races. On SMP machines, there are only two - * cases where this is true: - * - * (1) *PTEP has the PRESENT bit turned OFF - * (2) ENTRY has the DIRTY bit turned ON - * - * On ia64, we could implement this routine with a cmpxchg()-loop - * which ORs in the _PAGE_A/_PAGE_D bit if they're set in ENTRY. - * However, like on x86, we can get a more streamlined version by - * observing that it is OK to drop ACCESSED bit updates when - * SAFELY_WRITABLE is FALSE. Besides being rare, all that would do is - * result in an extra Access-bit fault, which would then turn on the - * ACCESSED bit in the low-level fault handler (iaccess_bit or - * daccess_bit in ivt.S). - */ -#ifdef CONFIG_SMP -# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \ -do { \ - if (__safely_writable) { \ - set_pte(__ptep, __entry); \ - flush_tlb_page(__vma, __addr); \ - } \ -} while (0) -#else -# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \ - ptep_establish(__vma, __addr, __ptep, __entry) -#endif - -# ifdef CONFIG_VIRTUAL_MEM_MAP - /* arch mem_map init routine is needed due to holes in a virtual mem_map */ -# define __HAVE_ARCH_MEMMAP_INIT - extern void memmap_init (unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn); -# endif /* CONFIG_VIRTUAL_MEM_MAP */ -# endif /* !__ASSEMBLY__ */ - -/* - * Identity-mapped regions use a large page size. We'll call such large pages - * "granules". If you can think of a better name that's unambiguous, let me - * know... - */ -#if defined(CONFIG_IA64_GRANULE_64MB) -# define IA64_GRANULE_SHIFT _PAGE_SIZE_64M -#elif defined(CONFIG_IA64_GRANULE_16MB) -# define IA64_GRANULE_SHIFT _PAGE_SIZE_16M -#endif -#define IA64_GRANULE_SIZE (1 << IA64_GRANULE_SHIFT) -/* - * log2() of the page size we use to map the kernel image (IA64_TR_KERNEL): - */ -#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M -#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT) - -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - -/* These tell get_user_pages() that the first gate page is accessible from user-level. */ -#define FIXADDR_USER_START GATE_ADDR -#define FIXADDR_USER_END (GATE_ADDR + 2*PERCPU_PAGE_SIZE) - -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -#define __HAVE_ARCH_PTEP_MKDIRTY -#define __HAVE_ARCH_PTE_SAME -#define __HAVE_ARCH_PGD_OFFSET_GATE -#include <asm-generic/pgtable.h> -#include <asm-generic/pgtable-nopud.h> - -#endif /* _ASM_IA64_PGTABLE_H */ diff -r d34925e4144b -r 3ca4ca7a9cc2 xen/include/asm-ia64/linux/linuxtime.h --- a/xen/include/asm-ia64/linux/linuxtime.h Thu Sep 1 17:09:27 2005 +++ /dev/null Thu Sep 1 18:46:28 2005 @@ -1,181 +0,0 @@ -#ifndef _LINUX_TIME_H -#define _LINUX_TIME_H - -#include <linux/types.h> - -#ifdef __KERNEL__ -#include <linux/seqlock.h> -#endif - -#ifndef _STRUCT_TIMESPEC -#define _STRUCT_TIMESPEC -struct timespec { - time_t tv_sec; /* seconds */ - long tv_nsec; /* nanoseconds */ -}; -#endif /* _STRUCT_TIMESPEC */ - -struct timeval { - time_t tv_sec; /* seconds */ - suseconds_t tv_usec; /* microseconds */ -}; - -struct timezone { - int tz_minuteswest; /* minutes west of Greenwich */ - int tz_dsttime; /* type of dst correction */ -}; - -#ifdef __KERNEL__ - -/* Parameters used to convert the timespec values */ -#ifndef USEC_PER_SEC -#define USEC_PER_SEC (1000000L) -#endif - -#ifndef NSEC_PER_SEC -#define NSEC_PER_SEC (1000000000L) -#endif - -#ifndef NSEC_PER_USEC -#define NSEC_PER_USEC (1000L) -#endif - -static __inline__ int timespec_equal(struct timespec *a, struct timespec *b) -{ - return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); -} - -/* Converts Gregorian date to seconds since 1970-01-01 00:00:00. - * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 - * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. - * - * [For the Julian calendar (which was used in Russia before 1917, - * Britain & colonies before 1752, anywhere else before 1582, - * and is still in use by some communities) leave out the - * -year/100+year/400 terms, and add 10.] - * - * This algorithm was first published by Gauss (I think). - * - * WARNING: this function will overflow on 2106-02-07 06:28:16 on - * machines were long is 32-bit! (However, as time_t is signed, we - * will already get problems at other places on 2038-01-19 03:14:08) - */ -static inline unsigned long -mktime (unsigned int year, unsigned int mon, - unsigned int day, unsigned int hour, - unsigned int min, unsigned int sec) -{ - if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */ - mon += 12; /* Puts Feb last since it has leap day */ - year -= 1; - } - - return ((( - (unsigned long) (year/4 - year/100 + year/400 + 367*mon/12 + day) + - year*365 - 719499 - )*24 + hour /* now have hours */ - )*60 + min /* now have minutes */ - )*60 + sec; /* finally seconds */ -} - -extern struct timespec xtime; -extern struct timespec wall_to_monotonic; -extern seqlock_t xtime_lock; - -static inline unsigned long get_seconds(void) -{ - return xtime.tv_sec; -} - -struct timespec current_kernel_time(void); - -#define CURRENT_TIME (current_kernel_time()) -#define CURRENT_TIME_SEC ((struct timespec) { xtime.tv_sec, 0 }) - -extern void do_gettimeofday(struct timeval *tv); -extern int do_settimeofday(struct timespec *tv); -extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); -extern void clock_was_set(void); // call when ever the clock is set -extern int do_posix_clock_monotonic_gettime(struct timespec *tp); -extern long do_nanosleep(struct timespec *t); -extern long do_utimes(char __user * filename, struct timeval * times); -struct itimerval; -extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); -extern int do_getitimer(int which, struct itimerval *value); -extern void getnstimeofday (struct timespec *tv); - -extern struct timespec timespec_trunc(struct timespec t, unsigned gran); - -static inline void -set_normalized_timespec (struct timespec *ts, time_t sec, long nsec) -{ - while (nsec > NSEC_PER_SEC) { - nsec -= NSEC_PER_SEC; - ++sec; - } - while (nsec < 0) { - nsec += NSEC_PER_SEC; - --sec; - } - ts->tv_sec = sec; - ts->tv_nsec = nsec; -} - -#endif /* __KERNEL__ */ - -#define NFDBITS __NFDBITS - -#define FD_SETSIZE __FD_SETSIZE -#define FD_SET(fd,fdsetp) __FD_SET(fd,fdsetp) -#define FD_CLR(fd,fdsetp) __FD_CLR(fd,fdsetp) -#define FD_ISSET(fd,fdsetp) __FD_ISSET(fd,fdsetp) -#define FD_ZERO(fdsetp) __FD_ZERO(fdsetp) - -/* - * Names of the interval timers, and structure - * defining a timer setting. - */ -#define ITIMER_REAL 0 -#define ITIMER_VIRTUAL 1 -#define ITIMER_PROF 2 - -struct itimerspec { - struct timespec it_interval; /* timer period */ - struct timespec it_value; /* timer expiration */ -}; - -struct itimerval { - struct timeval it_interval; /* timer interval */ - struct timeval it_value; /* current value */ -}; - - -/* - * The IDs of the various system clocks (for POSIX.1b interval timers). - */ -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_REALTIME_HR 4 -#define CLOCK_MONOTONIC_HR 5 - -/* - * The IDs of various hardware clocks - */ - - -#define CLOCK_SGI_CYCLE 10 -#define MAX_CLOCKS 16 -#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC | \ - CLOCK_REALTIME_HR | CLOCK_MONOTONIC_HR) -#define CLOCKS_MONO (CLOCK_MONOTONIC & CLOCK_MONOTONIC_HR) - -/* - * The various flags for setting POSIX.1b interval timers. - */ - -#define TIMER_ABSTIME 0x01 - - -#endif _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.