[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [IA64] more cleanup
# HG changeset patch # User awilliam@xxxxxxxxxxx # Node ID f662f98d594b65e1698c5da241312f1ae136b975 # Parent 279628dc2d6fda9a0fd790cce3e04a2ff1570c0d [IA64] more cleanup Clean-up: fw_emul.c created. More definitions moved to mm.c process.c is lighter and renamed to faults.c Signed-off-by: Tristan Gingold <tristan.gingold@xxxxxxxx> --- xen/arch/ia64/xen/process.c | 949 ------------------------------------------ xen/arch/ia64/xen/Makefile | 3 xen/arch/ia64/xen/dom_fw.c | 330 -------------- xen/arch/ia64/xen/domain.c | 114 ++++- xen/arch/ia64/xen/faults.c | 662 +++++++++++++++++++++++++++++ xen/arch/ia64/xen/fw_emul.c | 453 ++++++++++++++++++++ xen/arch/ia64/xen/hypercall.c | 52 ++ xen/arch/ia64/xen/mm.c | 337 ++++++++++++++ xen/arch/ia64/xen/xenmisc.c | 351 --------------- xen/include/asm-ia64/domain.h | 3 10 files changed, 1609 insertions(+), 1645 deletions(-) diff -r 279628dc2d6f -r f662f98d594b xen/arch/ia64/xen/Makefile --- a/xen/arch/ia64/xen/Makefile Mon Jun 05 14:23:57 2006 -0600 +++ b/xen/arch/ia64/xen/Makefile Mon Jun 05 14:28:39 2006 -0600 @@ -3,6 +3,7 @@ obj-y += domain.o obj-y += domain.o obj-y += dom_fw.o obj-y += efi_emul.o +obj-y += fw_emul.o obj-y += hpsimserial.o obj-y += hypercall.o obj-y += hyperprivop.o @@ -13,7 +14,7 @@ obj-y += mm_init.o obj-y += mm_init.o obj-y += pcdp.o obj-y += privop.o -obj-y += process.o +obj-y += faults.o obj-y += regionreg.o obj-y += sn_console.o obj-y += vcpu.o diff -r 279628dc2d6f -r f662f98d594b xen/arch/ia64/xen/dom_fw.c --- a/xen/arch/ia64/xen/dom_fw.c Mon Jun 05 14:23:57 2006 -0600 +++ b/xen/arch/ia64/xen/dom_fw.c Mon Jun 05 14:28:39 2006 -0600 @@ -23,7 +23,6 @@ #include <xen/acpi.h> #include <asm/dom_fw.h> -#include <public/sched.h> static struct ia64_boot_param *dom_fw_init(struct domain *, const char *,int,char *,int); extern unsigned long domain_mpa_to_imva(struct domain *,unsigned long mpaddr); @@ -139,334 +138,6 @@ unsigned long dom_fw_setup(struct domain /* the following heavily leveraged from linux/arch/ia64/hp/sim/fw-emu.c */ -struct sal_ret_values -sal_emulator (long index, unsigned long in1, unsigned long in2, - unsigned long in3, unsigned long in4, unsigned long in5, - unsigned long in6, unsigned long in7) -{ - unsigned long r9 = 0; - unsigned long r10 = 0; - long r11 = 0; - long status; - - status = 0; - switch (index) { - case SAL_FREQ_BASE: - if (!running_on_sim) - status = ia64_sal_freq_base(in1,&r9,&r10); - else switch (in1) { - case SAL_FREQ_BASE_PLATFORM: - r9 = 200000000; - break; - - case SAL_FREQ_BASE_INTERVAL_TIMER: - r9 = 700000000; - break; - - case SAL_FREQ_BASE_REALTIME_CLOCK: - r9 = 1; - break; - - default: - status = -1; - break; - } - break; - case SAL_PCI_CONFIG_READ: - if (current->domain == dom0) { - u64 value; - // note that args 2&3 are swapped!! - status = ia64_sal_pci_config_read(in1,in3,in2,&value); - r9 = value; - } - else - printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_READ\n"); - break; - case SAL_PCI_CONFIG_WRITE: - if (current->domain == dom0) { - if (((in1 & ~0xffffffffUL) && (in4 == 0)) || - (in4 > 1) || - (in2 > 8) || (in2 & (in2-1))) - printf("*** SAL_PCI_CONF_WRITE?!?(adr=0x%lx,typ=0x%lx,sz=0x%lx,val=0x%lx)\n", - in1,in4,in2,in3); - // note that args are in a different order!! - status = ia64_sal_pci_config_write(in1,in4,in2,in3); - } - else - printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_WRITE\n"); - break; - case SAL_SET_VECTORS: - if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) { - if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) { - /* Sanity check: cs_length1 must be 0, - second vector is reserved. */ - status = -2; - } - else { - struct domain *d = current->domain; - d->arch.boot_rdv_ip = in2; - d->arch.boot_rdv_r1 = in3; - } - } - else - printf("*** CALLED SAL_SET_VECTORS %lu. IGNORED...\n", - in1); - break; - case SAL_GET_STATE_INFO: - /* No more info. */ - status = -5; - r9 = 0; - break; - case SAL_GET_STATE_INFO_SIZE: - /* Return a dummy size. */ - status = 0; - r9 = 128; - break; - case SAL_CLEAR_STATE_INFO: - /* Noop. */ - break; - case SAL_MC_RENDEZ: - printf("*** CALLED SAL_MC_RENDEZ. IGNORED...\n"); - break; - case SAL_MC_SET_PARAMS: - printf("*** CALLED SAL_MC_SET_PARAMS. IGNORED...\n"); - break; - case SAL_CACHE_FLUSH: - if (1) { - /* Flush using SAL. - This method is faster but has a side effect on - other vcpu running on this cpu. */ - status = ia64_sal_cache_flush (in1); - } - else { - /* Flush with fc all the domain. - This method is slower but has no side effects. */ - domain_cache_flush (current->domain, in1 == 4 ? 1 : 0); - status = 0; - } - break; - case SAL_CACHE_INIT: - printf("*** CALLED SAL_CACHE_INIT. IGNORED...\n"); - break; - case SAL_UPDATE_PAL: - printf("*** CALLED SAL_UPDATE_PAL. IGNORED...\n"); - break; - default: - printf("*** CALLED SAL_ WITH UNKNOWN INDEX. IGNORED...\n"); - status = -1; - break; - } - return ((struct sal_ret_values) {status, r9, r10, r11}); -} - -struct ia64_pal_retval -xen_pal_emulator(unsigned long index, u64 in1, u64 in2, u64 in3) -{ - unsigned long r9 = 0; - unsigned long r10 = 0; - unsigned long r11 = 0; - long status = PAL_STATUS_UNIMPLEMENTED; - - if (running_on_sim) - return pal_emulator_static(index); - - // pal code must be mapped by a TR when pal is called, however - // calls are rare enough that we will map it lazily rather than - // at every context switch - //efi_map_pal_code(); - switch (index) { - case PAL_MEM_ATTRIB: - status = ia64_pal_mem_attrib(&r9); - break; - case PAL_FREQ_BASE: - status = ia64_pal_freq_base(&r9); - break; - case PAL_PROC_GET_FEATURES: - status = ia64_pal_proc_get_features(&r9,&r10,&r11); - break; - case PAL_BUS_GET_FEATURES: - status = ia64_pal_bus_get_features( - (pal_bus_features_u_t *) &r9, - (pal_bus_features_u_t *) &r10, - (pal_bus_features_u_t *) &r11); - break; - case PAL_FREQ_RATIOS: - status = ia64_pal_freq_ratios( - (struct pal_freq_ratio *) &r9, - (struct pal_freq_ratio *) &r10, - (struct pal_freq_ratio *) &r11); - break; - case PAL_PTCE_INFO: - { - // return hard-coded xen-specific values because ptc.e - // is emulated on xen to always flush everything - // these values result in only one ptc.e instruction - status = 0; r9 = 0; r10 = (1L << 32) | 1L; r11 = 0; - } - break; - case PAL_VERSION: - status = ia64_pal_version( - (pal_version_u_t *) &r9, - (pal_version_u_t *) &r10); - break; - case PAL_VM_PAGE_SIZE: - status = ia64_pal_vm_page_size(&r9,&r10); - break; - case PAL_DEBUG_INFO: - status = ia64_pal_debug_info(&r9,&r10); - break; - case PAL_CACHE_SUMMARY: - status = ia64_pal_cache_summary(&r9,&r10); - break; - case PAL_VM_SUMMARY: - { - /* Use xen-specific values. - hash_tag_id is somewhat random! */ - const pal_vm_info_1_u_t v1 = - {.pal_vm_info_1_s = - { .vw = 1, - .phys_add_size = 44, - .key_size = 16, - .max_pkr = 15, - .hash_tag_id = 0x30, - .max_dtr_entry = NDTRS - 1, - .max_itr_entry = NITRS - 1, -#ifdef VHPT_GLOBAL - .max_unique_tcs = 3, - .num_tc_levels = 2 -#else - .max_unique_tcs = 2, - .num_tc_levels = 1 -#endif - }}; - const pal_vm_info_2_u_t v2 = - { .pal_vm_info_2_s = - { .impl_va_msb = 50, - .rid_size = current->domain->arch.rid_bits, - .reserved = 0 }}; - r9 = v1.pvi1_val; - r10 = v2.pvi2_val; - status = PAL_STATUS_SUCCESS; - } - break; - case PAL_VM_INFO: -#ifdef VHPT_GLOBAL - if (in1 == 0 && in2 == 2) { - /* Level 1: VHPT */ - const pal_tc_info_u_t v = - { .pal_tc_info_s = {.num_sets = 128, - .associativity = 1, - .num_entries = 128, - .pf = 1, - .unified = 1, - .reduce_tr = 0, - .reserved = 0}}; - r9 = v.pti_val; - /* Only support PAGE_SIZE tc. */ - r10 = PAGE_SIZE; - status = PAL_STATUS_SUCCESS; - } -#endif - else if ( -#ifdef VHPT_GLOBAL - in1 == 1 /* Level 2. */ -#else - in1 == 0 /* Level 1. */ -#endif - && (in2 == 1 || in2 == 2)) - { - /* itlb/dtlb, 1 entry. */ - const pal_tc_info_u_t v = - { .pal_tc_info_s = {.num_sets = 1, - .associativity = 1, - .num_entries = 1, - .pf = 1, - .unified = 0, - .reduce_tr = 0, - .reserved = 0}}; - r9 = v.pti_val; - /* Only support PAGE_SIZE tc. */ - r10 = PAGE_SIZE; - status = PAL_STATUS_SUCCESS; - } - else - status = PAL_STATUS_EINVAL; - break; - case PAL_RSE_INFO: - status = ia64_pal_rse_info( - &r9, - (pal_hints_u_t *) &r10); - break; - case PAL_REGISTER_INFO: - status = ia64_pal_register_info(in1, &r9, &r10); - break; - case PAL_CACHE_FLUSH: - /* FIXME */ - printk("PAL_CACHE_FLUSH NOT IMPLEMENTED!\n"); - BUG(); - break; - case PAL_PERF_MON_INFO: - { - unsigned long pm_buffer[16]; - status = ia64_pal_perf_mon_info( - pm_buffer, - (pal_perf_mon_info_u_t *) &r9); - if (status != 0) { - while(1) - printk("PAL_PERF_MON_INFO fails ret=%ld\n", status); - break; - } - if (copy_to_user((void __user *)in1,pm_buffer,128)) { - while(1) - printk("xen_pal_emulator: PAL_PERF_MON_INFO " - "can't copy to user!!!!\n"); - status = PAL_STATUS_UNIMPLEMENTED; - break; - } - } - break; - case PAL_CACHE_INFO: - { - pal_cache_config_info_t ci; - status = ia64_pal_cache_config_info(in1,in2,&ci); - if (status != 0) break; - r9 = ci.pcci_info_1.pcci1_data; - r10 = ci.pcci_info_2.pcci2_data; - } - break; - case PAL_VM_TR_READ: /* FIXME: vcpu_get_tr?? */ - printk("PAL_VM_TR_READ NOT IMPLEMENTED, IGNORED!\n"); - break; - case PAL_HALT_INFO: - { - /* 1000 cycles to enter/leave low power state, - consumes 10 mW, implemented and cache/TLB coherent. */ - unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32) - | (1UL << 61) | (1UL << 60); - if (copy_to_user ((void *)in1, &res, sizeof (res))) - status = PAL_STATUS_EINVAL; - else - status = PAL_STATUS_SUCCESS; - } - break; - case PAL_HALT: - if (current->domain == dom0) { - printf ("Domain0 halts the machine\n"); - (*efi.reset_system)(EFI_RESET_SHUTDOWN,0,0,NULL); - } - else - domain_shutdown (current->domain, - SHUTDOWN_poweroff); - break; - default: - printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %lu!!!!\n", - index); - break; - } - return ((struct ia64_pal_retval) {status, r9, r10, r11}); -} - - #define NFUNCPTRS 20 static void print_md(efi_memory_desc_t *md) @@ -478,7 +149,6 @@ static void print_md(efi_memory_desc_t * md->num_pages >> (20 - EFI_PAGE_SHIFT)); #endif } - static u32 lsapic_nbr; diff -r 279628dc2d6f -r f662f98d594b xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Mon Jun 05 14:23:57 2006 -0600 +++ b/xen/arch/ia64/xen/domain.c Mon Jun 05 14:28:39 2006 -0600 @@ -78,21 +78,96 @@ extern char dom0_command_line[]; #define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend)) /* FIXME: where these declarations should be there ? */ -extern long platform_is_hp_ski(void); extern void serial_input_init(void); static void init_switch_stack(struct vcpu *v); +extern void vmx_do_launch(struct vcpu *); void build_physmap_table(struct domain *d); /* this belongs in include/asm, but there doesn't seem to be a suitable place */ -void arch_domain_destroy(struct domain *d) -{ - BUG_ON(d->arch.mm.pgd != NULL); - if (d->shared_info != NULL) - free_xenheap_page(d->shared_info); - - domain_flush_destroy (d); - - deallocate_rid_range(d); +unsigned long context_switch_count = 0; + +extern struct vcpu *ia64_switch_to (struct vcpu *next_task); + +#include <xen/sched-if.h> + +void schedule_tail(struct vcpu *prev) +{ + extern char ia64_ivt; + context_saved(prev); + + if (VMX_DOMAIN(current)) { + vmx_do_launch(current); + } else { + ia64_set_iva(&ia64_ivt); + ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | + VHPT_ENABLED); + load_region_regs(current); + vcpu_load_kernel_regs(current); + } +} + +void context_switch(struct vcpu *prev, struct vcpu *next) +{ + uint64_t spsr; + uint64_t pta; + + local_irq_save(spsr); + context_switch_count++; + + __ia64_save_fpu(prev->arch._thread.fph); + __ia64_load_fpu(next->arch._thread.fph); + if (VMX_DOMAIN(prev)) + vmx_save_state(prev); + if (VMX_DOMAIN(next)) + vmx_load_state(next); + /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/ + prev = ia64_switch_to(next); + + //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask); + + if (!VMX_DOMAIN(current)){ + vcpu_set_next_timer(current); + } + + +// leave this debug for now: it acts as a heartbeat when more than +// one domain is active +{ +static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50}; +static int i = 100; +int id = ((struct vcpu *)current)->domain->domain_id & 0xf; +if (!cnt[id]--) { cnt[id] = 500000; printk("%x",id); } +if (!i--) { i = 1000000; printk("+"); } +} + + if (VMX_DOMAIN(current)){ + vmx_load_all_rr(current); + }else{ + extern char ia64_ivt; + ia64_set_iva(&ia64_ivt); + if (!is_idle_domain(current->domain)) { + ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | + VHPT_ENABLED); + load_region_regs(current); + vcpu_load_kernel_regs(current); + if (vcpu_timer_expired(current)) + vcpu_pend_timer(current); + }else { + /* When switching to idle domain, only need to disable vhpt + * walker. Then all accesses happen within idle context will + * be handled by TR mapping and identity mapping. + */ + pta = ia64_get_pta(); + ia64_set_pta(pta & ~VHPT_ENABLED); + } + } + local_irq_restore(spsr); + context_saved(prev); +} + +void continue_running(struct vcpu *same) +{ + /* nothing to do */ } static void default_idle(void) @@ -257,6 +332,17 @@ fail_nomem: if (d->shared_info != NULL) free_xenheap_page(d->shared_info); return -ENOMEM; +} + +void arch_domain_destroy(struct domain *d) +{ + BUG_ON(d->arch.mm.pgd != NULL); + if (d->shared_info != NULL) + free_xenheap_page(d->shared_info); + + domain_flush_destroy (d); + + deallocate_rid_range(d); } void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c) @@ -543,7 +629,7 @@ static void loaddomainelfimage(struct do void alloc_dom0(void) { - if (platform_is_hp_ski()) { + if (running_on_sim) { dom0_size = 128*1024*1024; //FIXME: Should be configurable } #ifdef CONFIG_DOMAIN0_CONTIGUOUS @@ -798,21 +884,21 @@ int construct_dom0(struct domain *d, void machine_restart(char * __unused) { - if (platform_is_hp_ski()) dummy(); + if (running_on_sim) dummy(); printf("machine_restart called: spinning....\n"); while(1); } void machine_halt(void) { - if (platform_is_hp_ski()) dummy(); + if (running_on_sim) dummy(); printf("machine_halt called: spinning....\n"); while(1); } void dummy_called(char *function) { - if (platform_is_hp_ski()) asm("break 0;;"); + if (running_on_sim) asm("break 0;;"); printf("dummy called in %s: spinning....\n", function); while(1); } diff -r 279628dc2d6f -r f662f98d594b xen/arch/ia64/xen/hypercall.c --- a/xen/arch/ia64/xen/hypercall.c Mon Jun 05 14:23:57 2006 -0600 +++ b/xen/arch/ia64/xen/hypercall.c Mon Jun 05 14:28:39 2006 -0600 @@ -334,6 +334,58 @@ ia64_hypercall (struct pt_regs *regs) return xen_hypercall (regs); } +unsigned long hypercall_create_continuation( + unsigned int op, const char *format, ...) +{ + struct mc_state *mcs = &mc_state[smp_processor_id()]; + struct vcpu *v = current; + const char *p = format; + unsigned long arg; + unsigned int i; + va_list args; + + va_start(args, format); + if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) { + panic("PREEMPT happen in multicall\n"); // Not support yet + } else { + vcpu_set_gr(v, 2, op, 0); + for ( i = 0; *p != '\0'; i++) { + switch ( *p++ ) + { + case 'i': + arg = (unsigned long)va_arg(args, unsigned int); + break; + case 'l': + arg = (unsigned long)va_arg(args, unsigned long); + break; + case 'h': + arg = (unsigned long)va_arg(args, void *); + break; + default: + arg = 0; + BUG(); + } + switch (i) { + case 0: vcpu_set_gr(v, 14, arg, 0); + break; + case 1: vcpu_set_gr(v, 15, arg, 0); + break; + case 2: vcpu_set_gr(v, 16, arg, 0); + break; + case 3: vcpu_set_gr(v, 17, arg, 0); + break; + case 4: vcpu_set_gr(v, 18, arg, 0); + break; + default: panic("Too many args for hypercall continuation\n"); + break; + } + } + } + v->arch.hypercall_continuation = 1; + va_end(args); + return op; +} + /* Need make this function common */ extern int iosapic_guest_read( diff -r 279628dc2d6f -r f662f98d594b xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Mon Jun 05 14:23:57 2006 -0600 +++ b/xen/arch/ia64/xen/mm.c Mon Jun 05 14:28:39 2006 -0600 @@ -14,6 +14,7 @@ #include <asm/mm.h> #include <asm/pgalloc.h> #include <asm/vhpt.h> +#include <asm/vcpu.h> #include <linux/efi.h> #ifndef CONFIG_XEN_IA64_DOM0_VP @@ -246,6 +247,110 @@ share_xen_page_with_privileged_guests(st share_xen_page_with_privileged_guests(struct page_info *page, int readonly) { share_xen_page_with_guest(page, dom_xen, readonly); +} + +unsigned long +gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) +{ + unsigned long pte; + +#ifndef CONFIG_XEN_IA64_DOM0_VP + if (d == dom0) + return(gpfn); +#endif + pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT); + if (!pte) { + panic("gmfn_to_mfn_foreign: bad gpfn. spinning...\n"); + } + return ((pte & _PFN_MASK) >> PAGE_SHIFT); +} + +// given a domain virtual address, pte and pagesize, extract the metaphysical +// address, convert the pte for a physical address for (possibly different) +// Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use +// PAGE_SIZE!) +u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* logps) +{ + struct domain *d = current->domain; + ia64_itir_t itir = {.itir = itir__}; + u64 mask, mpaddr, pteval2; + u64 arflags; + u64 arflags2; + + pteval &= ((1UL << 53) - 1);// ignore [63:53] bits + + // FIXME address had better be pre-validated on insert + mask = ~itir_mask(itir.itir); + mpaddr = (((pteval & ~_PAGE_ED) & _PAGE_PPN_MASK) & ~mask) | + (address & mask); +#ifdef CONFIG_XEN_IA64_DOM0_VP + if (itir.ps > PAGE_SHIFT) { + itir.ps = PAGE_SHIFT; + } +#endif + *logps = itir.ps; +#ifndef CONFIG_XEN_IA64_DOM0_VP + if (d == dom0) { + if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { + /* + printk("translate_domain_pte: out-of-bounds dom0 mpaddr 0x%lx! itc=%lx...\n", + mpaddr, ia64_get_itc()); + */ + tdpfoo(); + } + } + else if ((mpaddr >> PAGE_SHIFT) > d->max_pages) { + /* Address beyond the limit. However the grant table is + also beyond the limit. Display a message if not in the + grant table. */ + if (mpaddr >= IA64_GRANT_TABLE_PADDR + && mpaddr < (IA64_GRANT_TABLE_PADDR + + (ORDER_GRANT_FRAMES << PAGE_SHIFT))) + printf("translate_domain_pte: bad mpa=0x%lx (> 0x%lx)," + "vadr=0x%lx,pteval=0x%lx,itir=0x%lx\n", + mpaddr, (unsigned long)d->max_pages<<PAGE_SHIFT, + address, pteval, itir.itir); + tdpfoo(); + } +#endif + pteval2 = lookup_domain_mpa(d,mpaddr); + arflags = pteval & _PAGE_AR_MASK; + arflags2 = pteval2 & _PAGE_AR_MASK; + if (arflags != _PAGE_AR_R && arflags2 == _PAGE_AR_R) { +#if 0 + DPRINTK("%s:%d " + "pteval 0x%lx arflag 0x%lx address 0x%lx itir 0x%lx " + "pteval2 0x%lx arflags2 0x%lx mpaddr 0x%lx\n", + __func__, __LINE__, + pteval, arflags, address, itir__, + pteval2, arflags2, mpaddr); +#endif + pteval = (pteval & ~_PAGE_AR_MASK) | _PAGE_AR_R; +} + + pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits + pteval2 |= (pteval & _PAGE_ED); + pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected) + pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2; + return pteval2; +} + +// given a current domain metaphysical address, return the physical address +unsigned long translate_domain_mpaddr(unsigned long mpaddr) +{ + unsigned long pteval; + +#ifndef CONFIG_XEN_IA64_DOM0_VP + if (current->domain == dom0) { + if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { + printk("translate_domain_mpaddr: out-of-bounds dom0 mpaddr 0x%lx! continuing...\n", + mpaddr); + tdpfoo(); + } + } +#endif + pteval = lookup_domain_mpa(current->domain,mpaddr); + return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK)); } //XXX !xxx_present() should be used instread of !xxx_none()? @@ -1036,6 +1141,238 @@ void domain_cache_flush (struct domain * //printf ("domain_cache_flush: %d %d pages\n", d->domain_id, nbr_page); } +#ifdef VERBOSE +#define MEM_LOG(_f, _a...) \ + printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \ + current->domain->domain_id , __LINE__ , ## _a ) +#else +#define MEM_LOG(_f, _a...) ((void)0) +#endif + +static void free_page_type(struct page_info *page, u32 type) +{ +} + +static int alloc_page_type(struct page_info *page, u32 type) +{ + return 1; +} + +unsigned long __get_free_pages(unsigned int mask, unsigned int order) +{ + void *p = alloc_xenheap_pages(order); + + memset(p,0,PAGE_SIZE<<order); + return (unsigned long)p; +} + +void __free_pages(struct page_info *page, unsigned int order) +{ + if (order) BUG(); + free_xenheap_page(page); +} + +void *pgtable_quicklist_alloc(void) +{ + void *p; + p = alloc_xenheap_pages(0); + if (p) + clear_page(p); + return p; +} + +void pgtable_quicklist_free(void *pgtable_entry) +{ + free_xenheap_page(pgtable_entry); +} + +void cleanup_writable_pagetable(struct domain *d) +{ + return; +} + +void put_page_type(struct page_info *page) +{ + u32 nx, x, y = page->u.inuse.type_info; + + again: + do { + x = y; + nx = x - 1; + + ASSERT((x & PGT_count_mask) != 0); + + /* + * The page should always be validated while a reference is held. The + * exception is during domain destruction, when we forcibly invalidate + * page-table pages if we detect a referential loop. + * See domain.c:relinquish_list(). + */ + ASSERT((x & PGT_validated) || + test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags)); + + if ( unlikely((nx & PGT_count_mask) == 0) ) + { + /* Record TLB information for flush later. Races are harmless. */ + page->tlbflush_timestamp = tlbflush_current_time(); + + if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && + likely(nx & PGT_validated) ) + { + /* + * Page-table pages must be unvalidated when count is zero. The + * 'free' is safe because the refcnt is non-zero and validated + * bit is clear => other ops will spin or fail. + */ + if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, + x & ~PGT_validated)) != x) ) + goto again; + /* We cleared the 'valid bit' so we do the clean up. */ + free_page_type(page, x); + /* Carry on, but with the 'valid bit' now clear. */ + x &= ~PGT_validated; + nx &= ~PGT_validated; + } + } + else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) == + (PGT_pinned | 1)) && + ((nx & PGT_type_mask) != PGT_writable_page)) ) + { + /* Page is now only pinned. Make the back pointer mutable again. */ + nx |= PGT_va_mutable; + } + } + while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); +} + + +int get_page_type(struct page_info *page, u32 type) +{ + u32 nx, x, y = page->u.inuse.type_info; + + again: + do { + x = y; + nx = x + 1; + if ( unlikely((nx & PGT_count_mask) == 0) ) + { + MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page)); + return 0; + } + else if ( unlikely((x & PGT_count_mask) == 0) ) + { + if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) + { + if ( (x & PGT_type_mask) != (type & PGT_type_mask) ) + { + /* + * On type change we check to flush stale TLB + * entries. This may be unnecessary (e.g., page + * was GDT/LDT) but those circumstances should be + * very rare. + */ + cpumask_t mask = + page_get_owner(page)->domain_dirty_cpumask; + tlbflush_filter(mask, page->tlbflush_timestamp); + + if ( unlikely(!cpus_empty(mask)) ) + { + perfc_incrc(need_flush_tlb_flush); + flush_tlb_mask(mask); + } + } + + /* We lose existing type, back pointer, and validity. */ + nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); + nx |= type; + + /* No special validation needed for writable pages. */ + /* Page tables and GDT/LDT need to be scanned for validity. */ + if ( type == PGT_writable_page ) + nx |= PGT_validated; + } + } + else + { + if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) + { + if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) + { + if ( current->domain == page_get_owner(page) ) + { + /* + * This ensures functions like set_gdt() see up-to-date + * type info without needing to clean up writable p.t. + * state on the fast path. + */ + LOCK_BIGLOCK(current->domain); + cleanup_writable_pagetable(current->domain); + y = page->u.inuse.type_info; + UNLOCK_BIGLOCK(current->domain); + /* Can we make progress now? */ + if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) || + ((y & PGT_count_mask) == 0) ) + goto again; + } + if ( ((x & PGT_type_mask) != PGT_l2_page_table) || + ((type & PGT_type_mask) != PGT_l1_page_table) ) + MEM_LOG("Bad type (saw %08x != exp %08x) " + "for mfn %016lx (pfn %016lx)", + x, type, page_to_mfn(page), + get_gpfn_from_mfn(page_to_mfn(page))); + return 0; + } + else if ( (x & PGT_va_mask) == PGT_va_mutable ) + { + /* The va backpointer is mutable, hence we update it. */ + nx &= ~PGT_va_mask; + nx |= type; /* we know the actual type is correct */ + } + else if ( ((type & PGT_va_mask) != PGT_va_mutable) && + ((type & PGT_va_mask) != (x & PGT_va_mask)) ) + { +#ifdef CONFIG_X86_PAE + /* We use backptr as extra typing. Cannot be unknown. */ + if ( (type & PGT_type_mask) == PGT_l2_page_table ) + return 0; +#endif + /* This table is possibly mapped at multiple locations. */ + nx &= ~PGT_va_mask; + nx |= PGT_va_unknown; + } + } + if ( unlikely(!(x & PGT_validated)) ) + { + /* Someone else is updating validation of this page. Wait... */ + while ( (y = page->u.inuse.type_info) == x ) + cpu_relax(); + goto again; + } + } + } + while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); + + if ( unlikely(!(nx & PGT_validated)) ) + { + /* Try to validate page type; drop the new reference on failure. */ + if ( unlikely(!alloc_page_type(page, type)) ) + { + MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %08x" + ": caf=%08x taf=%" PRtype_info, + page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), + type, page->count_info, page->u.inuse.type_info); + /* Noone else can get a reference. We hold the only ref. */ + page->u.inuse.type_info = 0; + return 0; + } + + /* Noone else is updating simultaneously. */ + __set_bit(_PGT_validated, &page->u.inuse.type_info); + } + + return 1; +} + /* * Local variables: * mode: C diff -r 279628dc2d6f -r f662f98d594b xen/arch/ia64/xen/xenmisc.c --- a/xen/arch/ia64/xen/xenmisc.c Mon Jun 05 14:23:57 2006 -0600 +++ b/xen/arch/ia64/xen/xenmisc.c Mon Jun 05 14:28:39 2006 -0600 @@ -19,7 +19,6 @@ #include <public/sched.h> #include <asm/vhpt.h> #include <asm/debugger.h> -#include <asm/vmx.h> #include <asm/vmx_vcpu.h> #include <asm/vcpu.h> @@ -56,90 +55,7 @@ is_platform_hp_ski(void) return 1; } -long -platform_is_hp_ski(void) -{ - extern long running_on_sim; - return running_on_sim; -} - - struct pt_regs *guest_cpu_user_regs(void) { return vcpu_regs(current); } - -unsigned long -gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) -{ - unsigned long pte; - -#ifndef CONFIG_XEN_IA64_DOM0_VP - if (d == dom0) - return(gpfn); -#endif - pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT); - if (!pte) { - panic("gmfn_to_mfn_foreign: bad gpfn. spinning...\n"); - } - return ((pte & _PFN_MASK) >> PAGE_SHIFT); -} - -#if 0 -u32 -mfn_to_gmfn(struct domain *d, unsigned long frame) -{ - // FIXME: is this right? -if ((frame << PAGE_SHIFT) & _PAGE_PPN_MASK) { -printk("mfn_to_gmfn: bad frame. spinning...\n"); -while(1); -} - return frame; -} -#endif - -/////////////////////////////// -// from arch/x86/memory.c -/////////////////////////////// - - -static void free_page_type(struct page_info *page, u32 type) -{ -} - -static int alloc_page_type(struct page_info *page, u32 type) -{ - return 1; -} - -/////////////////////////////// -//// misc memory stuff -/////////////////////////////// - -unsigned long __get_free_pages(unsigned int mask, unsigned int order) -{ - void *p = alloc_xenheap_pages(order); - - memset(p,0,PAGE_SIZE<<order); - return (unsigned long)p; -} - -void __free_pages(struct page_info *page, unsigned int order) -{ - if (order) BUG(); - free_xenheap_page(page); -} - -void *pgtable_quicklist_alloc(void) -{ - void *p; - p = alloc_xenheap_pages(0); - if (p) - clear_page(p); - return p; -} - -void pgtable_quicklist_free(void *pgtable_entry) -{ - free_xenheap_page(pgtable_entry); -} /////////////////////////////// // from arch/ia64/traps.c @@ -246,74 +162,6 @@ void *__module_text_address(unsigned lon void *__module_text_address(unsigned long addr) { return NULL; } void *module_text_address(unsigned long addr) { return NULL; } -unsigned long context_switch_count = 0; - -extern struct vcpu *ia64_switch_to (struct vcpu *next_task); - - -void context_switch(struct vcpu *prev, struct vcpu *next) -{ - uint64_t spsr; - uint64_t pta; - - local_irq_save(spsr); - context_switch_count++; - - __ia64_save_fpu(prev->arch._thread.fph); - __ia64_load_fpu(next->arch._thread.fph); - if (VMX_DOMAIN(prev)) - vmx_save_state(prev); - if (VMX_DOMAIN(next)) - vmx_load_state(next); - /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/ - prev = ia64_switch_to(next); - - //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask); - - if (!VMX_DOMAIN(current)){ - vcpu_set_next_timer(current); - } - - -// leave this debug for now: it acts as a heartbeat when more than -// one domain is active -{ -static long cnt[16] = { 50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50}; -static int i = 100; -int id = ((struct vcpu *)current)->domain->domain_id & 0xf; -if (!cnt[id]--) { cnt[id] = 500000; printk("%x",id); } -if (!i--) { i = 1000000; printk("+"); } -} - - if (VMX_DOMAIN(current)){ - vmx_load_all_rr(current); - }else{ - extern char ia64_ivt; - ia64_set_iva(&ia64_ivt); - if (!is_idle_domain(current->domain)) { - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); - load_region_regs(current); - vcpu_load_kernel_regs(current); - if (vcpu_timer_expired(current)) - vcpu_pend_timer(current); - }else { - /* When switching to idle domain, only need to disable vhpt - * walker. Then all accesses happen within idle context will - * be handled by TR mapping and identity mapping. - */ - pta = ia64_get_pta(); - ia64_set_pta(pta & ~VHPT_ENABLED); - } - } - local_irq_restore(spsr); - context_saved(prev); -} - -void continue_running(struct vcpu *same) -{ - /* nothing to do */ -} void arch_dump_domain_info(struct domain *d) { @@ -340,202 +188,3 @@ void panic_domain(struct pt_regs *regs, } domain_crash_synchronous (); } - -/////////////////////////////// -// from arch/x86/mm.c -/////////////////////////////// - -#ifdef VERBOSE -#define MEM_LOG(_f, _a...) \ - printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \ - current->domain->domain_id , __LINE__ , ## _a ) -#else -#define MEM_LOG(_f, _a...) ((void)0) -#endif - -void cleanup_writable_pagetable(struct domain *d) -{ - return; -} - -void put_page_type(struct page_info *page) -{ - u32 nx, x, y = page->u.inuse.type_info; - - again: - do { - x = y; - nx = x - 1; - - ASSERT((x & PGT_count_mask) != 0); - - /* - * The page should always be validated while a reference is held. The - * exception is during domain destruction, when we forcibly invalidate - * page-table pages if we detect a referential loop. - * See domain.c:relinquish_list(). - */ - ASSERT((x & PGT_validated) || - test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags)); - - if ( unlikely((nx & PGT_count_mask) == 0) ) - { - /* Record TLB information for flush later. Races are harmless. */ - page->tlbflush_timestamp = tlbflush_current_time(); - - if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && - likely(nx & PGT_validated) ) - { - /* - * Page-table pages must be unvalidated when count is zero. The - * 'free' is safe because the refcnt is non-zero and validated - * bit is clear => other ops will spin or fail. - */ - if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, - x & ~PGT_validated)) != x) ) - goto again; - /* We cleared the 'valid bit' so we do the clean up. */ - free_page_type(page, x); - /* Carry on, but with the 'valid bit' now clear. */ - x &= ~PGT_validated; - nx &= ~PGT_validated; - } - } - else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) == - (PGT_pinned | 1)) && - ((nx & PGT_type_mask) != PGT_writable_page)) ) - { - /* Page is now only pinned. Make the back pointer mutable again. */ - nx |= PGT_va_mutable; - } - } - while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); -} - - -int get_page_type(struct page_info *page, u32 type) -{ - u32 nx, x, y = page->u.inuse.type_info; - - again: - do { - x = y; - nx = x + 1; - if ( unlikely((nx & PGT_count_mask) == 0) ) - { - MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page)); - return 0; - } - else if ( unlikely((x & PGT_count_mask) == 0) ) - { - if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) - { - if ( (x & PGT_type_mask) != (type & PGT_type_mask) ) - { - /* - * On type change we check to flush stale TLB - * entries. This may be unnecessary (e.g., page - * was GDT/LDT) but those circumstances should be - * very rare. - */ - cpumask_t mask = - page_get_owner(page)->domain_dirty_cpumask; - tlbflush_filter(mask, page->tlbflush_timestamp); - - if ( unlikely(!cpus_empty(mask)) ) - { - perfc_incrc(need_flush_tlb_flush); - flush_tlb_mask(mask); - } - } - - /* We lose existing type, back pointer, and validity. */ - nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); - nx |= type; - - /* No special validation needed for writable pages. */ - /* Page tables and GDT/LDT need to be scanned for validity. */ - if ( type == PGT_writable_page ) - nx |= PGT_validated; - } - } - else - { - if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) - { - if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) - { - if ( current->domain == page_get_owner(page) ) - { - /* - * This ensures functions like set_gdt() see up-to-date - * type info without needing to clean up writable p.t. - * state on the fast path. - */ - LOCK_BIGLOCK(current->domain); - cleanup_writable_pagetable(current->domain); - y = page->u.inuse.type_info; - UNLOCK_BIGLOCK(current->domain); - /* Can we make progress now? */ - if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) || - ((y & PGT_count_mask) == 0) ) - goto again; - } - if ( ((x & PGT_type_mask) != PGT_l2_page_table) || - ((type & PGT_type_mask) != PGT_l1_page_table) ) - MEM_LOG("Bad type (saw %08x != exp %08x) " - "for mfn %016lx (pfn %016lx)", - x, type, page_to_mfn(page), - get_gpfn_from_mfn(page_to_mfn(page))); - return 0; - } - else if ( (x & PGT_va_mask) == PGT_va_mutable ) - { - /* The va backpointer is mutable, hence we update it. */ - nx &= ~PGT_va_mask; - nx |= type; /* we know the actual type is correct */ - } - else if ( ((type & PGT_va_mask) != PGT_va_mutable) && - ((type & PGT_va_mask) != (x & PGT_va_mask)) ) - { -#ifdef CONFIG_X86_PAE - /* We use backptr as extra typing. Cannot be unknown. */ - if ( (type & PGT_type_mask) == PGT_l2_page_table ) - return 0; -#endif - /* This table is possibly mapped at multiple locations. */ - nx &= ~PGT_va_mask; - nx |= PGT_va_unknown; - } - } - if ( unlikely(!(x & PGT_validated)) ) - { - /* Someone else is updating validation of this page. Wait... */ - while ( (y = page->u.inuse.type_info) == x ) - cpu_relax(); - goto again; - } - } - } - while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); - - if ( unlikely(!(nx & PGT_validated)) ) - { - /* Try to validate page type; drop the new reference on failure. */ - if ( unlikely(!alloc_page_type(page, type)) ) - { - MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %08x" - ": caf=%08x taf=%" PRtype_info, - page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), - type, page->count_info, page->u.inuse.type_info); - /* Noone else can get a reference. We hold the only ref. */ - page->u.inuse.type_info = 0; - return 0; - } - - /* Noone else is updating simultaneously. */ - __set_bit(_PGT_validated, &page->u.inuse.type_info); - } - - return 1; -} diff -r 279628dc2d6f -r f662f98d594b xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Mon Jun 05 14:23:57 2006 -0600 +++ b/xen/include/asm-ia64/domain.h Mon Jun 05 14:28:39 2006 -0600 @@ -13,6 +13,9 @@ #include <asm/fpswa.h> extern void domain_relinquish_resources(struct domain *); + +/* given a current domain metaphysical address, return the physical address */ +extern unsigned long translate_domain_mpaddr(unsigned long mpaddr); /* Flush cache of domain d. If sync_only is true, only synchronize I&D caches, diff -r 279628dc2d6f -r f662f98d594b xen/arch/ia64/xen/faults.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/xen/faults.c Mon Jun 05 14:28:39 2006 -0600 @@ -0,0 +1,662 @@ + +/* + * Miscellaneous process/domain related routines + * + * Copyright (C) 2004 Hewlett-Packard Co. + * Dan Magenheimer (dan.magenheimer@xxxxxx) + * + */ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/errno.h> +#include <xen/sched.h> +#include <xen/smp.h> +#include <asm/ptrace.h> +#include <xen/delay.h> + +#include <asm/system.h> +#include <asm/processor.h> +#include <xen/irq.h> +#include <xen/event.h> +#include <asm/privop.h> +#include <asm/vcpu.h> +#include <asm/ia64_int.h> +#include <asm/dom_fw.h> +#include <asm/vhpt.h> +#include <asm/debugger.h> +#include <asm/fpswa.h> + +extern void die_if_kernel(char *str, struct pt_regs *regs, long err); +/* FIXME: where these declarations shold be there ? */ +extern int ia64_hyperprivop(unsigned long, REGS *); +extern IA64FAULT ia64_hypercall(struct pt_regs *regs); + +#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT) +// note IA64_PSR_PK removed from following, why is this necessary? +#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \ + IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \ + IA64_PSR_IT | IA64_PSR_BN) + +#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \ + IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \ + IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \ + IA64_PSR_CPL | IA64_PSR_MC | IA64_PSR_IS | \ + IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \ + IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA) + + +extern void do_ssc(unsigned long ssc, struct pt_regs *regs); + +unsigned long slow_reflect_count[0x80] = { 0 }; +unsigned long fast_reflect_count[0x80] = { 0 }; + +#define inc_slow_reflect_count(vec) slow_reflect_count[vec>>8]++; + +void zero_reflect_counts(void) +{ + int i; + for (i=0; i<0x80; i++) slow_reflect_count[i] = 0; + for (i=0; i<0x80; i++) fast_reflect_count[i] = 0; +} + +int dump_reflect_counts(char *buf) +{ + int i,j,cnt; + char *s = buf; + + s += sprintf(s,"Slow reflections by vector:\n"); + for (i = 0, j = 0; i < 0x80; i++) { + if ( (cnt = slow_reflect_count[i]) != 0 ) { + s += sprintf(s,"0x%02x00:%10d, ",i,cnt); + if ((j++ & 3) == 3) s += sprintf(s,"\n"); + } + } + if (j & 3) s += sprintf(s,"\n"); + s += sprintf(s,"Fast reflections by vector:\n"); + for (i = 0, j = 0; i < 0x80; i++) { + if ( (cnt = fast_reflect_count[i]) != 0 ) { + s += sprintf(s,"0x%02x00:%10d, ",i,cnt); + if ((j++ & 3) == 3) s += sprintf(s,"\n"); + } + } + if (j & 3) s += sprintf(s,"\n"); + return s - buf; +} + +// should never panic domain... if it does, stack may have been overrun +void check_bad_nested_interruption(unsigned long isr, struct pt_regs *regs, unsigned long vector) +{ + struct vcpu *v = current; + + if (!(PSCB(v,ipsr) & IA64_PSR_DT)) { + panic_domain(regs,"psr.dt off, trying to deliver nested dtlb!\n"); + } + vector &= ~0xf; + if (vector != IA64_DATA_TLB_VECTOR && + vector != IA64_ALT_DATA_TLB_VECTOR && + vector != IA64_VHPT_TRANS_VECTOR) { + panic_domain(regs,"psr.ic off, delivering fault=%lx,ipsr=%lx,iip=%lx,ifa=%lx,isr=%lx,PSCB.iip=%lx\n", + vector,regs->cr_ipsr,regs->cr_iip,PSCB(v,ifa),isr,PSCB(v,iip)); + } +} + +void reflect_interruption(unsigned long isr, struct pt_regs *regs, unsigned long vector) +{ + struct vcpu *v = current; + + if (!PSCB(v,interrupt_collection_enabled)) + check_bad_nested_interruption(isr,regs,vector); + PSCB(v,unat) = regs->ar_unat; // not sure if this is really needed? + PSCB(v,precover_ifs) = regs->cr_ifs; + vcpu_bsw0(v); + PSCB(v,ipsr) = vcpu_get_ipsr_int_state(v,regs->cr_ipsr); + PSCB(v,isr) = isr; + PSCB(v,iip) = regs->cr_iip; + PSCB(v,ifs) = 0; + PSCB(v,incomplete_regframe) = 0; + + regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL; + regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; + regs->r31 = XSI_IPSR; + + v->vcpu_info->evtchn_upcall_mask = 1; + PSCB(v,interrupt_collection_enabled) = 0; + + inc_slow_reflect_count(vector); +} + +static unsigned long pending_false_positive = 0; + +void reflect_extint(struct pt_regs *regs) +{ + unsigned long isr = regs->cr_ipsr & IA64_PSR_RI; + struct vcpu *v = current; + static int first_extint = 1; + + if (first_extint) { + printf("Delivering first extint to domain: isr=0x%lx, iip=0x%lx\n", isr, regs->cr_iip); + first_extint = 0; + } + if (vcpu_timer_pending_early(v)) +printf("*#*#*#* about to deliver early timer to domain %d!!!\n",v->domain->domain_id); + PSCB(current,itir) = 0; + reflect_interruption(isr,regs,IA64_EXTINT_VECTOR); +} + +void reflect_event(struct pt_regs *regs) +{ + unsigned long isr = regs->cr_ipsr & IA64_PSR_RI; + struct vcpu *v = current; + + /* Sanity check */ + if (is_idle_vcpu(v) || !user_mode(regs)) { + //printk("WARN: invocation to reflect_event in nested xen\n"); + return; + } + + if (!event_pending(v)) + return; + + if (!PSCB(v,interrupt_collection_enabled)) + printf("psr.ic off, delivering event, ipsr=%lx,iip=%lx,isr=%lx,viip=0x%lx\n", + regs->cr_ipsr, regs->cr_iip, isr, PSCB(v, iip)); + PSCB(v,unat) = regs->ar_unat; // not sure if this is really needed? + PSCB(v,precover_ifs) = regs->cr_ifs; + vcpu_bsw0(v); + PSCB(v,ipsr) = vcpu_get_ipsr_int_state(v,regs->cr_ipsr); + PSCB(v,isr) = isr; + PSCB(v,iip) = regs->cr_iip; + PSCB(v,ifs) = 0; + PSCB(v,incomplete_regframe) = 0; + + regs->cr_iip = v->arch.event_callback_ip; + regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; + regs->r31 = XSI_IPSR; + + v->vcpu_info->evtchn_upcall_mask = 1; + PSCB(v,interrupt_collection_enabled) = 0; +} + +// ONLY gets called from ia64_leave_kernel +// ONLY call with interrupts disabled?? (else might miss one?) +// NEVER successful if already reflecting a trap/fault because psr.i==0 +void deliver_pending_interrupt(struct pt_regs *regs) +{ + struct domain *d = current->domain; + struct vcpu *v = current; + // FIXME: Will this work properly if doing an RFI??? + if (!is_idle_domain(d) && user_mode(regs)) { + if (vcpu_deliverable_interrupts(v)) + reflect_extint(regs); + else if (PSCB(v,pending_interruption)) + ++pending_false_positive; + } +} +unsigned long lazy_cover_count = 0; + +static int +handle_lazy_cover(struct vcpu *v, struct pt_regs *regs) +{ + if (!PSCB(v,interrupt_collection_enabled)) { + PSCB(v,ifs) = regs->cr_ifs; + PSCB(v,incomplete_regframe) = 1; + regs->cr_ifs = 0; + lazy_cover_count++; + return(1); // retry same instruction with cr.ifs off + } + return(0); +} + +void ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs, unsigned long itir) +{ + unsigned long iip = regs->cr_iip, iha; + // FIXME should validate address here + unsigned long pteval; + unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL); + IA64FAULT fault; + + if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, regs)) return; + if ((isr & IA64_ISR_SP) + || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) + { + /* + * This fault was due to a speculative load or lfetch.fault, set the "ed" + * bit in the psr to ensure forward progress. (Target register will get a + * NaT for ld.s, lfetch will be canceled.) + */ + ia64_psr(regs)->ed = 1; + return; + } + + again: + fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha); + if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) { + u64 logps; + pteval = translate_domain_pte(pteval, address, itir, &logps); + vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,logps); + if (fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) { + /* dtlb has been purged in-between. This dtlb was + matching. Undo the work. */ + vcpu_flush_tlb_vhpt_range (address, 1); + goto again; + } + return; + } + + if (!user_mode (regs)) { + /* The fault occurs inside Xen. */ + if (!ia64_done_with_exception(regs)) { + // should never happen. If it does, region 0 addr may + // indicate a bad xen pointer + printk("*** xen_handle_domain_access: exception table" + " lookup failed, iip=0x%lx, addr=0x%lx, spinning...\n", + iip, address); + panic_domain(regs,"*** xen_handle_domain_access: exception table" + " lookup failed, iip=0x%lx, addr=0x%lx, spinning...\n", + iip, address); + } + return; + } + if (!PSCB(current,interrupt_collection_enabled)) { + check_bad_nested_interruption(isr,regs,fault); + //printf("Delivering NESTED DATA TLB fault\n"); + fault = IA64_DATA_NESTED_TLB_VECTOR; + regs->cr_iip = ((unsigned long) PSCBX(current,iva) + fault) & ~0xffUL; + regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; + // NOTE: nested trap must NOT pass PSCB address + //regs->r31 = (unsigned long) &PSCB(current); + inc_slow_reflect_count(fault); + return; + } + + PSCB(current,itir) = itir; + PSCB(current,iha) = iha; + PSCB(current,ifa) = address; + reflect_interruption(isr, regs, fault); +} + +fpswa_interface_t *fpswa_interface = 0; + +void trap_init (void) +{ + if (ia64_boot_param->fpswa) + /* FPSWA fixup: make the interface pointer a virtual address: */ + fpswa_interface = __va(ia64_boot_param->fpswa); + else + printk("No FPSWA supported.\n"); +} + +static fpswa_ret_t +fp_emulate (int fp_fault, void *bundle, unsigned long *ipsr, + unsigned long *fpsr, unsigned long *isr, unsigned long *pr, + unsigned long *ifs, struct pt_regs *regs) +{ + fp_state_t fp_state; + fpswa_ret_t ret; + + if (!fpswa_interface) + return ((fpswa_ret_t) {-1, 0, 0, 0}); + + memset(&fp_state, 0, sizeof(fp_state_t)); + + /* + * compute fp_state. only FP registers f6 - f11 are used by the + * kernel, so set those bits in the mask and set the low volatile + * pointer to point to these registers. + */ + fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ + + fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; + /* + * unsigned long (*EFI_FPSWA) ( + * unsigned long trap_type, + * void *Bundle, + * unsigned long *pipsr, + * unsigned long *pfsr, + * unsigned long *pisr, + * unsigned long *ppreds, + * unsigned long *pifs, + * void *fp_state); + */ + ret = (*fpswa_interface->fpswa)(fp_fault, bundle, + ipsr, fpsr, isr, pr, ifs, &fp_state); + + return ret; +} + +/* + * Handle floating-point assist faults and traps for domain. + */ +unsigned long +handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) +{ + struct vcpu *v = current; + IA64_BUNDLE bundle; + IA64_BUNDLE __get_domain_bundle(UINT64); + unsigned long fault_ip; + fpswa_ret_t ret; + + fault_ip = regs->cr_iip; + /* + * When the FP trap occurs, the trapping instruction is completed. + * If ipsr.ri == 0, there is the trapping instruction in previous bundle. + */ + if (!fp_fault && (ia64_psr(regs)->ri == 0)) + fault_ip -= 16; + bundle = __get_domain_bundle(fault_ip); + if (!bundle.i64[0] && !bundle.i64[1]) { + printk("%s: floating-point bundle at 0x%lx not mapped\n", + __FUNCTION__, fault_ip); + return -1; + } + + ret = fp_emulate(fp_fault, &bundle, ®s->cr_ipsr, ®s->ar_fpsr, + &isr, ®s->pr, ®s->cr_ifs, regs); + + if (ret.status) { + PSCBX(v, fpswa_ret) = ret; + printk("%s(%s): fp_emulate() returned %ld\n", + __FUNCTION__, fp_fault?"fault":"trap", ret.status); + } + + return ret.status; +} + +void +ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, + unsigned long iim, unsigned long itir, unsigned long arg5, + unsigned long arg6, unsigned long arg7, unsigned long stack) +{ + struct pt_regs *regs = (struct pt_regs *) &stack; + unsigned long code; + static const char *reason[] = { + "IA-64 Illegal Operation fault", + "IA-64 Privileged Operation fault", + "IA-64 Privileged Register fault", + "IA-64 Reserved Register/Field fault", + "Disabled Instruction Set Transition fault", + "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", + "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", + "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" + }; + + printf("ia64_fault, vector=0x%lx, ifa=0x%016lx, iip=0x%016lx, ipsr=0x%016lx, isr=0x%016lx\n", + vector, ifa, regs->cr_iip, regs->cr_ipsr, isr); + + + if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { + /* + * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel + * the lfetch. + */ + ia64_psr(regs)->ed = 1; + printf("ia64_fault: handled lfetch.fault\n"); + return; + } + + switch (vector) { + case 0: + printk("VHPT Translation.\n"); + break; + + case 4: + printk("Alt DTLB.\n"); + break; + + case 6: + printk("Instruction Key Miss.\n"); + break; + + case 7: + printk("Data Key Miss.\n"); + break; + + case 8: + printk("Dirty-bit.\n"); + break; + + case 20: + printk("Page Not Found.\n"); + break; + + case 21: + printk("Key Permission.\n"); + break; + + case 22: + printk("Instruction Access Rights.\n"); + break; + + case 24: /* General Exception */ + code = (isr >> 4) & 0xf; + printk("General Exception: %s%s.\n", reason[code], + (code == 3) ? ((isr & (1UL << 37)) ? " (RSE access)" : + " (data access)") : ""); + if (code == 8) { +# ifdef CONFIG_IA64_PRINT_HAZARDS + printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", + current->comm, current->pid, + regs->cr_iip + ia64_psr(regs)->ri, + regs->pr); +# endif + printf("ia64_fault: returning on hazard\n"); + return; + } + break; + + case 25: + printk("Disabled FP-Register.\n"); + break; + + case 26: + printk("NaT consumption.\n"); + break; + + case 29: + printk("Debug.\n"); + break; + + case 30: + printk("Unaligned Reference.\n"); + break; + + case 31: + printk("Unsupported data reference.\n"); + break; + + case 32: + printk("Floating-Point Fault.\n"); + break; + + case 33: + printk("Floating-Point Trap.\n"); + break; + + case 34: + printk("Lower Privilege Transfer Trap.\n"); + break; + + case 35: + printk("Taken Branch Trap.\n"); + break; + + case 36: + printk("Single Step Trap.\n"); + break; + + case 45: + printk("IA-32 Exception.\n"); + break; + + case 46: + printk("IA-32 Intercept.\n"); + break; + + case 47: + printk("IA-32 Interrupt.\n"); + break; + + default: + printk("Fault %lu\n", vector); + break; + } + + show_registers(regs); + panic("Fault in Xen.\n"); +} + +unsigned long running_on_sim = 0; + + +/* Also read in hyperprivop.S */ +int first_break = 0; + +void +ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim) +{ + struct domain *d = current->domain; + struct vcpu *v = current; + IA64FAULT vector; + + if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant + do_ssc(vcpu_get_gr(current,36), regs); + } +#ifdef CRASH_DEBUG + else if ((iim == 0 || iim == CDB_BREAK_NUM) && !user_mode(regs)) { + if (iim == 0) + show_registers(regs); + debugger_trap_fatal(0 /* don't care */, regs); + } +#endif + else if (iim == d->arch.breakimm) { + /* by default, do not continue */ + v->arch.hypercall_continuation = 0; + + if ((vector = ia64_hypercall(regs)) == IA64_NO_FAULT) { + if (!PSCBX(v, hypercall_continuation)) + vcpu_increment_iip(current); + } + else reflect_interruption(isr, regs, vector); + } + else if (!PSCB(v,interrupt_collection_enabled)) { + if (ia64_hyperprivop(iim,regs)) + vcpu_increment_iip(current); + } + else { + if (iim == 0) + die_if_kernel("bug check", regs, iim); + PSCB(v,iim) = iim; + reflect_interruption(isr,regs,IA64_BREAK_VECTOR); + } +} + +void +ia64_handle_privop (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long itir) +{ + IA64FAULT vector; + + vector = priv_emulate(current,regs,isr); + if (vector != IA64_NO_FAULT && vector != IA64_RFI_IN_PROGRESS) { + // Note: if a path results in a vector to reflect that requires + // iha/itir (e.g. vcpu_force_data_miss), they must be set there + reflect_interruption(isr,regs,vector); + } +} + +/* Used in vhpt.h. */ +#define INTR_TYPE_MAX 10 +UINT64 int_counts[INTR_TYPE_MAX]; + +void +ia64_handle_reflection (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim, unsigned long vector) +{ + struct vcpu *v = current; + unsigned long check_lazy_cover = 0; + unsigned long psr = regs->cr_ipsr; + + /* Following faults shouldn'g be seen from Xen itself */ + if (!(psr & IA64_PSR_CPL)) BUG(); + + switch(vector) { + case 8: + vector = IA64_DIRTY_BIT_VECTOR; break; + case 9: + vector = IA64_INST_ACCESS_BIT_VECTOR; break; + case 10: + check_lazy_cover = 1; + vector = IA64_DATA_ACCESS_BIT_VECTOR; break; + case 20: + check_lazy_cover = 1; + vector = IA64_PAGE_NOT_PRESENT_VECTOR; break; + case 22: + vector = IA64_INST_ACCESS_RIGHTS_VECTOR; break; + case 23: + check_lazy_cover = 1; + vector = IA64_DATA_ACCESS_RIGHTS_VECTOR; break; + case 25: + vector = IA64_DISABLED_FPREG_VECTOR; + break; + case 26: + if (((isr >> 4L) & 0xfL) == 1) { + //regs->eml_unat = 0; FIXME: DO WE NEED THIS?? + printf("ia64_handle_reflection: handling regNaT fault\n"); + vector = IA64_NAT_CONSUMPTION_VECTOR; break; + } +#if 1 + // pass null pointer dereferences through with no error + // but retain debug output for non-zero ifa + if (!ifa) { + vector = IA64_NAT_CONSUMPTION_VECTOR; break; + } +#endif + printf("*** NaT fault... attempting to handle as privop\n"); + printf("isr=%016lx, ifa=%016lx, iip=%016lx, ipsr=%016lx\n", + isr, ifa, regs->cr_iip, psr); + //regs->eml_unat = 0; FIXME: DO WE NEED THIS??? + // certain NaT faults are higher priority than privop faults + vector = priv_emulate(v,regs,isr); + if (vector == IA64_NO_FAULT) { + printf("*** Handled privop masquerading as NaT fault\n"); + return; + } + vector = IA64_NAT_CONSUMPTION_VECTOR; break; + case 27: + //printf("*** Handled speculation vector, itc=%lx!\n",ia64_get_itc()); + PSCB(current,iim) = iim; + vector = IA64_SPECULATION_VECTOR; break; + case 30: + // FIXME: Should we handle unaligned refs in Xen?? + vector = IA64_UNALIGNED_REF_VECTOR; break; + case 32: + if (!(handle_fpu_swa(1, regs, isr))) { + vcpu_increment_iip(v); + return; + } + printf("ia64_handle_reflection: handling FP fault\n"); + vector = IA64_FP_FAULT_VECTOR; break; + case 33: + if (!(handle_fpu_swa(0, regs, isr))) return; + printf("ia64_handle_reflection: handling FP trap\n"); + vector = IA64_FP_TRAP_VECTOR; break; + case 34: + printf("ia64_handle_reflection: handling lowerpriv trap\n"); + vector = IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR; break; + case 35: + printf("ia64_handle_reflection: handling taken branch trap\n"); + vector = IA64_TAKEN_BRANCH_TRAP_VECTOR; break; + case 36: + printf("ia64_handle_reflection: handling single step trap\n"); + vector = IA64_SINGLE_STEP_TRAP_VECTOR; break; + + default: + printf("ia64_handle_reflection: unhandled vector=0x%lx\n",vector); + while(vector); + return; + } + if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, regs)) return; + PSCB(current,ifa) = ifa; + PSCB(current,itir) = vcpu_get_itir_on_fault(v,ifa); + reflect_interruption(isr,regs,vector); +} + diff -r 279628dc2d6f -r f662f98d594b xen/arch/ia64/xen/fw_emul.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/ia64/xen/fw_emul.c Mon Jun 05 14:28:39 2006 -0600 @@ -0,0 +1,453 @@ +#include <xen/config.h> +#include <asm/system.h> +#include <asm/pgalloc.h> + +#include <linux/efi.h> +#include <asm/pal.h> +#include <asm/sal.h> + +#include <public/sched.h> +#include "hpsim_ssc.h" +#include <asm/vcpu.h> +#include <asm/dom_fw.h> + +extern unsigned long running_on_sim; + +struct sal_ret_values +sal_emulator (long index, unsigned long in1, unsigned long in2, + unsigned long in3, unsigned long in4, unsigned long in5, + unsigned long in6, unsigned long in7) +{ + unsigned long r9 = 0; + unsigned long r10 = 0; + long r11 = 0; + long status; + + status = 0; + switch (index) { + case SAL_FREQ_BASE: + if (!running_on_sim) + status = ia64_sal_freq_base(in1,&r9,&r10); + else switch (in1) { + case SAL_FREQ_BASE_PLATFORM: + r9 = 200000000; + break; + + case SAL_FREQ_BASE_INTERVAL_TIMER: + r9 = 700000000; + break; + + case SAL_FREQ_BASE_REALTIME_CLOCK: + r9 = 1; + break; + + default: + status = -1; + break; + } + break; + case SAL_PCI_CONFIG_READ: + if (current->domain == dom0) { + u64 value; + // note that args 2&3 are swapped!! + status = ia64_sal_pci_config_read(in1,in3,in2,&value); + r9 = value; + } + else + printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_READ\n"); + break; + case SAL_PCI_CONFIG_WRITE: + if (current->domain == dom0) { + if (((in1 & ~0xffffffffUL) && (in4 == 0)) || + (in4 > 1) || + (in2 > 8) || (in2 & (in2-1))) + printf("*** SAL_PCI_CONF_WRITE?!?(adr=0x%lx,typ=0x%lx,sz=0x%lx,val=0x%lx)\n", + in1,in4,in2,in3); + // note that args are in a different order!! + status = ia64_sal_pci_config_write(in1,in4,in2,in3); + } + else + printf("NON-PRIV DOMAIN CALLED SAL_PCI_CONFIG_WRITE\n"); + break; + case SAL_SET_VECTORS: + if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) { + if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) { + /* Sanity check: cs_length1 must be 0, + second vector is reserved. */ + status = -2; + } + else { + struct domain *d = current->domain; + d->arch.boot_rdv_ip = in2; + d->arch.boot_rdv_r1 = in3; + } + } + else + printf("*** CALLED SAL_SET_VECTORS %lu. IGNORED...\n", + in1); + break; + case SAL_GET_STATE_INFO: + /* No more info. */ + status = -5; + r9 = 0; + break; + case SAL_GET_STATE_INFO_SIZE: + /* Return a dummy size. */ + status = 0; + r9 = 128; + break; + case SAL_CLEAR_STATE_INFO: + /* Noop. */ + break; + case SAL_MC_RENDEZ: + printf("*** CALLED SAL_MC_RENDEZ. IGNORED...\n"); + break; + case SAL_MC_SET_PARAMS: + printf("*** CALLED SAL_MC_SET_PARAMS. IGNORED...\n"); + break; + case SAL_CACHE_FLUSH: + if (1) { + /* Flush using SAL. + This method is faster but has a side effect on + other vcpu running on this cpu. */ + status = ia64_sal_cache_flush (in1); + } + else { + /* Flush with fc all the domain. + This method is slower but has no side effects. */ + domain_cache_flush (current->domain, in1 == 4 ? 1 : 0); + status = 0; + } + break; + case SAL_CACHE_INIT: + printf("*** CALLED SAL_CACHE_INIT. IGNORED...\n"); + break; + case SAL_UPDATE_PAL: + printf("*** CALLED SAL_UPDATE_PAL. IGNORED...\n"); + break; + default: + printf("*** CALLED SAL_ WITH UNKNOWN INDEX. IGNORED...\n"); + status = -1; + break; + } + return ((struct sal_ret_values) {status, r9, r10, r11}); +} + +struct ia64_pal_retval +xen_pal_emulator(unsigned long index, u64 in1, u64 in2, u64 in3) +{ + unsigned long r9 = 0; + unsigned long r10 = 0; + unsigned long r11 = 0; + long status = PAL_STATUS_UNIMPLEMENTED; + + if (running_on_sim) + return pal_emulator_static(index); + + // pal code must be mapped by a TR when pal is called, however + // calls are rare enough that we will map it lazily rather than + // at every context switch + //efi_map_pal_code(); + switch (index) { + case PAL_MEM_ATTRIB: + status = ia64_pal_mem_attrib(&r9); + break; + case PAL_FREQ_BASE: + status = ia64_pal_freq_base(&r9); + break; + case PAL_PROC_GET_FEATURES: + status = ia64_pal_proc_get_features(&r9,&r10,&r11); + break; + case PAL_BUS_GET_FEATURES: + status = ia64_pal_bus_get_features( + (pal_bus_features_u_t *) &r9, + (pal_bus_features_u_t *) &r10, + (pal_bus_features_u_t *) &r11); + break; + case PAL_FREQ_RATIOS: + status = ia64_pal_freq_ratios( + (struct pal_freq_ratio *) &r9, + (struct pal_freq_ratio *) &r10, + (struct pal_freq_ratio *) &r11); + break; + case PAL_PTCE_INFO: + { + // return hard-coded xen-specific values because ptc.e + // is emulated on xen to always flush everything + // these values result in only one ptc.e instruction + status = 0; r9 = 0; r10 = (1L << 32) | 1L; r11 = 0; + } + break; + case PAL_VERSION: + status = ia64_pal_version( + (pal_version_u_t *) &r9, + (pal_version_u_t *) &r10); + break; + case PAL_VM_PAGE_SIZE: + status = ia64_pal_vm_page_size(&r9,&r10); + break; + case PAL_DEBUG_INFO: + status = ia64_pal_debug_info(&r9,&r10); + break; + case PAL_CACHE_SUMMARY: + status = ia64_pal_cache_summary(&r9,&r10); + break; + case PAL_VM_SUMMARY: + { + /* Use xen-specific values. + hash_tag_id is somewhat random! */ + const pal_vm_info_1_u_t v1 = + {.pal_vm_info_1_s = + { .vw = 1, + .phys_add_size = 44, + .key_size = 16, + .max_pkr = 15, + .hash_tag_id = 0x30, + .max_dtr_entry = NDTRS - 1, + .max_itr_entry = NITRS - 1, +#ifdef VHPT_GLOBAL + .max_unique_tcs = 3, + .num_tc_levels = 2 +#else + .max_unique_tcs = 2, + .num_tc_levels = 1 +#endif + }}; + const pal_vm_info_2_u_t v2 = + { .pal_vm_info_2_s = + { .impl_va_msb = 50, + .rid_size = current->domain->arch.rid_bits, + .reserved = 0 }}; + r9 = v1.pvi1_val; + r10 = v2.pvi2_val; + status = PAL_STATUS_SUCCESS; + } + break; + case PAL_VM_INFO: +#ifdef VHPT_GLOBAL + if (in1 == 0 && in2 == 2) { + /* Level 1: VHPT */ + const pal_tc_info_u_t v = + { .pal_tc_info_s = {.num_sets = 128, + .associativity = 1, + .num_entries = 128, + .pf = 1, + .unified = 1, + .reduce_tr = 0, + .reserved = 0}}; + r9 = v.pti_val; + /* Only support PAGE_SIZE tc. */ + r10 = PAGE_SIZE; + status = PAL_STATUS_SUCCESS; + } +#endif + else if ( +#ifdef VHPT_GLOBAL + in1 == 1 /* Level 2. */ +#else + in1 == 0 /* Level 1. */ +#endif + && (in2 == 1 || in2 == 2)) + { + /* itlb/dtlb, 1 entry. */ + const pal_tc_info_u_t v = + { .pal_tc_info_s = {.num_sets = 1, + .associativity = 1, + .num_entries = 1, + .pf = 1, + .unified = 0, + .reduce_tr = 0, + .reserved = 0}}; + r9 = v.pti_val; + /* Only support PAGE_SIZE tc. */ + r10 = PAGE_SIZE; + status = PAL_STATUS_SUCCESS; + } + else + status = PAL_STATUS_EINVAL; + break; + case PAL_RSE_INFO: + status = ia64_pal_rse_info( + &r9, + (pal_hints_u_t *) &r10); + break; + case PAL_REGISTER_INFO: + status = ia64_pal_register_info(in1, &r9, &r10); + break; + case PAL_CACHE_FLUSH: + /* FIXME */ + printk("PAL_CACHE_FLUSH NOT IMPLEMENTED!\n"); + BUG(); + break; + case PAL_PERF_MON_INFO: + { + unsigned long pm_buffer[16]; + status = ia64_pal_perf_mon_info( + pm_buffer, + (pal_perf_mon_info_u_t *) &r9); + if (status != 0) { + while(1) + printk("PAL_PERF_MON_INFO fails ret=%ld\n", status); + break; + } + if (copy_to_user((void __user *)in1,pm_buffer,128)) { + while(1) + printk("xen_pal_emulator: PAL_PERF_MON_INFO " + "can't copy to user!!!!\n"); + status = PAL_STATUS_UNIMPLEMENTED; + break; + } + } + break; + case PAL_CACHE_INFO: + { + pal_cache_config_info_t ci; + status = ia64_pal_cache_config_info(in1,in2,&ci); + if (status != 0) break; + r9 = ci.pcci_info_1.pcci1_data; + r10 = ci.pcci_info_2.pcci2_data; + } + break; + case PAL_VM_TR_READ: /* FIXME: vcpu_get_tr?? */ + printk("PAL_VM_TR_READ NOT IMPLEMENTED, IGNORED!\n"); + break; + case PAL_HALT_INFO: + { + /* 1000 cycles to enter/leave low power state, + consumes 10 mW, implemented and cache/TLB coherent. */ + unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32) + | (1UL << 61) | (1UL << 60); + if (copy_to_user ((void *)in1, &res, sizeof (res))) + status = PAL_STATUS_EINVAL; + else + status = PAL_STATUS_SUCCESS; + } + break; + case PAL_HALT: + if (current->domain == dom0) { + printf ("Domain0 halts the machine\n"); + (*efi.reset_system)(EFI_RESET_SHUTDOWN,0,0,NULL); + } + else + domain_shutdown (current->domain, + SHUTDOWN_poweroff); + break; + default: + printk("xen_pal_emulator: UNIMPLEMENTED PAL CALL %lu!!!!\n", + index); + break; + } + return ((struct ia64_pal_retval) {status, r9, r10, r11}); +} + +void +do_ssc(unsigned long ssc, struct pt_regs *regs) +{ + unsigned long arg0, arg1, arg2, arg3, retval; + char buf[2]; +/**/ static int last_fd, last_count; // FIXME FIXME FIXME +/**/ // BROKEN FOR MULTIPLE DOMAINS & SMP +/**/ struct ssc_disk_stat { int fd; unsigned count;} *stat, last_stat; + + arg0 = vcpu_get_gr(current,32); + switch(ssc) { + case SSC_PUTCHAR: + buf[0] = arg0; + buf[1] = '\0'; + printf(buf); + break; + case SSC_GETCHAR: + retval = ia64_ssc(0,0,0,0,ssc); + vcpu_set_gr(current,8,retval,0); + break; + case SSC_WAIT_COMPLETION: + if (arg0) { // metaphysical address + + arg0 = translate_domain_mpaddr(arg0); +/**/ stat = (struct ssc_disk_stat *)__va(arg0); +///**/ if (stat->fd == last_fd) stat->count = last_count; +/**/ stat->count = last_count; +//if (last_count >= PAGE_SIZE) printf("ssc_wait: stat->fd=%d,last_fd=%d,last_count=%d\n",stat->fd,last_fd,last_count); +///**/ retval = ia64_ssc(arg0,0,0,0,ssc); +/**/ retval = 0; + } + else retval = -1L; + vcpu_set_gr(current,8,retval,0); + break; + case SSC_OPEN: + arg1 = vcpu_get_gr(current,33); // access rights +if (!running_on_sim) { printf("SSC_OPEN, not implemented on hardware. (ignoring...)\n"); arg0 = 0; } + if (arg0) { // metaphysical address + arg0 = translate_domain_mpaddr(arg0); + retval = ia64_ssc(arg0,arg1,0,0,ssc); + } + else retval = -1L; + vcpu_set_gr(current,8,retval,0); + break; + case SSC_WRITE: + case SSC_READ: +//if (ssc == SSC_WRITE) printf("DOING AN SSC_WRITE\n"); + arg1 = vcpu_get_gr(current,33); + arg2 = vcpu_get_gr(current,34); + arg3 = vcpu_get_gr(current,35); + if (arg2) { // metaphysical address of descriptor + struct ssc_disk_req *req; + unsigned long mpaddr; + long len; + + arg2 = translate_domain_mpaddr(arg2); + req = (struct ssc_disk_req *) __va(arg2); + req->len &= 0xffffffffL; // avoid strange bug + len = req->len; +/**/ last_fd = arg1; +/**/ last_count = len; + mpaddr = req->addr; +//if (last_count >= PAGE_SIZE) printf("do_ssc: read fd=%d, addr=%p, len=%lx ",last_fd,mpaddr,len); + retval = 0; + if ((mpaddr & PAGE_MASK) != ((mpaddr+len-1) & PAGE_MASK)) { + // do partial page first + req->addr = translate_domain_mpaddr(mpaddr); + req->len = PAGE_SIZE - (req->addr & ~PAGE_MASK); + len -= req->len; mpaddr += req->len; + retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc); + arg3 += req->len; // file offset +/**/ last_stat.fd = last_fd; +/**/ (void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION); +//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)[part]=%x ",req->addr,req->len,retval); + } + if (retval >= 0) while (len > 0) { + req->addr = translate_domain_mpaddr(mpaddr); + req->len = (len > PAGE_SIZE) ? PAGE_SIZE : len; + len -= PAGE_SIZE; mpaddr += PAGE_SIZE; + retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc); + arg3 += req->len; // file offset +// TEMP REMOVED AGAIN arg3 += req->len; // file offset +/**/ last_stat.fd = last_fd; +/**/ (void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION); +//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)=%x ",req->addr,req->len,retval); + } + // set it back to the original value + req->len = last_count; + } + else retval = -1L; + vcpu_set_gr(current,8,retval,0); +//if (last_count >= PAGE_SIZE) printf("retval=%x\n",retval); + break; + case SSC_CONNECT_INTERRUPT: + arg1 = vcpu_get_gr(current,33); + arg2 = vcpu_get_gr(current,34); + arg3 = vcpu_get_gr(current,35); + if (!running_on_sim) { printf("SSC_CONNECT_INTERRUPT, not implemented on hardware. (ignoring...)\n"); break; } + (void)ia64_ssc(arg0,arg1,arg2,arg3,ssc); + break; + case SSC_NETDEV_PROBE: + vcpu_set_gr(current,8,-1L,0); + break; + default: + printf("ia64_handle_break: bad ssc code %lx, iip=0x%lx, b0=0x%lx... spinning\n", + ssc, regs->cr_iip, regs->b0); + while(1); + break; + } + vcpu_increment_iip(current); +} diff -r 279628dc2d6f -r f662f98d594b xen/arch/ia64/xen/process.c --- a/xen/arch/ia64/xen/process.c Mon Jun 05 14:23:57 2006 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,949 +0,0 @@ - -/* - * Miscellaneous process/domain related routines - * - * Copyright (C) 2004 Hewlett-Packard Co. - * Dan Magenheimer (dan.magenheimer@xxxxxx) - * - */ - -#include <xen/config.h> -#include <xen/lib.h> -#include <xen/errno.h> -#include <xen/sched.h> -#include <xen/smp.h> -#include <asm/ptrace.h> -#include <xen/delay.h> - -#include <asm/sal.h> /* FOR struct ia64_sal_retval */ - -#include <asm/system.h> -#include <asm/io.h> -#include <asm/processor.h> -#include <asm/desc.h> -//#include <asm/ldt.h> -#include <xen/irq.h> -#include <xen/event.h> -#include <asm/regionreg.h> -#include <asm/privop.h> -#include <asm/vcpu.h> -#include <asm/ia64_int.h> -#include <asm/dom_fw.h> -#include <asm/vhpt.h> -#include "hpsim_ssc.h" -#include <xen/multicall.h> -#include <asm/debugger.h> -#include <asm/fpswa.h> - -extern void die_if_kernel(char *str, struct pt_regs *regs, long err); -/* FIXME: where these declarations shold be there ? */ -extern void panic_domain(struct pt_regs *, const char *, ...); -extern long platform_is_hp_ski(void); -extern int ia64_hyperprivop(unsigned long, REGS *); -extern IA64FAULT ia64_hypercall(struct pt_regs *regs); -extern void vmx_do_launch(struct vcpu *); -extern unsigned long lookup_domain_mpa(struct domain *,unsigned long); - -#define IA64_PSR_CPL1 (__IA64_UL(1) << IA64_PSR_CPL1_BIT) -// note IA64_PSR_PK removed from following, why is this necessary? -#define DELIVER_PSR_SET (IA64_PSR_IC | IA64_PSR_I | \ - IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_CPL1 | \ - IA64_PSR_IT | IA64_PSR_BN) - -#define DELIVER_PSR_CLR (IA64_PSR_AC | IA64_PSR_DFL | IA64_PSR_DFH | \ - IA64_PSR_SP | IA64_PSR_DI | IA64_PSR_SI | \ - IA64_PSR_DB | IA64_PSR_LP | IA64_PSR_TB | \ - IA64_PSR_CPL | IA64_PSR_MC | IA64_PSR_IS | \ - IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \ - IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA) - -#include <xen/sched-if.h> - -void schedule_tail(struct vcpu *prev) -{ - extern char ia64_ivt; - context_saved(prev); - - if (VMX_DOMAIN(current)) { - vmx_do_launch(current); - } else { - ia64_set_iva(&ia64_ivt); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); - load_region_regs(current); - vcpu_load_kernel_regs(current); - } -} - -void tdpfoo(void) { } - -// given a domain virtual address, pte and pagesize, extract the metaphysical -// address, convert the pte for a physical address for (possibly different) -// Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use -// PAGE_SIZE!) -u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* logps) -{ - struct domain *d = current->domain; - ia64_itir_t itir = {.itir = itir__}; - u64 mask, mpaddr, pteval2; - u64 arflags; - u64 arflags2; - - pteval &= ((1UL << 53) - 1);// ignore [63:53] bits - - // FIXME address had better be pre-validated on insert - mask = ~itir_mask(itir.itir); - mpaddr = (((pteval & ~_PAGE_ED) & _PAGE_PPN_MASK) & ~mask) | - (address & mask); -#ifdef CONFIG_XEN_IA64_DOM0_VP - if (itir.ps > PAGE_SHIFT) { - itir.ps = PAGE_SHIFT; - } -#endif - *logps = itir.ps; -#ifndef CONFIG_XEN_IA64_DOM0_VP - if (d == dom0) { - if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { - /* - printk("translate_domain_pte: out-of-bounds dom0 mpaddr 0x%lx! itc=%lx...\n", - mpaddr, ia64_get_itc()); - */ - tdpfoo(); - } - } - else if ((mpaddr >> PAGE_SHIFT) > d->max_pages) { - /* Address beyond the limit. However the grant table is - also beyond the limit. Display a message if not in the - grant table. */ - if (mpaddr >= IA64_GRANT_TABLE_PADDR - && mpaddr < (IA64_GRANT_TABLE_PADDR - + (ORDER_GRANT_FRAMES << PAGE_SHIFT))) - printf("translate_domain_pte: bad mpa=0x%lx (> 0x%lx)," - "vadr=0x%lx,pteval=0x%lx,itir=0x%lx\n", - mpaddr, (unsigned long)d->max_pages<<PAGE_SHIFT, - address, pteval, itir.itir); - tdpfoo(); - } -#endif - pteval2 = lookup_domain_mpa(d,mpaddr); - arflags = pteval & _PAGE_AR_MASK; - arflags2 = pteval2 & _PAGE_AR_MASK; - if (arflags != _PAGE_AR_R && arflags2 == _PAGE_AR_R) { -#if 0 - DPRINTK("%s:%d " - "pteval 0x%lx arflag 0x%lx address 0x%lx itir 0x%lx " - "pteval2 0x%lx arflags2 0x%lx mpaddr 0x%lx\n", - __func__, __LINE__, - pteval, arflags, address, itir__, - pteval2, arflags2, mpaddr); -#endif - pteval = (pteval & ~_PAGE_AR_MASK) | _PAGE_AR_R; -} - - pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits - pteval2 |= (pteval & _PAGE_ED); - pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected) - pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2; - return pteval2; -} - -// given a current domain metaphysical address, return the physical address -unsigned long translate_domain_mpaddr(unsigned long mpaddr) -{ - unsigned long pteval; - -#ifndef CONFIG_XEN_IA64_DOM0_VP - if (current->domain == dom0) { - if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) { - printk("translate_domain_mpaddr: out-of-bounds dom0 mpaddr 0x%lx! continuing...\n", - mpaddr); - tdpfoo(); - } - } -#endif - pteval = lookup_domain_mpa(current->domain,mpaddr); - return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK)); -} - -unsigned long slow_reflect_count[0x80] = { 0 }; -unsigned long fast_reflect_count[0x80] = { 0 }; - -#define inc_slow_reflect_count(vec) slow_reflect_count[vec>>8]++; - -void zero_reflect_counts(void) -{ - int i; - for (i=0; i<0x80; i++) slow_reflect_count[i] = 0; - for (i=0; i<0x80; i++) fast_reflect_count[i] = 0; -} - -int dump_reflect_counts(char *buf) -{ - int i,j,cnt; - char *s = buf; - - s += sprintf(s,"Slow reflections by vector:\n"); - for (i = 0, j = 0; i < 0x80; i++) { - if ( (cnt = slow_reflect_count[i]) != 0 ) { - s += sprintf(s,"0x%02x00:%10d, ",i,cnt); - if ((j++ & 3) == 3) s += sprintf(s,"\n"); - } - } - if (j & 3) s += sprintf(s,"\n"); - s += sprintf(s,"Fast reflections by vector:\n"); - for (i = 0, j = 0; i < 0x80; i++) { - if ( (cnt = fast_reflect_count[i]) != 0 ) { - s += sprintf(s,"0x%02x00:%10d, ",i,cnt); - if ((j++ & 3) == 3) s += sprintf(s,"\n"); - } - } - if (j & 3) s += sprintf(s,"\n"); - return s - buf; -} - -// should never panic domain... if it does, stack may have been overrun -void check_bad_nested_interruption(unsigned long isr, struct pt_regs *regs, unsigned long vector) -{ - struct vcpu *v = current; - - if (!(PSCB(v,ipsr) & IA64_PSR_DT)) { - panic_domain(regs,"psr.dt off, trying to deliver nested dtlb!\n"); - } - vector &= ~0xf; - if (vector != IA64_DATA_TLB_VECTOR && - vector != IA64_ALT_DATA_TLB_VECTOR && - vector != IA64_VHPT_TRANS_VECTOR) { - panic_domain(regs,"psr.ic off, delivering fault=%lx,ipsr=%lx,iip=%lx,ifa=%lx,isr=%lx,PSCB.iip=%lx\n", - vector,regs->cr_ipsr,regs->cr_iip,PSCB(v,ifa),isr,PSCB(v,iip)); - } -} - -void reflect_interruption(unsigned long isr, struct pt_regs *regs, unsigned long vector) -{ - struct vcpu *v = current; - - if (!PSCB(v,interrupt_collection_enabled)) - check_bad_nested_interruption(isr,regs,vector); - PSCB(v,unat) = regs->ar_unat; // not sure if this is really needed? - PSCB(v,precover_ifs) = regs->cr_ifs; - vcpu_bsw0(v); - PSCB(v,ipsr) = vcpu_get_ipsr_int_state(v,regs->cr_ipsr); - PSCB(v,isr) = isr; - PSCB(v,iip) = regs->cr_iip; - PSCB(v,ifs) = 0; - PSCB(v,incomplete_regframe) = 0; - - regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL; - regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; - regs->r31 = XSI_IPSR; - - v->vcpu_info->evtchn_upcall_mask = 1; - PSCB(v,interrupt_collection_enabled) = 0; - - inc_slow_reflect_count(vector); -} - -void foodpi(void) {} - -static unsigned long pending_false_positive = 0; - -void reflect_extint(struct pt_regs *regs) -{ - unsigned long isr = regs->cr_ipsr & IA64_PSR_RI; - struct vcpu *v = current; - static int first_extint = 1; - - if (first_extint) { - printf("Delivering first extint to domain: isr=0x%lx, iip=0x%lx\n", isr, regs->cr_iip); - first_extint = 0; - } - if (vcpu_timer_pending_early(v)) -printf("*#*#*#* about to deliver early timer to domain %d!!!\n",v->domain->domain_id); - PSCB(current,itir) = 0; - reflect_interruption(isr,regs,IA64_EXTINT_VECTOR); -} - -void reflect_event(struct pt_regs *regs) -{ - unsigned long isr = regs->cr_ipsr & IA64_PSR_RI; - struct vcpu *v = current; - - /* Sanity check */ - if (is_idle_vcpu(v) || !user_mode(regs)) { - //printk("WARN: invocation to reflect_event in nested xen\n"); - return; - } - - if (!event_pending(v)) - return; - - if (!PSCB(v,interrupt_collection_enabled)) - printf("psr.ic off, delivering event, ipsr=%lx,iip=%lx,isr=%lx,viip=0x%lx\n", - regs->cr_ipsr, regs->cr_iip, isr, PSCB(v, iip)); - PSCB(v,unat) = regs->ar_unat; // not sure if this is really needed? - PSCB(v,precover_ifs) = regs->cr_ifs; - vcpu_bsw0(v); - PSCB(v,ipsr) = vcpu_get_ipsr_int_state(v,regs->cr_ipsr); - PSCB(v,isr) = isr; - PSCB(v,iip) = regs->cr_iip; - PSCB(v,ifs) = 0; - PSCB(v,incomplete_regframe) = 0; - - regs->cr_iip = v->arch.event_callback_ip; - regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; - regs->r31 = XSI_IPSR; - - v->vcpu_info->evtchn_upcall_mask = 1; - PSCB(v,interrupt_collection_enabled) = 0; -} - -// ONLY gets called from ia64_leave_kernel -// ONLY call with interrupts disabled?? (else might miss one?) -// NEVER successful if already reflecting a trap/fault because psr.i==0 -void deliver_pending_interrupt(struct pt_regs *regs) -{ - struct domain *d = current->domain; - struct vcpu *v = current; - // FIXME: Will this work properly if doing an RFI??? - if (!is_idle_domain(d) && user_mode(regs)) { - if (vcpu_deliverable_interrupts(v)) - reflect_extint(regs); - else if (PSCB(v,pending_interruption)) - ++pending_false_positive; - } -} -unsigned long lazy_cover_count = 0; - -static int -handle_lazy_cover(struct vcpu *v, struct pt_regs *regs) -{ - if (!PSCB(v,interrupt_collection_enabled)) { - PSCB(v,ifs) = regs->cr_ifs; - PSCB(v,incomplete_regframe) = 1; - regs->cr_ifs = 0; - lazy_cover_count++; - return(1); // retry same instruction with cr.ifs off - } - return(0); -} - -void ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs, unsigned long itir) -{ - unsigned long iip = regs->cr_iip, iha; - // FIXME should validate address here - unsigned long pteval; - unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL); - IA64FAULT fault; - - if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, regs)) return; - if ((isr & IA64_ISR_SP) - || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) - { - /* - * This fault was due to a speculative load or lfetch.fault, set the "ed" - * bit in the psr to ensure forward progress. (Target register will get a - * NaT for ld.s, lfetch will be canceled.) - */ - ia64_psr(regs)->ed = 1; - return; - } - - again: - fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha); - if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) { - u64 logps; - pteval = translate_domain_pte(pteval, address, itir, &logps); - vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,logps); - if (fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) { - /* dtlb has been purged in-between. This dtlb was - matching. Undo the work. */ - vcpu_flush_tlb_vhpt_range (address, 1); - goto again; - } - return; - } - - if (!user_mode (regs)) { - /* The fault occurs inside Xen. */ - if (!ia64_done_with_exception(regs)) { - // should never happen. If it does, region 0 addr may - // indicate a bad xen pointer - printk("*** xen_handle_domain_access: exception table" - " lookup failed, iip=0x%lx, addr=0x%lx, spinning...\n", - iip, address); - panic_domain(regs,"*** xen_handle_domain_access: exception table" - " lookup failed, iip=0x%lx, addr=0x%lx, spinning...\n", - iip, address); - } - return; - } - if (!PSCB(current,interrupt_collection_enabled)) { - check_bad_nested_interruption(isr,regs,fault); - //printf("Delivering NESTED DATA TLB fault\n"); - fault = IA64_DATA_NESTED_TLB_VECTOR; - regs->cr_iip = ((unsigned long) PSCBX(current,iva) + fault) & ~0xffUL; - regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; - // NOTE: nested trap must NOT pass PSCB address - //regs->r31 = (unsigned long) &PSCB(current); - inc_slow_reflect_count(fault); - return; - } - - PSCB(current,itir) = itir; - PSCB(current,iha) = iha; - PSCB(current,ifa) = address; - reflect_interruption(isr, regs, fault); -} - -fpswa_interface_t *fpswa_interface = 0; - -void trap_init (void) -{ - if (ia64_boot_param->fpswa) - /* FPSWA fixup: make the interface pointer a virtual address: */ - fpswa_interface = __va(ia64_boot_param->fpswa); - else - printk("No FPSWA supported.\n"); -} - -static fpswa_ret_t -fp_emulate (int fp_fault, void *bundle, unsigned long *ipsr, - unsigned long *fpsr, unsigned long *isr, unsigned long *pr, - unsigned long *ifs, struct pt_regs *regs) -{ - fp_state_t fp_state; - fpswa_ret_t ret; - - if (!fpswa_interface) - return ((fpswa_ret_t) {-1, 0, 0, 0}); - - memset(&fp_state, 0, sizeof(fp_state_t)); - - /* - * compute fp_state. only FP registers f6 - f11 are used by the - * kernel, so set those bits in the mask and set the low volatile - * pointer to point to these registers. - */ - fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ - - fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; - /* - * unsigned long (*EFI_FPSWA) ( - * unsigned long trap_type, - * void *Bundle, - * unsigned long *pipsr, - * unsigned long *pfsr, - * unsigned long *pisr, - * unsigned long *ppreds, - * unsigned long *pifs, - * void *fp_state); - */ - ret = (*fpswa_interface->fpswa)(fp_fault, bundle, - ipsr, fpsr, isr, pr, ifs, &fp_state); - - return ret; -} - -/* - * Handle floating-point assist faults and traps for domain. - */ -unsigned long -handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) -{ - struct vcpu *v = current; - IA64_BUNDLE bundle; - IA64_BUNDLE __get_domain_bundle(UINT64); - unsigned long fault_ip; - fpswa_ret_t ret; - - fault_ip = regs->cr_iip; - /* - * When the FP trap occurs, the trapping instruction is completed. - * If ipsr.ri == 0, there is the trapping instruction in previous bundle. - */ - if (!fp_fault && (ia64_psr(regs)->ri == 0)) - fault_ip -= 16; - bundle = __get_domain_bundle(fault_ip); - if (!bundle.i64[0] && !bundle.i64[1]) { - printk("%s: floating-point bundle at 0x%lx not mapped\n", - __FUNCTION__, fault_ip); - return -1; - } - - ret = fp_emulate(fp_fault, &bundle, ®s->cr_ipsr, ®s->ar_fpsr, - &isr, ®s->pr, ®s->cr_ifs, regs); - - if (ret.status) { - PSCBX(v, fpswa_ret) = ret; - printk("%s(%s): fp_emulate() returned %ld\n", - __FUNCTION__, fp_fault?"fault":"trap", ret.status); - } - - return ret.status; -} - -void -ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, - unsigned long iim, unsigned long itir, unsigned long arg5, - unsigned long arg6, unsigned long arg7, unsigned long stack) -{ - struct pt_regs *regs = (struct pt_regs *) &stack; - unsigned long code; - static const char *reason[] = { - "IA-64 Illegal Operation fault", - "IA-64 Privileged Operation fault", - "IA-64 Privileged Register fault", - "IA-64 Reserved Register/Field fault", - "Disabled Instruction Set Transition fault", - "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", - "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", - "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" - }; - - printf("ia64_fault, vector=0x%lx, ifa=0x%016lx, iip=0x%016lx, ipsr=0x%016lx, isr=0x%016lx\n", - vector, ifa, regs->cr_iip, regs->cr_ipsr, isr); - - - if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { - /* - * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel - * the lfetch. - */ - ia64_psr(regs)->ed = 1; - printf("ia64_fault: handled lfetch.fault\n"); - return; - } - - switch (vector) { - case 0: - printk("VHPT Translation.\n"); - break; - - case 4: - printk("Alt DTLB.\n"); - break; - - case 6: - printk("Instruction Key Miss.\n"); - break; - - case 7: - printk("Data Key Miss.\n"); - break; - - case 8: - printk("Dirty-bit.\n"); - break; - - case 20: - printk("Page Not Found.\n"); - break; - - case 21: - printk("Key Permission.\n"); - break; - - case 22: - printk("Instruction Access Rights.\n"); - break; - - case 24: /* General Exception */ - code = (isr >> 4) & 0xf; - printk("General Exception: %s%s.\n", reason[code], - (code == 3) ? ((isr & (1UL << 37)) ? " (RSE access)" : - " (data access)") : ""); - if (code == 8) { -# ifdef CONFIG_IA64_PRINT_HAZARDS - printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", - current->comm, current->pid, - regs->cr_iip + ia64_psr(regs)->ri, - regs->pr); -# endif - printf("ia64_fault: returning on hazard\n"); - return; - } - break; - - case 25: - printk("Disabled FP-Register.\n"); - break; - - case 26: - printk("NaT consumption.\n"); - break; - - case 29: - printk("Debug.\n"); - break; - - case 30: - printk("Unaligned Reference.\n"); - break; - - case 31: - printk("Unsupported data reference.\n"); - break; - - case 32: - printk("Floating-Point Fault.\n"); - break; - - case 33: - printk("Floating-Point Trap.\n"); - break; - - case 34: - printk("Lower Privilege Transfer Trap.\n"); - break; - - case 35: - printk("Taken Branch Trap.\n"); - break; - - case 36: - printk("Single Step Trap.\n"); - break; - - case 45: - printk("IA-32 Exception.\n"); - break; - - case 46: - printk("IA-32 Intercept.\n"); - break; - - case 47: - printk("IA-32 Interrupt.\n"); - break; - - default: - printk("Fault %lu\n", vector); - break; - } - - show_registers(regs); - panic("Fault in Xen.\n"); -} - -unsigned long running_on_sim = 0; - -void -do_ssc(unsigned long ssc, struct pt_regs *regs) -{ - unsigned long arg0, arg1, arg2, arg3, retval; - char buf[2]; -/**/ static int last_fd, last_count; // FIXME FIXME FIXME -/**/ // BROKEN FOR MULTIPLE DOMAINS & SMP -/**/ struct ssc_disk_stat { int fd; unsigned count;} *stat, last_stat; - - arg0 = vcpu_get_gr(current,32); - switch(ssc) { - case SSC_PUTCHAR: - buf[0] = arg0; - buf[1] = '\0'; - printf(buf); - break; - case SSC_GETCHAR: - retval = ia64_ssc(0,0,0,0,ssc); - vcpu_set_gr(current,8,retval,0); - break; - case SSC_WAIT_COMPLETION: - if (arg0) { // metaphysical address - - arg0 = translate_domain_mpaddr(arg0); -/**/ stat = (struct ssc_disk_stat *)__va(arg0); -///**/ if (stat->fd == last_fd) stat->count = last_count; -/**/ stat->count = last_count; -//if (last_count >= PAGE_SIZE) printf("ssc_wait: stat->fd=%d,last_fd=%d,last_count=%d\n",stat->fd,last_fd,last_count); -///**/ retval = ia64_ssc(arg0,0,0,0,ssc); -/**/ retval = 0; - } - else retval = -1L; - vcpu_set_gr(current,8,retval,0); - break; - case SSC_OPEN: - arg1 = vcpu_get_gr(current,33); // access rights -if (!running_on_sim) { printf("SSC_OPEN, not implemented on hardware. (ignoring...)\n"); arg0 = 0; } - if (arg0) { // metaphysical address - arg0 = translate_domain_mpaddr(arg0); - retval = ia64_ssc(arg0,arg1,0,0,ssc); - } - else retval = -1L; - vcpu_set_gr(current,8,retval,0); - break; - case SSC_WRITE: - case SSC_READ: -//if (ssc == SSC_WRITE) printf("DOING AN SSC_WRITE\n"); - arg1 = vcpu_get_gr(current,33); - arg2 = vcpu_get_gr(current,34); - arg3 = vcpu_get_gr(current,35); - if (arg2) { // metaphysical address of descriptor - struct ssc_disk_req *req; - unsigned long mpaddr; - long len; - - arg2 = translate_domain_mpaddr(arg2); - req = (struct ssc_disk_req *) __va(arg2); - req->len &= 0xffffffffL; // avoid strange bug - len = req->len; -/**/ last_fd = arg1; -/**/ last_count = len; - mpaddr = req->addr; -//if (last_count >= PAGE_SIZE) printf("do_ssc: read fd=%d, addr=%p, len=%lx ",last_fd,mpaddr,len); - retval = 0; - if ((mpaddr & PAGE_MASK) != ((mpaddr+len-1) & PAGE_MASK)) { - // do partial page first - req->addr = translate_domain_mpaddr(mpaddr); - req->len = PAGE_SIZE - (req->addr & ~PAGE_MASK); - len -= req->len; mpaddr += req->len; - retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc); - arg3 += req->len; // file offset -/**/ last_stat.fd = last_fd; -/**/ (void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION); -//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)[part]=%x ",req->addr,req->len,retval); - } - if (retval >= 0) while (len > 0) { - req->addr = translate_domain_mpaddr(mpaddr); - req->len = (len > PAGE_SIZE) ? PAGE_SIZE : len; - len -= PAGE_SIZE; mpaddr += PAGE_SIZE; - retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc); - arg3 += req->len; // file offset -// TEMP REMOVED AGAIN arg3 += req->len; // file offset -/**/ last_stat.fd = last_fd; -/**/ (void)ia64_ssc(__pa(&last_stat),0,0,0,SSC_WAIT_COMPLETION); -//if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)=%x ",req->addr,req->len,retval); - } - // set it back to the original value - req->len = last_count; - } - else retval = -1L; - vcpu_set_gr(current,8,retval,0); -//if (last_count >= PAGE_SIZE) printf("retval=%x\n",retval); - break; - case SSC_CONNECT_INTERRUPT: - arg1 = vcpu_get_gr(current,33); - arg2 = vcpu_get_gr(current,34); - arg3 = vcpu_get_gr(current,35); - if (!running_on_sim) { printf("SSC_CONNECT_INTERRUPT, not implemented on hardware. (ignoring...)\n"); break; } - (void)ia64_ssc(arg0,arg1,arg2,arg3,ssc); - break; - case SSC_NETDEV_PROBE: - vcpu_set_gr(current,8,-1L,0); - break; - default: - printf("ia64_handle_break: bad ssc code %lx, iip=0x%lx, b0=0x%lx... spinning\n", - ssc, regs->cr_iip, regs->b0); - while(1); - break; - } - vcpu_increment_iip(current); -} - -/* Also read in hyperprivop.S */ -int first_break = 1; - -void -ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim) -{ - struct domain *d = current->domain; - struct vcpu *v = current; - IA64FAULT vector; - - if (first_break) { - if (platform_is_hp_ski()) running_on_sim = 1; - else running_on_sim = 0; - first_break = 0; - } - if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant - do_ssc(vcpu_get_gr(current,36), regs); - } -#ifdef CRASH_DEBUG - else if ((iim == 0 || iim == CDB_BREAK_NUM) && !user_mode(regs)) { - if (iim == 0) - show_registers(regs); - debugger_trap_fatal(0 /* don't care */, regs); - } -#endif - else if (iim == d->arch.breakimm) { - /* by default, do not continue */ - v->arch.hypercall_continuation = 0; - - if ((vector = ia64_hypercall(regs)) == IA64_NO_FAULT) { - if (!PSCBX(v, hypercall_continuation)) - vcpu_increment_iip(current); - } - else reflect_interruption(isr, regs, vector); - } - else if (!PSCB(v,interrupt_collection_enabled)) { - if (ia64_hyperprivop(iim,regs)) - vcpu_increment_iip(current); - } - else { - if (iim == 0) - die_if_kernel("bug check", regs, iim); - PSCB(v,iim) = iim; - reflect_interruption(isr,regs,IA64_BREAK_VECTOR); - } -} - -void -ia64_handle_privop (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long itir) -{ - IA64FAULT vector; - - vector = priv_emulate(current,regs,isr); - if (vector != IA64_NO_FAULT && vector != IA64_RFI_IN_PROGRESS) { - // Note: if a path results in a vector to reflect that requires - // iha/itir (e.g. vcpu_force_data_miss), they must be set there - reflect_interruption(isr,regs,vector); - } -} - -/* Used in vhpt.h. */ -#define INTR_TYPE_MAX 10 -UINT64 int_counts[INTR_TYPE_MAX]; - -void -ia64_handle_reflection (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim, unsigned long vector) -{ - struct vcpu *v = current; - unsigned long check_lazy_cover = 0; - unsigned long psr = regs->cr_ipsr; - - /* Following faults shouldn'g be seen from Xen itself */ - if (!(psr & IA64_PSR_CPL)) BUG(); - - switch(vector) { - case 8: - vector = IA64_DIRTY_BIT_VECTOR; break; - case 9: - vector = IA64_INST_ACCESS_BIT_VECTOR; break; - case 10: - check_lazy_cover = 1; - vector = IA64_DATA_ACCESS_BIT_VECTOR; break; - case 20: - check_lazy_cover = 1; - vector = IA64_PAGE_NOT_PRESENT_VECTOR; break; - case 22: - vector = IA64_INST_ACCESS_RIGHTS_VECTOR; break; - case 23: - check_lazy_cover = 1; - vector = IA64_DATA_ACCESS_RIGHTS_VECTOR; break; - case 25: - vector = IA64_DISABLED_FPREG_VECTOR; - break; - case 26: - if (((isr >> 4L) & 0xfL) == 1) { - //regs->eml_unat = 0; FIXME: DO WE NEED THIS?? - printf("ia64_handle_reflection: handling regNaT fault\n"); - vector = IA64_NAT_CONSUMPTION_VECTOR; break; - } -#if 1 - // pass null pointer dereferences through with no error - // but retain debug output for non-zero ifa - if (!ifa) { - vector = IA64_NAT_CONSUMPTION_VECTOR; break; - } -#endif - printf("*** NaT fault... attempting to handle as privop\n"); - printf("isr=%016lx, ifa=%016lx, iip=%016lx, ipsr=%016lx\n", - isr, ifa, regs->cr_iip, psr); - //regs->eml_unat = 0; FIXME: DO WE NEED THIS??? - // certain NaT faults are higher priority than privop faults - vector = priv_emulate(v,regs,isr); - if (vector == IA64_NO_FAULT) { - printf("*** Handled privop masquerading as NaT fault\n"); - return; - } - vector = IA64_NAT_CONSUMPTION_VECTOR; break; - case 27: - //printf("*** Handled speculation vector, itc=%lx!\n",ia64_get_itc()); - PSCB(current,iim) = iim; - vector = IA64_SPECULATION_VECTOR; break; - case 30: - // FIXME: Should we handle unaligned refs in Xen?? - vector = IA64_UNALIGNED_REF_VECTOR; break; - case 32: - if (!(handle_fpu_swa(1, regs, isr))) { - vcpu_increment_iip(v); - return; - } - printf("ia64_handle_reflection: handling FP fault\n"); - vector = IA64_FP_FAULT_VECTOR; break; - case 33: - if (!(handle_fpu_swa(0, regs, isr))) return; - printf("ia64_handle_reflection: handling FP trap\n"); - vector = IA64_FP_TRAP_VECTOR; break; - case 34: - printf("ia64_handle_reflection: handling lowerpriv trap\n"); - vector = IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR; break; - case 35: - printf("ia64_handle_reflection: handling taken branch trap\n"); - vector = IA64_TAKEN_BRANCH_TRAP_VECTOR; break; - case 36: - printf("ia64_handle_reflection: handling single step trap\n"); - vector = IA64_SINGLE_STEP_TRAP_VECTOR; break; - - default: - printf("ia64_handle_reflection: unhandled vector=0x%lx\n",vector); - while(vector); - return; - } - if (check_lazy_cover && (isr & IA64_ISR_IR) && handle_lazy_cover(v, regs)) return; - PSCB(current,ifa) = ifa; - PSCB(current,itir) = vcpu_get_itir_on_fault(v,ifa); - reflect_interruption(isr,regs,vector); -} - -unsigned long hypercall_create_continuation( - unsigned int op, const char *format, ...) -{ - struct mc_state *mcs = &mc_state[smp_processor_id()]; - struct vcpu *v = current; - const char *p = format; - unsigned long arg; - unsigned int i; - va_list args; - - va_start(args, format); - if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) { - panic("PREEMPT happen in multicall\n"); // Not support yet - } else { - vcpu_set_gr(v, 2, op, 0); - for ( i = 0; *p != '\0'; i++) { - switch ( *p++ ) - { - case 'i': - arg = (unsigned long)va_arg(args, unsigned int); - break; - case 'l': - arg = (unsigned long)va_arg(args, unsigned long); - break; - case 'h': - arg = (unsigned long)va_arg(args, void *); - break; - default: - arg = 0; - BUG(); - } - switch (i) { - case 0: vcpu_set_gr(v, 14, arg, 0); - break; - case 1: vcpu_set_gr(v, 15, arg, 0); - break; - case 2: vcpu_set_gr(v, 16, arg, 0); - break; - case 3: vcpu_set_gr(v, 17, arg, 0); - break; - case 4: vcpu_set_gr(v, 18, arg, 0); - break; - default: panic("Too many args for hypercall continuation\n"); - break; - } - } - } - v->arch.hypercall_continuation = 1; - va_end(args); - return op; -} - _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |