[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 2/2] Improve hpet accuracy
This patch performs the bulk of the changes described in 0/2 description above, to improve HPET accuracy Signed-off-by: Dave Winchell <dwinchell@xxxxxxxxxxxxxxx> Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxxx> diff -r ec3493b63170 xen/arch/x86/hvm/hpet.c --- a/xen/arch/x86/hvm/hpet.c +++ b/xen/arch/x86/hvm/hpet.c @@ -24,14 +24,11 @@ #include <xen/sched.h> #include <xen/event.h> + #define HPET_BASE_ADDRESS 0xfed00000ULL #define HPET_MMAP_SIZE 1024 #define S_TO_NS 1000000000ULL /* 1s = 10^9 ns */ #define S_TO_FS 1000000000000000ULL /* 1s = 10^15 fs */ - -/* Frequency_of_Xen_systeme_time / frequency_of_HPET = 16 */ -#define STIME_PER_HPET_TICK 16 -#define guest_time_hpet(v) (hvm_get_guest_time(v) / STIME_PER_HPET_TICK) #define HPET_ID 0x000 #define HPET_PERIOD 0x004 @@ -72,8 +69,9 @@ << HPET_TN_INT_ROUTE_CAP_SHIFT) #define hpet_tick_to_ns(h, tick) \ - ((s_time_t)((((tick) > (h)->hpet_to_ns_limit) ? \ - ~0ULL : (tick) * (h)->hpet_to_ns_scale) >> 10)) + (s_time_t)hpet_mult_div(tick, h->hpet.phys_period, 1000000UL) + +#define hpet_phys_ns_to_ticks(ns, period) hpet_mult_div(ns, 1000000UL, period) #define timer_config(h, n) (h->hpet.timers[n].config) #define timer_is_periodic(h, n) (timer_config(h, n) & HPET_TN_PERIODIC) @@ -139,15 +137,34 @@ return 0; } - static inline uint64_t hpet_read_maincounter(HPETState *h) { - ASSERT(spin_is_locked(&h->lock)); + uint64_t mc; - if ( hpet_enabled(h) ) - return guest_time_hpet(h->vcpu) + h->mc_offset; - else - return h->hpet.mc64; + mc = read_64_main_counter() + h->mc_offset; + return mc; +} +static inline uint64_t hpet_compute_diff(HPETState *h, int tn) +{ + + if ( timer_is_32bit(h, tn) ) { + uint32_t tn_cmp, diff, mc; + + tn_cmp = (uint32_t)h->hpet.timers[tn].cmp; + mc = (uint32_t)hpet_read_maincounter(h); + diff = tn_cmp - mc; + diff = (int32_t)diff > 0 ? diff : (uint32_t)0; + return (uint64_t)diff; + } + else { + uint64_t tn_cmp, diff, mc; + + mc = hpet_read_maincounter(h); + tn_cmp = h->hpet.timers[tn].cmp; + diff = tn_cmp - mc; + diff = (int64_t)diff > 0 ? diff : (uint64_t)0; + return diff; + } } static int hpet_read( @@ -190,13 +207,9 @@ stop_timer(&h->timers[tn]); } -/* the number of HPET tick that stands for - * 1/(2^10) second, namely, 0.9765625 milliseconds */ -#define HPET_TINY_TIME_SPAN ((h->stime_freq >> 10) / STIME_PER_HPET_TICK) - static void hpet_set_timer(HPETState *h, unsigned int tn) { - uint64_t tn_cmp, cur_tick, diff; + uint64_t diff; ASSERT(tn < HPET_TIMER_NUM); ASSERT(spin_is_locked(&h->lock)); @@ -209,25 +222,7 @@ pit_stop_channel0_irq(pit); } - tn_cmp = h->hpet.timers[tn].cmp; - cur_tick = hpet_read_maincounter(h); - if ( timer_is_32bit(h, tn) ) - { - tn_cmp = (uint32_t)tn_cmp; - cur_tick = (uint32_t)cur_tick; - } - - diff = tn_cmp - cur_tick; - - /* - * Detect time values set in the past. This is hard to do for 32-bit - * comparators as the timer does not have to be set that far in the future - * for the counter difference to wrap a 32-bit signed integer. We fudge - * by looking for a 'small' time value in the past. - */ - if ( (int64_t)diff < 0 ) - diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN)) - ? (uint32_t)diff : 0; + diff = hpet_compute_diff(h, tn); set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, diff)); } @@ -273,14 +268,15 @@ if ( !(old_val & HPET_CFG_ENABLE) && (new_val & HPET_CFG_ENABLE) ) { /* Enable main counter and interrupt generation. */ - h->mc_offset = h->hpet.mc64 - guest_time_hpet(h->vcpu); + + h->mc_offset = h->hpet.mc64 - read_64_main_counter(); + for ( i = 0; i < HPET_TIMER_NUM; i++ ) hpet_set_timer(h, i); } else if ( (old_val & HPET_CFG_ENABLE) && !(new_val & HPET_CFG_ENABLE) ) { /* Halt main counter and disable interrupt generation. */ - h->hpet.mc64 = h->mc_offset + guest_time_hpet(h->vcpu); for ( i = 0; i < HPET_TIMER_NUM; i++ ) hpet_stop_timer(h, i); } @@ -291,6 +287,9 @@ gdprintk(XENLOG_WARNING, "HPET: writing main counter but it's not halted!\n"); h->hpet.mc64 = new_val; + + h->mc_offset = h->hpet.mc64 - read_64_main_counter(); + break; case HPET_T0_CFG: @@ -333,7 +332,7 @@ * - maximum is to prevent overflow in time_after() calculations */ if ( hpet_tick_to_ns(h, new_val) < MICROSECS(900) ) - new_val = (MICROSECS(900) << 10) / h->hpet_to_ns_scale; + new_val = hpet_phys_ns_to_ticks(MICROSECS(900), h->hpet.phys_period); new_val &= (timer_is_32bit(h, tn) ? ~0u : ~0ull) >> 1; h->hpet.period[tn] = new_val; } @@ -373,10 +372,216 @@ .write_handler = hpet_write }; +static void hpet_stats_dump_dom(struct domain *d) +{ + struct HPETState *h = &d->arch.hvm_domain.pl_time.vhpet; + unsigned long mc, s; + int i; + + printk("domain %d\n", d->domain_id); + mc = hpet_read_maincounter(h); + s = hpet_tick_to_ns(h, mc); + s = s / 1000000000UL; + + printk("cur index %ld\n", s % INTR_CNT_BUCKETS); + for(i = 0; i < INTR_CNT_BUCKETS; i++) { + if(!(i%10)) + printk("\n"); + printk("%ld ", h->hpet.intr_counts[i]); + } + printk("\n"); +} +static void hpet_state_dump(struct domain *d) +{ + struct HPETState *h = &d->arch.hvm_domain.pl_time.vhpet; + + printk("timers.config: 0x%lx 0x%lx 0x%lx\n", h->hpet.timers[0].config, h->hpet.timers[1].config, h->hpet.timers[2].config); + printk("timers.cmp: 0x%lx 0x%lx 0x%lx\n", h->hpet.timers[0].cmp, h->hpet.timers[1].cmp, h->hpet.timers[2].cmp); + printk("current mc: 0x%lx\n", hpet_read_maincounter(h)); + printk("period: %lx %lx %lx\n", h->hpet.period[0], h->hpet.period[1], h->hpet.period[1]); + printk("mc_offset 0x%lx\n",h->mc_offset); + printk("phys_period 0x%lx\n",h->hpet.phys_period); + printk("last_end_of_intr_mc 0x%lx\n",h->hpet.last_end_of_intr_mc); + printk("end_of_intr_mc 0x%lx\n",h->hpet.end_of_intr_mc); + printk("cpu_khz 0x%lx\n",h->hpet.cpu_khz); + printk("migr_local_tsc 0x%lx\n",h->hpet.migr_local_tsc); + printk("intr_pending_nr 0x%lx\n",h->hpet.intr_pending_nr); + printk("pending_mask 0x%lx\n",h->hpet.pending_mask); + printk("delivery_policy %d\n",h->hpet.delivery_policy); + printk("vector 0x%x 0x%x 0x%x\n",h->hpet.vector[0],h->hpet.vector[1],h->hpet.vector[2]); +} +static void hpet_stats_dump(unsigned char c) +{ + struct domain *d; + + for_each_domain(d) { + if(d->domain_id) { + hpet_stats_dump_dom(d); + hpet_state_dump(d); + } + } +} +#include <xen/keyhandler.h> +static __init int hpet_stats_dump_keyhandler_init(void) +{ + register_keyhandler('Z', hpet_stats_dump,"hpet_stats_dump"); + return 0; +} +__initcall(hpet_stats_dump_keyhandler_init); +static void hpet_stats(struct vcpu *v) +{ + struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet; + unsigned long mc, s, u; + + mc = hpet_read_maincounter(h); + s = hpet_tick_to_ns(h, mc); + s = s / 1000000000UL; + if(h->hpet.intr_counts_last_s && (s > h->hpet.intr_counts_last_s)) { + for(u = (h->hpet.intr_counts_last_s + 1); u <= s; u++) + h->hpet.intr_counts[u % INTR_CNT_BUCKETS] = 0; + } + h->hpet.intr_counts_last_s = s; + h->hpet.intr_counts[s % INTR_CNT_BUCKETS]++; +} + + + +static void hpet_vioapic_del_cb(uint64_t arg, uint32_t intrs_delivered) +{ + HPETState *h = (HPETState *)arg; + + h->hpet.pending_mask = intrs_delivered; +} +void hpet_intr_en_fn_missed(struct vcpu *v, unsigned int vector, unsigned int post) +{ + struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet; + + spin_lock(&h->lock); + if(post) + hpet_stats(v); + else { + clear_bit(v->vcpu_id, &h->hpet.pending_mask); + if(!(h->hpet.pending_mask)) + h->hpet.end_of_intr_mc = hpet_read_maincounter(h); + } + spin_unlock(&h->lock); +} +void hpet_intr_en_fn_no_missed(struct vcpu *v, unsigned int vector, unsigned int post) +{ + struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet; + + spin_lock(&h->lock); + if(post) { + hpet_stats(v); + if(!h->hpet.intr_pending_nr) { + // probably should kill domain here + printk("hpet_intr_en_fn: unexpected cleared intr_pending_nr pending_mask 0x%lx\n", h->hpet.pending_mask); + spin_unlock(&h->lock); + return; + } + clear_bit(v->vcpu_id, &h->hpet.pending_mask); + + if(!(h->hpet.pending_mask)) { + h->hpet.intr_pending_nr--; + if(h->hpet.intr_pending_nr) { + hvm_isa_irq_deassert(v->domain, 0); + hvm_isa_irq_assert_cb(v->domain, 0, hpet_vioapic_del_cb, (uint64_t)h); + } + } + } + spin_unlock(&h->lock); +} + +/* For guest computes missed policy, + * we will only route the interrupt if a) the last interrupt routed has been processed by the guest + * and b) its been more than a (clock) periods worth of main counter ticks since that interrupt + * was processed. + * + * It was found, through experimentation, that Linux guests keep very accurate time for hpet with + * this logic, even if it means we are only delivering every 2*period. This is because the Linux logic + * for missed ticks is very good for hpet. On the other hand, delivering the interrupt just slightly + * early causes poor timekeeping. + * + * It was also found that time stamping at the end of interrupt processing improved accuracy over + * time stamping at injection time. This is probably due to the delay that can happen in the Linux + * interrupt handler if it has to wait for a lock. + * + */ + +static void hpet_route_decision_missed_ticks(HPETState *h, unsigned int tn, int isa_irq, unsigned int *route, unsigned int *cb_expected) +{ + uint64_t mc; + struct domain *d = h->vcpu->domain; + unsigned int vector; + + *route = 0; + *cb_expected = 0; + if(!vioapic_get_vector(d, isa_irq, &vector)) { + mc = hpet_read_maincounter(h); + if(h->hpet.vector[tn] == ~0U) { + h->hpet.vector[tn] = vector; + if(hvm_register_intr_en_notif(d, vector, HVM_INTR_EN_NOTIF_HPET_MISSED)) + panic(__FUNCTION__); + h->hpet.last_end_of_intr_mc = h->hpet.end_of_intr_mc = 0; + *cb_expected = 1; + *route = 1; + } + else if((h->hpet.end_of_intr_mc != h->hpet.last_end_of_intr_mc) && + ((mc - h->hpet.end_of_intr_mc) > h->hpet.period[tn])) { + if(vector != h->hpet.vector[tn]) { + hvm_unregister_intr_en_notif(d, h->hpet.vector[tn]); + h->hpet.vector[tn] = vector; + if(hvm_register_intr_en_notif(d, vector, HVM_INTR_EN_NOTIF_HPET_MISSED)) + panic(__FUNCTION__); + } + h->hpet.last_end_of_intr_mc = h->hpet.end_of_intr_mc; + *cb_expected = 1; + *route = 1; + } + } + else + *route = 1; +} +static void hpet_route_decision_not_missed_ticks(HPETState *h, unsigned int tn, int isa_irq, unsigned int *route, unsigned int *cb_expected) +{ + struct domain *d = h->vcpu->domain; + unsigned int vector; + + *route = 0; + *cb_expected = 0; + if(!vioapic_get_vector(d, isa_irq, &vector)) { + if(h->hpet.vector[tn] == ~0U) { + h->hpet.vector[tn] = vector; + if(hvm_register_intr_en_notif(d, vector, HVM_INTR_EN_NOTIF_HPET_NO_MISSED)) + panic(__FUNCTION__); + } + if(h->hpet.intr_pending_nr++) + return; + + if(vector != h->hpet.vector[tn]) { + hvm_unregister_intr_en_notif(d, h->hpet.vector[tn]); + h->hpet.vector[tn] = vector; + if(hvm_register_intr_en_notif(d, vector, HVM_INTR_EN_NOTIF_HPET_NO_MISSED)) + panic(__FUNCTION__); + } + *cb_expected = 1; + *route = 1; + } + else + *route = 1; +} + +typedef void (*hpet_route_fn_t)(HPETState *h, unsigned int tn, int isa_irq, unsigned int *route, unsigned int *cb_expected); + +static hpet_route_fn_t hpet_determine_route_params[HPET_DEL_POLICY_NUMS] = {hpet_route_decision_missed_ticks, + hpet_route_decision_not_missed_ticks}; + static void hpet_route_interrupt(HPETState *h, unsigned int tn) { unsigned int tn_int_route = timer_int_route(h, tn); struct domain *d = h->vcpu->domain; + unsigned int route = 1; + unsigned int cb_expected = 0; ASSERT(spin_is_locked(&h->lock)); @@ -386,8 +591,18 @@ timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC, timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */ int isa_irq = (tn == 0) ? 0 : 8; - hvm_isa_irq_deassert(d, isa_irq); - hvm_isa_irq_assert(d, isa_irq); + + if(!tn) + (*hpet_determine_route_params[h->hpet.delivery_policy])(h, tn, isa_irq, &route, &cb_expected); + + if(route) { + hvm_isa_irq_deassert(d, isa_irq); + if(cb_expected) { + hvm_isa_irq_assert_cb(d, isa_irq, hpet_vioapic_del_cb, (uint64_t)h); + } + else + hvm_isa_irq_assert(d, isa_irq); + } return; } @@ -405,6 +620,46 @@ spin_unlock(&d->arch.hvm_domain.irq_lock); } + + +static void hpet_timer0_timeout_missed_ticks(HPETState *h) +{ + uint64_t mc = hpet_read_maincounter(h); + unsigned int tn = 0; + uint64_t period = h->hpet.period[tn]; + + if ( timer_is_32bit(h, tn) ) + { + while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) ) + h->hpet.timers[tn].cmp = (uint32_t)(h->hpet.timers[tn].cmp + period); + } + else + { + while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) ) + h->hpet.timers[tn].cmp += period; + } + set_timer(&h->timers[tn], + NOW() + hpet_tick_to_ns(h, period)); +} +static void hpet_timer0_timeout_not_missed_ticks(HPETState *h) +{ + unsigned int tn = 0; + uint64_t diff; + uint64_t period = h->hpet.period[tn]; + + if ( timer_is_32bit(h, tn) ) + h->hpet.timers[tn].cmp = (uint32_t)(h->hpet.timers[tn].cmp + period); + else + h->hpet.timers[tn].cmp += period; + + diff = hpet_compute_diff(h, tn); + set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, diff)); +} + +typedef void (*hpet_timer0_timeout_fn_t)(HPETState *h); + +static hpet_timer0_timeout_fn_t hpet_timer0_timeout[HPET_DEL_POLICY_NUMS] = {hpet_timer0_timeout_missed_ticks, + hpet_timer0_timeout_not_missed_ticks}; static void hpet_timer_fn(void *opaque) { struct HPET_timer_fn_info *htfi = opaque; @@ -424,19 +679,25 @@ if ( timer_is_periodic(h, tn) && (h->hpet.period[tn] != 0) ) { - uint64_t mc = hpet_read_maincounter(h), period = h->hpet.period[tn]; - if ( timer_is_32bit(h, tn) ) - { - while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) ) - h->hpet.timers[tn].cmp = (uint32_t)( - h->hpet.timers[tn].cmp + period); - } - else - { - while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) ) - h->hpet.timers[tn].cmp += period; - } - set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, period)); + if(!tn) + (*hpet_timer0_timeout[h->hpet.delivery_policy])(h); + + else + { + uint64_t mc = hpet_read_maincounter(h), period = h->hpet.period[tn]; + if ( timer_is_32bit(h, tn) ) + { + while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) ) + h->hpet.timers[tn].cmp = (uint32_t)( + h->hpet.timers[tn].cmp + period); + } + else + { + while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) ) + h->hpet.timers[tn].cmp += period; + } + set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, period)); + } } spin_unlock(&h->lock); @@ -462,7 +723,10 @@ spin_lock(&hp->lock); /* Write the proper value into the main counter */ - hp->hpet.mc64 = hp->mc_offset + guest_time_hpet(hp->vcpu); + + hp->hpet.mc64 = hpet_read_maincounter(hp); + rdtscll(hp->hpet.migr_local_tsc); + hp->hpet.cpu_khz = cpu_khz; /* Save the HPET registers */ rc = _hvm_init_entry(h, HVM_SAVE_CODE(HPET), 0, HVM_SAVE_LENGTH(HPET)); @@ -488,19 +752,70 @@ C(period[0]); C(period[1]); C(period[2]); + C(vector[0]); + C(vector[1]); + C(vector[2]); + C(last_end_of_intr_mc); + C(end_of_intr_mc); + C(intr_pending_nr); + C(pending_mask); + C(delivery_policy); + C(phys_period); + C(cpu_khz); + C(migr_local_tsc); + C(intr_counts_last_s); #undef C + memcpy(rec->intr_counts, hp->hpet.intr_counts, sizeof(hp->hpet.intr_counts)); } spin_unlock(&hp->lock); return rc; } +static int hpet_debug_migr_check_period(struct domain *d, HPETState *hp) +{ + unsigned long period, m_period, delta; + + period = read_hpet_period(); + m_period = (hp->hpet.capability >> 32) & 0xffffffffUL; + delta = (period > m_period) ? (period - m_period) : (m_period - period); + if(delta) { + /* Some hpets report small differences in period. A difference of 1 has been seen. + * Allow 100 as that is still 0.00014%, which is small enough. + */ + printk("hpet.capability 0x%lx ((hp->hpet.capability >> 32) & 0xffffffffUL) 0x%lx period %lx\n", + hp->hpet.capability, + ((hp->hpet.capability >> 32) & 0xffffffffUL), + period); + if(delta > 100) { + printk("hpet period difference %ld too large\n", delta); + return 1; + } + } + return 0; +} +#define HPET_MIGR_TICK_ADJUSTMENT 1 +/* + * HPET_MIGR_TICK_ADJUSTMENT - + * This corrects for some of the time between hpet save on the sending node + * and hpet load on the receiving node. The correction has been found to be quite small, + * 300-400 usec. This adjustment is based on sending a final message in migrate with the + * tsc at send time (last_tsc_sender) in the message. Upon reception the tsc (first_tsc_receiver) + * is recorded. So the only time we are not taking into account is the time the message is in + * transit. + * The reason this adjustment is in here, given how small it is, is that there may be circumstances, + * for example a node heavily loaded with other guests, where the adjustment would be significant. + */ static int hpet_load(struct domain *d, hvm_domain_context_t *h) { HPETState *hp = &d->arch.hvm_domain.pl_time.vhpet; struct hvm_hw_hpet *rec; int i; +#ifdef HPET_MIGR_TICK_ADJUSTMENT + unsigned long now, dt1, dt2, dt1ticks, dt2ticks, period; +#endif + spin_lock(&hp->lock); @@ -531,11 +846,50 @@ C(period[0]); C(period[1]); C(period[2]); + C(vector[0]); + C(vector[1]); + C(vector[2]); + C(last_end_of_intr_mc); + C(end_of_intr_mc); + C(intr_pending_nr); + C(pending_mask); + C(delivery_policy); + C(phys_period); + C(cpu_khz); + C(migr_local_tsc); + C(intr_counts_last_s); #undef C + + memcpy(hp->hpet.intr_counts, rec->intr_counts, sizeof(hp->hpet.intr_counts)); - /* Recalculate the offset between the main counter and guest time */ - hp->mc_offset = hp->hpet.mc64 - guest_time_hpet(hp->vcpu); - + /* Recalculate the offset between the main counter and guest time */ + + if(hpet_debug_migr_check_period(d, hp)) + return -EINVAL; + +#ifdef HPET_MIGR_TICK_ADJUSTMENT + period = read_hpet_period(); + rdtscll(now); + /* dt1 is the time delta on the sending node between the sending of the last migrate message and the call to hpet_save. */ + dt1 = ((d->last_tsc_sender - hp->hpet.migr_local_tsc) * 1000UL) / hp->hpet.cpu_khz; + dt1 = dt1 * 1000UL; + dt1ticks = hpet_phys_ns_to_ticks(dt1, period); + + /* dt2 is the time delta on the reveiving node between now (hpet_load) and the reception of the last migrate message. */ + dt2 = ((now - d->first_tsc_receiver) * 1000UL) / cpu_khz; + dt2 = dt2 * 1000UL; + dt2ticks = hpet_phys_ns_to_ticks(dt2, period); + hp->mc_offset = hp->hpet.mc64 + dt1ticks + dt2ticks - read_64_main_counter(); +#else + hp->mc_offset = hp->hpet.mc64 - read_64_main_counter(); +#endif + + if(hp->hpet.delivery_policy == HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS) + hvm_register_intr_en_notif(d, hp->hpet.vector[0], HVM_INTR_EN_NOTIF_HPET_MISSED); + else if(hp->hpet.delivery_policy == HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS) + hvm_register_intr_en_notif(d, hp->hpet.vector[0], HVM_INTR_EN_NOTIF_HPET_NO_MISSED); + + /* Restart the timers */ for ( i = 0; i < HPET_TIMER_NUM; i++ ) if ( hpet_enabled(hp) ) @@ -548,6 +902,17 @@ HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, hpet_load, 1, HVMSR_PER_DOM); +void hpet_notify_timer_mode(struct domain *d, uint64_t value) +{ + HPETState *h = &d->arch.hvm_domain.pl_time.vhpet; + + if(value == HVM_HPET_guest_computes_missed_ticks) + h->hpet.delivery_policy = HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS; + else if(value == HVM_HPET_guest_does_not_compute_missed_ticks) + h->hpet.delivery_policy = HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS; +} + + void hpet_init(struct vcpu *v) { HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet; @@ -557,18 +922,22 @@ spin_lock_init(&h->lock); + if(hpet_physical_inited) + printk("virtual hpet_init: using physical hpet\n"); + else + printk("virtual hpet_init: using simulated hpet\n"); + h->vcpu = v; h->stime_freq = S_TO_NS; - - h->hpet_to_ns_scale = ((S_TO_NS * STIME_PER_HPET_TICK) << 10) / h->stime_freq; - h->hpet_to_ns_limit = ~0ULL / h->hpet_to_ns_scale; + h->hpet.phys_period = read_hpet_period(); /* 64-bit main counter; 3 timers supported; LegacyReplacementRoute. */ h->hpet.capability = 0x8086A201ULL; /* This is the number of femptoseconds per HPET tick. */ /* Here we define HPET's frequency to be 1/16 of Xen system time */ - h->hpet.capability |= ((S_TO_FS*STIME_PER_HPET_TICK/h->stime_freq) << 32); + + h->hpet.capability |= read_hpet_period() << 32; for ( i = 0; i < HPET_TIMER_NUM; i++ ) { @@ -577,6 +946,7 @@ h->hpet.timers[i].cmp = ~0ULL; h->timer_fn_info[i].hs = h; h->timer_fn_info[i].tn = i; + h->hpet.vector[i] = ~0U; init_timer(&h->timers[i], hpet_timer_fn, &h->timer_fn_info[i], v->processor); } @@ -590,7 +960,6 @@ for ( i = 0; i < HPET_TIMER_NUM; i++ ) kill_timer(&h->timers[i]); } - void hpet_reset(struct domain *d) { hpet_deinit(d); diff -r ec3493b63170 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -458,6 +458,8 @@ ctxt.dr6 = vc->debugreg[6]; ctxt.dr7 = vc->debugreg[7]; + ctxt.int_notif = v->int_notif; + if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 ) return 1; } @@ -612,6 +614,9 @@ vc->flags = VGCF_online; v->fpu_initialised = 1; + + v->int_notif = ctxt.int_notif; + /* Auxiliary processors should be woken immediately. */ if ( test_and_clear_bit(_VPF_down, &v->pause_flags) ) @@ -2382,8 +2387,9 @@ hvm_latch_shinfo_size(d); break; case HVM_PARAM_TIMER_MODE: - if ( a.value > HVMPTM_one_missed_tick_pending ) + if ( a.value > HVM_HPET_guest_does_not_compute_missed_ticks ) rc = -EINVAL; + hpet_notify_timer_mode(d, a.value); break; case HVM_PARAM_MIG_LAST_TSC: d->last_tsc_sender = a.value; @@ -2574,7 +2580,98 @@ return rc; } +/* Interrupt inject and completion notification facility. + * Register 'notif_fn', to be called whenever 'vector' is injected (post = 1) or + * completed (post = 0). Here completed is when the guest re-enables interrupts. + */ + + +notif_fn_t hvm_intr_en_notif_fn[HVM_INTR_EN_NOTIF_MAX+1] = {(notif_fn_t)0, hpet_intr_en_fn_missed, hpet_intr_en_fn_no_missed}; +int hvm_register_intr_en_notif(struct domain *d, unsigned int notif_vector, int notif_fn_index) +{ + intr_en_notif_t *entry; + struct list_head *cur; + intr_en_notif_t *cur_entry; + + entry = xmalloc(struct intr_en_notif); + entry->notif_vector = notif_vector; + entry->notif_fn = notif_fn_index; + if(!entry) { + printk("hvm_register_intr_en_notif: xmalloc failed\n"); + return 1; + } + spin_lock(&d->intr_en_notif_lock); + list_for_each(cur, &d->intr_en_notif_list) { + cur_entry = list_entry(cur, struct intr_en_notif, links); + if(cur_entry->notif_vector == notif_vector) { + list_del(cur); + xfree(cur_entry); + break; + } + } + list_add_tail(&entry->links, &d->intr_en_notif_list); + set_bit(notif_vector, &d->intr_en_notif_bitmap); + spin_unlock(&d->intr_en_notif_lock); + return 0; +} +int hvm_unregister_intr_en_notif(struct domain *d, unsigned int notif_vector) +{ + struct list_head *cur; + intr_en_notif_t *cur_entry; + + spin_lock(&d->intr_en_notif_lock); + clear_bit(notif_vector, &d->intr_en_notif_bitmap); + list_for_each(cur, &d->intr_en_notif_list) { + cur_entry = list_entry(cur, struct intr_en_notif, links); + if(cur_entry->notif_vector == notif_vector) { + list_del(cur); + xfree(cur_entry); + break; + } + } + spin_unlock(&d->intr_en_notif_lock); + return 0; +} +void hvm_intr_en_notif_arm(struct vcpu *v, unsigned int vector) +{ + struct list_head *cur; + intr_en_notif_t *cur_entry; + struct domain *d = v->domain; + + if(test_bit(vector, &d->intr_en_notif_bitmap)) { + vcpu_intr_en_notif_t *vi = &v->int_notif; + + spin_lock(&d->intr_en_notif_lock); + list_for_each(cur, &d->intr_en_notif_list) { + cur_entry = list_entry(cur, struct intr_en_notif, links); + if(cur_entry->notif_vector == vector) { + vi->intr_en_notif_fn = cur_entry->notif_fn; + vi->intr_en_notif_vec = vector; + vi->intr_en_notif_state = 0; + hvm_intr_en_notif_fn[vi->intr_en_notif_fn](v, vi->intr_en_notif_vec, 1); + break; + } + } + spin_unlock(&d->intr_en_notif_lock); + } + return; +} + +void hvm_intr_en_notif_disarm(struct vcpu *v, int irq_masked) +{ + if(v->int_notif.intr_en_notif_fn) { + vcpu_intr_en_notif_t *vi = &v->int_notif; + + if(vi->intr_en_notif_state && !(irq_masked || vlapic_tpr_gte_vec(v, vi->intr_en_notif_vec))) { + hvm_intr_en_notif_fn[vi->intr_en_notif_fn](v, vi->intr_en_notif_vec, 0); + vi->intr_en_notif_fn = 0; + vi->intr_en_notif_state = 0; + } + else if (irq_masked || vlapic_tpr_gte_vec(v, vi->intr_en_notif_vec)) + vi->intr_en_notif_state = 1; + } +} /* * Local variables: * mode: C diff -r ec3493b63170 xen/arch/x86/hvm/irq.c --- a/xen/arch/x86/hvm/irq.c +++ b/xen/arch/x86/hvm/irq.c @@ -88,11 +88,35 @@ spin_unlock(&d->arch.hvm_domain.irq_lock); } +void hvm_isa_irq_assert_cb( + struct domain *d, unsigned int isa_irq, + void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered), + uint64_t cb_arg) +{ + struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; + unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq); + + + ASSERT(isa_irq <= 15); + + spin_lock(&d->arch.hvm_domain.irq_lock); + + if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq.i) && + (hvm_irq->gsi_assert_count[gsi]++ == 0) ) + { + vioapic_register_delivered_cb(d, intrs_delivered_cb, cb_arg); + vioapic_irq_positive_edge(d, gsi); + vpic_irq_positive_edge(d, isa_irq); + } + + spin_unlock(&d->arch.hvm_domain.irq_lock); +} void hvm_isa_irq_assert( struct domain *d, unsigned int isa_irq) { struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq; unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq); + ASSERT(isa_irq <= 15); diff -r ec3493b63170 xen/arch/x86/hvm/svm/intr.c --- a/xen/arch/x86/hvm/svm/intr.c +++ b/xen/arch/x86/hvm/svm/intr.c @@ -148,7 +148,10 @@ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; struct hvm_intack intack; + hvm_intr_en_notif_disarm(v, irq_masked(vmcb->rflags)); + /* Crank the handle on interrupt state. */ + pt_update_irq(v); svm_dirq_assist(v); @@ -177,7 +180,6 @@ enable_intr_window(v, intack); return; } - intack = hvm_vcpu_ack_pending_irq(v, intack); } while ( intack.source == hvm_intsrc_none ); @@ -189,6 +191,7 @@ { HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0); svm_inject_extint(v, intack.vector); + hvm_intr_en_notif_arm(v, intack.vector); pt_intr_post(v, intack); } diff -r ec3493b63170 xen/arch/x86/hvm/vioapic.c --- a/xen/arch/x86/hvm/vioapic.c +++ b/xen/arch/x86/hvm/vioapic.c @@ -306,7 +306,18 @@ return pt_active(&pit->pt0); } -static void vioapic_deliver(struct hvm_hw_vioapic *vioapic, int irq) +void vioapic_register_delivered_cb(struct domain *d, void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered), + uint64_t cb_arg) +{ + struct hvm_hw_vioapic *vioapic = domain_vioapic(d); + + ASSERT(spin_is_locked(&vioapic_domain(vioapic)->arch.hvm_domain.irq_lock)); + + vioapic->intrs_delivered_cb = intrs_delivered_cb; + vioapic->cb_arg = cb_arg; +} + +void vioapic_deliver(struct hvm_hw_vioapic *vioapic, int irq) { uint16_t dest = vioapic->redirtbl[irq].fields.dest_id; uint8_t dest_mode = vioapic->redirtbl[irq].fields.dest_mode; @@ -314,6 +325,7 @@ uint8_t vector = vioapic->redirtbl[irq].fields.vector; uint8_t trig_mode = vioapic->redirtbl[irq].fields.trig_mode; uint32_t deliver_bitmask; + uint32_t deliver_bitmask_final = (uint32_t)0; struct vlapic *target; struct vcpu *v; @@ -348,6 +360,11 @@ vector, deliver_bitmask); if ( target != NULL ) { + set_bit(vlapic_vcpu(target)->vcpu_id, &deliver_bitmask_final); + if(vioapic->intrs_delivered_cb) { + (*vioapic->intrs_delivered_cb)(vioapic->cb_arg, deliver_bitmask_final); + vioapic->intrs_delivered_cb = (void *)0; + } ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode); } else @@ -362,25 +379,23 @@ case dest_Fixed: { uint8_t bit; + if(vioapic->intrs_delivered_cb) { + (*vioapic->intrs_delivered_cb)(vioapic->cb_arg, deliver_bitmask); + vioapic->intrs_delivered_cb = 0; + } for ( bit = 0; deliver_bitmask != 0; bit++ ) { if ( !(deliver_bitmask & (1 << bit)) ) continue; deliver_bitmask &= ~(1 << bit); -#ifdef IRQ0_SPECIAL_ROUTING - /* Do not deliver timer interrupts to VCPU != 0 */ - if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() ) - v = vioapic_domain(vioapic)->vcpu[0]; - else -#endif - v = vioapic_domain(vioapic)->vcpu[bit]; - if ( v != NULL ) - { - target = vcpu_vlapic(v); - ioapic_inj_irq(vioapic, target, vector, - trig_mode, delivery_mode); - } - } + v = vioapic_domain(vioapic)->vcpu[bit]; + /* ioapic_get_delivery_bitmask guarantees that v is never NULL. */ + if( v != NULL ) { + target = vcpu_vlapic(v); + ioapic_inj_irq(vioapic, target, vector, + trig_mode, delivery_mode); + } + } break; } @@ -404,6 +419,23 @@ delivery_mode); break; } +} + +int vioapic_get_vector(struct domain *d, unsigned int isa_irq, unsigned int *vector) +{ + struct hvm_hw_vioapic *vioapic = domain_vioapic(d); + union vioapic_redir_entry *ent; + int ret = 1; + unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq); + + spin_lock(&d->arch.hvm_domain.irq_lock); + ent = &vioapic->redirtbl[gsi]; + if ( !ent->fields.mask ) { + *vector = ent->fields.vector; + ret = 0; + } + spin_unlock(&d->arch.hvm_domain.irq_lock); + return ret; } void vioapic_irq_positive_edge(struct domain *d, unsigned int irq) diff -r ec3493b63170 xen/arch/x86/hvm/vlapic.c --- a/xen/arch/x86/hvm/vlapic.c +++ b/xen/arch/x86/hvm/vlapic.c @@ -113,7 +113,6 @@ /* * IRR-specific bitmap update & search routines. */ - static int vlapic_test_and_set_irr(int vector, struct vlapic *vlapic) { return vlapic_test_and_set_vector(vector, &vlapic->regs->data[APIC_IRR]); @@ -165,6 +164,12 @@ vlapic, ppr, isr, isrv); return ppr; +} +bool_t vlapic_tpr_gte_vec(struct vcpu *v, int vector) +{ + struct vlapic *vlapic = vcpu_vlapic(v); + + return ((vector & 0xf0) <= vlapic_get_ppr(vlapic)); } int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda) diff -r ec3493b63170 xen/arch/x86/hvm/vmx/intr.c --- a/xen/arch/x86/hvm/vmx/intr.c +++ b/xen/arch/x86/hvm/vmx/intr.c @@ -163,6 +163,10 @@ struct vcpu *v = current; unsigned int tpr_threshold = 0; enum hvm_intblk intblk; + unsigned long eflags; + + eflags = __vmread(GUEST_RFLAGS); + hvm_intr_en_notif_disarm(v, irq_masked(eflags)); /* Crank the handle on interrupt state. */ pt_update_irq(v); @@ -200,6 +204,7 @@ { HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0); vmx_inject_extint(v, intack.vector); + hvm_intr_en_notif_arm(v, intack.vector); pt_intr_post(v, intack); } diff -r ec3493b63170 xen/arch/x86/time.c --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -36,6 +36,7 @@ string_param("clocksource", opt_clocksource); #define EPOCH MILLISECS(1000) +#define HPET_PERIOD_SIMULATED 0x429b17fUL unsigned long cpu_khz; /* CPU clock frequency in kHz. */ DEFINE_SPINLOCK(rtc_lock); @@ -348,6 +349,7 @@ return hpet_read32(HPET_COUNTER); } +int hpet_physical_inited = 0; static int init_hpet(struct platform_timesource *pts) { u64 hpet_rate = hpet_setup(); @@ -359,6 +361,8 @@ pts->frequency = hpet_rate; pts->read_counter = read_hpet_count; pts->counter_bits = 32; + + hpet_physical_inited = 1; return 1; } @@ -500,6 +504,64 @@ return stime; } +static int hpet_main_counter_phys_avoid_hdw = 0; +boolean_param("hpet_avoid", hpet_main_counter_phys_avoid_hdw); +static unsigned long hpet_main_counter_phys_avoid_hdw_period; +#define hpet_phys_ns_to_ticks(ns, period) hpet_mult_div(ns, 1000000UL, period) +#define hpet_tick_to_ns(tick, period) hpet_mult_div(tick, period, 1000000UL); +#define TSC_TO_NSEC(tsc) hpet_mult_div(tsc, 1000000UL, cpu_khz) + +typedef struct { + spinlock_t lock; + s_time_t last_ret; +} get_s_time_mono_t; + +static get_s_time_mono_t get_s_time_mon; + +static void get_s_time_mono_init(void) +{ + spin_lock_init(&get_s_time_mon.lock); +} + +u64 read_64_main_counter(void) +{ + u64 count; + unsigned long flags; + struct cpu_time *t = &this_cpu(cpu_time); + u64 tsc, delta; + s_time_t now; + + if(hpet_main_counter_phys_avoid_hdw || !hpet_physical_inited) { + spin_lock(&get_s_time_mon.lock); + rdtscll(tsc); + delta = tsc - t->local_tsc_stamp; + now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale); + if(now > get_s_time_mon.last_ret) + get_s_time_mon.last_ret = now; + else + now = get_s_time_mon.last_ret; + spin_unlock(&get_s_time_mon.lock); + if(!hpet_main_counter_phys_avoid_hdw_period) + hpet_main_counter_phys_avoid_hdw_period = read_hpet_period(); + count = hpet_phys_ns_to_ticks(now, hpet_main_counter_phys_avoid_hdw_period); + } + else { + spin_lock_irqsave(&platform_timer_lock, flags); + count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask); + spin_unlock_irqrestore(&platform_timer_lock, flags); + } + return count; +} +u64 read_hpet_period(void) +{ + unsigned long period; + + if(hpet_physical_inited) + period = (unsigned long)hpet_read32(HPET_PERIOD); + else + period = HPET_PERIOD_SIMULATED; + return period; +} static void platform_time_calibration(void) { @@ -559,6 +621,7 @@ plt_overflow(NULL); platform_timer_stamp = plt_stamp64; + get_s_time_mono_init(); printk("Platform timer is %s %s\n", freq_string(pts->frequency), pts->name); diff -r ec3493b63170 xen/common/domain.c --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -96,6 +96,8 @@ spin_lock_init(&d->hypercall_deadlock_mutex); INIT_LIST_HEAD(&d->page_list); INIT_LIST_HEAD(&d->xenpage_list); + spin_lock_init(&d->intr_en_notif_lock); + INIT_LIST_HEAD(&d->intr_en_notif_list); return d; } diff -r ec3493b63170 xen/include/asm-x86/hvm/hvm.h --- a/xen/include/asm-x86/hvm/hvm.h +++ b/xen/include/asm-x86/hvm/hvm.h @@ -23,6 +23,7 @@ #include <asm/current.h> #include <asm/x86_emulate.h> +#include <asm/processor.h> #include <public/domctl.h> #include <public/hvm/save.h> @@ -218,10 +219,20 @@ hvm_funcs.set_segment_register(v, seg, reg); } +static inline int irq_masked(unsigned long eflags) +{ + return ((eflags & X86_EFLAGS_IF) == 0); +} + void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); void hvm_migrate_timers(struct vcpu *v); void hvm_do_resume(struct vcpu *v); + +int hvm_register_intr_en_notif(struct domain *d, unsigned int notif_vector, int notif_fn_index); +int hvm_unregister_intr_en_notif(struct domain *d, unsigned int notif_vector); +void hvm_intr_en_notif_arm(struct vcpu *v, unsigned int vector); +void hvm_intr_en_notif_disarm(struct vcpu *v, int irq_masked); static inline void hvm_inject_exception(unsigned int trapnr, int errcode, unsigned long cr2) diff -r ec3493b63170 xen/include/asm-x86/hvm/irq.h --- a/xen/include/asm-x86/hvm/irq.h +++ b/xen/include/asm-x86/hvm/irq.h @@ -160,6 +160,10 @@ struct domain *d, unsigned int isa_irq); void hvm_isa_irq_deassert( struct domain *d, unsigned int isa_irq); +void hvm_isa_irq_assert_cb( + struct domain *d, unsigned int isa_irq, + void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered), + uint64_t cb_arg); void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq); diff -r ec3493b63170 xen/include/asm-x86/hvm/vioapic.h --- a/xen/include/asm-x86/hvm/vioapic.h +++ b/xen/include/asm-x86/hvm/vioapic.h @@ -66,5 +66,7 @@ void vioapic_reset(struct domain *d); void vioapic_irq_positive_edge(struct domain *d, unsigned int irq); void vioapic_update_EOI(struct domain *d, int vector); - +int vioapic_get_vector(struct domain *d, unsigned int isa_irq, unsigned int *vector); +void vioapic_register_delivered_cb(struct domain *d, void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered), + uint64_t cb_arg); #endif /* __ASM_X86_HVM_VIOAPIC_H__ */ diff -r ec3493b63170 xen/include/asm-x86/hvm/vlapic.h --- a/xen/include/asm-x86/hvm/vlapic.h +++ b/xen/include/asm-x86/hvm/vlapic.h @@ -98,4 +98,6 @@ int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda); +bool_t vlapic_tpr_gte_vec(struct vcpu *v, int vector); + #endif /* __ASM_X86_HVM_VLAPIC_H__ */ diff -r ec3493b63170 xen/include/asm-x86/hvm/vpt.h --- a/xen/include/asm-x86/hvm/vpt.h +++ b/xen/include/asm-x86/hvm/vpt.h @@ -50,23 +50,45 @@ uint64_t fsb; /* FSB route, not supported now */ } timers[HPET_TIMER_NUM]; - /* Hidden register state */ + /* The rest of this struct is hidden register state */ + + /* Per timer state */ uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */ + uint32_t vector[HPET_TIMER_NUM]; + + /* Timer 0 (clock) specific state */ + + uint64_t last_end_of_intr_mc; + uint64_t end_of_intr_mc; + uint64_t intr_pending_nr; + uint64_t pending_mask; + uint32_t delivery_policy; + + /* Global state */ + + uint64_t phys_period; + uint64_t cpu_khz; + uint64_t migr_local_tsc; + + /* Debug */ + + uint64_t intr_counts[INTR_CNT_BUCKETS]; + uint64_t intr_counts_last_s; }; typedef struct HPETState { struct hpet_registers hpet; struct vcpu *vcpu; uint64_t stime_freq; - uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */ - uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns */ uint64_t mc_offset; struct timer timers[HPET_TIMER_NUM]; struct HPET_timer_fn_info timer_fn_info[HPET_TIMER_NUM]; spinlock_t lock; } HPETState; - +void hpet_intr_en_fn_missed(struct vcpu *v, unsigned int vector, unsigned int post); +void hpet_intr_en_fn_no_missed(struct vcpu *v, unsigned int vector, unsigned int post); +void hpet_notify_timer_mode(struct domain *d, uint64_t value); /* * Abstract layer of periodic time, one short time. */ diff -r ec3493b63170 xen/include/public/arch-x86/hvm/save.h --- a/xen/include/public/arch-x86/hvm/save.h +++ b/xen/include/public/arch-x86/hvm/save.h @@ -47,6 +47,12 @@ /* * Processor */ + +typedef struct vcpu_intr_en_notif { + int intr_en_notif_fn; + int intr_en_notif_state; + unsigned int intr_en_notif_vec; +} vcpu_intr_en_notif_t; struct hvm_hw_cpu { uint8_t fpu_regs[512]; @@ -156,6 +162,7 @@ }; /* error code for pending event */ uint32_t error_code; + vcpu_intr_en_notif_t int_notif; }; DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu); @@ -253,6 +260,8 @@ #endif } fields; } redirtbl[VIOAPIC_NUM_PINS]; + void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered); + uint64_t cb_arg; }; DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic); @@ -366,6 +375,11 @@ * HPET */ +#define HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS 0 /* Linux */ +#define HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS 1 /* Windows */ +#define HPET_DEL_POLICY_NUMS 2 +#define INTR_CNT_BUCKETS 20 + #define HPET_TIMER_NUM 3 /* 3 timers supported now */ struct hvm_hw_hpet { /* Memory-mapped, software visible registers */ @@ -385,8 +399,30 @@ } timers[HPET_TIMER_NUM]; uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */ - /* Hidden register state */ + /* The rest of this struct is hidden register state */ + + /* Per timer state */ uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */ + uint32_t vector[HPET_TIMER_NUM]; + + /* Timer 0 (clock) specific state */ + + uint64_t last_end_of_intr_mc; + uint64_t end_of_intr_mc; + uint64_t intr_pending_nr; + uint64_t pending_mask; + uint32_t delivery_policy; + + /* Global state */ + + uint64_t phys_period; + uint64_t cpu_khz; + uint64_t migr_local_tsc; + + /* Debug */ + + uint64_t intr_counts[INTR_CNT_BUCKETS]; + uint64_t intr_counts_last_s; }; DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet); diff -r ec3493b63170 xen/include/public/hvm/params.h --- a/xen/include/public/hvm/params.h +++ b/xen/include/public/hvm/params.h @@ -80,6 +80,8 @@ #define HVMPTM_no_delay_for_missed_ticks 1 #define HVMPTM_no_missed_ticks_pending 2 #define HVMPTM_one_missed_tick_pending 3 +#define HVM_HPET_guest_computes_missed_ticks 4 +#define HVM_HPET_guest_does_not_compute_missed_ticks 5 /* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ #define HVM_PARAM_HPET_ENABLED 11 diff -r ec3493b63170 xen/include/xen/sched.h --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -69,6 +69,19 @@ int evtchn_init(struct domain *d); void evtchn_destroy(struct domain *d); +typedef void (*notif_fn_t)(struct vcpu *v, unsigned int vector, unsigned int post); + +#define HVM_INTR_EN_NOTIF_UNUSED 0 +#define HVM_INTR_EN_NOTIF_HPET_MISSED 1 +#define HVM_INTR_EN_NOTIF_HPET_NO_MISSED 2 +#define HVM_INTR_EN_NOTIF_MAX 2 + +typedef struct intr_en_notif { + struct list_head links; + unsigned int notif_vector; + int notif_fn; +} intr_en_notif_t; + struct vcpu { int vcpu_id; @@ -135,6 +148,7 @@ cpumask_t vcpu_dirty_cpumask; struct arch_vcpu arch; + vcpu_intr_en_notif_t int_notif; }; /* Per-domain lock can be recursively acquired in fault handlers. */ @@ -232,6 +246,10 @@ int32_t time_offset_seconds; struct rcu_head rcu; + + spinlock_t intr_en_notif_lock; + unsigned long intr_en_notif_bitmap[(MAX_VECTOR/sizeof(unsigned long))+1]; + struct list_head intr_en_notif_list; unsigned long last_tsc_sender; unsigned long first_tsc_receiver; @@ -508,6 +526,18 @@ if ( test_and_clear_bit(_VPF_blocked, &v->pause_flags) ) vcpu_wake(v); } +/* + * compute (var*num)/den where var*num may overflow 64 bits + */ +static inline uint64_t hpet_mult_div(uint64_t var, uint64_t num, uint64_t den) +{ + uint64_t result, q, r; + + q = var / den; + r = var % den; + result = (q * num) + (r * num) / den; + return result; +} #define IS_PRIV(_d) ((_d)->is_privileged) #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t))) diff -r ec3493b63170 xen/include/xen/time.h --- a/xen/include/xen/time.h +++ b/xen/include/xen/time.h @@ -61,6 +61,11 @@ extern void send_timer_event(struct vcpu *v); +u64 read_64_main_counter(void); +u64 read_hpet_period(void); + +extern int hpet_physical_inited; + #endif /* __XEN_TIME_H__ */ /* _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |