[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/2] Improve hpet accuracy



This patch performs the bulk of the changes described in 0/2 description above, to improve HPET accuracy

Signed-off-by: Dave Winchell <dwinchell@xxxxxxxxxxxxxxx>
Signed-off-by: Ben Guthro <bguthro@xxxxxxxxxxxxxxx>
diff -r ec3493b63170 xen/arch/x86/hvm/hpet.c
--- a/xen/arch/x86/hvm/hpet.c
+++ b/xen/arch/x86/hvm/hpet.c
@@ -24,14 +24,11 @@
 #include <xen/sched.h>
 #include <xen/event.h>
 
+
 #define HPET_BASE_ADDRESS   0xfed00000ULL
 #define HPET_MMAP_SIZE      1024
 #define S_TO_NS  1000000000ULL           /* 1s  = 10^9  ns */
 #define S_TO_FS  1000000000000000ULL     /* 1s  = 10^15 fs */
-
-/* Frequency_of_Xen_systeme_time / frequency_of_HPET = 16 */
-#define STIME_PER_HPET_TICK 16
-#define guest_time_hpet(v) (hvm_get_guest_time(v) / STIME_PER_HPET_TICK)
 
 #define HPET_ID         0x000
 #define HPET_PERIOD     0x004
@@ -72,8 +69,9 @@
                     << HPET_TN_INT_ROUTE_CAP_SHIFT)
 
 #define hpet_tick_to_ns(h, tick)                        \
-    ((s_time_t)((((tick) > (h)->hpet_to_ns_limit) ?     \
-        ~0ULL : (tick) * (h)->hpet_to_ns_scale) >> 10))
+    (s_time_t)hpet_mult_div(tick, h->hpet.phys_period, 1000000UL)
+
+#define hpet_phys_ns_to_ticks(ns, period) hpet_mult_div(ns, 1000000UL, period)
 
 #define timer_config(h, n)       (h->hpet.timers[n].config)
 #define timer_is_periodic(h, n)  (timer_config(h, n) & HPET_TN_PERIODIC)
@@ -139,15 +137,34 @@
 
     return 0;
 }
-
 static inline uint64_t hpet_read_maincounter(HPETState *h)
 {
-    ASSERT(spin_is_locked(&h->lock));
+    uint64_t mc;
 
-    if ( hpet_enabled(h) )
-        return guest_time_hpet(h->vcpu) + h->mc_offset;
-    else 
-        return h->hpet.mc64;
+    mc = read_64_main_counter() + h->mc_offset;
+    return mc;
+}
+static inline uint64_t hpet_compute_diff(HPETState *h, int tn)
+{
+
+    if ( timer_is_32bit(h, tn) ) {
+       uint32_t tn_cmp, diff, mc;
+
+       tn_cmp = (uint32_t)h->hpet.timers[tn].cmp;
+       mc = (uint32_t)hpet_read_maincounter(h);
+       diff = tn_cmp - mc;
+       diff = (int32_t)diff > 0 ? diff : (uint32_t)0;
+       return (uint64_t)diff;
+    }
+    else {
+       uint64_t tn_cmp, diff, mc;
+       
+       mc = hpet_read_maincounter(h);
+       tn_cmp = h->hpet.timers[tn].cmp;
+       diff = tn_cmp - mc;
+       diff = (int64_t)diff > 0 ? diff : (uint64_t)0;
+       return diff;
+    }
 }
 
 static int hpet_read(
@@ -190,13 +207,9 @@
     stop_timer(&h->timers[tn]);
 }
 
-/* the number of HPET tick that stands for
- * 1/(2^10) second, namely, 0.9765625 milliseconds */
-#define  HPET_TINY_TIME_SPAN  ((h->stime_freq >> 10) / STIME_PER_HPET_TICK)
-
 static void hpet_set_timer(HPETState *h, unsigned int tn)
 {
-    uint64_t tn_cmp, cur_tick, diff;
+    uint64_t diff;
 
     ASSERT(tn < HPET_TIMER_NUM);
     ASSERT(spin_is_locked(&h->lock));
@@ -209,25 +222,7 @@
         pit_stop_channel0_irq(pit);
     }
 
-    tn_cmp   = h->hpet.timers[tn].cmp;
-    cur_tick = hpet_read_maincounter(h);
-    if ( timer_is_32bit(h, tn) )
-    {
-        tn_cmp   = (uint32_t)tn_cmp;
-        cur_tick = (uint32_t)cur_tick;
-    }
-
-    diff = tn_cmp - cur_tick;
-
-    /*
-     * Detect time values set in the past. This is hard to do for 32-bit
-     * comparators as the timer does not have to be set that far in the future
-     * for the counter difference to wrap a 32-bit signed integer. We fudge
-     * by looking for a 'small' time value in the past.
-     */
-    if ( (int64_t)diff < 0 )
-        diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN))
-            ? (uint32_t)diff : 0;
+    diff = hpet_compute_diff(h, tn);
 
     set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, diff));
 }
@@ -273,14 +268,15 @@
         if ( !(old_val & HPET_CFG_ENABLE) && (new_val & HPET_CFG_ENABLE) )
         {
             /* Enable main counter and interrupt generation. */
-            h->mc_offset = h->hpet.mc64 - guest_time_hpet(h->vcpu);
+
+           h->mc_offset = h->hpet.mc64 - read_64_main_counter();
+           
             for ( i = 0; i < HPET_TIMER_NUM; i++ )
                 hpet_set_timer(h, i); 
         }
         else if ( (old_val & HPET_CFG_ENABLE) && !(new_val & HPET_CFG_ENABLE) )
         {
             /* Halt main counter and disable interrupt generation. */
-            h->hpet.mc64 = h->mc_offset + guest_time_hpet(h->vcpu);
             for ( i = 0; i < HPET_TIMER_NUM; i++ )
                 hpet_stop_timer(h, i);
         }
@@ -291,6 +287,9 @@
             gdprintk(XENLOG_WARNING, 
                      "HPET: writing main counter but it's not halted!\n");
         h->hpet.mc64 = new_val;
+
+       h->mc_offset = h->hpet.mc64 - read_64_main_counter();
+
         break;
 
     case HPET_T0_CFG:
@@ -333,7 +332,7 @@
              *  - maximum is to prevent overflow in time_after() calculations
              */
             if ( hpet_tick_to_ns(h, new_val) < MICROSECS(900) )
-                new_val = (MICROSECS(900) << 10) / h->hpet_to_ns_scale;
+               new_val = hpet_phys_ns_to_ticks(MICROSECS(900), 
h->hpet.phys_period);
             new_val &= (timer_is_32bit(h, tn) ? ~0u : ~0ull) >> 1;
             h->hpet.period[tn] = new_val;
         }
@@ -373,10 +372,216 @@
     .write_handler = hpet_write
 };
 
+static void hpet_stats_dump_dom(struct domain *d)
+{
+    struct HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
+    unsigned long mc, s;
+    int i;
+
+    printk("domain %d\n", d->domain_id);
+    mc = hpet_read_maincounter(h);
+    s = hpet_tick_to_ns(h, mc);
+    s = s / 1000000000UL;
+
+    printk("cur index %ld\n", s % INTR_CNT_BUCKETS);
+    for(i = 0; i < INTR_CNT_BUCKETS; i++) {
+       if(!(i%10))
+           printk("\n");
+       printk("%ld ", h->hpet.intr_counts[i]);
+    }
+    printk("\n");
+}
+static void hpet_state_dump(struct domain *d)
+{
+    struct HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
+
+    printk("timers.config: 0x%lx 0x%lx 0x%lx\n", h->hpet.timers[0].config, 
h->hpet.timers[1].config, h->hpet.timers[2].config);
+    printk("timers.cmp: 0x%lx 0x%lx 0x%lx\n", h->hpet.timers[0].cmp, 
h->hpet.timers[1].cmp, h->hpet.timers[2].cmp);
+    printk("current mc: 0x%lx\n", hpet_read_maincounter(h));
+    printk("period: %lx %lx %lx\n", h->hpet.period[0], h->hpet.period[1], 
h->hpet.period[1]);
+    printk("mc_offset 0x%lx\n",h->mc_offset);
+    printk("phys_period 0x%lx\n",h->hpet.phys_period);
+    printk("last_end_of_intr_mc 0x%lx\n",h->hpet.last_end_of_intr_mc);
+    printk("end_of_intr_mc 0x%lx\n",h->hpet.end_of_intr_mc);
+    printk("cpu_khz 0x%lx\n",h->hpet.cpu_khz);
+    printk("migr_local_tsc 0x%lx\n",h->hpet.migr_local_tsc);
+    printk("intr_pending_nr 0x%lx\n",h->hpet.intr_pending_nr);
+    printk("pending_mask 0x%lx\n",h->hpet.pending_mask);
+    printk("delivery_policy %d\n",h->hpet.delivery_policy);
+    printk("vector 0x%x 0x%x 
0x%x\n",h->hpet.vector[0],h->hpet.vector[1],h->hpet.vector[2]);
+}
+static void hpet_stats_dump(unsigned char c)
+{
+    struct domain *d;
+
+    for_each_domain(d) {
+       if(d->domain_id) {
+           hpet_stats_dump_dom(d);
+           hpet_state_dump(d);
+       }
+    }
+}
+#include <xen/keyhandler.h>
+static __init int hpet_stats_dump_keyhandler_init(void)
+{
+    register_keyhandler('Z', hpet_stats_dump,"hpet_stats_dump");
+    return 0;
+}
+__initcall(hpet_stats_dump_keyhandler_init);
+static void hpet_stats(struct vcpu *v)
+{
+    struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
+    unsigned long mc, s, u;
+
+    mc = hpet_read_maincounter(h);
+    s = hpet_tick_to_ns(h, mc);
+    s = s / 1000000000UL;
+    if(h->hpet.intr_counts_last_s && (s > h->hpet.intr_counts_last_s)) {
+       for(u = (h->hpet.intr_counts_last_s + 1); u <= s; u++)
+           h->hpet.intr_counts[u % INTR_CNT_BUCKETS] = 0;
+    }
+    h->hpet.intr_counts_last_s = s;
+    h->hpet.intr_counts[s % INTR_CNT_BUCKETS]++;
+}
+
+
+
+static void hpet_vioapic_del_cb(uint64_t arg, uint32_t intrs_delivered)
+{
+    HPETState *h = (HPETState *)arg;
+
+    h->hpet.pending_mask = intrs_delivered;
+}
+void hpet_intr_en_fn_missed(struct vcpu *v, unsigned int vector, unsigned int 
post)
+{
+    struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
+
+    spin_lock(&h->lock);
+    if(post)
+       hpet_stats(v);
+    else {
+       clear_bit(v->vcpu_id, &h->hpet.pending_mask);
+       if(!(h->hpet.pending_mask))
+           h->hpet.end_of_intr_mc = hpet_read_maincounter(h);
+    }
+    spin_unlock(&h->lock);
+}
+void hpet_intr_en_fn_no_missed(struct vcpu *v, unsigned int vector, unsigned 
int post)
+{
+    struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
+
+    spin_lock(&h->lock);
+    if(post) {
+       hpet_stats(v);
+       if(!h->hpet.intr_pending_nr) {
+           // probably should kill domain here
+           printk("hpet_intr_en_fn: unexpected cleared intr_pending_nr 
pending_mask 0x%lx\n", h->hpet.pending_mask);
+           spin_unlock(&h->lock);
+           return;
+       }
+       clear_bit(v->vcpu_id, &h->hpet.pending_mask);
+
+       if(!(h->hpet.pending_mask)) {
+           h->hpet.intr_pending_nr--;
+           if(h->hpet.intr_pending_nr) {
+               hvm_isa_irq_deassert(v->domain, 0);
+               hvm_isa_irq_assert_cb(v->domain, 0, hpet_vioapic_del_cb, 
(uint64_t)h);      
+           }
+       }
+    }
+    spin_unlock(&h->lock);
+}
+
+/* For guest computes missed policy,
+ * we will only route the interrupt if a) the last interrupt routed has been 
processed by the guest
+ * and b) its been more than a (clock) periods worth of main counter ticks 
since that interrupt
+ * was processed.
+ *
+ * It was found, through experimentation, that Linux guests keep very accurate 
time for hpet with
+ * this logic, even if it means we are only delivering every 2*period. This is 
because the Linux logic
+ * for missed ticks is very good for hpet. On the other hand, delivering the 
interrupt just slightly
+ * early causes poor timekeeping.
+ *
+ * It was also found that time stamping at the end of interrupt processing 
improved accuracy over
+ * time stamping at injection time. This is probably due to the delay that can 
happen in the Linux
+ * interrupt handler if it has to wait for a lock.
+ *
+ */
+
+static void hpet_route_decision_missed_ticks(HPETState *h, unsigned int tn, 
int isa_irq, unsigned int *route, unsigned int *cb_expected)
+{
+    uint64_t mc;
+    struct domain *d = h->vcpu->domain;
+    unsigned int vector;
+
+    *route = 0;
+    *cb_expected = 0;
+    if(!vioapic_get_vector(d, isa_irq, &vector)) {
+       mc = hpet_read_maincounter(h);
+       if(h->hpet.vector[tn] == ~0U) {
+           h->hpet.vector[tn] = vector;
+           if(hvm_register_intr_en_notif(d, vector, 
HVM_INTR_EN_NOTIF_HPET_MISSED))
+               panic(__FUNCTION__);
+           h->hpet.last_end_of_intr_mc = h->hpet.end_of_intr_mc = 0;
+           *cb_expected = 1;
+           *route = 1;
+       }
+       else if((h->hpet.end_of_intr_mc != h->hpet.last_end_of_intr_mc) &&
+               ((mc - h->hpet.end_of_intr_mc) >  h->hpet.period[tn])) {
+           if(vector != h->hpet.vector[tn]) {
+               hvm_unregister_intr_en_notif(d, h->hpet.vector[tn]);
+               h->hpet.vector[tn] = vector;
+               if(hvm_register_intr_en_notif(d, vector, 
HVM_INTR_EN_NOTIF_HPET_MISSED))
+                   panic(__FUNCTION__);                        
+           }
+           h->hpet.last_end_of_intr_mc = h->hpet.end_of_intr_mc;
+           *cb_expected = 1;
+           *route = 1;
+       }
+    }
+    else
+       *route = 1;
+}
+static void hpet_route_decision_not_missed_ticks(HPETState *h, unsigned int 
tn, int isa_irq, unsigned int *route, unsigned int *cb_expected)
+{
+    struct domain *d = h->vcpu->domain;
+    unsigned int vector;
+
+    *route = 0;
+    *cb_expected = 0;
+    if(!vioapic_get_vector(d, isa_irq, &vector)) {
+       if(h->hpet.vector[tn] == ~0U) {
+           h->hpet.vector[tn] = vector;
+           if(hvm_register_intr_en_notif(d, vector, 
HVM_INTR_EN_NOTIF_HPET_NO_MISSED))
+               panic(__FUNCTION__);
+       }
+       if(h->hpet.intr_pending_nr++)
+           return;
+
+       if(vector != h->hpet.vector[tn]) {
+           hvm_unregister_intr_en_notif(d, h->hpet.vector[tn]);
+           h->hpet.vector[tn] = vector;
+           if(hvm_register_intr_en_notif(d, vector, 
HVM_INTR_EN_NOTIF_HPET_NO_MISSED))
+               panic(__FUNCTION__);                    
+       }
+       *cb_expected = 1;
+       *route = 1;
+    }
+    else
+       *route = 1;
+}
+
+typedef void (*hpet_route_fn_t)(HPETState *h, unsigned int tn, int isa_irq, 
unsigned int *route, unsigned int *cb_expected);
+
+static hpet_route_fn_t hpet_determine_route_params[HPET_DEL_POLICY_NUMS] = 
{hpet_route_decision_missed_ticks,
+                                                                    
hpet_route_decision_not_missed_ticks};
+
 static void hpet_route_interrupt(HPETState *h, unsigned int tn)
 {
     unsigned int tn_int_route = timer_int_route(h, tn);
     struct domain *d = h->vcpu->domain;
+    unsigned int route = 1;
+    unsigned int cb_expected = 0;
 
     ASSERT(spin_is_locked(&h->lock));
 
@@ -386,8 +591,18 @@
            timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
            timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
         int isa_irq = (tn == 0) ? 0 : 8;
-        hvm_isa_irq_deassert(d, isa_irq);
-        hvm_isa_irq_assert(d, isa_irq);
+
+       if(!tn)
+           (*hpet_determine_route_params[h->hpet.delivery_policy])(h, tn, 
isa_irq, &route, &cb_expected);
+
+       if(route) {
+           hvm_isa_irq_deassert(d, isa_irq);
+           if(cb_expected) {
+               hvm_isa_irq_assert_cb(d, isa_irq, hpet_vioapic_del_cb, 
(uint64_t)h);
+           }
+           else
+               hvm_isa_irq_assert(d, isa_irq);
+       }
         return;
     }
 
@@ -405,6 +620,46 @@
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
 
+
+
+static void hpet_timer0_timeout_missed_ticks(HPETState *h)
+{
+    uint64_t mc = hpet_read_maincounter(h);
+    unsigned int tn = 0;
+    uint64_t period = h->hpet.period[tn];
+
+    if ( timer_is_32bit(h, tn) )
+    {
+       while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
+           h->hpet.timers[tn].cmp = (uint32_t)(h->hpet.timers[tn].cmp + 
period);
+    }
+    else
+    {
+       while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
+           h->hpet.timers[tn].cmp += period;
+    }
+    set_timer(&h->timers[tn], 
+             NOW() + hpet_tick_to_ns(h, period));
+}
+static void hpet_timer0_timeout_not_missed_ticks(HPETState *h)
+{
+    unsigned int tn = 0;
+    uint64_t diff;
+    uint64_t period = h->hpet.period[tn];
+
+    if ( timer_is_32bit(h, tn) )
+       h->hpet.timers[tn].cmp = (uint32_t)(h->hpet.timers[tn].cmp + period);
+    else
+       h->hpet.timers[tn].cmp += period;   
+
+    diff = hpet_compute_diff(h, tn);
+    set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, diff));
+}
+
+typedef void (*hpet_timer0_timeout_fn_t)(HPETState *h);
+
+static hpet_timer0_timeout_fn_t hpet_timer0_timeout[HPET_DEL_POLICY_NUMS] = 
{hpet_timer0_timeout_missed_ticks,
+                                                                     
hpet_timer0_timeout_not_missed_ticks};
 static void hpet_timer_fn(void *opaque)
 {
     struct HPET_timer_fn_info *htfi = opaque;
@@ -424,19 +679,25 @@
 
     if ( timer_is_periodic(h, tn) && (h->hpet.period[tn] != 0) )
     {
-        uint64_t mc = hpet_read_maincounter(h), period = h->hpet.period[tn];
-        if ( timer_is_32bit(h, tn) )
-        {
-            while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
-                h->hpet.timers[tn].cmp = (uint32_t)(
-                    h->hpet.timers[tn].cmp + period);
-        }
-        else
-        {
-            while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
-                h->hpet.timers[tn].cmp += period;
-        }
-        set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, period));
+       if(!tn)
+           (*hpet_timer0_timeout[h->hpet.delivery_policy])(h);
+
+       else
+       {
+           uint64_t mc = hpet_read_maincounter(h), period = h->hpet.period[tn];
+           if ( timer_is_32bit(h, tn) )
+           {
+               while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
+                   h->hpet.timers[tn].cmp = (uint32_t)(
+                                                       h->hpet.timers[tn].cmp 
+ period);
+           }
+           else
+           {
+               while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
+                   h->hpet.timers[tn].cmp += period;
+           }
+           set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, period));
+       }
     }
 
     spin_unlock(&h->lock);
@@ -462,7 +723,10 @@
     spin_lock(&hp->lock);
 
     /* Write the proper value into the main counter */
-    hp->hpet.mc64 = hp->mc_offset + guest_time_hpet(hp->vcpu);
+
+    hp->hpet.mc64 = hpet_read_maincounter(hp);
+    rdtscll(hp->hpet.migr_local_tsc);
+    hp->hpet.cpu_khz = cpu_khz;
 
     /* Save the HPET registers */
     rc = _hvm_init_entry(h, HVM_SAVE_CODE(HPET), 0, HVM_SAVE_LENGTH(HPET));
@@ -488,19 +752,70 @@
         C(period[0]);
         C(period[1]);
         C(period[2]);
+       C(vector[0]);
+       C(vector[1]);
+       C(vector[2]);
+       C(last_end_of_intr_mc);
+       C(end_of_intr_mc);
+       C(intr_pending_nr);
+       C(pending_mask);
+       C(delivery_policy);
+       C(phys_period);
+       C(cpu_khz);
+       C(migr_local_tsc);
+       C(intr_counts_last_s);
 #undef C
+       memcpy(rec->intr_counts, hp->hpet.intr_counts, 
sizeof(hp->hpet.intr_counts));
     }
 
     spin_unlock(&hp->lock);
 
     return rc;
 }
+static int hpet_debug_migr_check_period(struct domain *d, HPETState *hp)
+{
+    unsigned long period, m_period, delta;
+
+    period = read_hpet_period();
+    m_period = (hp->hpet.capability >> 32) & 0xffffffffUL;
+    delta = (period > m_period) ? (period - m_period) : (m_period - period);
+    if(delta) {
+       /* Some hpets report small differences in period. A difference of 1 has 
been seen.
+        * Allow 100 as that is still 0.00014%, which is small enough.
+        */
+       printk("hpet.capability 0x%lx ((hp->hpet.capability >> 32) & 
0xffffffffUL) 0x%lx period %lx\n",
+              hp->hpet.capability,
+              ((hp->hpet.capability >> 32) & 0xffffffffUL),
+              period);
+       if(delta > 100) {
+           printk("hpet period difference %ld too large\n", delta);
+           return 1;
+       }
+    }
+    return 0;
+}
+#define HPET_MIGR_TICK_ADJUSTMENT 1
+/*
+ * HPET_MIGR_TICK_ADJUSTMENT -
+ *   This corrects for some of the time between hpet save on the sending node
+ *   and hpet load on the receiving node. The correction has been found to be 
quite small,
+ *   300-400 usec. This adjustment is based on sending a final message in 
migrate with the
+ *   tsc at send time (last_tsc_sender) in the message. Upon reception the tsc 
(first_tsc_receiver)
+ *   is recorded. So the only time we are not taking into account is the time 
the message is in
+ *   transit.
+ *   The reason this adjustment is in here, given how small it is, is that 
there may be circumstances,
+ *   for example a node heavily loaded with other guests, where the adjustment 
would be significant.
+ */
 
 static int hpet_load(struct domain *d, hvm_domain_context_t *h)
 {
     HPETState *hp = &d->arch.hvm_domain.pl_time.vhpet;
     struct hvm_hw_hpet *rec;
     int i;
+#ifdef HPET_MIGR_TICK_ADJUSTMENT
+    unsigned long now, dt1, dt2, dt1ticks, dt2ticks, period;
+#endif
+  
 
     spin_lock(&hp->lock);
 
@@ -531,11 +846,50 @@
         C(period[0]);
         C(period[1]);
         C(period[2]);
+       C(vector[0]);
+       C(vector[1]);
+       C(vector[2]);
+       C(last_end_of_intr_mc);
+       C(end_of_intr_mc);
+       C(intr_pending_nr);
+       C(pending_mask);
+       C(delivery_policy);
+       C(phys_period);
+       C(cpu_khz);
+       C(migr_local_tsc);
+       C(intr_counts_last_s);
 #undef C
+
+       memcpy(hp->hpet.intr_counts, rec->intr_counts, 
sizeof(hp->hpet.intr_counts));
     
-    /* Recalculate the offset between the main counter and guest time */
-    hp->mc_offset = hp->hpet.mc64 - guest_time_hpet(hp->vcpu);
-                
+       /* Recalculate the offset between the main counter and guest time */
+
+       if(hpet_debug_migr_check_period(d, hp))
+           return -EINVAL;
+
+#ifdef HPET_MIGR_TICK_ADJUSTMENT
+       period = read_hpet_period();
+       rdtscll(now);
+       /*  dt1 is the time delta on the sending node between the sending of 
the last migrate message and the call to hpet_save. */
+       dt1 = ((d->last_tsc_sender - hp->hpet.migr_local_tsc) * 1000UL) / 
hp->hpet.cpu_khz;
+       dt1 = dt1 * 1000UL;
+       dt1ticks = hpet_phys_ns_to_ticks(dt1, period);
+
+       /*  dt2 is the time delta on the reveiving node between now (hpet_load) 
and the reception of the last migrate message. */
+       dt2 = ((now - d->first_tsc_receiver) * 1000UL) / cpu_khz;
+       dt2 = dt2 * 1000UL;
+       dt2ticks = hpet_phys_ns_to_ticks(dt2, period);
+       hp->mc_offset = hp->hpet.mc64 + dt1ticks + dt2ticks - 
read_64_main_counter();
+#else
+       hp->mc_offset = hp->hpet.mc64 - read_64_main_counter();
+#endif
+
+       if(hp->hpet.delivery_policy == 
HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS)
+           hvm_register_intr_en_notif(d, hp->hpet.vector[0], 
HVM_INTR_EN_NOTIF_HPET_MISSED);
+       else if(hp->hpet.delivery_policy == 
HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS)
+           hvm_register_intr_en_notif(d, hp->hpet.vector[0], 
HVM_INTR_EN_NOTIF_HPET_NO_MISSED);
+
+                    
     /* Restart the timers */
     for ( i = 0; i < HPET_TIMER_NUM; i++ )
         if ( hpet_enabled(hp) )
@@ -548,6 +902,17 @@
 
 HVM_REGISTER_SAVE_RESTORE(HPET, hpet_save, hpet_load, 1, HVMSR_PER_DOM);
 
+void hpet_notify_timer_mode(struct domain *d, uint64_t value)
+{
+    HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
+
+    if(value == HVM_HPET_guest_computes_missed_ticks)
+       h->hpet.delivery_policy = HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS;
+    else if(value == HVM_HPET_guest_does_not_compute_missed_ticks)
+       h->hpet.delivery_policy = 
HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS;
+}
+
+
 void hpet_init(struct vcpu *v)
 {
     HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
@@ -557,18 +922,22 @@
 
     spin_lock_init(&h->lock);
 
+    if(hpet_physical_inited)
+       printk("virtual hpet_init: using physical hpet\n");
+    else
+        printk("virtual hpet_init: using simulated hpet\n");
+
     h->vcpu = v;
     h->stime_freq = S_TO_NS;
-
-    h->hpet_to_ns_scale = ((S_TO_NS * STIME_PER_HPET_TICK) << 10) / 
h->stime_freq;
-    h->hpet_to_ns_limit = ~0ULL / h->hpet_to_ns_scale;
+    h->hpet.phys_period = read_hpet_period();
 
     /* 64-bit main counter; 3 timers supported; LegacyReplacementRoute. */
     h->hpet.capability = 0x8086A201ULL;
 
     /* This is the number of femptoseconds per HPET tick. */
     /* Here we define HPET's frequency to be 1/16 of Xen system time */
-    h->hpet.capability |= ((S_TO_FS*STIME_PER_HPET_TICK/h->stime_freq) << 32);
+
+    h->hpet.capability |= read_hpet_period() << 32;
 
     for ( i = 0; i < HPET_TIMER_NUM; i++ )
     {
@@ -577,6 +946,7 @@
         h->hpet.timers[i].cmp = ~0ULL;
         h->timer_fn_info[i].hs = h;
         h->timer_fn_info[i].tn = i;
+       h->hpet.vector[i] = ~0U;
         init_timer(&h->timers[i], hpet_timer_fn, &h->timer_fn_info[i],
                    v->processor);
     }
@@ -590,7 +960,6 @@
     for ( i = 0; i < HPET_TIMER_NUM; i++ )
         kill_timer(&h->timers[i]);
 }
-
 void hpet_reset(struct domain *d)
 {
     hpet_deinit(d);
diff -r ec3493b63170 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -458,6 +458,8 @@
         ctxt.dr6 = vc->debugreg[6];
         ctxt.dr7 = vc->debugreg[7];
 
+        ctxt.int_notif = v->int_notif;
+
         if ( hvm_save_entry(CPU, v->vcpu_id, h, &ctxt) != 0 )
             return 1; 
     }
@@ -612,6 +614,9 @@
 
     vc->flags = VGCF_online;
     v->fpu_initialised = 1;
+
+    v->int_notif = ctxt.int_notif;
+
 
     /* Auxiliary processors should be woken immediately. */
     if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
@@ -2382,8 +2387,9 @@
                 hvm_latch_shinfo_size(d);
                 break;
             case HVM_PARAM_TIMER_MODE:
-                if ( a.value > HVMPTM_one_missed_tick_pending )
+                if ( a.value > HVM_HPET_guest_does_not_compute_missed_ticks )
                     rc = -EINVAL;
+                hpet_notify_timer_mode(d, a.value);
                 break;
             case HVM_PARAM_MIG_LAST_TSC:
                 d->last_tsc_sender = a.value;
@@ -2574,7 +2580,98 @@
 
     return rc;
 }
+/* Interrupt inject and completion notification facility.
+ * Register 'notif_fn', to be called whenever 'vector' is injected (post = 1) 
or 
+ * completed (post = 0). Here completed is when the guest re-enables 
interrupts.
+ */
 
+
+
+notif_fn_t hvm_intr_en_notif_fn[HVM_INTR_EN_NOTIF_MAX+1] = {(notif_fn_t)0, 
hpet_intr_en_fn_missed, hpet_intr_en_fn_no_missed};
+int hvm_register_intr_en_notif(struct domain *d, unsigned int notif_vector, 
int notif_fn_index)
+{
+    intr_en_notif_t *entry;
+    struct list_head *cur;
+    intr_en_notif_t *cur_entry;
+
+    entry = xmalloc(struct intr_en_notif);
+    entry->notif_vector = notif_vector;
+    entry->notif_fn = notif_fn_index;
+    if(!entry) {
+        printk("hvm_register_intr_en_notif: xmalloc failed\n");
+        return 1;
+    }
+    spin_lock(&d->intr_en_notif_lock);
+    list_for_each(cur, &d->intr_en_notif_list) {
+        cur_entry = list_entry(cur, struct intr_en_notif, links);
+        if(cur_entry->notif_vector == notif_vector) {
+            list_del(cur);
+            xfree(cur_entry);
+            break;
+        }
+    }
+    list_add_tail(&entry->links, &d->intr_en_notif_list);
+    set_bit(notif_vector, &d->intr_en_notif_bitmap);    
+    spin_unlock(&d->intr_en_notif_lock);
+    return 0;
+}
+int hvm_unregister_intr_en_notif(struct domain *d, unsigned int notif_vector)
+{
+    struct list_head *cur;
+    intr_en_notif_t *cur_entry;
+
+    spin_lock(&d->intr_en_notif_lock);
+    clear_bit(notif_vector, &d->intr_en_notif_bitmap);
+    list_for_each(cur, &d->intr_en_notif_list) {
+        cur_entry = list_entry(cur, struct intr_en_notif, links);
+        if(cur_entry->notif_vector == notif_vector) {
+            list_del(cur);
+            xfree(cur_entry);
+            break;
+        }
+    }
+    spin_unlock(&d->intr_en_notif_lock);
+    return 0;
+}
+void hvm_intr_en_notif_arm(struct vcpu *v, unsigned int vector)
+{
+    struct list_head *cur;
+    intr_en_notif_t *cur_entry;
+    struct domain *d = v->domain;
+
+    if(test_bit(vector, &d->intr_en_notif_bitmap)) {
+        vcpu_intr_en_notif_t *vi = &v->int_notif;
+
+        spin_lock(&d->intr_en_notif_lock);
+        list_for_each(cur, &d->intr_en_notif_list) {
+            cur_entry = list_entry(cur, struct intr_en_notif, links);
+            if(cur_entry->notif_vector == vector) {
+                vi->intr_en_notif_fn = cur_entry->notif_fn;
+                vi->intr_en_notif_vec = vector;
+                vi->intr_en_notif_state = 0;
+                hvm_intr_en_notif_fn[vi->intr_en_notif_fn](v, 
vi->intr_en_notif_vec, 1);
+                break;
+            }
+        }
+        spin_unlock(&d->intr_en_notif_lock);
+    }
+    return;
+}
+
+void hvm_intr_en_notif_disarm(struct vcpu *v, int irq_masked)
+{
+    if(v->int_notif.intr_en_notif_fn) {
+        vcpu_intr_en_notif_t *vi = &v->int_notif;
+
+        if(vi->intr_en_notif_state && !(irq_masked || vlapic_tpr_gte_vec(v, 
vi->intr_en_notif_vec))) {
+            hvm_intr_en_notif_fn[vi->intr_en_notif_fn](v, 
vi->intr_en_notif_vec, 0);
+            vi->intr_en_notif_fn = 0;
+            vi->intr_en_notif_state = 0;
+        }
+        else if (irq_masked || vlapic_tpr_gte_vec(v, vi->intr_en_notif_vec))
+            vi->intr_en_notif_state = 1;
+    }
+}
 /*
  * Local variables:
  * mode: C
diff -r ec3493b63170 xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -88,11 +88,35 @@
     spin_unlock(&d->arch.hvm_domain.irq_lock);
 }
 
+void hvm_isa_irq_assert_cb(
+                          struct domain *d, unsigned int isa_irq,
+                          void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t 
intrs_delivered),
+                          uint64_t cb_arg)
+{
+    struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+    unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+
+
+    ASSERT(isa_irq <= 15);
+
+    spin_lock(&d->arch.hvm_domain.irq_lock);
+
+    if ( !__test_and_set_bit(isa_irq, &hvm_irq->isa_irq.i) &&
+         (hvm_irq->gsi_assert_count[gsi]++ == 0) )
+    {
+       vioapic_register_delivered_cb(d, intrs_delivered_cb, cb_arg);  
+        vioapic_irq_positive_edge(d, gsi);
+        vpic_irq_positive_edge(d, isa_irq);
+    }
+
+    spin_unlock(&d->arch.hvm_domain.irq_lock);
+}
 void hvm_isa_irq_assert(
     struct domain *d, unsigned int isa_irq)
 {
     struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
     unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+
 
     ASSERT(isa_irq <= 15);
 
diff -r ec3493b63170 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c
+++ b/xen/arch/x86/hvm/svm/intr.c
@@ -148,7 +148,10 @@
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     struct hvm_intack intack;
 
+    hvm_intr_en_notif_disarm(v, irq_masked(vmcb->rflags));
+
     /* Crank the handle on interrupt state. */
+
     pt_update_irq(v);
     svm_dirq_assist(v);
 
@@ -177,7 +180,6 @@
             enable_intr_window(v, intack);
             return;
         }
-
         intack = hvm_vcpu_ack_pending_irq(v, intack);
     } while ( intack.source == hvm_intsrc_none );
 
@@ -189,6 +191,7 @@
     {
         HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
         svm_inject_extint(v, intack.vector);
+        hvm_intr_en_notif_arm(v, intack.vector);
         pt_intr_post(v, intack);
     }
 
diff -r ec3493b63170 xen/arch/x86/hvm/vioapic.c
--- a/xen/arch/x86/hvm/vioapic.c
+++ b/xen/arch/x86/hvm/vioapic.c
@@ -306,7 +306,18 @@
     return pt_active(&pit->pt0);
 }
 
-static void vioapic_deliver(struct hvm_hw_vioapic *vioapic, int irq)
+void vioapic_register_delivered_cb(struct domain *d, void 
(*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered),
+                                  uint64_t cb_arg)
+{
+    struct hvm_hw_vioapic *vioapic = domain_vioapic(d);
+
+    ASSERT(spin_is_locked(&vioapic_domain(vioapic)->arch.hvm_domain.irq_lock));
+
+    vioapic->intrs_delivered_cb = intrs_delivered_cb;
+    vioapic->cb_arg = cb_arg;
+}
+
+void vioapic_deliver(struct hvm_hw_vioapic *vioapic, int irq)
 {
     uint16_t dest = vioapic->redirtbl[irq].fields.dest_id;
     uint8_t dest_mode = vioapic->redirtbl[irq].fields.dest_mode;
@@ -314,6 +325,7 @@
     uint8_t vector = vioapic->redirtbl[irq].fields.vector;
     uint8_t trig_mode = vioapic->redirtbl[irq].fields.trig_mode;
     uint32_t deliver_bitmask;
+    uint32_t deliver_bitmask_final = (uint32_t)0;
     struct vlapic *target;
     struct vcpu *v;
 
@@ -348,6 +360,11 @@
                                       vector, deliver_bitmask);
         if ( target != NULL )
         {
+           set_bit(vlapic_vcpu(target)->vcpu_id, &deliver_bitmask_final);
+           if(vioapic->intrs_delivered_cb) {
+               (*vioapic->intrs_delivered_cb)(vioapic->cb_arg, 
deliver_bitmask_final);
+               vioapic->intrs_delivered_cb = (void *)0;
+           }
             ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode);
         }
         else
@@ -362,25 +379,23 @@
     case dest_Fixed:
     {
         uint8_t bit;
+       if(vioapic->intrs_delivered_cb) {
+           (*vioapic->intrs_delivered_cb)(vioapic->cb_arg, deliver_bitmask);
+           vioapic->intrs_delivered_cb = 0;
+       }
         for ( bit = 0; deliver_bitmask != 0; bit++ )
         {
             if ( !(deliver_bitmask & (1 << bit)) )
                 continue;
             deliver_bitmask &= ~(1 << bit);
-#ifdef IRQ0_SPECIAL_ROUTING
-            /* Do not deliver timer interrupts to VCPU != 0 */
-            if ( (irq == hvm_isa_irq_to_gsi(0)) && pit_channel0_enabled() )
-                v = vioapic_domain(vioapic)->vcpu[0];
-            else
-#endif
-                v = vioapic_domain(vioapic)->vcpu[bit];
-            if ( v != NULL )
-            {
-                target = vcpu_vlapic(v);
-                ioapic_inj_irq(vioapic, target, vector,
-                               trig_mode, delivery_mode);
-            }
-        }
+           v = vioapic_domain(vioapic)->vcpu[bit];
+           /* ioapic_get_delivery_bitmask guarantees that v is never NULL. */
+           if( v != NULL ) {
+               target = vcpu_vlapic(v);
+               ioapic_inj_irq(vioapic, target, vector,
+                              trig_mode, delivery_mode);
+           }
+       }   
         break;
     }
 
@@ -404,6 +419,23 @@
                  delivery_mode);
         break;
     }
+}
+
+int vioapic_get_vector(struct domain *d, unsigned int isa_irq, unsigned int 
*vector)
+{
+    struct hvm_hw_vioapic *vioapic = domain_vioapic(d);
+    union vioapic_redir_entry *ent;
+    int ret = 1;
+    unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);
+
+    spin_lock(&d->arch.hvm_domain.irq_lock);
+    ent = &vioapic->redirtbl[gsi];
+    if ( !ent->fields.mask ) {
+       *vector = ent->fields.vector;
+       ret = 0;
+    }
+    spin_unlock(&d->arch.hvm_domain.irq_lock);
+    return ret;
 }
 
 void vioapic_irq_positive_edge(struct domain *d, unsigned int irq)
diff -r ec3493b63170 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -113,7 +113,6 @@
 /*
  * IRR-specific bitmap update & search routines.
  */
-
 static int vlapic_test_and_set_irr(int vector, struct vlapic *vlapic)
 {
     return vlapic_test_and_set_vector(vector, &vlapic->regs->data[APIC_IRR]);
@@ -165,6 +164,12 @@
                 vlapic, ppr, isr, isrv);
 
     return ppr;
+}
+bool_t vlapic_tpr_gte_vec(struct vcpu *v, int vector)
+{
+    struct vlapic *vlapic = vcpu_vlapic(v);
+    
+    return ((vector & 0xf0) <= vlapic_get_ppr(vlapic));
 }
 
 int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda)
diff -r ec3493b63170 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -163,6 +163,10 @@
     struct vcpu *v = current;
     unsigned int tpr_threshold = 0;
     enum hvm_intblk intblk;
+    unsigned long eflags;
+
+    eflags = __vmread(GUEST_RFLAGS);
+    hvm_intr_en_notif_disarm(v, irq_masked(eflags));
 
     /* Crank the handle on interrupt state. */
     pt_update_irq(v);
@@ -200,6 +204,7 @@
     {
         HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
         vmx_inject_extint(v, intack.vector);
+        hvm_intr_en_notif_arm(v, intack.vector);
         pt_intr_post(v, intack);
     }
 
diff -r ec3493b63170 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -36,6 +36,7 @@
 string_param("clocksource", opt_clocksource);
 
 #define EPOCH MILLISECS(1000)
+#define HPET_PERIOD_SIMULATED 0x429b17fUL
 
 unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
 DEFINE_SPINLOCK(rtc_lock);
@@ -348,6 +349,7 @@
     return hpet_read32(HPET_COUNTER);
 }
 
+int hpet_physical_inited = 0;
 static int init_hpet(struct platform_timesource *pts)
 {
     u64 hpet_rate = hpet_setup();
@@ -359,6 +361,8 @@
     pts->frequency = hpet_rate;
     pts->read_counter = read_hpet_count;
     pts->counter_bits = 32;
+
+    hpet_physical_inited = 1;
 
     return 1;
 }
@@ -500,6 +504,64 @@
 
     return stime;
 }
+static int hpet_main_counter_phys_avoid_hdw = 0;
+boolean_param("hpet_avoid", hpet_main_counter_phys_avoid_hdw);
+static unsigned long hpet_main_counter_phys_avoid_hdw_period;
+#define hpet_phys_ns_to_ticks(ns, period) hpet_mult_div(ns, 1000000UL, period) 
+#define hpet_tick_to_ns(tick, period) hpet_mult_div(tick, period, 1000000UL);
+#define TSC_TO_NSEC(tsc) hpet_mult_div(tsc, 1000000UL, cpu_khz) 
+
+typedef struct {
+    spinlock_t lock;
+    s_time_t last_ret;
+} get_s_time_mono_t;
+
+static get_s_time_mono_t get_s_time_mon;
+
+static void get_s_time_mono_init(void)
+{
+    spin_lock_init(&get_s_time_mon.lock);
+}
+
+u64 read_64_main_counter(void)
+{
+    u64 count;
+    unsigned long flags;
+    struct cpu_time *t = &this_cpu(cpu_time);
+    u64 tsc, delta;
+    s_time_t now;
+
+    if(hpet_main_counter_phys_avoid_hdw || !hpet_physical_inited) {
+        spin_lock(&get_s_time_mon.lock);
+        rdtscll(tsc);
+        delta = tsc - t->local_tsc_stamp;
+        now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
+        if(now > get_s_time_mon.last_ret)
+            get_s_time_mon.last_ret = now;
+        else
+            now = get_s_time_mon.last_ret;
+        spin_unlock(&get_s_time_mon.lock);
+        if(!hpet_main_counter_phys_avoid_hdw_period)
+            hpet_main_counter_phys_avoid_hdw_period = read_hpet_period();
+        count = hpet_phys_ns_to_ticks(now, 
hpet_main_counter_phys_avoid_hdw_period);
+    }
+    else {
+        spin_lock_irqsave(&platform_timer_lock, flags);
+        count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & 
plt_mask);
+        spin_unlock_irqrestore(&platform_timer_lock, flags);
+    }
+    return count;
+}
+u64 read_hpet_period(void)
+{
+    unsigned long period;
+
+    if(hpet_physical_inited)
+        period = (unsigned long)hpet_read32(HPET_PERIOD);
+    else
+        period = HPET_PERIOD_SIMULATED;
+    return period;
+}
 
 static void platform_time_calibration(void)
 {
@@ -559,6 +621,7 @@
     plt_overflow(NULL);
 
     platform_timer_stamp = plt_stamp64;
+    get_s_time_mono_init();
 
     printk("Platform timer is %s %s\n",
            freq_string(pts->frequency), pts->name);
diff -r ec3493b63170 xen/common/domain.c
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -96,6 +96,8 @@
     spin_lock_init(&d->hypercall_deadlock_mutex);
     INIT_LIST_HEAD(&d->page_list);
     INIT_LIST_HEAD(&d->xenpage_list);
+    spin_lock_init(&d->intr_en_notif_lock);
+    INIT_LIST_HEAD(&d->intr_en_notif_list);
 
     return d;
 }
diff -r ec3493b63170 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -23,6 +23,7 @@
 
 #include <asm/current.h>
 #include <asm/x86_emulate.h>
+#include <asm/processor.h>
 #include <public/domctl.h>
 #include <public/hvm/save.h>
 
@@ -218,10 +219,20 @@
     hvm_funcs.set_segment_register(v, seg, reg);
 }
 
+static inline int irq_masked(unsigned long eflags)
+{
+    return ((eflags & X86_EFLAGS_IF) == 0);
+}
+
 void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                    unsigned int *ecx, unsigned int *edx);
 void hvm_migrate_timers(struct vcpu *v);
 void hvm_do_resume(struct vcpu *v);
+
+int hvm_register_intr_en_notif(struct domain *d, unsigned int notif_vector, 
int notif_fn_index);
+int hvm_unregister_intr_en_notif(struct domain *d, unsigned int notif_vector);
+void hvm_intr_en_notif_arm(struct vcpu *v, unsigned int vector);
+void hvm_intr_en_notif_disarm(struct vcpu *v, int irq_masked);
 
 static inline void
 hvm_inject_exception(unsigned int trapnr, int errcode, unsigned long cr2)
diff -r ec3493b63170 xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h
+++ b/xen/include/asm-x86/hvm/irq.h
@@ -160,6 +160,10 @@
     struct domain *d, unsigned int isa_irq);
 void hvm_isa_irq_deassert(
     struct domain *d, unsigned int isa_irq);
+void hvm_isa_irq_assert_cb(
+                          struct domain *d, unsigned int isa_irq,
+                          void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t 
intrs_delivered),
+                          uint64_t cb_arg);
 
 void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
 
diff -r ec3493b63170 xen/include/asm-x86/hvm/vioapic.h
--- a/xen/include/asm-x86/hvm/vioapic.h
+++ b/xen/include/asm-x86/hvm/vioapic.h
@@ -66,5 +66,7 @@
 void vioapic_reset(struct domain *d);
 void vioapic_irq_positive_edge(struct domain *d, unsigned int irq);
 void vioapic_update_EOI(struct domain *d, int vector);
-
+int vioapic_get_vector(struct domain *d, unsigned int isa_irq, unsigned int 
*vector);
+void vioapic_register_delivered_cb(struct domain *d, void 
(*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered),
+                                  uint64_t cb_arg);
 #endif /* __ASM_X86_HVM_VIOAPIC_H__ */
diff -r ec3493b63170 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -98,4 +98,6 @@
 
 int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda);
 
+bool_t vlapic_tpr_gte_vec(struct vcpu *v, int vector);
+
 #endif /* __ASM_X86_HVM_VLAPIC_H__ */
diff -r ec3493b63170 xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -50,23 +50,45 @@
         uint64_t fsb;           /* FSB route, not supported now */
     } timers[HPET_TIMER_NUM];
 
-    /* Hidden register state */
+    /* The rest of this struct is hidden register state */
+
+    /* Per timer state */
     uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
+    uint32_t vector[HPET_TIMER_NUM];
+
+    /* Timer 0 (clock) specific state */
+ 
+    uint64_t last_end_of_intr_mc;
+    uint64_t end_of_intr_mc;
+    uint64_t intr_pending_nr;
+    uint64_t pending_mask;
+    uint32_t delivery_policy;
+ 
+    /* Global state */
+ 
+    uint64_t phys_period;
+    uint64_t cpu_khz;
+    uint64_t migr_local_tsc;
+ 
+    /* Debug */
+ 
+    uint64_t intr_counts[INTR_CNT_BUCKETS];
+    uint64_t intr_counts_last_s;
 };
 
 typedef struct HPETState {
     struct hpet_registers hpet;
     struct vcpu *vcpu;
     uint64_t stime_freq;
-    uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */
-    uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns      */
     uint64_t mc_offset;
     struct timer timers[HPET_TIMER_NUM];
     struct HPET_timer_fn_info timer_fn_info[HPET_TIMER_NUM]; 
     spinlock_t lock;
 } HPETState;
 
-
+void hpet_intr_en_fn_missed(struct vcpu *v, unsigned int vector, unsigned int 
post);
+void hpet_intr_en_fn_no_missed(struct vcpu *v, unsigned int vector, unsigned 
int post);
+void hpet_notify_timer_mode(struct domain *d, uint64_t value);
 /*
  * Abstract layer of periodic time, one short time.
  */
diff -r ec3493b63170 xen/include/public/arch-x86/hvm/save.h
--- a/xen/include/public/arch-x86/hvm/save.h
+++ b/xen/include/public/arch-x86/hvm/save.h
@@ -47,6 +47,12 @@
 /*
  * Processor
  */
+
+typedef struct vcpu_intr_en_notif {
+    int intr_en_notif_fn;
+    int intr_en_notif_state;
+    unsigned int intr_en_notif_vec;
+} vcpu_intr_en_notif_t;
 
 struct hvm_hw_cpu {
     uint8_t  fpu_regs[512];
@@ -156,6 +162,7 @@
     };
     /* error code for pending event */
     uint32_t error_code;
+    vcpu_intr_en_notif_t int_notif;
 };
 
 DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu);
@@ -253,6 +260,8 @@
 #endif
         } fields;
     } redirtbl[VIOAPIC_NUM_PINS];
+    void (*intrs_delivered_cb)(uint64_t cb_arg, uint32_t intrs_delivered);
+    uint64_t cb_arg;
 };
 
 DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic);
@@ -366,6 +375,11 @@
  * HPET
  */
 
+#define HPET_DEL_POLICY_GUEST_COMPUTES_MISSED_TICKS 0 /* Linux */
+#define HPET_DEL_POLICY_GUEST_DOES_NOT_COMPUTE_MISSED_TICKS 1 /* Windows */
+#define HPET_DEL_POLICY_NUMS 2
+#define INTR_CNT_BUCKETS 20
+
 #define HPET_TIMER_NUM     3    /* 3 timers supported now */
 struct hvm_hw_hpet {
     /* Memory-mapped, software visible registers */
@@ -385,8 +399,30 @@
     } timers[HPET_TIMER_NUM];
     uint64_t res5[4*(24-HPET_TIMER_NUM)];  /* reserved, up to 0x3ff */
 
-    /* Hidden register state */
+    /* The rest of this struct is hidden register state */
+
+    /* Per timer state */
     uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
+    uint32_t vector[HPET_TIMER_NUM];
+
+    /* Timer 0 (clock) specific state */
+ 
+    uint64_t last_end_of_intr_mc;
+    uint64_t end_of_intr_mc;
+    uint64_t intr_pending_nr;
+    uint64_t pending_mask;
+    uint32_t delivery_policy;
+ 
+    /* Global state */
+ 
+    uint64_t phys_period;
+    uint64_t cpu_khz;
+    uint64_t migr_local_tsc;
+ 
+    /* Debug */
+ 
+    uint64_t intr_counts[INTR_CNT_BUCKETS];
+    uint64_t intr_counts_last_s;
 };
 
 DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet);
diff -r ec3493b63170 xen/include/public/hvm/params.h
--- a/xen/include/public/hvm/params.h
+++ b/xen/include/public/hvm/params.h
@@ -80,6 +80,8 @@
 #define HVMPTM_no_delay_for_missed_ticks 1
 #define HVMPTM_no_missed_ticks_pending   2
 #define HVMPTM_one_missed_tick_pending   3
+#define HVM_HPET_guest_computes_missed_ticks  4
+#define HVM_HPET_guest_does_not_compute_missed_ticks  5
 
 /* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
 #define HVM_PARAM_HPET_ENABLED 11
diff -r ec3493b63170 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -69,6 +69,19 @@
 int  evtchn_init(struct domain *d);
 void evtchn_destroy(struct domain *d);
 
+typedef void (*notif_fn_t)(struct vcpu *v, unsigned int vector, unsigned int 
post);
+
+#define HVM_INTR_EN_NOTIF_UNUSED 0
+#define HVM_INTR_EN_NOTIF_HPET_MISSED 1
+#define HVM_INTR_EN_NOTIF_HPET_NO_MISSED 2
+#define HVM_INTR_EN_NOTIF_MAX 2
+
+typedef struct intr_en_notif {
+    struct list_head links;
+    unsigned int notif_vector;
+    int notif_fn;
+} intr_en_notif_t;
+
 struct vcpu 
 {
     int              vcpu_id;
@@ -135,6 +148,7 @@
     cpumask_t        vcpu_dirty_cpumask;
 
     struct arch_vcpu arch;
+    vcpu_intr_en_notif_t int_notif;
 };
 
 /* Per-domain lock can be recursively acquired in fault handlers. */
@@ -232,6 +246,10 @@
     int32_t time_offset_seconds;
 
     struct rcu_head rcu;
+ 
+    spinlock_t intr_en_notif_lock;
+    unsigned long intr_en_notif_bitmap[(MAX_VECTOR/sizeof(unsigned long))+1];
+    struct list_head intr_en_notif_list;
 
     unsigned long last_tsc_sender;
     unsigned long first_tsc_receiver;
@@ -508,6 +526,18 @@
     if ( test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
         vcpu_wake(v);
 }
+/*
+ * compute (var*num)/den where var*num may overflow 64 bits
+ */
+static inline uint64_t hpet_mult_div(uint64_t var, uint64_t num, uint64_t den)
+{
+    uint64_t result, q, r;
+
+       q = var / den;
+       r = var % den;
+       result = (q * num) + (r * num) / den;
+    return result;
+}
 
 #define IS_PRIV(_d) ((_d)->is_privileged)
 #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == 
(_t)))
diff -r ec3493b63170 xen/include/xen/time.h
--- a/xen/include/xen/time.h
+++ b/xen/include/xen/time.h
@@ -61,6 +61,11 @@
 
 extern void send_timer_event(struct vcpu *v);
 
+u64 read_64_main_counter(void);
+u64 read_hpet_period(void);
+
+extern int hpet_physical_inited;
+
 #endif /* __XEN_TIME_H__ */
 
 /*
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.