[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [RFC] Interrupt latency measurement technique



Hi Jonatan

I didn't pass xen_timer_val to dom0. I stored time counters in the
static buffer in Xen, and after in dom0, and then compare them using
Exel.
During printing of buffer I can do with counters whatever I want -
convert them to nanosecond, etc

Let me show the code which does this in Xen:


diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 39e0f2f..340dfa5 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -847,12 +847,39 @@ static void do_sgi(struct cpu_user_regs *regs,
int othercpu, enum gic_sgi sgi)
     GICC[GICC_DIR] = sgi;
 }

+
+s_time_t ticks_to_ns(uint64_t ticks);
+
+static const u32 test_irq = 57; <-- IRQ, which I profiled (Display
IRQ on my platform)
+static u32 total_irqs;
+
+#define NUM_TICKS_TO_STORE  100
+struct irq_tick {
+    u64 cnt_tick;
+    u64 cnt_off;
+};
+
+static struct irq_tick irq_ticks[NUM_TICKS_TO_STORE]; <-- buffer to store ticks
+static u32 offs;
+
+static void print_irq_ticks(void)
+{
+    u32 i;
+    for ( i = 0; i < NUM_TICKS_TO_STORE; i++ )
+    {
+        printk("[%02u] irq %u tick -> %llu off -> %llu off_ns -> %llu
dom_tick %llu dom_tick_ns -> %llu cpu_khz %lu total %u\n",
+                i, test_irq, irq_ticks[i].cnt_tick,
irq_ticks[i].cnt_off, ticks_to_ns(irq_ticks[i].cnt_off),
+                irq_ticks[i].cnt_tick - irq_ticks[i].cnt_off,
+                ticks_to_ns(irq_ticks[i].cnt_tick -
irq_ticks[i].cnt_off), cpu_khz, total_irqs);
+    }
+}
+
 /* Accept an interrupt from the GIC and dispatch its handler */
 void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)
 {
     uint32_t intack;
     unsigned int irq;
-
+    static u32 step;

     do  {
         intack = GICC[GICC_IAR];
@@ -860,6 +887,22 @@ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)

         if ( likely(irq >= 16 && irq < 1021) )
         {
+            if ( irq == test_irq )
+            {
+                total_irqs++;
+                irq_ticks[step].cnt_tick = READ_SYSREG64(CNTPCT_EL0);
 <-- Timer base value
+                irq_ticks[step].cnt_off = READ_SYSREG64(CNTVOFF_EL2);
 <--Time offset for domain, dom0 in my case
+
+                step++;
+                if ( step % NUM_TICKS_TO_STORE == 0 )
+                {
+                    print_irq_ticks();
+                    step = 0;
+                }
+            }

CNTVOFF_EL2 register contains constant value, it is a difference
between tick in Xen and tick in dom0. If you subtract base tick value
and this offset you'll get tick value in dom0.
After, in dom0 you check CNTPCT_EL0 and that's it. You have 2 tick
values for dom0 - one from Xen, second from dom0 itself.

My diff for dom0 contains some overhead - I copy pasted some Xen code there:

diff --git a/drivers/video/omap2/dss/dispc-compat.c
b/drivers/video/omap2/dss/dispc-compat.c
index 4b31058..71c807e 100644
--- a/drivers/video/omap2/dss/dispc-compat.c
+++ b/drivers/video/omap2/dss/dispc-compat.c
@@ -255,6 +255,63 @@ static void print_irq_status(u32 status)
 #undef PIS
 }

+
+#define CNTPCT          p15,0,c14       /* Time counter value */
+#define CNTPCT_EL0      CNTPCT
+
+#define __CP64(r1, r2, coproc, opc, crm) coproc, opc, r1, r2, crm
+#define CP64(r, name...) __CP64(r, name)
+#define LOAD_CP64(r, name...)  "mrrc " __stringify(CP64(%r, %H##r, name)) ";"
+#define READ_CP64(name...) ({                                   \
+     register uint64_t _r;                                       \
+     asm volatile(LOAD_CP64(0, name) : "=r" (_r));               \
+     _r; })
+#define READ_SYSREG64(R...)     READ_CP64(R)
+
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
+       union {
+               u64 ll;
+               struct {
+                       u32 low, high;
+               } l;
+       } u, res;
+       u64 rl, rh;
+
+       u.ll = a;
+       rl = (u64)u.l.low * (u64)b;
+       rh = (u64)u.l.high * (u64)b;
+       rh += (rl >> 32);
+       res.l.high = div64_u64(rh, c);
+       res.l.low = div64_u64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+       return res.ll;
+}
+
+static const u32 cpu_khz = 6144;
+static inline s64 ticks_to_ns(uint64_t ticks)
+{
+#define SECONDS(_s)     ((s64)((_s)  * 1000000000ULL))
+
+    return muldiv64(ticks, SECONDS(1), 1000 * cpu_khz);
+}
+
+static const u32 test_irq = 57;
+static u32 total_irqs;
+
+#define NUM_TICKS_TO_STORE  100
+static u64 irq_ticks[NUM_TICKS_TO_STORE];
+
+static void print_irq_ticks(void)
+{
+    u32 i;
+    for ( i = 0; i < NUM_TICKS_TO_STORE; i++ )
+    {
+        printk("(Dom0)[%02u] irq %u tick -> %lld tick_ns %lld cpu_khz
%u total %u\n",
+                i, test_irq, irq_ticks[i], ticks_to_ns(irq_ticks[i]),
cpu_khz, total_irqs);
+    }
+}
+
 /* Called from dss.c. Note that we don't touch clocks here,
  * but we presume they are on because we got an IRQ. However,
  * an irq handler may turn the clocks off, so we may not have
@@ -267,6 +324,17 @@ static irqreturn_t omap_dispc_irq_handler(int
irq, void *arg)
        u32 unhandled_errors;
        struct omap_dispc_isr_data *isr_data;
        struct omap_dispc_isr_data registered_isr[DISPC_MAX_NR_ISRS];
+    static u32 step;
+
+    total_irqs++;
+    irq_ticks[step] = READ_SYSREG64(CNTPCT_EL0);
+
+    step++;
+    if ( step % NUM_TICKS_TO_STORE == 0 )
+    {
+        print_irq_ticks();
+        step = 0;
+    }


It would be nice to know whether this works for you.

Regards,
Andrii

On Fri, Aug 29, 2014 at 9:07 PM, Jonathan Fraser <jfraser@xxxxxxxxxxxx> wrote:
> I need to make the same measurements.
> How did you pass the value of 'xen_timer_val' to the dom0 kernel?  I can
> store it in a scratch hardware register in the SoC, but how did you do
> it?
>
> Thanks,
> Jon
>
>
> On Wed, 2014-08-27 at 13:07 +0300, Andrii Tseglytskyi wrote:
>> Test setup is the following:
>>
>> Platform - Jacinto6 (OMAP5) with two ARMv7 cores (Cortex A15), 1.5 Gb of RAM
>> Hypervisor - Xen 4.4 stable
>> Dom0 - Linux 3.8 running with 2 vcpus, 128 Mb RAM
>> DomU was not launched during the test
>>
>> Regards,
>> Andrii
>>
>>
>> On Wed, Aug 27, 2014 at 12:14 PM, Andrii Tseglytskyi
>> <andrii.tseglytskyi@xxxxxxxxxxxxxxx> wrote:
>> > Hi,
>> >
>> > I need to measure IRQ latency introduced by Xen: (GIC -> Xen IRQ
>> > handler -> Dom0 IRQ handler)
>> > I need to know how many time I spend in Xen IRQ handler.
>> >
>> > Can someone comment - is the following algorithm works:
>> >
>> > - in function xen/arch/arm/gic.c: gic_interrupt() store timer counter 
>> > value:
>> > xen_timer_val = READ_SYSREG64(CNTPCT_EL0) - READ_SYSREG64(CNTVOFF_EL2)
>> >
>> > - in any IRQ handler in dom0 store timer counter value:
>> > dom0_timer_val = READ_SYSREG64(CNTPCT_EL0)
>> >
>> > - calculate time diff in nanoseconds:
>> > time_diff_ns = ticks_to_ns(dom0_timer_val - xen_timer_val)
>> >
>> > Using this technique I measured display IRQ latency and got about
>> > - 20 to 30 usec latency on 1 GHz MPU frequency
>> > - 10 to 20 usec latencyon 1.5 GHz MPU frequency
>> >
>> > Are these numbers expectable? Can this technique be used?
>> >
>> > Regards,
>> > Andrii
>> >
>> > --
>> >
>> > Andrii Tseglytskyi | Embedded Dev
>> > GlobalLogic
>> > www.globallogic.com
>>
>>
>>
>



-- 

Andrii Tseglytskyi | Embedded Dev
GlobalLogic
www.globallogic.com

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.