[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] Re: [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
On Fri, 2011-05-06 at 19:25 +0100, Olaf Hering wrote: > # HG changeset patch > # User Olaf Hering <olaf@xxxxxxxxx> > # Date 1304700881 -7200 > # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162 > # Parent 1631b61acaa8e88437d0f1861409ab1824de2721 > Update __insert_record() to copy the trace record to individual mfns. > This is a prereq before changing the per-cpu allocation from contiguous > to non-contiguous allocation. > > Signed-off-by: Olaf Hering <olaf@xxxxxxxxx> > > diff -r 1631b61acaa8 -r 1c5da4d9e33c xen/common/trace.c > --- a/xen/common/trace.c Fri May 06 17:56:35 2011 +0200 > +++ b/xen/common/trace.c Fri May 06 18:54:41 2011 +0200 > @@ -52,7 +52,6 @@ static struct t_info *t_info; > static unsigned int t_info_pages; > > static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs); > -static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data); > static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock); > static u32 data_size __read_mostly; > > @@ -193,7 +192,6 @@ static int alloc_trace_bufs(unsigned int > > per_cpu(t_bufs, cpu) = buf = rawbuf; > buf->cons = buf->prod = 0; > - per_cpu(t_data, cpu) = (unsigned char *)(buf + 1); > } > > offset = t_info_first_offset; > @@ -457,10 +455,16 @@ static inline u32 calc_bytes_avail(const > return data_size - calc_unconsumed_bytes(buf); > } > > -static inline struct t_rec *next_record(const struct t_buf *buf, > - uint32_t *next) > +static unsigned char *next_record(const struct t_buf *buf, uint32_t *next, > + unsigned char **next_page, > + uint32_t *offset_in_page) > { > u32 x = buf->prod, cons = buf->cons; > + uint32_t per_cpu_mfn_offset; > + uint32_t per_cpu_mfn_nr; > + uint32_t *mfn_list; > + uint32_t mfn; > + unsigned char *this_page; > > barrier(); /* must read buf->prod and buf->cons only once */ > *next = x; > @@ -472,7 +476,27 @@ static inline struct t_rec *next_record( > > ASSERT(x < data_size); > > - return (struct t_rec *)&this_cpu(t_data)[x]; > + /* add leading header to get total offset of next record */ > + x += sizeof(struct t_buf); > + *offset_in_page = x % PAGE_SIZE; > + > + /* offset into array of mfns */ > + per_cpu_mfn_nr = x / PAGE_SIZE; > + per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()]; > + mfn_list = (uint32_t *)t_info; > + mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr]; > + this_page = mfn_to_virt(mfn); > + if (per_cpu_mfn_nr + 1 >= opt_tbuf_size) > + { > + /* reached end of buffer? */ > + *next_page = NULL; > + } > + else > + { > + mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1]; > + *next_page = mfn_to_virt(mfn); > + } > + return this_page; > } General approach here looks good, but I'm wondering if there's a way to reduce the math here. The amount of work done for each trace record posted is really getting pretty big. I guess first of all the '%' and '/' should be &(PAGE_SIZE-1) and >>(PAGE_SHIFT), respectively. Would it make sense to pre-cache the virtual address of the various MFNs (i.e., keep a per-cpu virtual address list) rather than doing the calculation each time? That might reduce the number of instructions to find the approprate virtual addresses. -George > > static inline void __insert_record(struct t_buf *buf, > @@ -482,28 +506,37 @@ static inline void __insert_record(struc > unsigned int rec_size, > const void *extra_data) > { > - struct t_rec *rec; > + struct t_rec split_rec, *rec; > uint32_t *dst; > + unsigned char *this_page, *next_page; > unsigned int extra_word = extra / sizeof(u32); > unsigned int local_rec_size = calc_rec_size(cycles, extra); > uint32_t next; > + uint32_t offset; > + uint32_t remaining; > > BUG_ON(local_rec_size != rec_size); > BUG_ON(extra & 3); > > - rec = next_record(buf, &next); > - if ( !rec ) > + this_page = next_record(buf, &next, &next_page, &offset); > + if ( !this_page ) > return; > - /* Double-check once more that we have enough space. > - * Don't bugcheck here, in case the userland tool is doing > - * something stupid. */ > - if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size ) > + > + remaining = PAGE_SIZE - offset; > + > + if ( unlikely(rec_size > remaining) ) > { > - if ( printk_ratelimit() ) > + if ( next_page == NULL ) > + { > + /* access beyond end of buffer */ > printk(XENLOG_WARNING > - "%s: size=%08x prod=%08x cons=%08x rec=%u\n", > - __func__, data_size, next, buf->cons, rec_size); > - return; > + "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n", > + __func__, data_size, next, buf->cons, rec_size, > remaining); > + return; > + } > + rec = &split_rec; > + } else { > + rec = (struct t_rec*)(this_page + offset); > } > > rec->event = event; > @@ -520,6 +553,12 @@ static inline void __insert_record(struc > if ( extra_data && extra ) > memcpy(dst, extra_data, extra); > > + if ( unlikely(rec_size > remaining) ) > + { > + memcpy(this_page + offset, rec, remaining); > + memcpy(next_page, (char *)rec + remaining, rec_size - remaining); > + } > + > wmb(); > > next += rec_size; _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |