[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] trace security fixes


  • To: Jan Beulich <JBeulich@xxxxxxxxxx>
  • From: George Dunlap <dunlapg@xxxxxxxxx>
  • Date: Tue, 29 Jun 2010 15:21:33 +0100
  • Cc: xen-devel@xxxxxxxxxxxxxxxxxxx
  • Delivery-date: Tue, 29 Jun 2010 07:22:21 -0700
  • Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=mime-version:sender:in-reply-to:references:date :x-google-sender-auth:message-id:subject:from:to:cc:content-type :content-transfer-encoding; b=xlb2Cqi775w6S7MyLLyA39TP2M8eZsdLyM+iEp69WvXU8YK/6XBG+L9nlZCD5/3aad l6PYkTe39Aup1o7bAtG5JCYyjz6ohtVZni7mpLqVu9zuGpHtoZtm2TmObTWQWLQzAcWf LNT+SudmmZhMJ669C7+pZ4Y53KSYY3e2rZvGU=
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

Jan,

Thanks for your work in this.  Could you please break this patch down
into its component changes (described in the list below) so that I can
review, discuss, and ack/nack components individually?  (And so they
show up as individual changesets when someone's doing archeology...)

So far I agree with the following changes:
> - there's no need to share writably the t_info pages (Dom0 only wants
>  [and needs] to read it)
> - T_INFO_FIRST_OFFSET wasn't defined correctly, thus allowing in the
>  num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0
> - printk()-s should be lower level or rate limited

But probably disagree with this one:
> - the hypervisor should not rely on any specific state of the actual
>  trace buffer (as it is shared writable with Dom0)

And would like to be able to see the impact of following more clearly
(i.e., in separate patches):
> - check_tbuf_size() didn't consider the case of the incoming size not
>  allowing for the 2*data_size range for t_buf->{prod,cons}

> To make clear what purpose specific variables have and/or where they
> got loaded from, the patch also changes the type of some of them to
> be explicitly u32/s32, and removes pointless assertions (like checking
> an unsigned variable to be >= 0).
>
> I also took the prototype adjustment of __trace_var() as an opportunity
> to simplify the TRACE_xD() macros. Similar simplification could be done
> on the (quite numerous) direct callers of the function.

Thanks again,
 -George

>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
>
> --- 2010-06-15.orig/tools/xenmon/xenbaked.c     2010-06-01 13:39:57.000000000 
> +0200
> +++ 2010-06-15/tools/xenmon/xenbaked.c  2010-06-28 10:23:01.000000000 +0200
> @@ -84,7 +84,7 @@ typedef struct settings_st {
>  } settings_t;
>
>  struct t_struct {
> -    struct t_info *t_info;  /* Structure with information about individual 
> buffers */
> +    const struct t_info *t_info; /* Structure with information about 
> individual buffers */
>     struct t_buf **meta;    /* Pointers to trace buffer metadata */
>     unsigned char **data;   /* Pointers to trace buffer data areas */
>  };
> @@ -376,9 +376,8 @@ static struct t_struct *map_tbufs(unsign
>     }
>
>     /* Map t_info metadata structure */
> -    tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
> -                                        tinfo_size, PROT_READ | PROT_WRITE,
> -                                        tbufs_mfn);
> +    tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
> +                                        PROT_READ, tbufs_mfn);
>
>     if ( tbufs.t_info == 0 )
>     {
> @@ -404,7 +403,8 @@ static struct t_struct *map_tbufs(unsign
>     for(i=0; i<num; i++)
>     {
>
> -        uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + 
> tbufs.t_info->mfn_offset[i];
> +        const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
> +                                   + tbufs.t_info->mfn_offset[i];
>         int j;
>         xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
>
> --- 2010-06-15.orig/tools/xentrace/xentrace.c   2010-06-01 13:39:57.000000000 
> +0200
> +++ 2010-06-15/tools/xentrace/xentrace.c        2010-06-28 10:23:26.000000000 
> +0200
> @@ -63,7 +63,7 @@ typedef struct settings_st {
>  } settings_t;
>
>  struct t_struct {
> -    struct t_info *t_info;  /* Structure with information about individual 
> buffers */
> +    const struct t_info *t_info; /* Structure with information about 
> individual buffers */
>     struct t_buf **meta;    /* Pointers to trace buffer metadata */
>     unsigned char **data;   /* Pointers to trace buffer data areas */
>  };
> @@ -475,9 +475,8 @@ static struct t_struct *map_tbufs(unsign
>     int i;
>
>     /* Map t_info metadata structure */
> -    tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
> -                                        tinfo_size, PROT_READ | PROT_WRITE,
> -                                        tbufs_mfn);
> +    tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
> +                                        PROT_READ, tbufs_mfn);
>
>     if ( tbufs.t_info == 0 )
>     {
> @@ -503,7 +502,8 @@ static struct t_struct *map_tbufs(unsign
>     for(i=0; i<num; i++)
>     {
>
> -        uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + 
> tbufs.t_info->mfn_offset[i];
> +        const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
> +                                   + tbufs.t_info->mfn_offset[i];
>         int j;
>         xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
>
> --- 2010-06-15.orig/xen/common/trace.c  2010-06-28 09:43:58.000000000 +0200
> +++ 2010-06-15/xen/common/trace.c       2010-06-28 11:58:37.000000000 +0200
> @@ -51,15 +51,15 @@ static struct t_info *t_info;
>  #define T_INFO_PAGES 2  /* Size fixed at 2 pages for now. */
>  #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
>  /* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t 
> */
> -#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 
> 1) / sizeof(uint32_t))
> -static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
> +#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) / 
> sizeof(uint32_t))
> +static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs);
>  static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
>  static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
> -static int data_size;
> +static u32 data_size;
>
>  /* High water mark for trace buffers; */
>  /* Send virtual interrupt when buffer level reaches this point */
> -static int t_buf_highwater;
> +static u32 t_buf_highwater;
>
>  /* Number of records lost due to per-CPU trace buffer being full. */
>  static DEFINE_PER_CPU(unsigned long, lost_records);
> @@ -77,11 +77,16 @@ static u32 tb_event_mask = TRC_ALL;
>
>  /**
>  * check_tbuf_size - check to make sure that the proposed size will fit
> - * in the currently sized struct t_info.
> + * in the currently sized struct t_info and allows prod and cons to
> + * reach double the value without overflow.
>  */
> -static inline int check_tbuf_size(int size)
> +static int check_tbuf_size(u32 pages)
>  {
> -    return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / 
> sizeof(uint32_t));
> +    u32 size = pages * PAGE_SIZE;
> +
> +    return (size / PAGE_SIZE != pages) || (size + size < size) ||
> +           (num_online_cpus() * pages + T_INFO_FIRST_OFFSET >
> +            T_INFO_SIZE / sizeof(uint32_t));
>  }
>
>  /**
> @@ -115,7 +120,7 @@ static int alloc_trace_bufs(void)
>     }
>
>     t_info->tbuf_size = opt_tbuf_size;
> -    printk("tbuf_size %d\n", t_info->tbuf_size);
> +    printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
>
>     nr_pages = opt_tbuf_size;
>     order = get_order_from_pages(nr_pages);
> @@ -139,7 +144,7 @@ static int alloc_trace_bufs(void)
>
>         spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
>
> -        buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
> +        per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
>         buf->cons = buf->prod = 0;
>         per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
>
> @@ -171,7 +176,7 @@ static int alloc_trace_bufs(void)
>             /* Write list first, then write per-cpu offset. */
>             wmb();
>             t_info->mfn_offset[cpu]=offset;
> -            printk("p%d mfn %"PRIx32" offset %d\n",
> +            printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
>                    cpu, mfn, offset);
>             offset+=i;
>         }
> @@ -190,6 +195,7 @@ out_dealloc:
>         spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
>         if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
>         {
> +            per_cpu(t_bufs, cpu) = NULL;
>             ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
>             free_xenheap_pages(rawbuf, order);
>         }
> @@ -308,7 +314,7 @@ void __init init_trace_bufs(void)
>
>     for(i=0; i<T_INFO_PAGES; i++)
>         share_xen_page_with_privileged_guests(
> -            virt_to_page(t_info) + i, XENSHARE_writable);
> +            virt_to_page(t_info) + i, XENSHARE_readonly);
>
>     if ( opt_tbuf_size == 0 )
>     {
> @@ -404,19 +410,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
>     return rc;
>  }
>
> -static inline int calc_rec_size(int cycles, int extra)
> +static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
>  {
> -    int rec_size;
> -    rec_size = 4;
> +    unsigned int rec_size = 4;
> +
>     if ( cycles )
>         rec_size += 8;
>     rec_size += extra;
>     return rec_size;
>  }
>
> -static inline int calc_unconsumed_bytes(struct t_buf *buf)
> +static inline bool_t bogus(u32 prod, u32 cons)
> +{
> +    if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
> +         unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
> +    {
> +        tb_init_done = 0;
> +        printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons 
> (%08x)\n",
> +               smp_processor_id(), prod, cons);
> +        return 1;
> +    }
> +    return 0;
> +}
> +
> +static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf)
>  {
> -    int x = buf->prod - buf->cons;
> +    u32 prod = buf->prod, cons = buf->cons;
> +    s32 x = prod - cons;
> +
> +    if ( bogus(prod, cons) )
> +        return data_size;
> +
>     if ( x < 0 )
>         x += 2*data_size;
>
> @@ -426,9 +450,14 @@ static inline int calc_unconsumed_bytes(
>     return x;
>  }
>
> -static inline int calc_bytes_to_wrap(struct t_buf *buf)
> +static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf)
>  {
> -    int x = data_size - buf->prod;
> +    u32 prod = buf->prod;
> +    s32 x = data_size - prod;
> +
> +    if ( bogus(prod, buf->cons) )
> +        return 0;
> +
>     if ( x <= 0 )
>         x += data_size;
>
> @@ -438,35 +467,37 @@ static inline int calc_bytes_to_wrap(str
>     return x;
>  }
>
> -static inline int calc_bytes_avail(struct t_buf *buf)
> +static inline u32 calc_bytes_avail(const volatile struct t_buf *buf)
>  {
>     return data_size - calc_unconsumed_bytes(buf);
>  }
>
> -static inline struct t_rec *
> -next_record(struct t_buf *buf)
> +static inline struct t_rec *next_record(const volatile struct t_buf *buf)
>  {
> -    int x = buf->prod;
> +    u32 x = buf->prod;
> +
> +    if ( !tb_init_done || bogus(x, buf->cons) )
> +        return NULL;
> +
>     if ( x >= data_size )
>         x -= data_size;
>
> -    ASSERT(x >= 0);
>     ASSERT(x < data_size);
>
>     return (struct t_rec *)&this_cpu(t_data)[x];
>  }
>
> -static inline int __insert_record(struct t_buf *buf,
> -                                  unsigned long event,
> -                                  int extra,
> -                                  int cycles,
> -                                  int rec_size,
> -                                  unsigned char *extra_data)
> +static inline void __insert_record(volatile struct t_buf *buf,
> +                                   unsigned long event,
> +                                   unsigned int extra,
> +                                   bool_t cycles,
> +                                   unsigned int rec_size,
> +                                   const void *extra_data)
>  {
>     struct t_rec *rec;
>     unsigned char *dst;
> -    unsigned long extra_word = extra/sizeof(u32);
> -    int local_rec_size = calc_rec_size(cycles, extra);
> +    unsigned int extra_word = extra / sizeof(u32);
> +    unsigned int local_rec_size = calc_rec_size(cycles, extra);
>     uint32_t next;
>
>     BUG_ON(local_rec_size != rec_size);
> @@ -475,17 +506,20 @@ static inline int __insert_record(struct
>     /* Double-check once more that we have enough space.
>      * Don't bugcheck here, in case the userland tool is doing
>      * something stupid. */
> -    if ( calc_bytes_avail(buf) < rec_size )
> +    next = calc_bytes_avail(buf);
> +    if ( next < rec_size )
>     {
> -        printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
> -               __func__,
> -               calc_bytes_avail(buf),
> -               data_size, buf->prod, buf->cons, data_size, rec_size);
> -        return 0;
> +        if ( printk_ratelimit() )
> +            printk(XENLOG_WARNING
> +                   "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n",
> +                   __func__, next, data_size, buf->prod, buf->cons, 
> rec_size);
> +        return;
>     }
>     rmb();
>
>     rec = next_record(buf);
> +    if ( !rec )
> +        return;
>     rec->event = event;
>     rec->extra_u32 = extra_word;
>     dst = (unsigned char *)rec->u.nocycles.extra_u32;
> @@ -502,21 +536,22 @@ static inline int __insert_record(struct
>
>     wmb();
>
> -    next = buf->prod + rec_size;
> +    next = buf->prod;
> +    if ( bogus(next, buf->cons) )
> +        return;
> +    next += rec_size;
>     if ( next >= 2*data_size )
>         next -= 2*data_size;
> -    ASSERT(next >= 0);
>     ASSERT(next < 2*data_size);
>     buf->prod = next;
> -
> -    return rec_size;
>  }
>
> -static inline int insert_wrap_record(struct t_buf *buf, int size)
> +static inline void insert_wrap_record(volatile struct t_buf *buf,
> +                                      unsigned int size)
>  {
> -    int space_left = calc_bytes_to_wrap(buf);
> -    unsigned long extra_space = space_left - sizeof(u32);
> -    int cycles = 0;
> +    u32 space_left = calc_bytes_to_wrap(buf);
> +    unsigned int extra_space = space_left - sizeof(u32);
> +    bool_t cycles = 0;
>
>     BUG_ON(space_left > size);
>
> @@ -528,17 +563,13 @@ static inline int insert_wrap_record(str
>         ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
>     }
>
> -    return __insert_record(buf,
> -                    TRC_TRACE_WRAP_BUFFER,
> -                    extra_space,
> -                    cycles,
> -                    space_left,
> -                    NULL);
> +    __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
> +                    space_left, NULL);
>  }
>
>  #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
>
> -static inline int insert_lost_records(struct t_buf *buf)
> +static inline void insert_lost_records(volatile struct t_buf *buf)
>  {
>     struct {
>         u32 lost_records;
> @@ -553,12 +584,8 @@ static inline int insert_lost_records(st
>
>     this_cpu(lost_records) = 0;
>
> -    return __insert_record(buf,
> -                           TRC_LOST_RECORDS,
> -                           sizeof(ed),
> -                           1 /* cycles */,
> -                           LOST_REC_SIZE,
> -                           (unsigned char *)&ed);
> +    __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
> +                    LOST_REC_SIZE, &ed);
>  }
>
>  /*
> @@ -580,13 +607,15 @@ static DECLARE_TASKLET(trace_notify_dom0
>  * failure, otherwise 0.  Failure occurs only if the trace buffers are not yet
>  * initialised.
>  */
> -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
> +void __trace_var(u32 event, bool_t cycles, unsigned int extra,
> +                 const void *extra_data)
>  {
> -    struct t_buf *buf;
> -    unsigned long flags, bytes_to_tail, bytes_to_wrap;
> -    int rec_size, total_size;
> -    int extra_word;
> -    int started_below_highwater = 0;
> +    volatile struct t_buf *buf;
> +    unsigned long flags;
> +    u32 bytes_to_tail, bytes_to_wrap;
> +    unsigned int rec_size, total_size;
> +    unsigned int extra_word;
> +    bool_t started_below_highwater;
>
>     if( !tb_init_done )
>         return;
> @@ -619,13 +648,11 @@ void __trace_var(u32 event, int cycles,
>
>     /* Read tb_init_done /before/ t_bufs. */
>     rmb();
> -
> -    spin_lock_irqsave(&this_cpu(t_lock), flags);
> -
>     buf = this_cpu(t_bufs);
> -
>     if ( unlikely(!buf) )
> -        goto unlock;
> +        return;
> +
> +    spin_lock_irqsave(&this_cpu(t_lock), flags);
>
>     started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
>
> --- 2010-06-15.orig/xen/include/xen/trace.h     2010-06-28 09:43:58.000000000 
> +0200
> +++ 2010-06-15/xen/include/xen/trace.h  2010-06-25 12:08:36.000000000 +0200
> @@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
>
>  int trace_will_trace_event(u32 event);
>
> -void __trace_var(u32 event, int cycles, int extra, unsigned char 
> *extra_data);
> +void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
>
>  static inline void trace_var(u32 event, int cycles, int extra,
>                                unsigned char *extra_data)
> @@ -57,7 +57,7 @@ static inline void trace_var(u32 event,
>         {                                                       \
>             u32 _d[1];                                          \
>             _d[0] = d1;                                         \
> -            __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>         }                                                       \
>     } while ( 0 )
>
> @@ -68,7 +68,7 @@ static inline void trace_var(u32 event,
>             u32 _d[2];                                          \
>             _d[0] = d1;                                         \
>             _d[1] = d2;                                         \
> -            __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>         }                                                       \
>     } while ( 0 )
>
> @@ -80,7 +80,7 @@ static inline void trace_var(u32 event,
>             _d[0] = d1;                                         \
>             _d[1] = d2;                                         \
>             _d[2] = d3;                                         \
> -            __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>         }                                                       \
>     } while ( 0 )
>
> @@ -93,7 +93,7 @@ static inline void trace_var(u32 event,
>             _d[1] = d2;                                         \
>             _d[2] = d3;                                         \
>             _d[3] = d4;                                         \
> -            __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>         }                                                       \
>     } while ( 0 )
>
> @@ -107,7 +107,7 @@ static inline void trace_var(u32 event,
>             _d[2] = d3;                                         \
>             _d[3] = d4;                                         \
>             _d[4] = d5;                                         \
> -            __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>         }                                                       \
>     } while ( 0 )
>
> @@ -122,7 +122,7 @@ static inline void trace_var(u32 event,
>             _d[3] = d4;                                         \
>             _d[4] = d5;                                         \
>             _d[5] = d6;                                         \
> -            __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
> +            __trace_var(_e, 1, sizeof(_d), _d);                 \
>         }                                                       \
>     } while ( 0 )
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
>
>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.