|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2] MCE: Fix race condition in mctelem_reserve
>>> On 14.02.14 at 17:23, Frediano Ziglio <frediano.ziglio@xxxxxxxxxx> wrote:
> Ping
And this is clearly not the first ping. Guys - you had over 3 weeks time
to respond to this. Please!
Jan
> On Fri, 2014-01-31 at 13:37 +0000, Frediano Ziglio wrote:
>> Ping
>>
>> On Wed, 2014-01-22 at 17:17 +0000, Frediano Ziglio wrote:
>> > From 49b37906afef0981f318064f4cb53a3602bca50a Mon Sep 17 00:00:00 2001
>> > From: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx>
>> > Date: Wed, 22 Jan 2014 10:48:50 +0000
>> > Subject: [PATCH] MCE: Fix race condition in mctelem_reserve
>> > MIME-Version: 1.0
>> > Content-Type: text/plain; charset=UTF-8
>> > Content-Transfer-Encoding: 8bit
>> >
>> > These lines (in mctelem_reserve)
>> >
>> > newhead = oldhead->mcte_next;
>> > if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
>> >
>> > are racy. After you read the newhead pointer it can happen that another
>> > flow (thread or recursive invocation) change all the list but set head
>> > with same value. So oldhead is the same as *freelp but you are setting
>> > a new head that could point to whatever element (even already used).
>> >
>> > This patch use instead a bit array and atomic bit operations.
>> >
>> > Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx>
>> > ---
>> > xen/arch/x86/cpu/mcheck/mctelem.c | 81
> ++++++++++++++-----------------------
>> > 1 file changed, 30 insertions(+), 51 deletions(-)
>> >
>> > Changes from v1:
>> > - Use bitmap to allow any number of items to be used;
>> > - Use a single bitmap to simplify reserve loop;
>> > - Remove HOME flags as was not used anymore.
>> >
>> > diff --git a/xen/arch/x86/cpu/mcheck/mctelem.c
> b/xen/arch/x86/cpu/mcheck/mctelem.c
>> > index 895ce1a..ed8e8d2 100644
>> > --- a/xen/arch/x86/cpu/mcheck/mctelem.c
>> > +++ b/xen/arch/x86/cpu/mcheck/mctelem.c
>> > @@ -37,24 +37,19 @@ struct mctelem_ent {
>> > void *mcte_data; /* corresponding data payload */
>> > };
>> >
>> > -#define MCTE_F_HOME_URGENT 0x0001U /* free to urgent
>> > freelist */
>> > -#define MCTE_F_HOME_NONURGENT 0x0002U /* free to nonurgent
>> > freelist */
>> > -#define MCTE_F_CLASS_URGENT 0x0004U /* in use - urgent
>> > errors */
>> > -#define MCTE_F_CLASS_NONURGENT 0x0008U /* in use - nonurgent
>> > errors */
>> > +#define MCTE_F_CLASS_URGENT 0x0001U /* in use - urgent
>> > errors */
>> > +#define MCTE_F_CLASS_NONURGENT 0x0002U /* in use - nonurgent
>> > errors */
>> > #define MCTE_F_STATE_FREE 0x0010U /* on a freelist */
>> > #define MCTE_F_STATE_UNCOMMITTED 0x0020U /* reserved; on no list
>> > */
>> > #define MCTE_F_STATE_COMMITTED 0x0040U /* on a committed list
>> > */
>> > #define MCTE_F_STATE_PROCESSING 0x0080U /* on a processing list
>> > */
>> >
>> > -#define MCTE_F_MASK_HOME (MCTE_F_HOME_URGENT |
>> > MCTE_F_HOME_NONURGENT)
>> > #define MCTE_F_MASK_CLASS (MCTE_F_CLASS_URGENT |
>> > MCTE_F_CLASS_NONURGENT)
>> > #define MCTE_F_MASK_STATE (MCTE_F_STATE_FREE | \
>> > MCTE_F_STATE_UNCOMMITTED | \
>> > MCTE_F_STATE_COMMITTED | \
>> > MCTE_F_STATE_PROCESSING)
>> >
>> > -#define MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME)
>> > -
>> > #define MCTE_CLASS(tep) ((tep)->mcte_flags & MCTE_F_MASK_CLASS)
>> > #define MCTE_SET_CLASS(tep, new) do { \
>> > (tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \
>> > @@ -69,6 +64,8 @@ struct mctelem_ent {
>> > #define MC_URGENT_NENT 10
>> > #define MC_NONURGENT_NENT 20
>> >
>> > +#define MC_NENT (MC_URGENT_NENT + MC_NONURGENT_NENT)
>> > +
>> > #define MC_NCLASSES (MC_NONURGENT + 1)
>> >
>> > #define COOKIE2MCTE(c) ((struct mctelem_ent *)(c))
>> > @@ -77,11 +74,9 @@ struct mctelem_ent {
>> > static struct mc_telem_ctl {
>> > /* Linked lists that thread the array members together.
>> > *
>> > - * The free lists are singly-linked via mcte_next, and we allocate
>> > - * from them by atomically unlinking an element from the head.
>> > - * Consumed entries are returned to the head of the free list.
>> > - * When an entry is reserved off the free list it is not linked
>> > - * on any list until it is committed or dismissed.
>> > + * The free lists is a bit array where bit 1 means free.
>> > + * This as element number is quite small and is easy to
>> > + * atomically allocate that way.
>> > *
>> > * The committed list grows at the head and we do not maintain a
>> > * tail pointer; insertions are performed atomically. The head
>> > @@ -101,7 +96,7 @@ static struct mc_telem_ctl {
>> > * we can lock it for updates. The head of the processing list
>> > * always has the oldest telemetry, and we append (as above)
>> > * at the tail of the processing list. */
>> > - struct mctelem_ent *mctc_free[MC_NCLASSES];
>> > + DECLARE_BITMAP(mctc_free, MC_NENT);
>> > struct mctelem_ent *mctc_committed[MC_NCLASSES];
>> > struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
>> > struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
>> > @@ -207,14 +202,14 @@ int mctelem_has_deferred(unsigned int cpu)
>> > */
>> > static void mctelem_free(struct mctelem_ent *tep)
>> > {
>> > - mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ?
>> > - MC_URGENT : MC_NONURGENT;
>> > -
>> > BUG_ON(tep->mcte_refcnt != 0);
>> > BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
>> >
>> > tep->mcte_prev = NULL;
>> > - mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep);
>> > + tep->mcte_next = NULL;
>> > +
>> > + /* set free in array */
>> > + set_bit(tep - mctctl.mctc_elems, mctctl.mctc_free);
>> > }
>> >
>> > /* Increment the reference count of an entry that is not linked on to
>> > @@ -274,34 +269,25 @@ void mctelem_init(int reqdatasz)
>> > }
>> >
>> > if ((mctctl.mctc_elems = xmalloc_array(struct mctelem_ent,
>> > - MC_URGENT_NENT + MC_NONURGENT_NENT)) == NULL ||
>> > - (datarr = xmalloc_bytes((MC_URGENT_NENT + MC_NONURGENT_NENT) *
>> > - datasz)) == NULL) {
>> > + MC_NENT)) == NULL ||
>> > + (datarr = xmalloc_bytes(MC_NENT * datasz)) == NULL) {
>> > if (mctctl.mctc_elems)
>> > xfree(mctctl.mctc_elems);
>> > printk("Allocations for MCA telemetry failed\n");
>> > return;
>> > }
>> >
>> > - for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
>> > - struct mctelem_ent *tep, **tepp;
>> > + for (i = 0; i < MC_NENT; i++) {
>> > + struct mctelem_ent *tep;
>> >
>> > tep = mctctl.mctc_elems + i;
>> > tep->mcte_flags = MCTE_F_STATE_FREE;
>> > tep->mcte_refcnt = 0;
>> > tep->mcte_data = datarr + i * datasz;
>> >
>> > - if (i < MC_URGENT_NENT) {
>> > - tepp = &mctctl.mctc_free[MC_URGENT];
>> > - tep->mcte_flags |= MCTE_F_HOME_URGENT;
>> > - } else {
>> > - tepp = &mctctl.mctc_free[MC_NONURGENT];
>> > - tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
>> > - }
>> > -
>> > - tep->mcte_next = *tepp;
>> > + __set_bit(i, mctctl.mctc_free);
>> > + tep->mcte_next = NULL;
>> > tep->mcte_prev = NULL;
>> > - *tepp = tep;
>> > }
>> > }
>> >
>> > @@ -310,32 +296,25 @@ static int mctelem_drop_count;
>> >
>> > /* Reserve a telemetry entry, or return NULL if none available.
>> > * If we return an entry then the caller must subsequently call exactly
> one of
>> > - * mctelem_unreserve or mctelem_commit for that entry.
>> > + * mctelem_dismiss or mctelem_commit for that entry.
>> > */
>> > mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
>> > {
>> > - struct mctelem_ent **freelp;
>> > - struct mctelem_ent *oldhead, *newhead;
>> > - mctelem_class_t target = (which == MC_URGENT) ?
>> > - MC_URGENT : MC_NONURGENT;
>> > + unsigned bit;
>> > + unsigned start_bit = (which == MC_URGENT) ? 0 : MC_URGENT_NENT;
>> >
>> > - freelp = &mctctl.mctc_free[target];
>> > for (;;) {
>> > - if ((oldhead = *freelp) == NULL) {
>> > - if (which == MC_URGENT && target == MC_URGENT) {
>> > - /* raid the non-urgent freelist */
>> > - target = MC_NONURGENT;
>> > - freelp = &mctctl.mctc_free[target];
>> > - continue;
>> > - } else {
>> > - mctelem_drop_count++;
>> > - return (NULL);
>> > - }
>> > + bit = find_next_bit(mctctl.mctc_free, MC_NENT, start_bit);
>> > +
>> > + if (bit >= MC_NENT) {
>> > + mctelem_drop_count++;
>> > + return (NULL);
>> > }
>> >
>> > - newhead = oldhead->mcte_next;
>> > - if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
>> > - struct mctelem_ent *tep = oldhead;
>> > + /* try to allocate, atomically clear free bit */
>> > + if (test_and_clear_bit(bit, mctctl.mctc_free)) {
>> > + /* return element we got */
>> > + struct mctelem_ent *tep = mctctl.mctc_elems + bit;
>> >
>> > mctelem_hold(tep);
>> > MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |