[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2] MCE: Fix race condition in mctelem_reserve
>>> On 30.01.14 at 10:20, Frediano Ziglio <frediano.ziglio@xxxxxxxxxx> wrote: > Ping You're ping should be to the MCE maintainers - I'm merely waiting for their comments/ack. Jan > On Wed, 2014-01-22 at 17:17 +0000, Frediano Ziglio wrote: >> These lines (in mctelem_reserve) >> >> newhead = oldhead->mcte_next; >> if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) { >> >> are racy. After you read the newhead pointer it can happen that another >> flow (thread or recursive invocation) change all the list but set head >> with same value. So oldhead is the same as *freelp but you are setting >> a new head that could point to whatever element (even already used). >> >> This patch use instead a bit array and atomic bit operations. >> >> Signed-off-by: Frediano Ziglio <frediano.ziglio@xxxxxxxxxx> >> --- >> xen/arch/x86/cpu/mcheck/mctelem.c | 81 > ++++++++++++++----------------------- >> 1 file changed, 30 insertions(+), 51 deletions(-) >> >> Changes from v1: >> - Use bitmap to allow any number of items to be used; >> - Use a single bitmap to simplify reserve loop; >> - Remove HOME flags as was not used anymore. >> >> diff --git a/xen/arch/x86/cpu/mcheck/mctelem.c > b/xen/arch/x86/cpu/mcheck/mctelem.c >> index 895ce1a..ed8e8d2 100644 >> --- a/xen/arch/x86/cpu/mcheck/mctelem.c >> +++ b/xen/arch/x86/cpu/mcheck/mctelem.c >> @@ -37,24 +37,19 @@ struct mctelem_ent { >> void *mcte_data; /* corresponding data payload */ >> }; >> >> -#define MCTE_F_HOME_URGENT 0x0001U /* free to urgent >> freelist */ >> -#define MCTE_F_HOME_NONURGENT 0x0002U /* free to nonurgent >> freelist */ >> -#define MCTE_F_CLASS_URGENT 0x0004U /* in use - urgent >> errors */ >> -#define MCTE_F_CLASS_NONURGENT 0x0008U /* in use - nonurgent >> errors */ >> +#define MCTE_F_CLASS_URGENT 0x0001U /* in use - urgent >> errors */ >> +#define MCTE_F_CLASS_NONURGENT 0x0002U /* in use - nonurgent >> errors */ >> #define MCTE_F_STATE_FREE 0x0010U /* on a freelist */ >> #define MCTE_F_STATE_UNCOMMITTED 0x0020U /* reserved; on no list >> */ >> #define MCTE_F_STATE_COMMITTED 0x0040U /* on a committed list >> */ >> #define MCTE_F_STATE_PROCESSING 0x0080U /* on a processing list >> */ >> >> -#define MCTE_F_MASK_HOME (MCTE_F_HOME_URGENT | >> MCTE_F_HOME_NONURGENT) >> #define MCTE_F_MASK_CLASS (MCTE_F_CLASS_URGENT | >> MCTE_F_CLASS_NONURGENT) >> #define MCTE_F_MASK_STATE (MCTE_F_STATE_FREE | \ >> MCTE_F_STATE_UNCOMMITTED | \ >> MCTE_F_STATE_COMMITTED | \ >> MCTE_F_STATE_PROCESSING) >> >> -#define MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME) >> - >> #define MCTE_CLASS(tep) ((tep)->mcte_flags & MCTE_F_MASK_CLASS) >> #define MCTE_SET_CLASS(tep, new) do { \ >> (tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \ >> @@ -69,6 +64,8 @@ struct mctelem_ent { >> #define MC_URGENT_NENT 10 >> #define MC_NONURGENT_NENT 20 >> >> +#define MC_NENT (MC_URGENT_NENT + MC_NONURGENT_NENT) >> + >> #define MC_NCLASSES (MC_NONURGENT + 1) >> >> #define COOKIE2MCTE(c) ((struct mctelem_ent *)(c)) >> @@ -77,11 +74,9 @@ struct mctelem_ent { >> static struct mc_telem_ctl { >> /* Linked lists that thread the array members together. >> * >> - * The free lists are singly-linked via mcte_next, and we allocate >> - * from them by atomically unlinking an element from the head. >> - * Consumed entries are returned to the head of the free list. >> - * When an entry is reserved off the free list it is not linked >> - * on any list until it is committed or dismissed. >> + * The free lists is a bit array where bit 1 means free. >> + * This as element number is quite small and is easy to >> + * atomically allocate that way. >> * >> * The committed list grows at the head and we do not maintain a >> * tail pointer; insertions are performed atomically. The head >> @@ -101,7 +96,7 @@ static struct mc_telem_ctl { >> * we can lock it for updates. The head of the processing list >> * always has the oldest telemetry, and we append (as above) >> * at the tail of the processing list. */ >> - struct mctelem_ent *mctc_free[MC_NCLASSES]; >> + DECLARE_BITMAP(mctc_free, MC_NENT); >> struct mctelem_ent *mctc_committed[MC_NCLASSES]; >> struct mctelem_ent *mctc_processing_head[MC_NCLASSES]; >> struct mctelem_ent *mctc_processing_tail[MC_NCLASSES]; >> @@ -207,14 +202,14 @@ int mctelem_has_deferred(unsigned int cpu) >> */ >> static void mctelem_free(struct mctelem_ent *tep) >> { >> - mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ? >> - MC_URGENT : MC_NONURGENT; >> - >> BUG_ON(tep->mcte_refcnt != 0); >> BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE); >> >> tep->mcte_prev = NULL; >> - mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep); >> + tep->mcte_next = NULL; >> + >> + /* set free in array */ >> + set_bit(tep - mctctl.mctc_elems, mctctl.mctc_free); >> } >> >> /* Increment the reference count of an entry that is not linked on to >> @@ -274,34 +269,25 @@ void mctelem_init(int reqdatasz) >> } >> >> if ((mctctl.mctc_elems = xmalloc_array(struct mctelem_ent, >> - MC_URGENT_NENT + MC_NONURGENT_NENT)) == NULL || >> - (datarr = xmalloc_bytes((MC_URGENT_NENT + MC_NONURGENT_NENT) * >> - datasz)) == NULL) { >> + MC_NENT)) == NULL || >> + (datarr = xmalloc_bytes(MC_NENT * datasz)) == NULL) { >> if (mctctl.mctc_elems) >> xfree(mctctl.mctc_elems); >> printk("Allocations for MCA telemetry failed\n"); >> return; >> } >> >> - for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) { >> - struct mctelem_ent *tep, **tepp; >> + for (i = 0; i < MC_NENT; i++) { >> + struct mctelem_ent *tep; >> >> tep = mctctl.mctc_elems + i; >> tep->mcte_flags = MCTE_F_STATE_FREE; >> tep->mcte_refcnt = 0; >> tep->mcte_data = datarr + i * datasz; >> >> - if (i < MC_URGENT_NENT) { >> - tepp = &mctctl.mctc_free[MC_URGENT]; >> - tep->mcte_flags |= MCTE_F_HOME_URGENT; >> - } else { >> - tepp = &mctctl.mctc_free[MC_NONURGENT]; >> - tep->mcte_flags |= MCTE_F_HOME_NONURGENT; >> - } >> - >> - tep->mcte_next = *tepp; >> + __set_bit(i, mctctl.mctc_free); >> + tep->mcte_next = NULL; >> tep->mcte_prev = NULL; >> - *tepp = tep; >> } >> } >> >> @@ -310,32 +296,25 @@ static int mctelem_drop_count; >> >> /* Reserve a telemetry entry, or return NULL if none available. >> * If we return an entry then the caller must subsequently call exactly one > of >> - * mctelem_unreserve or mctelem_commit for that entry. >> + * mctelem_dismiss or mctelem_commit for that entry. >> */ >> mctelem_cookie_t mctelem_reserve(mctelem_class_t which) >> { >> - struct mctelem_ent **freelp; >> - struct mctelem_ent *oldhead, *newhead; >> - mctelem_class_t target = (which == MC_URGENT) ? >> - MC_URGENT : MC_NONURGENT; >> + unsigned bit; >> + unsigned start_bit = (which == MC_URGENT) ? 0 : MC_URGENT_NENT; >> >> - freelp = &mctctl.mctc_free[target]; >> for (;;) { >> - if ((oldhead = *freelp) == NULL) { >> - if (which == MC_URGENT && target == MC_URGENT) { >> - /* raid the non-urgent freelist */ >> - target = MC_NONURGENT; >> - freelp = &mctctl.mctc_free[target]; >> - continue; >> - } else { >> - mctelem_drop_count++; >> - return (NULL); >> - } >> + bit = find_next_bit(mctctl.mctc_free, MC_NENT, start_bit); >> + >> + if (bit >= MC_NENT) { >> + mctelem_drop_count++; >> + return (NULL); >> } >> >> - newhead = oldhead->mcte_next; >> - if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) { >> - struct mctelem_ent *tep = oldhead; >> + /* try to allocate, atomically clear free bit */ >> + if (test_and_clear_bit(bit, mctctl.mctc_free)) { >> + /* return element we got */ >> + struct mctelem_ent *tep = mctctl.mctc_elems + bit; >> >> mctelem_hold(tep); >> MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |