# HG changeset patch # Parent c45cce86b944403632c81b0f3b98b0db33658e28 diff -r c45cce86b944 xen/arch/x86/cpu/mcheck/mctelem.c --- a/xen/arch/x86/cpu/mcheck/mctelem.c Mon Jan 20 10:27:49 2014 +0000 +++ b/xen/arch/x86/cpu/mcheck/mctelem.c Mon Jan 20 13:55:10 2014 +0000 @@ -29,10 +29,19 @@ #include "mce.h" +typedef union { + struct { + uint16_t state; /* See MCTE_STATE_* below */ + uint16_t next_free; + }; + uint32_t whole; +} mcte_state_t; + struct mctelem_ent { struct mctelem_ent *mcte_next; /* next in chronological order */ struct mctelem_ent *mcte_prev; /* previous in chronological order */ - uint32_t mcte_flags; /* See MCTE_F_* below */ + mcte_state_t mcte_state; + uint16_t mcte_flags; /* See MCTE_F_* below */ uint32_t mcte_refcnt; /* Reference count */ void *mcte_data; /* corresponding data payload */ }; @@ -41,17 +50,13 @@ struct mctelem_ent { #define MCTE_F_HOME_NONURGENT 0x0002U /* free to nonurgent freelist */ #define MCTE_F_CLASS_URGENT 0x0004U /* in use - urgent errors */ #define MCTE_F_CLASS_NONURGENT 0x0008U /* in use - nonurgent errors */ -#define MCTE_F_STATE_FREE 0x0010U /* on a freelist */ -#define MCTE_F_STATE_UNCOMMITTED 0x0020U /* reserved; on no list */ -#define MCTE_F_STATE_COMMITTED 0x0040U /* on a committed list */ -#define MCTE_F_STATE_PROCESSING 0x0080U /* on a processing list */ +#define MCTE_STATE_FREE 0x001U /* on a freelist */ +#define MCTE_STATE_UNCOMMITTED 0x002U /* reserved; on no list */ +#define MCTE_STATE_COMMITTED 0x003U /* on a committed list */ +#define MCTE_STATE_PROCESSING 0x004U /* on a processing list */ #define MCTE_F_MASK_HOME (MCTE_F_HOME_URGENT | MCTE_F_HOME_NONURGENT) #define MCTE_F_MASK_CLASS (MCTE_F_CLASS_URGENT | MCTE_F_CLASS_NONURGENT) -#define MCTE_F_MASK_STATE (MCTE_F_STATE_FREE | \ - MCTE_F_STATE_UNCOMMITTED | \ - MCTE_F_STATE_COMMITTED | \ - MCTE_F_STATE_PROCESSING) #define MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME) @@ -60,15 +65,19 @@ struct mctelem_ent { (tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \ (tep)->mcte_flags |= MCTE_F_CLASS_##new; } while (0) -#define MCTE_STATE(tep) ((tep)->mcte_flags & MCTE_F_MASK_STATE) -#define MCTE_TRANSITION_STATE(tep, old, new) do { \ - BUG_ON(MCTE_STATE(tep) != (MCTE_F_STATE_##old)); \ - (tep)->mcte_flags &= ~MCTE_F_MASK_STATE; \ - (tep)->mcte_flags |= (MCTE_F_STATE_##new); } while (0) +#define MCTE_STATE(tep) ((tep)->mcte_state.state) +#define MCTE_XCHG_TRANSITION_STATE(tep, old, new) \ + (cmpxchg(&(tep)->mcte_state.state, MCTE_STATE_##old, MCTE_STATE_##new) == MCTE_STATE_##old) +#define MCTE_TRANSITION_STATE0(tep, old, new) \ + BUG_ON(cmpxchg(&(tep)->mcte_state.state, old, new) != old) +#define MCTE_TRANSITION_STATE(tep, old, new) \ + MCTE_TRANSITION_STATE0((tep), MCTE_STATE_##old, MCTE_STATE_##new) #define MC_URGENT_NENT 10 #define MC_NONURGENT_NENT 20 +#define MC_FREE_IDX (MC_URGENT_NENT + MC_NONURGENT_NENT) + #define MC_NCLASSES (MC_NONURGENT + 1) #define COOKIE2MCTE(c) ((struct mctelem_ent *)(c)) @@ -77,11 +86,15 @@ struct mctelem_ent { static struct mc_telem_ctl { /* Linked lists that thread the array members together. * - * The free lists are singly-linked via mcte_next, and we allocate - * from them by atomically unlinking an element from the head. + * The free lists are singly-linked via mcte_state.next_free, and + * we allocate from them by atomically unlinking an element from the + * head. * Consumed entries are returned to the head of the free list. * When an entry is reserved off the free list it is not linked * on any list until it is committed or dismissed. + * Instead of storing the pointer we store a counter and an index. + * The counter is used to assure nobody changed head to the + * same value was before. * * The committed list grows at the head and we do not maintain a * tail pointer; insertions are performed atomically. The head @@ -101,7 +114,7 @@ static struct mc_telem_ctl { * we can lock it for updates. The head of the processing list * always has the oldest telemetry, and we append (as above) * at the tail of the processing list. */ - struct mctelem_ent *mctc_free[MC_NCLASSES]; + uint64_t mctc_free[MC_NCLASSES]; struct mctelem_ent *mctc_committed[MC_NCLASSES]; struct mctelem_ent *mctc_processing_head[MC_NCLASSES]; struct mctelem_ent *mctc_processing_tail[MC_NCLASSES]; @@ -205,16 +218,29 @@ int mctelem_has_deferred(unsigned int cp /* Free an entry to its native free list; the entry must not be linked on * any list. */ -static void mctelem_free(struct mctelem_ent *tep) +static void mctelem_free(struct mctelem_ent *tep, uint16_t prev_state) { + uint64_t *headp; mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ? MC_URGENT : MC_NONURGENT; BUG_ON(tep->mcte_refcnt != 0); - BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE); + BUG_ON(MCTE_STATE(tep) != prev_state); tep->mcte_prev = NULL; - mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep); + tep->mcte_next = NULL; + tep->mcte_state.state = MCTE_STATE_FREE; + + /* set the new head and counter */ + headp = &mctctl.mctc_free[target]; + for (;;) { + uint64_t old = *headp; + uint64_t new = ((old + 0x10000ULL) & ~0xffffULL) | (tep - mctctl.mctc_elems); + + tep->mcte_state.next_free = (old & 0xffffU); + if (cmpxchg(headp, old, new) == old) + break; + } } /* Increment the reference count of an entry that is not linked on to @@ -247,9 +273,8 @@ static void mctelem_processing_release(s BUG_ON(tep != mctctl.mctc_processing_head[which]); if (--tep->mcte_refcnt == 0) { - MCTE_TRANSITION_STATE(tep, PROCESSING, FREE); mctctl.mctc_processing_head[which] = tep->mcte_next; - mctelem_free(tep); + mctelem_free(tep, MCTE_STATE_PROCESSING); } } @@ -283,49 +308,68 @@ void mctelem_init(int reqdatasz) return; } + mctctl.mctc_free[MC_URGENT] = mctctl.mctc_free[MC_NONURGENT] = MC_FREE_IDX; for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) { - struct mctelem_ent *tep, **tepp; + uint64_t next_free; + struct mctelem_ent *tep; tep = mctctl.mctc_elems + i; - tep->mcte_flags = MCTE_F_STATE_FREE; + tep->mcte_state.state = MCTE_STATE_FREE; tep->mcte_refcnt = 0; tep->mcte_data = datarr + i * datasz; if (i < MC_URGENT_NENT) { - tepp = &mctctl.mctc_free[MC_URGENT]; - tep->mcte_flags |= MCTE_F_HOME_URGENT; + next_free = mctctl.mctc_free[MC_URGENT]; + mctctl.mctc_free[MC_URGENT] = i; + tep->mcte_flags = MCTE_F_HOME_URGENT; } else { - tepp = &mctctl.mctc_free[MC_NONURGENT]; - tep->mcte_flags |= MCTE_F_HOME_NONURGENT; + next_free = mctctl.mctc_free[MC_NONURGENT]; + mctctl.mctc_free[MC_NONURGENT] = i; + tep->mcte_flags = MCTE_F_HOME_NONURGENT; } - tep->mcte_next = *tepp; + tep->mcte_state.next_free = next_free; + tep->mcte_next = NULL; tep->mcte_prev = NULL; - *tepp = tep; } } /* incremented non-atomically when reserve fails */ static int mctelem_drop_count; +static void mctelem_init_uncommited(struct mctelem_ent *tep, mctelem_class_t which) +{ + mctelem_hold(tep); + tep->mcte_next = NULL; + tep->mcte_prev = NULL; + if (which == MC_URGENT) + MCTE_SET_CLASS(tep, URGENT); + else + MCTE_SET_CLASS(tep, NONURGENT); +} + /* Reserve a telemetry entry, or return NULL if none available. * If we return an entry then the caller must subsequently call exactly one of - * mctelem_unreserve or mctelem_commit for that entry. + * mctelem_dismiss or mctelem_commit for that entry. */ mctelem_cookie_t mctelem_reserve(mctelem_class_t which) { - struct mctelem_ent **freelp; - struct mctelem_ent *oldhead, *newhead; + mcte_state_t te_state; + uint64_t *freep; + uint64_t oldhead, new; + struct mctelem_ent *tep; mctelem_class_t target = (which == MC_URGENT) ? MC_URGENT : MC_NONURGENT; - freelp = &mctctl.mctc_free[target]; + freep = &mctctl.mctc_free[target]; for (;;) { - if ((oldhead = *freelp) == NULL) { + oldhead = *freep; + + if ((oldhead & 0xffffU) == MC_FREE_IDX) { if (which == MC_URGENT && target == MC_URGENT) { /* raid the non-urgent freelist */ target = MC_NONURGENT; - freelp = &mctctl.mctc_free[target]; + freep = &mctctl.mctc_free[target]; continue; } else { mctelem_drop_count++; @@ -333,21 +377,42 @@ mctelem_cookie_t mctelem_reserve(mctelem } } - newhead = oldhead->mcte_next; - if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) { - struct mctelem_ent *tep = oldhead; + /* + * Try to allocate the element + * If we got set we can update the list head + */ + tep = mctctl.mctc_elems + (oldhead & 0xffffU); + if (MCTE_XCHG_TRANSITION_STATE(tep, FREE, UNCOMMITTED)) { + new = ((oldhead + 0x10000ULL) & ~0xffffULL) | tep->mcte_state.next_free; + /* exchange atomically, if we fail this means somebody else have updated it */ + cmpxchg(freep, oldhead, new); - mctelem_hold(tep); - MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED); - tep->mcte_next = NULL; - tep->mcte_prev = NULL; - if (which == MC_URGENT) - MCTE_SET_CLASS(tep, URGENT); - else - MCTE_SET_CLASS(tep, NONURGENT); + /* return element we got */ + mctelem_init_uncommited(tep, which); return MCTE2COOKIE(tep); } + + /* Read atomically state and next free element */ + te_state.whole = tep->mcte_state.whole; + if (te_state.state != MCTE_STATE_UNCOMMITTED) + /* should have been removed now and list updated */ + continue; + + /* Try to update, this means that other thread/function + * did not still update. + * In case of reentrancy we need to update, we cannot wait + * the other thread cause there is no other thread + */ + new = ((oldhead + 0x10000ULL) & ~0xffffULL) | te_state.next_free; + + /* We don't care here about the result, as + * - if succeeded we got a new head + * - if we fail somebody already changed the queue + */ + cmpxchg(freep, oldhead, new); } + mctelem_drop_count++; + return (NULL); } void *mctelem_dataptr(mctelem_cookie_t cookie) @@ -366,8 +431,7 @@ void mctelem_dismiss(mctelem_cookie_t co struct mctelem_ent *tep = COOKIE2MCTE(cookie); tep->mcte_refcnt--; - MCTE_TRANSITION_STATE(tep, UNCOMMITTED, FREE); - mctelem_free(tep); + mctelem_free(tep, MCTE_STATE_UNCOMMITTED); } /* Commit an entry with completed telemetry for logging. The caller must