[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [v2 1/3] x86: refactor psr implementation in hypervisor.



Thanks for reviewing the patches! Sorry for late to reply because Oct 1 to 7
is China National Holiday.

On 16-09-30 17:18:33, Konrad Rzeszutek Wilk wrote:
> On Thu, Sep 22, 2016 at 10:15:20AM +0800, Yi Sun wrote:
> > Current psr.c is designed for supporting L3 CAT/CDP. It has many
> > limitations to add new feature. Considering to support more PSR
> > features, we need refactor PSR implementation to make it more
> > flexible and fulfill the principle, open for extension but closed
> > for modification.
> > 
> > The core of the refactoring is to abstract the common actions and
> > encapsulate them into "struct feat_ops". The detailed steps to add
> > a new feature are described at the head of psr.c.
> > 
> > Signed-off-by: Yi Sun <yi.y.sun@xxxxxxxxxxxxxxx>
> > 
> > ---
> > Changed since v1:
> >  * sysctl.c
> >     - Interface change for abstraction requirement.
> >  * psr.c
> >     - Function and variables names are changed to express accurately.
> >     - Fix code style issues.
> >     - Fix imprecise comments.
> >     - Add one callback function, get_cos_num(), to fulfill the
> >       abstraction requirement.
> >     - Divide get_old_set_new() callback functions into two functions:
> >       get_old_val() and set_new_val() make it more primitive.
> >     - Change feat_info type from an array to union.
> >     - Adjust some functions to replace if to switch to make them
> >       clearer.
> >     - Replace custom list management to system.
> >     - Use 'const' to make codes more safe.
> >  * psr.h
> >     - Change 'enum mask_type' to 'enum psr_val_type' to express
> >       more accurate.
> >     - Change parameters of psr_get_info() to fulfill abstraction
> >       requirement.
> > ---
> >  xen/arch/x86/domctl.c     |   36 +-
> >  xen/arch/x86/psr.c        | 1105 
> > +++++++++++++++++++++++++++++++++++----------
> 
> Whoa. 1K changes. This is a dense patch.
> 
Yes, a little big because it refactors psr.c. :-)

> >  xen/arch/x86/sysctl.c     |   14 +-
> >  xen/include/asm-x86/psr.h |   20 +-
> >  4 files changed, 903 insertions(+), 272 deletions(-)
> > 
> > diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
> > index 2a2fe04..c53d819 100644
> > --- a/xen/arch/x86/domctl.c
> > +++ b/xen/arch/x86/domctl.c
> > @@ -1386,41 +1386,41 @@ long arch_do_domctl(
> >          switch ( domctl->u.psr_cat_op.cmd )
> >          {
> >          case XEN_DOMCTL_PSR_CAT_OP_SET_L3_CBM:
> > -            ret = psr_set_l3_cbm(d, domctl->u.psr_cat_op.target,
> > -                                 domctl->u.psr_cat_op.data,
> > -                                 PSR_CBM_TYPE_L3);
> > +            ret = psr_set_val(d, domctl->u.psr_cat_op.target,
> > +                              domctl->u.psr_cat_op.data,
> > +                              PSR_MASK_TYPE_L3_CBM);
> >              break;
> >  
> >          case XEN_DOMCTL_PSR_CAT_OP_SET_L3_CODE:
> > -            ret = psr_set_l3_cbm(d, domctl->u.psr_cat_op.target,
> > -                                 domctl->u.psr_cat_op.data,
> > -                                 PSR_CBM_TYPE_L3_CODE);
> > +            ret = psr_set_val(d, domctl->u.psr_cat_op.target,
> > +                              domctl->u.psr_cat_op.data,
> > +                              PSR_MASK_TYPE_L3_CODE);
> >              break;
> >  
> >          case XEN_DOMCTL_PSR_CAT_OP_SET_L3_DATA:
> > -            ret = psr_set_l3_cbm(d, domctl->u.psr_cat_op.target,
> > -                                 domctl->u.psr_cat_op.data,
> > -                                 PSR_CBM_TYPE_L3_DATA);
> > +            ret = psr_set_val(d, domctl->u.psr_cat_op.target,
> > +                              domctl->u.psr_cat_op.data,
> > +                              PSR_MASK_TYPE_L3_DATA);
> >              break;
> >  
> >          case XEN_DOMCTL_PSR_CAT_OP_GET_L3_CBM:
> > -            ret = psr_get_l3_cbm(d, domctl->u.psr_cat_op.target,
> > -                                 &domctl->u.psr_cat_op.data,
> > -                                 PSR_CBM_TYPE_L3);
> > +            ret = psr_get_val(d, domctl->u.psr_cat_op.target,
> > +                              &domctl->u.psr_cat_op.data,
> > +                              PSR_MASK_TYPE_L3_CBM);
> >              copyback = 1;
> >              break;
> >  
> >          case XEN_DOMCTL_PSR_CAT_OP_GET_L3_CODE:
> > -            ret = psr_get_l3_cbm(d, domctl->u.psr_cat_op.target,
> > -                                 &domctl->u.psr_cat_op.data,
> > -                                 PSR_CBM_TYPE_L3_CODE);
> > +            ret = psr_get_val(d, domctl->u.psr_cat_op.target,
> > +                              &domctl->u.psr_cat_op.data,
> > +                              PSR_MASK_TYPE_L3_CODE);
> >              copyback = 1;
> >              break;
> >  
> >          case XEN_DOMCTL_PSR_CAT_OP_GET_L3_DATA:
> > -            ret = psr_get_l3_cbm(d, domctl->u.psr_cat_op.target,
> > -                                 &domctl->u.psr_cat_op.data,
> > -                                 PSR_CBM_TYPE_L3_DATA);
> > +            ret = psr_get_val(d, domctl->u.psr_cat_op.target,
> > +                              &domctl->u.psr_cat_op.data,
> > +                              PSR_MASK_TYPE_L3_DATA);
> >              copyback = 1;
> >              break;
> >  
> > diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c
> > index 0b5073c..99e4c78 100644
> > --- a/xen/arch/x86/psr.c
> > +++ b/xen/arch/x86/psr.c
> > @@ -17,28 +17,159 @@
> >  #include <xen/cpu.h>
> >  #include <xen/err.h>
> >  #include <xen/sched.h>
> > +#include <xen/list.h>
> >  #include <asm/psr.h>
> >  
> >  #define PSR_CMT        (1<<0)
> >  #define PSR_CAT        (1<<1)
> >  #define PSR_CDP        (1<<2)
> >  
> > -struct psr_cat_cbm {
> > +/*
> > + * To add a new PSR feature, please follow below steps:
> > + * 1. Implement feature specific callback functions listed in
> > + *    "struct feat_ops";
> > + * 2. Implement a feature specific "struct feat_ops" object and register
> > + *    feature specific callback functions into it;
> > + * 3. Delcare feat_list object for the feature and malloc memory for it
> > + *    in cpu_prepare_work(). Correspondingly, free them in
> > + *    free_feature();
> > + * 4. Add feature initialization codes in cpu_init_work().
> > + */
> > +
> > +struct feat_list;
> > +struct psr_socket_info;
> > +
> > +/* Every feature enabled should implement such ops and callback functions. 
> > */
> 
> should means it can choose to implement 0 of them. Perhaps MUST ? Or
> at least mention which ones MUST be implemented?
> 
Thanks! I will change "should" to "MUST".

> > +struct feat_ops {
> > +    /*
> > +     * init_feature is used in cpu initialization process to do feature
> > +     * specific initialization works.
> > +     */
> > +    void (*init_feature)(unsigned int eax, unsigned int ebx,
> > +                         unsigned int ecx, unsigned int edx,
> > +                         struct feat_list *feat,
> > +                         struct psr_socket_info *info);
> > +    /*
> > +     * get_cos_num is used to get the COS registers number occupied by the
> 
> Perhaps instead of 'number' use 'amount'

Ok, thanks!

> > +     * feature for one setting, like CDP occupies 2 COSs but CAT occupies 
> > 1.
> 
> occupies? Perhaps uses?
> 
Ok, thanks!

> 
> > +     */
> > +    unsigned int (*get_cos_num)(const struct feat_list *feat);
> > +    /*
> > +     * get_old_val and set_new_val are a pair of functions called together.
> > +     * The caller will traverse all features in list to and call both 
> > functions
> 
> s/in list/in the list/

Thanks!

> > +     * for every feature do below two things:
> s/do/to do/

Thanks!

> > +     * 1. get old_cos register value of all supported features and
> > +     * 2. set the new value for appointed feature.
> 
> Appointed? I am not sure what you mean by that?
> 
It means the feature that user wants to set value. For example, when user
executes "xl psr-cat-cbm-set -l2 0 0xf", the L2 CAT valid value of Dom0 will
be updated to 0xf. PSR_MASK_TYPE_L2_CBM will be sent into psr interface as
a parameter to indicate the "appointed feature" is L2 CAT.

> And what do you mean by 'set_new_value' As in what the COS 
> value should be?
> 
get_old_val and set_new_val are a pair of functions to be called together.
First, we execute get_old_val to get all features old values and assemble them
into an array. Then, we call set_new_val to set new value of the
"appointed feature" into the array. After that, we call find_cos() to find if
the value array can be found in existed COS values. If not, we call
alloc_new_cos to allocate a new COS ID to save this value array.

You can refer below example.

> > +     *
> > +     * All the values are set into value array according the traversal 
> > order,
> 
> What value array? The COS array?
> 
The array is created in psr_set_val(). It maintains all features' valid values
on current domain. We use it to find if there is a match one.

For example, old_cos of the domain is 1. Then, User wants to set L3 CAT CBM to
0x1ff and L2 CAT 0x3f. The original COS registers like below.

        -------------------------------
        | COS 0 | COS 1 | COS 2 | ... |
        -------------------------------
L3 CAT  | 0x7ff | 0x3ff | 0x1ff | ... |
        -------------------------------
L2 CAT  | 0xff  | 0x3f  | 0x3f  | ... |
        -------------------------------

Then, the array should be assembled in get_old_set_new() like below:
val_array[0]: 0x1ff
val_array[1]: 0x3f

Then, we can use this value array to find if there is matching COS through
compare_val(). We can find COS 2 is the matching one. 

If there is already a COS registers combination (e.g. L3 COS 2 and L2 COS 2)
same as the value array, we can reuse this combination directly but not to
allocate a new COS ID. By this way, we can save the COS registers. You know,
there is limitation on COS registers number.

Of course, if we cannot find it in existing combinations, we will call
alloc_new_cos() to allocate a new COS to use.

> > +     * meaning the same order of feature list members.
> > +     *
> > +     * The return value of the function means the number of the value array
> 
> s/number/amount/ perhaps?
> 
> > +     * member to skip or error.
> 
> Perhaps:
> 
> The return value is the amount of entries to skip in the value array?
> 
Ok, thanks!

> > +     * 1 - one item in value array.
> > +     * 2 - two items in value array, like CDP occupies two items.
> 
> s/like/e.g./

Thanks!

> > +     * negative - error.
> > +     */
> > +    int (*get_old_val)(uint64_t val[],
> > +                       const struct feat_list *feat,
> > +                       unsigned int old_cos,
> > +                       enum psr_val_type type);
> > +    int (*set_new_val)(uint64_t val[],
> > +                       const struct feat_list *feat,
> > +                       unsigned int old_cos,
> > +                       enum psr_val_type type,
> > +                       uint64_t m);
> > +    /*
> > +     * compare_val is used to in set value process to compare if the
> 
> 'to in set value process to' ? I am not sure what you mean?
> 
Sorry, should be 'compare_val is used in value setting process'.

> > +     * input value array can match all the features' COS registers values
> > +     * according to input cos id.
> 
> > +     * The return value of the function means the number of the value array
> > +     * member to skip or error.
> 
> Perhaps:
> 
> The return value is the amount of entries to skip in the CoS array?
> 
Ok, thanks!

> > +     * 1 - one item in value array.
> > +     * 2 - two items in value array, like CDP occupies two items.
> 
> Ditto

Thanks!

> > +     * negative - error.
> > +     */
> > +    int (*compare_val)(const uint64_t val[], const struct feat_list *feat,
> > +                        unsigned int cos, bool *found);
> > +    /*
> > +     * get_cos_max_from_type is used to get the cos_max value of the 
> > feature
> > +     * according to input psr_val_type.
> > +     */
> > +    unsigned int (*get_cos_max_from_type)(const struct feat_list *feat,
> > +                                          enum psr_val_type type);
> > +    /*
> > +     * exceeds_cos_max is used to check if the input cos id exceeds the
> > +     * feature's cos_max and if the input value is not the default one.
> > +     * Even if the associated cos exceeds the cos_max, HW can work as 
> > default
> 
> s/as default value/with default values/

Thanks!

> > +     * value. That is the reason we need check if input value is default 
> > one.
> 
> And what is the reason? Based on the design document I would assume that the
> default value is to allow full usage. Please mention that.
> 
Like above example I provided, assume the max cos number of L3 is 8 but L2's is
4. Both L3 and L2 have used 4 COS registers(COS[0:3]). That means there is no
additional COS register of L2 to use.

At that time, user tries to set a new value for L3 on Dom0 but keep the old L2
value. That means we cannot find a matching one through find_cos(). So, we need
allocate a new COS through alloc_new_cos(). There are two scenarios:
1. The old L2 value of Dom0 is default value.
2. The old L2 value of Dom0 is NOT default value.

For both cases, we can find COS[4] for L3.

For case 1, we can set 4 into association register and make it work even if 4
exceeds the cos_max of L2. Because HW can handle this. If the COS ID set into
association register exceeds the max value, HW treats it as default value.

But for case 2, L2 value that user wants is NOT default value which HW cannot
handle it. So we have to return error which means this value pair(L3 and L2)
cannot work in current environment.

> > +     * If both criteria are fulfilled, that means the input exceeds the
> > +     * range.
> > +     *
> > +     * The return value of the function means the number of the value array
> > +     * member to skip or error.
> > +     * 1 - one item in value array.
> > +     * 2 - two items in value array, like CDP occupies two items.
> > +     * 0 - error, exceed cos_max and value to set is not default.
> 
> 'to set is not default' I think you mean that "the value is not set, and 
> the default one is used. ' ?
> 
Nope, 0 means error that we fail to allocate a new COS ID.Here,
I want to express that at least one feature's cos_max has been exceeded and
this feature's value is not default one. So, we cannot handle this case and
return error back.

> > +     */
> > +    unsigned int (*exceeds_cos_max)(const uint64_t val[],
> > +                                    const struct feat_list *feat,
> > +                                    unsigned int cos);
> > +    /* write_msr is used to write feature specific MSR registers. */
> 
> s/write/write out/
> 
Thanks!

> > +    int (*write_msr)(unsigned int cos, const uint64_t val[],
> > +                     struct feat_list *feat);
> > +    /* get_val is used to get feature specific COS register value. */
> 
> You mentioned examples previously. Would it make sense to enumerate
> it here too.
> 
Sorry, I do not get your meaning. Could you please explain it? Thanks!

> > +    int (*get_val)(const struct feat_list *feat, unsigned int cos,
> > +                   enum psr_val_type type, uint64_t *val);
> > +    /* get_feat_info is used to get feature specific HW info. */
> 
> That sounds mysterious.
> 
> I presume that 'dat' and 'array_len' are where the specific HW info
> is put.
>
Yes, you are right. dat[] is used to put HW info. The array_len means the
length of dat[] array.
 
> But nothing about the return code?
> 
0 is error and 1 is success. If you think it is necessary, I will explain it
in comment. Thanks!

> > +    int (*get_feat_info)(const struct feat_list *feat, enum psr_val_type 
> > type,
> > +                         uint32_t dat[], uint32_t array_len);
> > +    /* get_max_cos_max is used to get feature's cos_max. */
> > +    unsigned int (*get_max_cos_max)(const struct feat_list *feat);
> > +};
> > +
> > +/* CAT/CDP info data structure. */
> > +struct psr_cat_lvl_info {
> > +    unsigned int cbm_len;
> > +    unsigned int cos_max;
> > +};
> > +
> > +struct feat_info {
> >      union {
> > -        uint64_t cbm;
> > -        struct {
> > -            uint64_t code;
> > -            uint64_t data;
> > -        };
> > +        struct psr_cat_lvl_info l3_info;
> >      };
> > +};
> > +
> > +/*
> > + * Per spec, the maximum COS register number is 128 for all current PSR
> > + * features.
> > + */
> > +#define MAX_COS_REG_NUM  128
> > +
> > +struct feat_list {
> > +    unsigned int feature;
> > +    struct feat_ops ops;
> > +    struct feat_info info;
> > +    uint64_t cos_reg_val[MAX_COS_REG_NUM];
> > +    struct list_head list;
> > +};
> > +
> > +struct psr_ref {
> >      unsigned int ref;
> >  };
> >  
> > -struct psr_cat_socket_info {
> > -    unsigned int cbm_len;
> > -    unsigned int cos_max;
> > -    struct psr_cat_cbm *cos_to_cbm;
> > -    spinlock_t cbm_lock;
> > +#define PSR_SOCKET_L3_CAT 0
> > +#define PSR_SOCKET_L3_CDP 1
> > +
> > +struct psr_socket_info {
> > +    /*
> > +     * bit 1~0: [01]->L3 CAT-only, [10]->L3 CDP
> > +     */
> > +    unsigned int features;
> 
> You can also use:
>       unsigned int l3_cat:1
>                    l3_cdp:1
> 
> and then you don't have to use a #define. But that is up to you.
> 
Thank you! I prefer to keep current codes then I can use bit related functions,
like test_bit()/__set_bit()/etc.

> > +    unsigned int nr_feat;
> > +    struct list_head feat_list;
> > +    struct psr_ref *cos_ref;
> > +    spinlock_t mask_lock;
> 
> Can you give a comment above this spinlock and say what it protects against?
> 
Sure, will add it. Thanks!

> >  };
> >  
> >  struct psr_assoc {
> > @@ -48,9 +179,7 @@ struct psr_assoc {
> >  
> >  struct psr_cmt *__read_mostly psr_cmt;
> >  
> > -static unsigned long *__read_mostly cat_socket_enable;
> > -static struct psr_cat_socket_info *__read_mostly cat_socket_info;
> > -static unsigned long *__read_mostly cdp_socket_enable;
> > +static struct psr_socket_info *__read_mostly socket_info;
> >  
> >  static unsigned int opt_psr;
> >  static unsigned int __initdata opt_rmid_max = 255;
> > @@ -58,8 +187,410 @@ static unsigned int __read_mostly opt_cos_max = 255;
> >  static uint64_t rmid_mask;
> >  static DEFINE_PER_CPU(struct psr_assoc, psr_assoc);
> >  
> > -static struct psr_cat_cbm *temp_cos_to_cbm;
> > +static struct psr_ref *temp_cos_ref;
> > +/* Every feature has its own object. */
> 
> Not sure I understand that. What object? The feat_l3 ?
> 
Yes, correct.

> > +static struct feat_list *feat_l3;
> > +
> > +/* Common functions for supporting feature callback functions. */
> > +static void free_feature(struct psr_socket_info *info)
> > +{
> > +    struct feat_list *feat_tmp;
> > +
> > +    if ( !info )
> > +        return;
> > +
> > +    list_for_each_entry(feat_tmp, &info->feat_list, list)
> > +    {
> > +        clear_bit(feat_tmp->feature, &info->features);
> > +        list_del(&feat_tmp->list);
> > +        xfree(feat_tmp);
> > +    }
> > +
> > +    /* Free feature which are not added into feat_list. */
> > +    if ( feat_l3 )
> > +    {
> > +        xfree(feat_l3);
> > +        feat_l3 = NULL;
> > +    }
> > +}
> > +
> > +static bool psr_check_cbm(unsigned int cbm_len, uint64_t cbm)
> > +{
> > +    unsigned int first_bit, zero_bit;
> > +
> > +    /* Set bits should only in the range of [0, cbm_len). */
> > +    if ( cbm & (~0ull << cbm_len) )
> > +        return false;
> > +
> > +    /* At least one bit need to be set. */
> > +    if ( cbm == 0 )
> > +        return false;
> > +
> > +    first_bit = find_first_bit(&cbm, cbm_len);
> > +    zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit);
> > +
> > +    /* Set bits should be contiguous. */
> > +    if ( zero_bit < cbm_len &&
> > +         find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len )
> > +        return false;
> > +
> > +    return true;
> > +}
> > +
> > +/*
> > + * Features specific implementations.
> > + */
> > +
> > +/* L3 CAT/CDP callback functions implementation. */
> > +static void l3_cat_init_feature(unsigned int eax, unsigned int ebx,
> > +                                unsigned int ecx, unsigned int edx,
> > +                                struct feat_list *feat,
> > +                                struct psr_socket_info *info)
> > +{
> > +    struct psr_cat_lvl_info l3_cat;
> > +    unsigned int socket;
> > +    uint64_t val;
> > +
> > +    /* No valid value so do not enable feature. */
> > +    if ( 0 == eax || 0 == edx )
> 
> I am curious - how come you put the value before the variable?
> [I have been always putting the value _after_ the variable]
> 
That is a coding habit. To put value before variable can help to find code
error by compilation, like '0 = eax', a '=' is missed.

If this is not the style of Xen, I can change it. Thanks!

> > +        return;
> > +
> > +    l3_cat.cbm_len = (eax & 0x1f) + 1;
> > +    l3_cat.cos_max = min(opt_cos_max, edx & 0xffff);
> > +
> > +    /* cos=0 is reserved as default cbm(all ones). */
> > +    feat->cos_reg_val[0] = (1ull << l3_cat.cbm_len) - 1;
> > +
> > +    feat->feature = PSR_SOCKET_L3_CAT;
> > +
> > +    if ( (ecx & PSR_CAT_CDP_CAPABILITY) && (opt_psr & PSR_CDP) &&
> > +         !test_bit(PSR_SOCKET_L3_CDP, &info->features) )
> > +    {
> > +        /* Data */
> > +        feat->cos_reg_val[0] = (1ull << l3_cat.cbm_len) - 1;
> > +        /* Code */
> > +        feat->cos_reg_val[1] = (1ull << l3_cat.cbm_len) - 1;
> > +
> > +        /* We only write mask1 since mask0 is always all ones by default. 
> > */
> > +        wrmsrl(MSR_IA32_PSR_L3_MASK(1),
> > +               (1ull << l3_cat.cbm_len) - 1);
> > +        rdmsrl(MSR_IA32_PSR_L3_QOS_CFG, val);
> > +        wrmsrl(MSR_IA32_PSR_L3_QOS_CFG, val | (1 << 
> > PSR_L3_QOS_CDP_ENABLE_BIT));
> > +
> > +        /* Cut half of cos_max when CDP is enabled. */
> > +        l3_cat.cos_max >>= 1;
> > +
> > +        feat->feature = PSR_SOCKET_L3_CDP;
> > +        __set_bit(PSR_SOCKET_L3_CDP, &info->features);
> > +    }
> > +    else
> > +        __set_bit(PSR_SOCKET_L3_CAT, &info->features);
> > +
> > +    feat->info.l3_info = l3_cat;
> > +
> > +    info->nr_feat++;
> > +
> > +    /* Add this feature into list. */
> > +    list_add_tail(&feat->list, &info->feat_list);
> > +
> > +    socket = cpu_to_socket(smp_processor_id());
> > +    printk(XENLOG_INFO "L3 CAT: enabled on socket %u, cos_max:%u, 
> > cbm_len:%u, CDP:%s\n",
> > +           socket, feat->info.l3_info.cos_max, feat->info.l3_info.cbm_len,
> > +           test_bit(PSR_SOCKET_L3_CDP, &info->features) ? "on" : "off");
> > +}
> > +
> > +static int l3_cat_compare_val(const uint64_t val[],
> > +                              const struct feat_list *feat,
> > +                              unsigned int cos, bool *found)
> > +{
> > +    uint64_t l3_def_cbm;
> > +
> > +    l3_def_cbm = (1ull << feat->info.l3_info.cbm_len) - 1;
> > +
> > +    /* CDP */
> > +    if ( feat->feature == PSR_SOCKET_L3_CDP )
> > +    {
> > +        if ( cos > feat->info.l3_info.cos_max )
> > +        {
> > +            if ( val[0] != l3_def_cbm ||
> > +                 val[1] != l3_def_cbm )
> > +            {
> > +                *found = false;
> > +                return -ENOENT;
> > +            }
> > +            *found = true;
> > +        }
> > +        else
> > +            *found = (val[0] == feat->cos_reg_val[cos * 2] &&
> > +                      val[1] == feat->cos_reg_val[cos * 2 + 1]);
> > +
> > +        /* CDP occupies two COS, one for data, one for code. */
> > +        return 2;
> > +    }
> > +
> > +    /* CAT */
> > +    if ( cos > feat->info.l3_info.cos_max )
> > +    {
> > +        if ( val[0] != l3_def_cbm )
> > +        {
> > +            *found = false;
> > +            return -ENOENT;
> > +        }
> > +        *found = true;
> > +    }
> > +    else
> > +        *found = (val[0] == feat->cos_reg_val[cos]);
> > +
> > +    /* L3 CAT occupies one COS. */
> > +    return 1;
> > +}
> > +
> > +static unsigned int l3_cat_get_cos_max_from_type(const struct feat_list 
> > *feat,
> > +                                                 enum psr_val_type type)
> > +{
> > +    if ( type != PSR_MASK_TYPE_L3_DATA &&
> > +         type != PSR_MASK_TYPE_L3_CODE &&
> > +         type != PSR_MASK_TYPE_L3_CBM )
> > +        return 0;
> > +
> > +    return feat->info.l3_info.cos_max;
> > +}
> > +
> > +static unsigned int l3_cat_exceeds_cos_max(const uint64_t val[],
> > +                                           const struct feat_list *feat,
> > +                                           unsigned int cos)
> > +{
> > +    uint64_t l3_def_cbm;
> > +
> > +    l3_def_cbm = (1ull << feat->info.l3_info.cbm_len) - 1;
> > +
> > +    /* CDP */
> > +    if ( feat->feature == PSR_SOCKET_L3_CDP )
> > +    {
> > +        if ( cos > feat->info.l3_info.cos_max &&
> > +             (val[0] != l3_def_cbm || val[1] != l3_def_cbm) )
> > +                /*
> > +                 * Exceed cos_max and value to set is not default,
> > +                 * return error.
> > +                 */
> > +                return 0;
> > +
> > +        /* CDP occupies two COS, one for data, one for code. */
> > +        return 2;
> > +    }
> > +
> > +    /* CAT */
> > +    if ( cos > feat->info.l3_info.cos_max &&
> > +         val[0] != l3_def_cbm )
> > +            /*
> > +             * Exceed cos_max and value to set is not default,
> > +             * return error.
> > +             */
> > +            return 0;
> > +
> > +    /* L3 CAT occupies one COS. */
> > +    return 1;
> > +}
> > +
> > +static int l3_cat_write_msr(unsigned int cos, const uint64_t val[],
> > +                            struct feat_list *feat)
> > +{
> > +    /* CDP */
> > +    if ( feat->feature == PSR_SOCKET_L3_CDP )
> > +    {
> > +        /*
> > +         * If input cos is more than the cos_max of the feature, we should
> > +         * not set the value.
> > +         */
> > +        if ( cos > feat->info.l3_info.cos_max )
> > +            /* CDP occupies two COS, one for data, one for code. */
> > +            return 2;
> > +
> > +        /* Data */
> > +        feat->cos_reg_val[cos * 2] = val[0];
> > +        /* Code */
> > +        feat->cos_reg_val[cos * 2 + 1] = val[1];
> > +
> > +        wrmsrl(MSR_IA32_PSR_L3_MASK_DATA(cos), val[0]);
> > +        wrmsrl(MSR_IA32_PSR_L3_MASK_CODE(cos), val[1]);
> > +        /* CDP occupies two COS, one for data, one for code. */
> > +        return 2;
> > +    }
> > +
> > +    /* CAT */
> > +    if ( cos > feat->info.l3_info.cos_max )
> > +        /* L3 CAT occupies one COS. */
> > +        return 1;
> > +
> > +    feat->cos_reg_val[cos] = val[0];
> > +    wrmsrl(MSR_IA32_PSR_L3_MASK(cos), val[0]);
> > +    /* L3 CAT occupies one COS. */
> > +    return 1;
> > +}
> > +
> > +static unsigned int l3_cat_get_cos_num(const struct feat_list *feat)
> > +{
> > +    if ( feat->feature == PSR_SOCKET_L3_CDP )
> > +        return 2;
> > +
> > +    return 1;
> > +}
> > +
> > +static int l3_cat_get_old_val(uint64_t val[],
> > +                              const struct feat_list *feat,
> > +                              unsigned int old_cos,
> > +                              enum psr_val_type type)
> > +{
> > +    if ( old_cos > feat->info.l3_info.cos_max )
> > +        /* Use default value. */
> > +        old_cos = 0;
> > +
> > +    /* CDP */
> > +    if ( feat->feature == PSR_SOCKET_L3_CDP )
> > +    {
> > +        /* Data */
> > +        val[0] = feat->cos_reg_val[old_cos * 2];
> > +        /* Code */
> > +        val[1] = feat->cos_reg_val[old_cos * 2 + 1];
> > +
> > +        /* CDP occupies two COS, one for data, one for code. */
> > +        return 2;
> > +    }
> > +
> > +    /* CAT */
> > +    val[0] =  feat->cos_reg_val[old_cos];
> > +
> > +    /* L3 CAT occupies one COS. */
> > +    return 1;
> > +}
> > +
> > +static int l3_cat_set_new_val(uint64_t val[],
> > +                              const struct feat_list *feat,
> > +                              unsigned int old_cos,
> > +                              enum psr_val_type type,
> > +                              uint64_t m)
> > +{
> > +    /* L3 CAT occupies one COS. */
> > +    unsigned int ret = 1;
> > +
> > +    switch (type) {
> > +    /* CDP occupies two COS, one for data, one for code. */
> > +    case PSR_MASK_TYPE_L3_DATA:
> > +    case PSR_MASK_TYPE_L3_CODE:
> > +        if ( feat->feature != PSR_SOCKET_L3_CDP )
> > +            return -ENXIO;
> > +
> > +        if ( !psr_check_cbm(feat->info.l3_info.cbm_len, m) )
> > +            return -EINVAL;
> > +
> > +        if ( type == PSR_MASK_TYPE_L3_DATA )
> > +            val[0] = m;
> > +        else
> > +            val[1] = m;
> > +
> > +        ret= 2;
> > +        break;
> > +
> > +    /* CAT */
> > +    case PSR_MASK_TYPE_L3_CBM:
> > +        if ( !psr_check_cbm(feat->info.l3_info.cbm_len, m) )
> > +            return -EINVAL;
> > +
> > +        val[0] = m;
> > +        break;
> > +
> > +    default:
> > +        if ( feat->feature == PSR_SOCKET_L3_CDP )
> > +            /* CDP occupies two COS, one for data, one for code. */
> > +            ret = 2;
> > +        break;
> > +    }
> > +
> > +    return ret;
> > +}
> > +
> > +static int l3_cat_get_val(const struct feat_list *feat, unsigned int cos,
> > +                          enum psr_val_type type, uint64_t *val)
> > +{
> > +    if ( cos > feat->info.l3_info.cos_max )
> > +        /* Use default value. */
> > +        cos = 0;
> > +
> > +    switch (type) {
> > +    /* CDP */
> > +    case PSR_MASK_TYPE_L3_DATA:
> > +    case PSR_MASK_TYPE_L3_CODE:
> > +        if ( feat->feature != PSR_SOCKET_L3_CDP )
> > +            return -ENXIO;
> > +
> > +        if ( type == PSR_MASK_TYPE_L3_DATA )
> > +            *val = feat->cos_reg_val[cos * 2];
> > +        else
> > +            *val = feat->cos_reg_val[cos * 2 + 1];
> > +        break;
> > +
> > +    /* CAT */
> > +    case PSR_MASK_TYPE_L3_CBM:
> > +        *val =  feat->cos_reg_val[cos];
> > +        break;
> > +
> > +    default:
> > +        return 0;
> > +    }
> > +
> > +    return 1;
> > +}
> > +
> > +static int l3_cat_get_feat_info(const struct feat_list *feat,
> > +                                enum psr_val_type type,
> > +                                uint32_t dat[], uint32_t array_len)
> > +{
> > +    if ( !dat || 3 > array_len )
> > +        return 0;
> 
> Not return -EINVAL?

I use 0 as internal error number. The error code of get_feat_info will not
be returned outside psr.c.

> > +
> > +    switch (type) {
> > +    case PSR_MASK_TYPE_L3_CBM:
> > +    case PSR_MASK_TYPE_L3_DATA:
> > +    case PSR_MASK_TYPE_L3_CODE:
> > +        if ( feat->feature == PSR_SOCKET_L3_CDP )
> > +            dat[2] |= XEN_SYSCTL_PSR_CAT_L3_CDP;
> > +        else
> > +            dat[2] = 0;
> > +        break;
> >  
> > +    default:
> > +        /* This feature is not requested feature. */
> > +        return 0;
> 
> return -EOPNOTSUPPO?
> 
Same as above comment. Thanks!

> > +    }
> > +
> > +    dat[0] = feat->info.l3_info.cbm_len;
> > +    dat[1] = feat->info.l3_info.cos_max;
> > +
> > +    return 1;
> 
> Not return 2 ?
> 
> If this is just to return 0|1 why not use 'bool' ?
> 
Yes, I can change it to 'bool'. Thanks!

> > +}
> > +
> > +static unsigned int l3_cat_get_max_cos_max(const struct feat_list *feat)
> > +{
> > +    return feat->info.l3_info.cos_max;
> > +}
> > +
> > +struct feat_ops l3_cat_ops = {
> > +    .init_feature = l3_cat_init_feature,
> > +    .get_cos_num = l3_cat_get_cos_num,
> > +    .get_old_val = l3_cat_get_old_val,
> > +    .set_new_val = l3_cat_set_new_val,
> > +    .compare_val = l3_cat_compare_val,
> > +    .get_cos_max_from_type = l3_cat_get_cos_max_from_type,
> > +    .exceeds_cos_max = l3_cat_exceeds_cos_max,
> > +    .write_msr = l3_cat_write_msr,
> > +    .get_val = l3_cat_get_val,
> > +    .get_feat_info = l3_cat_get_feat_info,
> > +    .get_max_cos_max = l3_cat_get_max_cos_max,
> > +};
> > +
> > +/*
> > + * Common functions implementation
> > + */
> >  static unsigned int get_socket_cpu(unsigned int socket)
> >  {
> >      if ( likely(socket < nr_sockets) )
> > @@ -209,17 +740,29 @@ void psr_free_rmid(struct domain *d)
> >      d->arch.psr_rmid = 0;
> >  }
> >  
> > +static inline unsigned int get_max_cos_max(const struct psr_socket_info 
> > *info)
> > +{
> > +    const struct feat_list *feat_tmp;
> > +    unsigned int cos_max = 0;
> > +
> > +    list_for_each_entry(feat_tmp, &info->feat_list, list)
> > +        cos_max = max(feat_tmp->ops.get_max_cos_max(feat_tmp), cos_max);
> > +
> > +    return cos_max;
> > +}
> > +
> >  static inline void psr_assoc_init(void)
> >  {
> >      struct psr_assoc *psra = &this_cpu(psr_assoc);
> >  
> > -    if ( cat_socket_info )
> > +    if ( socket_info )
> >      {
> >          unsigned int socket = cpu_to_socket(smp_processor_id());
> > +        const struct psr_socket_info *info = socket_info + socket;
> > +        unsigned int cos_max = get_max_cos_max(info);
> >  
> > -        if ( test_bit(socket, cat_socket_enable) )
> > -            psra->cos_mask = ((1ull << get_count_order(
> > -                             cat_socket_info[socket].cos_max)) - 1) << 32;
> > +        if ( info->features )
> > +            psra->cos_mask = ((1ull << get_count_order(cos_max)) - 1) << 
> > 32;
> >      }
> >  
> >      if ( psr_cmt_enabled() || psra->cos_mask )
> > @@ -256,270 +799,362 @@ void psr_ctxt_switch_to(struct domain *d)
> >          psra->val = reg;
> >      }
> >  }
> > -static struct psr_cat_socket_info *get_cat_socket_info(unsigned int socket)
> > +
> > +static struct psr_socket_info *get_socket_info(unsigned int socket)
> >  {
> > -    if ( !cat_socket_info )
> > +    struct psr_socket_info *info;
> > +
> > +    if ( !socket_info )
> >          return ERR_PTR(-ENODEV);
> >  
> >      if ( socket >= nr_sockets )
> >          return ERR_PTR(-ENOTSOCK);
> >  
> > -    if ( !test_bit(socket, cat_socket_enable) )
> > -        return ERR_PTR(-ENOENT);
> > +    info = socket_info + socket;
> >  
> > -    return cat_socket_info + socket;
> > -}
> > +    if ( !info->features )
> > +        return ERR_PTR(-ENOENT);
> >  
> > -static inline bool_t cdp_is_enabled(unsigned int socket)
> > -{
> > -    return cdp_socket_enable && test_bit(socket, cdp_socket_enable);
> > +    return info;
> >  }
> >  
> > -int psr_get_cat_l3_info(unsigned int socket, uint32_t *cbm_len,
> > -                        uint32_t *cos_max, uint32_t *flags)
> > +int psr_get_info(unsigned int socket, enum psr_val_type type,
> > +                 uint32_t dat[], uint32_t array_len)
> >  {
> > -    struct psr_cat_socket_info *info = get_cat_socket_info(socket);
> > +    struct psr_socket_info *info = get_socket_info(socket);
> > +    const struct feat_list *feat_tmp;
> >  
> >      if ( IS_ERR(info) )
> >          return PTR_ERR(info);
> >  
> > -    *cbm_len = info->cbm_len;
> > -    *cos_max = info->cos_max;
> > +    list_for_each_entry(feat_tmp, &info->feat_list, list)
> > +        if ( feat_tmp->ops.get_feat_info(feat_tmp, type, dat, array_len) )
> > +            return 0;
> 
> How about just using the regulard return values: 0 for succcess and
> other numbers are failures?
> 
Do you mean for all functions? Thanks!

> >  
> > -    *flags = 0;
> > -    if ( cdp_is_enabled(socket) )
> > -        *flags |= XEN_SYSCTL_PSR_CAT_L3_CDP;
> > +    return -ENODEV;
> > +}
> >  
> > -    return 0;
> > +static unsigned int get_cos_num(const struct psr_socket_info *info)
> > +{
> > +    const struct feat_list *feat_tmp;
> > +    unsigned int num = 0;
> > +
> > +    list_for_each_entry(feat_tmp, &info->feat_list, list)
> > +        num += feat_tmp->ops.get_cos_num(feat_tmp);
> > +
> > +    return num;
> >  }
> >  
> > -int psr_get_l3_cbm(struct domain *d, unsigned int socket,
> > -                   uint64_t *cbm, enum cbm_type type)
> > +static int get_old_set_new(uint64_t *val,
> > +                           uint32_t array_len,
> > +                           const struct psr_socket_info *info,
> > +                           unsigned int old_cos,
> > +                           enum psr_val_type type,
> > +                           uint64_t m)
> >  {
> > -    struct psr_cat_socket_info *info = get_cat_socket_info(socket);
> > -    bool_t cdp_enabled = cdp_is_enabled(socket);
> > +    const struct feat_list *feat_tmp;
> > +    int ret;
> > +    uint64_t *val_tmp = val;
> >  
> > -    if ( IS_ERR(info) )
> > -        return PTR_ERR(info);
> > +    if ( !val )
> > +        return -EINVAL;
> >  
> > -    switch ( type )
> > +    list_for_each_entry(feat_tmp, &info->feat_list, list)
> >      {
> > -    case PSR_CBM_TYPE_L3:
> > -        if ( cdp_enabled )
> > -            return -EXDEV;
> > -        *cbm = info->cos_to_cbm[d->arch.psr_cos_ids[socket]].cbm;
> > -        break;
> > +        /* value getting order is same as feature list */
> > +        ret = feat_tmp->ops.get_old_val(val_tmp, feat_tmp, old_cos, type);
> >  
> > -    case PSR_CBM_TYPE_L3_CODE:
> > -        if ( !cdp_enabled )
> > -            *cbm = info->cos_to_cbm[d->arch.psr_cos_ids[socket]].cbm;
> > -        else
> > -            *cbm = info->cos_to_cbm[d->arch.psr_cos_ids[socket]].code;
> > -        break;
> > -
> > -    case PSR_CBM_TYPE_L3_DATA:
> > -        if ( !cdp_enabled )
> > -            *cbm = info->cos_to_cbm[d->arch.psr_cos_ids[socket]].cbm;
> > -        else
> > -            *cbm = info->cos_to_cbm[d->arch.psr_cos_ids[socket]].data;
> > -        break;
> > +        val_tmp += ret;
> > +        if ( val_tmp - val > array_len)
> > +            return -EINVAL;
> > +    }
> >  
> > -    default:
> > -        ASSERT_UNREACHABLE();
> > +    val_tmp = val;
> > +    list_for_each_entry(feat_tmp, &info->feat_list, list)
> > +    {
> > +        /* value setting order is same as feature list */
> > +        ret = feat_tmp->ops.set_new_val(val_tmp, feat_tmp, old_cos, type, 
> > m);
> > +        if ( ret < 0 )
> > +            return ret;
> > +
> > +        val_tmp += ret;
> > +        if ( val_tmp - val > array_len)
> > +            return -EINVAL;
> >      }
> >  
> >      return 0;
> >  }
> >  
> > -static bool_t psr_check_cbm(unsigned int cbm_len, uint64_t cbm)
> > +int psr_get_val(struct domain *d, unsigned int socket,
> > +                uint64_t *val, enum psr_val_type type)
> >  {
> > -    unsigned int first_bit, zero_bit;
> > +    const struct psr_socket_info *info = get_socket_info(socket);
> > +    unsigned int cos = d->arch.psr_cos_ids[socket];
> > +    const struct feat_list *feat_tmp;
> > +    int ret;
> >  
> > -    /* Set bits should only in the range of [0, cbm_len). */
> > -    if ( cbm & (~0ull << cbm_len) )
> > -        return 0;
> > -
> > -    /* At least one bit need to be set. */
> > -    if ( cbm == 0 )
> > -        return 0;
> > +    if ( IS_ERR(info) )
> > +        return PTR_ERR(info);
> >  
> > -    first_bit = find_first_bit(&cbm, cbm_len);
> > -    zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit);
> > +    list_for_each_entry(feat_tmp, &info->feat_list, list)
> > +    {
> > +        ret = feat_tmp->ops.get_val(feat_tmp, cos, type, val);
> > +        if ( ret < 0 )
> > +            return ret;
> >  
> > -    /* Set bits should be contiguous. */
> > -    if ( zero_bit < cbm_len &&
> > -         find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len )
> > -        return 0;
> > +        if ( ret > 0 )
> > +            /* Found */
> > +            break;
> > +    }
> >  
> > -    return 1;
> > +    return 0;
> >  }
> >  
> > -struct cos_cbm_info
> > +struct cos_mask_info
> >  {
> >      unsigned int cos;
> > -    bool_t cdp;
> > -    uint64_t cbm_code;
> > -    uint64_t cbm_data;
> > +    struct list_head *feat_list;
> > +    const uint64_t *val;
> >  };
> >  
> > -static void do_write_l3_cbm(void *data)
> > +static void do_write_psr_msr(void *data)
> >  {
> > -    struct cos_cbm_info *info = data;
> > +    struct cos_mask_info *info = (struct cos_mask_info *)data;
> > +    unsigned int cos           = info->cos;
> > +    struct list_head *feat_list= info->feat_list;
> > +    const uint64_t *val        = info->val;
> > +    struct feat_list *feat_tmp;
> > +    int ret;
> > +
> > +    if ( !feat_list )
> > +        return;
> >  
> > -    if ( info->cdp )
> > +    list_for_each_entry(feat_tmp, feat_list, list)
> >      {
> > -        wrmsrl(MSR_IA32_PSR_L3_MASK_CODE(info->cos), info->cbm_code);
> > -        wrmsrl(MSR_IA32_PSR_L3_MASK_DATA(info->cos), info->cbm_data);
> > +        ret = feat_tmp->ops.write_msr(cos, val, feat_tmp);
> > +        if ( ret <= 0)
> > +            return;
> > +
> > +        val += ret;
> >      }
> > -    else
> > -        wrmsrl(MSR_IA32_PSR_L3_MASK(info->cos), info->cbm_code);
> >  }
> >  
> > -static int write_l3_cbm(unsigned int socket, unsigned int cos,
> > -                        uint64_t cbm_code, uint64_t cbm_data, bool_t cdp)
> > +static int write_psr_msr(unsigned int socket, unsigned int cos,
> > +                         const uint64_t *val)
> >  {
> > -    struct cos_cbm_info info =
> > +    struct psr_socket_info *info = get_socket_info(socket);
> > +
> > +    struct cos_mask_info data =
> >      {
> >          .cos = cos,
> > -        .cbm_code = cbm_code,
> > -        .cbm_data = cbm_data,
> > -        .cdp = cdp,
> > +        .feat_list = &info->feat_list,
> > +        .val = val,
> >      };
> >  
> >      if ( socket == cpu_to_socket(smp_processor_id()) )
> > -        do_write_l3_cbm(&info);
> > +        do_write_psr_msr(&data);
> >      else
> >      {
> >          unsigned int cpu = get_socket_cpu(socket);
> >  
> >          if ( cpu >= nr_cpu_ids )
> >              return -ENOTSOCK;
> > -        on_selected_cpus(cpumask_of(cpu), do_write_l3_cbm, &info, 1);
> > +        on_selected_cpus(cpumask_of(cpu), do_write_psr_msr, &data, 1);
> >      }
> >  
> >      return 0;
> >  }
> >  
> > -static int find_cos(struct psr_cat_cbm *map, unsigned int cos_max,
> > -                    uint64_t cbm_code, uint64_t cbm_data, bool_t 
> > cdp_enabled)
> > +static int find_cos(const uint64_t *val, uint32_t array_len,
> > +                    enum psr_val_type type,
> > +                    const struct psr_socket_info *info)
> >  {
> >      unsigned int cos;
> > +    const struct psr_ref *map = info->cos_ref;
> > +    const struct feat_list *feat_tmp;
> > +    const uint64_t *val_tmp = val;
> > +    int ret;
> > +    bool found = false;
> > +    unsigned int cos_max = 0;
> > +
> > +    /* cos_max is the max COS of the feature to be set. */
> > +    list_for_each_entry(feat_tmp, &info->feat_list, list)
> > +    {
> > +        cos_max = feat_tmp->ops.get_cos_max_from_type(feat_tmp, type);
> > +        if ( cos_max > 0 )
> > +            break;
> > +    }
> >  
> >      for ( cos = 0; cos <= cos_max; cos++ )
> >      {
> > -        if ( (map[cos].ref || cos == 0) &&
> > -             ((!cdp_enabled && map[cos].cbm == cbm_code) ||
> > -              (cdp_enabled && map[cos].code == cbm_code &&
> > -                              map[cos].data == cbm_data)) )
> > +        if ( 0 != cos && 0 == map[cos].ref )
> 
> how about just:
> 
> if ( cos && !map[cos].ref ) ?
> 
Yes, thanks!

> > +            continue;
> > +
> > +        /* Not found, need find again from beginning. */
> > +        val_tmp = val;
> > +        list_for_each_entry(feat_tmp, &info->feat_list, list)
> > +        {
> > +            /*
> > +             * Compare value according to feature list order.
> > +             * We must follow this order because value array is assembled
> > +             * as this order in get_old_set_new().
> > +             */
> > +            ret = feat_tmp->ops.compare_val(val_tmp, feat_tmp, cos, 
> > &found);
> > +            if ( ret < 0 )
> > +                return ret;
> > +
> > +            /* If fail to match, go to next cos to compare. */
> > +            if ( !found )
> > +                break;
> > +
> > +            val_tmp += ret;
> > +            if ( val_tmp - val > array_len )
> > +                return -EINVAL;
> > +        }
> > +
> > +        /* Every feature can match, this cos is what we find. */
> 
> I am not sure I understand the comment.
> 
That means every feature's value in 'val_tmp' can be found. Perhaps, 'All
features can match' is better?

> > +        if ( found )
> >              return cos;
> >      }
> >  
> >      return -ENOENT;
> >  }
> >  
> > -static int pick_avail_cos(struct psr_cat_cbm *map, unsigned int cos_max,
> > -                          unsigned int old_cos)
> > +static bool exceeds_cos_max(const uint64_t *val,
> > +                            uint32_t array_len,
> > +                            const struct psr_socket_info *info,
> > +                            unsigned int cos)
> > +{
> > +    unsigned int ret;
> > +    const uint64_t *val_tmp = val;
> > +    const struct feat_list *feat_tmp;
> > +
> > +    list_for_each_entry(feat_tmp, &info->feat_list, list)
> > +    {
> > +        ret = feat_tmp->ops.exceeds_cos_max(val_tmp, feat_tmp, cos);
> > +        if ( 0 == ret )
> 
> How about
> 
>  if ( !ret )
>       return false;
> 
Sure, thanks!

> > +            return false;
> > +
> > +        val_tmp += ret;
> > +        if ( val_tmp - val > array_len )
> > +            return false;
> > +    }
> > +
> > +    return true;
> > +}
> > +
> > +static int alloc_new_cos(const struct psr_socket_info *info,
> > +                         const uint64_t *val, uint32_t array_len,
> > +                         unsigned int old_cos,
> > +                         enum psr_val_type type)
> >  {
> >      unsigned int cos;
> > +    unsigned int cos_max = 0;
> > +    const struct feat_list *feat_tmp;
> > +    const struct psr_ref *map = info->cos_ref;
> > +
> > +    /*
> > +     * cos_max is the max COS of the feature to be set.
> > +     */
> > +    list_for_each_entry(feat_tmp, &info->feat_list, list)
> > +    {
> > +        cos_max = feat_tmp->ops.get_cos_max_from_type(feat_tmp, type);
> > +        if ( cos_max > 0 )
> > +            break;
> > +    }
> > +
> > +    if ( !cos_max )
> > +        return -ENOENT;
> >  
> >      /* If old cos is referred only by the domain, then use it. */
> >      if ( map[old_cos].ref == 1 && old_cos != 0 )
> > -        return old_cos;
> > +        if ( exceeds_cos_max(val, array_len, info, old_cos) )
> > +            return old_cos;
> >  
> >      /* Find an unused one other than cos0. */
> >      for ( cos = 1; cos <= cos_max; cos++ )
> > +        /*
> > +         * ref is 0 means this COS is not occupied by other domain and
> > +         * can be used for current setting.
> > +         */
> >          if ( map[cos].ref == 0 )
> > +        {
> > +            if ( !exceeds_cos_max(val, array_len, info, cos) )
> > +                return -ENOENT;
> > +
> >              return cos;
> > +        }
> >  
> >      return -ENOENT;
> >  }
> >  
> > -int psr_set_l3_cbm(struct domain *d, unsigned int socket,
> > -                   uint64_t cbm, enum cbm_type type)
> > +int psr_set_val(struct domain *d, unsigned int socket,
> > +                uint64_t val, enum psr_val_type type)
> >  {
> > -    unsigned int old_cos, cos_max;
> > +    unsigned int old_cos;
> >      int cos, ret;
> > -    uint64_t cbm_data, cbm_code;
> > -    bool_t cdp_enabled = cdp_is_enabled(socket);
> > -    struct psr_cat_cbm *map;
> > -    struct psr_cat_socket_info *info = get_cat_socket_info(socket);
> > +    struct psr_ref *map;
> > +    uint64_t *val_array;
> > +    struct psr_socket_info *info = get_socket_info(socket);
> > +    uint32_t array_len;
> >  
> >      if ( IS_ERR(info) )
> >          return PTR_ERR(info);
> >  
> > -    if ( !psr_check_cbm(info->cbm_len, cbm) )
> > -        return -EINVAL;
> > -
> > -    if ( !cdp_enabled && (type == PSR_CBM_TYPE_L3_CODE ||
> > -                          type == PSR_CBM_TYPE_L3_DATA) )
> > -        return -ENXIO;
> > -
> > -    cos_max = info->cos_max;
> > +    map = info->cos_ref;
> >      old_cos = d->arch.psr_cos_ids[socket];
> > -    map = info->cos_to_cbm;
> >  
> > -    switch ( type )
> > -    {
> > -    case PSR_CBM_TYPE_L3:
> > -        cbm_code = cbm;
> > -        cbm_data = cbm;
> > -        break;
> > -
> > -    case PSR_CBM_TYPE_L3_CODE:
> > -        cbm_code = cbm;
> > -        cbm_data = map[old_cos].data;
> > -        break;
> > -
> > -    case PSR_CBM_TYPE_L3_DATA:
> > -        cbm_code = map[old_cos].code;
> > -        cbm_data = cbm;
> > -        break;
> > +    array_len = get_cos_num((const struct psr_socket_info *)info);
> > +    val_array = xzalloc_array(uint64_t, array_len);
> > +    if ( !val_array )
> > +        return -ENOMEM;
> >  
> > -    default:
> > -        ASSERT_UNREACHABLE();
> > -        return -EINVAL;
> > -    }
> > +    if ( (ret = get_old_set_new(val_array, array_len,
> > +                                (const struct psr_socket_info *)info,
> > +                                old_cos, type, val)) != 0 )
> > +        return ret;
> 
> You are leaking 'val_array'? That can't be good.
> 
Oh, sorry, I missed it. Thanks!

> >  
> > -    spin_lock(&info->cbm_lock);
> > -    cos = find_cos(map, cos_max, cbm_code, cbm_data, cdp_enabled);
> > +    spin_lock(&info->mask_lock);
> > +    cos = find_cos((const uint64_t *)val_array, array_len, type,
> > +                   (const struct psr_socket_info *)info);
> >      if ( cos >= 0 )
> >      {
> >          if ( cos == old_cos )
> >          {
> > -            spin_unlock(&info->cbm_lock);
> > +            spin_unlock(&info->mask_lock);
> > +            xfree(val_array);
> >              return 0;
> >          }
> >      }
> >      else
> >      {
> > -        cos = pick_avail_cos(map, cos_max, old_cos);
> > +        cos = alloc_new_cos((const struct psr_socket_info *)info,
> > +                            (const uint64_t *)val_array, array_len,
> > +                            old_cos, type);
> >          if ( cos < 0 )
> >          {
> > -            spin_unlock(&info->cbm_lock);
> > +            spin_unlock(&info->mask_lock);
> > +            xfree(val_array);
> >              return cos;
> >          }
> >  
> > -        /* We try to avoid writing MSR. */
> > -        if ( (cdp_enabled &&
> > -             (map[cos].code != cbm_code || map[cos].data != cbm_data)) ||
> > -             (!cdp_enabled && map[cos].cbm != cbm_code) )
> > +        /* Write all features MSRs corresponding to the COS */
> > +        ret = write_psr_msr(socket, cos, (const uint64_t *)val_array);
> > +        if ( ret )
> >          {
> > -            ret = write_l3_cbm(socket, cos, cbm_code, cbm_data, 
> > cdp_enabled);
> > -            if ( ret )
> > -            {
> > -                spin_unlock(&info->cbm_lock);
> > -                return ret;
> > -            }
> > -            map[cos].code = cbm_code;
> > -            map[cos].data = cbm_data;
> > +            spin_unlock(&info->mask_lock);
> > +            xfree(val_array);
> > +            return ret;
> >          }
> >      }
> >  
> >      map[cos].ref++;
> >      map[old_cos].ref--;
> > -    spin_unlock(&info->cbm_lock);
> > +
> > +    spin_unlock(&info->mask_lock);
> >  
> >      d->arch.psr_cos_ids[socket] = cos;
> > +    xfree(val_array);
> >  
> >      return 0;
> >  }
> > @@ -529,20 +1164,23 @@ static void psr_free_cos(struct domain *d)
> >  {
> >      unsigned int socket;
> >      unsigned int cos;
> > -    struct psr_cat_socket_info *info;
> > +    struct psr_socket_info *info;
> >  
> >      if( !d->arch.psr_cos_ids )
> >          return;
> >  
> > -    for_each_set_bit(socket, cat_socket_enable, nr_sockets)
> > +    for( socket = 0; socket < nr_sockets; socket++)
> 
> Wrong Style. You have the ( incorrectly and also the ++ needs a space
> after it.

Sorry, I missed space. Thanks!

> >      {
> >          if ( (cos = d->arch.psr_cos_ids[socket]) == 0 )
> >              continue;
> >  
> > -        info = cat_socket_info + socket;
> > -        spin_lock(&info->cbm_lock);
> > -        info->cos_to_cbm[cos].ref--;
> > -        spin_unlock(&info->cbm_lock);
> > +        info = get_socket_info(socket);
> > +        if ( IS_ERR(info) )
> > +            continue;
> > +
> > +        spin_lock(&info->mask_lock);
> > +        info->cos_ref[cos].ref--;
> > +        spin_unlock(&info->mask_lock);
> >      }
> >  
> >      xfree(d->arch.psr_cos_ids);
> > @@ -551,7 +1189,7 @@ static void psr_free_cos(struct domain *d)
> >  
> >  int psr_domain_init(struct domain *d)
> >  {
> > -    if ( cat_socket_info )
> > +    if ( socket_info )
> >      {
> >          d->arch.psr_cos_ids = xzalloc_array(unsigned int, nr_sockets);
> >          if ( !d->arch.psr_cos_ids )
> > @@ -567,137 +1205,126 @@ void psr_domain_free(struct domain *d)
> >      psr_free_cos(d);
> >  }
> >  
> > -static int cat_cpu_prepare(unsigned int cpu)
> > +static int cpu_prepare_work(unsigned int cpu)
> >  {
> > -    if ( !cat_socket_info )
> > +    if ( !socket_info )
> >          return 0;
> >  
> > -    if ( temp_cos_to_cbm == NULL &&
> > -         (temp_cos_to_cbm = xzalloc_array(struct psr_cat_cbm,
> > +    if ( temp_cos_ref == NULL &&
> > +         (temp_cos_ref = xzalloc_array(struct psr_ref,
> >                                            opt_cos_max + 1UL)) == NULL )
> >          return -ENOMEM;
> >  
> > +    /* Malloc memory for the global feature head here. */
> > +    if ( feat_l3 == NULL &&
> > +         (feat_l3 = xzalloc(struct feat_list)) == NULL )
> > +        return -ENOMEM;
> 
> .. and leak temp_cos_ref?
> 
Sorry, will free temp_cos_ref here. Thanks!

> > +
> >      return 0;
> >  }
> >  
> > -static void cat_cpu_init(void)
> > +static void cpu_init_work(void)
> >  {
> >      unsigned int eax, ebx, ecx, edx;
> > -    struct psr_cat_socket_info *info;
> > +    struct psr_socket_info *info;
> >      unsigned int socket;
> >      unsigned int cpu = smp_processor_id();
> > -    uint64_t val;
> >      const struct cpuinfo_x86 *c = cpu_data + cpu;
> > +    struct feat_list *feat_tmp;
> >  
> >      if ( !cpu_has(c, X86_FEATURE_PQE) || c->cpuid_level < 
> > PSR_CPUID_LEVEL_CAT )
> >          return;
> >  
> >      socket = cpu_to_socket(cpu);
> > -    if ( test_bit(socket, cat_socket_enable) )
> > +    info = socket_info + socket;
> > +    if ( info->features )
> >          return;
> >  
> > +    info->cos_ref = temp_cos_ref;
> > +    temp_cos_ref = NULL;
> > +    spin_lock_init(&info->mask_lock);
> > +
> >      cpuid_count(PSR_CPUID_LEVEL_CAT, 0, &eax, &ebx, &ecx, &edx);
> >      if ( ebx & PSR_RESOURCE_TYPE_L3 )
> >      {
> > -        cpuid_count(PSR_CPUID_LEVEL_CAT, 1, &eax, &ebx, &ecx, &edx);
> > -        info = cat_socket_info + socket;
> > -        info->cbm_len = (eax & 0x1f) + 1;
> > -        info->cos_max = min(opt_cos_max, edx & 0xffff);
> > -
> > -        info->cos_to_cbm = temp_cos_to_cbm;
> > -        temp_cos_to_cbm = NULL;
> > -        /* cos=0 is reserved as default cbm(all ones). */
> > -        info->cos_to_cbm[0].cbm = (1ull << info->cbm_len) - 1;
> > +        feat_tmp = feat_l3;
> > +        feat_l3 = NULL;
> >  
> > -        spin_lock_init(&info->cbm_lock);
> > -
> > -        set_bit(socket, cat_socket_enable);
> > -
> > -        if ( (ecx & PSR_CAT_CDP_CAPABILITY) && (opt_psr & PSR_CDP) &&
> > -             cdp_socket_enable && !test_bit(socket, cdp_socket_enable) )
> > -        {
> > -            info->cos_to_cbm[0].code = (1ull << info->cbm_len) - 1;
> > -            info->cos_to_cbm[0].data = (1ull << info->cbm_len) - 1;
> > -
> > -            /* We only write mask1 since mask0 is always all ones by 
> > default. */
> > -            wrmsrl(MSR_IA32_PSR_L3_MASK(1), (1ull << info->cbm_len) - 1);
> > -
> > -            rdmsrl(MSR_IA32_PSR_L3_QOS_CFG, val);
> > -            wrmsrl(MSR_IA32_PSR_L3_QOS_CFG, val | (1 << 
> > PSR_L3_QOS_CDP_ENABLE_BIT));
> > -
> > -            /* Cut half of cos_max when CDP is enabled. */
> > -            info->cos_max >>= 1;
> > -
> > -            set_bit(socket, cdp_socket_enable);
> > -        }
> > -        printk(XENLOG_INFO "CAT: enabled on socket %u, cos_max:%u, 
> > cbm_len:%u, CDP:%s\n",
> > -               socket, info->cos_max, info->cbm_len,
> > -               cdp_is_enabled(socket) ? "on" : "off");
> > +        /* Initialize this feature according to CPUID. */
> > +        cpuid_count(PSR_CPUID_LEVEL_CAT, 1, &eax, &ebx, &ecx, &edx);
> > +        feat_tmp->ops = l3_cat_ops;
> > +        feat_tmp->ops.init_feature(eax, ebx, ecx, edx, feat_tmp, info);
> >      }
> >  }
> >  
> > -static void cat_cpu_fini(unsigned int cpu)
> > +static void cpu_fini_work(unsigned int cpu)
> >  {
> >      unsigned int socket = cpu_to_socket(cpu);
> >  
> >      if ( !socket_cpumask[socket] || cpumask_empty(socket_cpumask[socket]) )
> >      {
> > -        struct psr_cat_socket_info *info = cat_socket_info + socket;
> > +        struct psr_socket_info *info = get_socket_info(socket);
> > +
> > +        free_feature(info);
> >  
> > -        if ( info->cos_to_cbm )
> > +        if ( info->cos_ref )
> 
> Um, just called 'free_feature' so the 'info' has garbage..
> 
> Pehraps you should call free_feature after you done with 'info'

Sorry, could you please explain more?

Per my design, 'cpu_fini_work' is used to free CPU resources in info but
not info itself. 'free_feature' only is used to free feat_list and feature
objects(e.g. feat_l3). I think 'free_feature' can be called before or after
'if ( info->cos_ref )'.

> >          {
> > -            xfree(info->cos_to_cbm);
> > -            info->cos_to_cbm = NULL;
> > +            xfree(info->cos_ref);
> > +            info->cos_ref = NULL;
> >          }
> > -
> > -        if ( cdp_is_enabled(socket) )
> > -            clear_bit(socket, cdp_socket_enable);
> > -
> > -        clear_bit(socket, cat_socket_enable);
> >      }
> >  }
> >  
> > -static void __init psr_cat_free(void)
> > +static void __init psr_free(void)
> >  {
> > -    xfree(cat_socket_enable);
> > -    cat_socket_enable = NULL;
> > -    xfree(cat_socket_info);
> > -    cat_socket_info = NULL;
> > +    unsigned int i;
> > +
> > +    for ( i = 0; i < nr_sockets; i++ )
> > +        free_feature(&socket_info[i]);
> > +
> > +    xfree(socket_info);
> > +    socket_info = NULL;
> >  }
> >  
> > -static void __init init_psr_cat(void)
> > +static void __init init_psr(void)
> >  {
> > +    unsigned int i;
> > +
> >      if ( opt_cos_max < 1 )
> >      {
> >          printk(XENLOG_INFO "CAT: disabled, cos_max is too small\n");
> >          return;
> >      }
> >  
> > -    cat_socket_enable = xzalloc_array(unsigned long, 
> > BITS_TO_LONGS(nr_sockets));
> > -    cat_socket_info = xzalloc_array(struct psr_cat_socket_info, 
> > nr_sockets);
> > -    cdp_socket_enable = xzalloc_array(unsigned long, 
> > BITS_TO_LONGS(nr_sockets));
> > +    socket_info = xzalloc_array(struct psr_socket_info, nr_sockets);
> > +
> > +    if ( !socket_info )
> > +    {
> > +        printk(XENLOG_INFO "Fail to alloc socket_info!\n");
> > +        return;
> > +    }
> >  
> > -    if ( !cat_socket_enable || !cat_socket_info )
> > -        psr_cat_free();
> > +    for ( i = 0; i < nr_sockets; i++ )
> > +        INIT_LIST_HEAD(&socket_info[i].feat_list);
> >  }
> >  
> >  static int psr_cpu_prepare(unsigned int cpu)
> >  {
> > -    return cat_cpu_prepare(cpu);
> > +    return cpu_prepare_work(cpu);
> >  }
> >  
> >  static void psr_cpu_init(void)
> >  {
> > -    if ( cat_socket_info )
> > -        cat_cpu_init();
> > +    if ( socket_info )
> > +        cpu_init_work();
> >  
> >      psr_assoc_init();
> >  }
> >  
> >  static void psr_cpu_fini(unsigned int cpu)
> >  {
> > -    if ( cat_socket_info )
> > -        cat_cpu_fini(cpu);
> > +    if ( socket_info )
> > +        cpu_fini_work(cpu);
> >  }
> >  
> >  static int cpu_callback(
> > @@ -739,13 +1366,13 @@ static int __init psr_presmp_init(void)
> >          init_psr_cmt(opt_rmid_max);
> >  
> >      if ( opt_psr & PSR_CAT )
> > -        init_psr_cat();
> > +        init_psr();
> >  
> >      if ( psr_cpu_prepare(0) )
> > -        psr_cat_free();
> > +        psr_free();
> >  
> >      psr_cpu_init();
> > -    if ( psr_cmt_enabled() || cat_socket_info )
> > +    if ( psr_cmt_enabled() || socket_info )
> >          register_cpu_notifier(&cpu_nfb);
> >  
> >      return 0;
> > diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
> > index 14e7dc7..8e17a9a 100644
> > --- a/xen/arch/x86/sysctl.c
> > +++ b/xen/arch/x86/sysctl.c
> > @@ -176,15 +176,19 @@ long arch_do_sysctl(
> >          switch ( sysctl->u.psr_cat_op.cmd )
> >          {
> >          case XEN_SYSCTL_PSR_CAT_get_l3_info:
> > -            ret = psr_get_cat_l3_info(sysctl->u.psr_cat_op.target,
> > -                                      
> > &sysctl->u.psr_cat_op.u.l3_info.cbm_len,
> > -                                      
> > &sysctl->u.psr_cat_op.u.l3_info.cos_max,
> > -                                      
> > &sysctl->u.psr_cat_op.u.l3_info.flags);
> > +        {
> > +            uint32_t dat[3];
> 
> Perhaps a #define for it?

DYM define a macro for 'uint32_t dat[3]'?

> > +            ret = psr_get_info(sysctl->u.psr_cat_op.target,
> > +                               PSR_MASK_TYPE_L3_CBM,
> > +                               dat, 3);
> 
> That way you can use that there.

Sorry, I do not understand your meaning. Could you please explain more? Thanks!

> > +            sysctl->u.psr_cat_op.u.l3_info.cbm_len = dat[0];
> > +            sysctl->u.psr_cat_op.u.l3_info.cos_max = dat[1];
> > +            sysctl->u.psr_cat_op.u.l3_info.flags   = dat[2];
> >  
> >              if ( !ret && __copy_field_to_guest(u_sysctl, sysctl, 
> > u.psr_cat_op) )
> >                  ret = -EFAULT;
> >              break;
> > -
> > +        }
> >          default:
> >              ret = -EOPNOTSUPP;
> >              break;
> > diff --git a/xen/include/asm-x86/psr.h b/xen/include/asm-x86/psr.h
> > index 57f47e9..ef46d12 100644
> > --- a/xen/include/asm-x86/psr.h
> > +++ b/xen/include/asm-x86/psr.h
> > @@ -46,10 +46,10 @@ struct psr_cmt {
> >      struct psr_cmt_l3 l3;
> >  };
> >  
> > -enum cbm_type {
> > -    PSR_CBM_TYPE_L3,
> > -    PSR_CBM_TYPE_L3_CODE,
> > -    PSR_CBM_TYPE_L3_DATA,
> > +enum psr_val_type {
> > +    PSR_MASK_TYPE_L3_CBM,
> > +    PSR_MASK_TYPE_L3_CODE,
> > +    PSR_MASK_TYPE_L3_DATA,
> >  };
> >  
> >  extern struct psr_cmt *psr_cmt;
> > @@ -63,12 +63,12 @@ int psr_alloc_rmid(struct domain *d);
> >  void psr_free_rmid(struct domain *d);
> >  void psr_ctxt_switch_to(struct domain *d);
> >  
> > -int psr_get_cat_l3_info(unsigned int socket, uint32_t *cbm_len,
> > -                        uint32_t *cos_max, uint32_t *flags);
> > -int psr_get_l3_cbm(struct domain *d, unsigned int socket,
> > -                   uint64_t *cbm, enum cbm_type type);
> > -int psr_set_l3_cbm(struct domain *d, unsigned int socket,
> > -                   uint64_t cbm, enum cbm_type type);
> > +int psr_get_info(unsigned int socket, enum psr_val_type type,
> > +                 uint32_t dat[], uint32_t array_len);
> > +int psr_get_val(struct domain *d, unsigned int socket,
> > +                uint64_t *val, enum psr_val_type type);
> > +int psr_set_val(struct domain *d, unsigned int socket,
> > +                uint64_t val, enum psr_val_type type);
> >  
> >  int psr_domain_init(struct domain *d);
> >  void psr_domain_free(struct domain *d);
> > -- 
> > 2.7.4
> > 
> > 
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@xxxxxxxxxxxxx
> > https://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.