|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC v2 7/7] xen/iommu: smmu-v3: Add Xen specific code to enable the ported driver
On 11/20/2017 7:25 AM, Julien Grall wrote:
> Hi Sameer,
>
> On 19/11/17 07:45, Goel, Sameer wrote:
>> On 10/12/2017 10:36 AM, Julien Grall wrote:
>>>> +
>>>> +typedef paddr_t phys_addr_t;
>>>> +typedef paddr_t dma_addr_t;
>>>> +
>>>> +/* Alias to Xen device tree helpers */
>>>> +#define device_node dt_device_node
>>>> +#define of_phandle_args dt_phandle_args
>>>> +#define of_device_id dt_device_match
>>>> +#define of_match_node dt_match_node
>>>> +#define of_property_read_u32(np, pname, out) (!dt_property_read_u32(np,
>>>> pname, out))
>>>> +#define of_property_read_bool dt_property_read_bool
>>>> +#define of_parse_phandle_with_args dt_parse_phandle_with_args
>>>> +#define mutex spinlock_t
>>>> +#define mutex_init spin_lock_init
>>>> +#define mutex_lock spin_lock
>>>> +#define mutex_unlock spin_unlock
>>>
>>> mutex and spinlock are not the same. The former is sleeping whilst the
>>> later is not.
>>>
>>> Can you please explain why this is fine and possibly add that in a comment?
>>>
>> Mutex is used to protect the access to smmu device internal data structure
>> when setting up the s2 config and installing stes for a given device in
>> Linux. The ste programming operation can be competitively long but in the
>> current testing, I did not see this blocking for too long. I will put in a
>> comment.
>
> Well, I don't think that this is a justification. You tested on one platform
> and does not explain how you perform them.
>
> If I understand correctly, that mutex is only used when assigning device. So
> it might be ok to switch to spinlock. But that's not because the operation is
> not too long, it just because it would be only perform by the toolstack
> (domctl) and will not be issued by guest.
Ok. I agree ans will update the comment.
>
>>
>>>> +
>>>> +/* Xen: Helpers to get device MMIO and IRQs */
>>>> +struct resource {
>>>> + u64 addr;
>>>> + u64 size;
>>>> + unsigned int type;
>>>> +};
>>>
>>> Likely we want a compat header for defining Linux helpers. This would avoid
>>> replicating it everywhere.
>> Agreed.
>>
> That should be
>>>
>>>> +
>>>> +#define resource_size(res) ((res)->size)
>>>> +
>>>> +#define platform_device device
>>>> +
>>>> +#define IORESOURCE_MEM 0
>>>> +#define IORESOURCE_IRQ 1
>>>> +
>>>> +static struct resource *platform_get_resource(struct platform_device
>>>> *pdev,
>>>> + unsigned int type,
>>>> + unsigned int num)
>>>> +{
>>>> + /*
>>>> + * The resource is only used between 2 calls of platform_get_resource.
>>>> + * It's quite ugly but it's avoid to add too much code in the part
>>>> + * imported from Linux
>>>> + */
>>>> + static struct resource res;
>>>> + struct acpi_iort_node *iort_node;
>>>> + struct acpi_iort_smmu_v3 *node_smmu_data;
>>>> + int ret = 0;
>>>> +
>>>> + res.type = type;
>>>> +
>>>> + switch (type) {
>>>> + case IORESOURCE_MEM:
>>>> + if (pdev->type == DEV_ACPI) {
>>>> + ret = 1;
>>>> + iort_node = pdev->acpi_node;
>>>> + node_smmu_data =
>>>> + (struct acpi_iort_smmu_v3 *)iort_node->node_data;
>>>> +
>>>> + if (node_smmu_data != NULL) {
>>>> + res.addr = node_smmu_data->base_address;
>>>> + res.size = SZ_128K;
>>>> + ret = 0;
>>>> + }
>>>> + } else {
>>>> + ret = dt_device_get_address(dev_to_dt(pdev), num,
>>>> + &res.addr, &res.size);
>>>> + }
>>>> +
>>>> + return ((ret) ? NULL : &res);
>>>> +
>>>> + case IORESOURCE_IRQ:
>>>> + ret = platform_get_irq(dev_to_dt(pdev), num);
>>>
>>> No IRQ for ACPI?
>> For IRQs the code calls platform_get_irq_byname. So, the IORESOURCE_IRQ
>> implementation is not needed at all. (DT or ACPI)
>
> Please document it then.
Ok.
>
> [...]
>
>>>
>>>> + udelay(sleep_us); \
>>>> + } \
>>>> + (cond) ? 0 : -ETIMEDOUT; \
>>>> +})
>>>> +
>>>> +#define readl_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us)
>>>> \
>>>> + readx_poll_timeout(readl_relaxed, addr, val, cond, delay_us,
>>>> timeout_us)
>>>> +
>>>> +/* Xen: Helpers for IRQ functions */
>>>> +#define request_irq(irq, func, flags, name, dev) request_irq(irq, flags,
>>>> func, name, dev)
>>>> +#define free_irq release_irq
>>>> +
>>>> +enum irqreturn {
>>>> + IRQ_NONE = (0 << 0),
>>>> + IRQ_HANDLED = (1 << 0),
>>>> +};
>>>> +
>>>> +typedef enum irqreturn irqreturn_t;
>>>> +
>>>> +/* Device logger functions */
>>>> +#define dev_print(dev, lvl, fmt, ...) \
>>>> + printk(lvl "smmu: " fmt, ## __VA_ARGS__)
>>>> +
>>>> +#define dev_dbg(dev, fmt, ...) dev_print(dev, XENLOG_DEBUG, fmt, ##
>>>> __VA_ARGS__)
>>>> +#define dev_notice(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ##
>>>> __VA_ARGS__)
>>>> +#define dev_warn(dev, fmt, ...) dev_print(dev, XENLOG_WARNING, fmt, ##
>>>> __VA_ARGS__)
>>>> +#define dev_err(dev, fmt, ...) dev_print(dev, XENLOG_ERR, fmt, ##
>>>> __VA_ARGS__)
>>>> +#define dev_info(dev, fmt, ...) dev_print(dev, XENLOG_INFO, fmt, ##
>>>> __VA_ARGS__)
>>>> +
>>>> +#define dev_err_ratelimited(dev, fmt, ...) \
>>>> + dev_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
>>>> +
>>>> +#define dev_name(dev) dt_node_full_name(dev_to_dt(dev))
>>>> +
>>>> +/* Alias to Xen allocation helpers */
>>>> +#define kfree xfree
>>>> +#define kmalloc(size, flags) _xmalloc(size, sizeof(void *))
>>>> +#define kzalloc(size, flags) _xzalloc(size, sizeof(void *))
>>>> +#define devm_kzalloc(dev, size, flags) _xzalloc(size, sizeof(void *))
>>>> +#define kmalloc_array(size, n, flags) _xmalloc_array(size, sizeof(void
>>>> *), n)
>>>> +
>>>> +/* Compatibility defines */
>>>> +#undef WARN_ON
>>>> +#define WARN_ON(cond) (!!cond)
>>>
>>> Why do you redefine WARN_ON?Need it to be a scalar value. The Xen
>>> implementation is a do..while. Did not want a large impact hence the local
>>> define. I can try proposing a new common define.
>
> Well, I don't think it is acceptable to redefine WARN_ON. At least without
> trying to modify the common version.
I will put this in the common folder.
>
>>>
>>>> +#define WARN_ON_ONCE(cond) WARN_ON(cond)
>>>> Hmmm, can't we implement a common WARN_ON_ONCE?
>> Will not really be executed. Defining it to maintain compatibility. I think
>> this file is the right place for this define. We can send it to a generic
>> file if so needed.
>
> I can't see why we wouldn't want to have a WARN_ON_ONCE in the common code.
> If you disagree, please explain.
>
> [...]
>
>>>
>>>> + else {
>>>> cpu_relax();
>>>
>>> Hmmm I now see why you added cpu_relax() at the top. Well, on Xen cpu_relax
>>> is just a barrier. On Linux it is used to yield.
>>>
>>> And that bit is worrying me. The Linux code will allow context switching to
>>> another tasks if the code is taking too much time.
>>>
>>> Xen is not preemptible, so is it fine?
>> This is used when consuming the command queue and could be a potential
>> performance issue if the queue is large. (This is never the case).
>> I am wondering if we should define a yeild in long run?
>
> As I said before, Xen is not preemptible. In this particular case, there are
> spinlock taken by the callers (e.g any function assigning device). So yield
> would just make it worst.
>
> [...]
I can keep the memory barrier as is. This should not impact much. This should
not be a concern.
>
>>>
>>>> strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) *
>>>> STRTAB_L1_DESC_DWORDS];
>>>> desc->span = STRTAB_SPLIT + 1;
>>>> - desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
>>>> - GFP_KERNEL | __GFP_ZERO);
>>>> +
>>>> + alignment = 1 << ((5 + (desc->span - 1)));
>>>
>>> Do you mind explaining the 5? Also, does the shift will always be < 32?
>> Its from the "Level 1 Stream Table Descriptor" from SMMU spec. I can add the
>> ref to the spec. Yes the span is hard coded in the driver and with any spec
>> valid span values this cannot exceed 32.
>> I have added a comment tot he code
>>>
>>>> + desc->l2ptr = _xzalloc(size, alignment);
>>>> + desc->l2ptr_dma = virt_to_maddr(desc->l2ptr);
>>>
>>> _xzalloc can fail and virt_to_maddr will result to a panic.
>>>
>> I will move the check.
>>>> +
>>>> if (!desc->l2ptr) {
>>>> dev_err(smmu->dev,
>>>> "failed to allocate l2 stream table for SID %u\n",
>>>> @@ -1158,7 +1418,7 @@ static int arm_smmu_init_l2_strtab(struct
>>>> arm_smmu_device *smmu, u32 sid)
>>>> }
>>>> /* IRQ and event handlers */
>>>> -static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
>>>> +static void arm_smmu_evtq_thread(int irq, void *dev, struct cpu_user_regs
>>>> *regs)
>>>
>>> Could you please introduce a wrapper instead as it was done in smmu.c?
>>>
>> Done.
>>
>>>> {
>>>> int i;
>>>> struct arm_smmu_device *smmu = dev;
>>>> @@ -1186,7 +1446,6 @@ static irqreturn_t arm_smmu_evtq_thread(int irq,
>>>> void *dev)
>>>> /* Sync our overflow flag, as we believe we're up to speed */
>>>> q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
>>>> - return IRQ_HANDLED;
>>>> }
>>>> static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64
>>>> *evt)
>>>> @@ -1203,7 +1462,7 @@ static void arm_smmu_handle_ppr(struct
>>>> arm_smmu_device *smmu, u64 *evt)
>>>> dev_info(smmu->dev, "unexpected PRI request received:\n");
>>>> dev_info(smmu->dev,
>>>> - "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova
>>>> 0x%016llx\n",
>>>> + "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova
>>>> 0x%016lx\n",
>>>
>>> Hmmm why?
>> The variable is defined as ungined long long in Linux. ( uses generic
>> int-ll64.h) In Xen the definition changes to unsigned long for ARM_64 which
>> actually makes sense.
>
> u64 is a 64-bit variable. And therefore the format should be PRIx64 to
> accomodate 64-bit and 32-bit.
Sure. I can change the format specifier.
>
> [...]
>
>>>
>>>> +
>>>> static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
>>>> {
>>>> @@ -1410,6 +1674,7 @@ static struct iommu_domain
>>>> *arm_smmu_domain_alloc(unsigned type)
>>>> return &smmu_domain->domain;
>>>> }
>>>> +#if 0
>>>
>>> Please explain the #if 0
>>>
>> Done.
>>
>>>> static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
>>>> {
>>>> int idx, size = 1 << span;
>>>> @@ -1427,36 +1692,20 @@ static void arm_smmu_bitmap_free(unsigned long
>>>> *map, int idx)
>>>> {
>>>> clear_bit(idx, map);
>>>> }
>>>> +#endif
>>>> static void arm_smmu_domain_free(struct iommu_domain *domain)
>>>> {
>>>> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>> - struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>> -
>>>> - iommu_put_dma_cookie(domain);
>>>> - free_io_pgtable_ops(smmu_domain->pgtbl_ops);
>>>> -
>>>> - /* Free the CD and ASID, if we allocated them */
>>>> - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
>>>> - struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
>>>> -
>>>> - if (cfg->cdptr) {
>>>> - dmam_free_coherent(smmu_domain->smmu->dev,
>>>> - CTXDESC_CD_DWORDS << 3,
>>>> - cfg->cdptr,
>>>> - cfg->cdptr_dma);
>>>> -
>>>> - arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid);
>>>> - }
>>>> - } else {
>>>> - struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
>>>> - if (cfg->vmid)
>>>> - arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
>>>> - }
>>>> + /*
>>>> + * Xen: Remove the free functions that are not used and code related
>>>> + * to S1 translation. We just need to free the domain here.
>>>> + */
>>>
>>> Please use #if 0 rather than removing the code + comment on top. But I am
>>> not sure why you drop the S2 free code. Shouldn't we allocate VMID from the
>>> SMMU?
>> I am picking up the vmid from the domain I am sharing the page tables with.
>> domain->arch.p2m.vmid. This seemed valid. Please let me know if we want to
>> generate a different vmid?
>
> The processor and the SMMU may support either 8 or 16bits VMID but I don't
> think any thing in the spec says that both processor and SMMU must support
> the same value.
>
> So it would be possible to have a processor supporting 16-bits VMID and the
> SMMU only 8-bits VMID.
>
> Adding a check at the SMMU initialization might be a solution. But as the
> code for allocating the VMID is already present, then I would prefer to we
> stick with different the VMID.
Sure. For this iteration I will generate an SMMU specific VMID. Do you think it
would be an optimization to reuse the CPU VMID if SMMU supports 16bit VMID?
Thanks,
Sameer
>
>>
>>
>>>
>>>> kfree(smmu_domain);
>>>> }
>>>> +#if 0
>>>> static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain
>>>> *smmu_domain,
>>>> struct io_pgtable_cfg *pgtbl_cfg)
>>>> {
>>>> @@ -1488,33 +1737,30 @@ out_free_asid:
>>>> arm_smmu_bitmap_free(smmu->asid_map, asid);
>>>> return ret;
>>>> }
>>>> +#endif
>>>> -static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain
>>>> *smmu_domain,
>>>> - struct io_pgtable_cfg *pgtbl_cfg)
>>>> +static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain
>>>> *smmu_domain)
>>>> {
>>>> - int vmid;
>>>> - struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>> struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
>>>> - vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
>>>> - if (vmid < 0)
>>>> - return vmid;
>>>> + /* Xen: Set the values as needed */
>>>> - cfg->vmid = (u16)vmid;
>>>> - cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
>>>> - cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
>>>> + cfg->vmid = cfg->domain->arch.p2m.vmid;
>>>
>>> See my comment above.
>> I am wondering why we are considering this value invalid? Since the page
>> tables are shared we can use the vmid from the domain. Is the concern
>> regarding the size?
>
> See above.
>
> Cheers,
>
--
Qualcomm Datacenter Technologies as an affiliate of Qualcomm Technologies,
Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux
Foundation Collaborative Project.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |