[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v6 08/20] xen/riscv: introduce cmpxchg.h



The header was taken from Linux kernl 6.4.0-rc1.

Addionally, were updated:
* add emulation of {cmp}xchg for 1/2 byte types using 32-bit atomic
  access.
* replace tabs with spaces
* replace __* variale with *__
* introduce generic version of xchg_* and cmpxchg_*.
* drop {cmp}xchg{release,relaxed,acquire} as Xen doesn't use them
* drop barries and use instruction suffixices instead ( .aq, .rl, .aqrl )

Implementation of 4- and 8-byte cases were updated according to the spec:
```
              ....
Linux Construct         RVWMO AMO Mapping
atomic <op> relaxed     amo<op>.{w|d}
atomic <op> acquire     amo<op>.{w|d}.aq
atomic <op> release     amo<op>.{w|d}.rl
atomic <op>             amo<op>.{w|d}.aqrl
Linux Construct         RVWMO LR/SC Mapping
atomic <op> relaxed     loop: lr.{w|d}; <op>; sc.{w|d}; bnez loop
atomic <op> acquire     loop: lr.{w|d}.aq; <op>; sc.{w|d}; bnez loop
atomic <op> release     loop: lr.{w|d}; <op>; sc.{w|d}.aqrl∗ ; bnez loop OR
                        fence.tso; loop: lr.{w|d}; <op>; sc.{w|d}∗ ; bnez loop
atomic <op>             loop: lr.{w|d}.aq; <op>; sc.{w|d}.aqrl; bnez loop

Table A.5: Mappings from Linux memory primitives to RISC-V primitives

```

Signed-off-by: Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx>
---
Changes in V6:
-  update the commit message? ( As before I don't understand this point. Can 
you give an example of what sort of opcode / instruction is missing?)
 - Code style fixes
 - change sizeof(*ptr) -> sizeof(*(ptr))
 - update operands names and some local variables for macros emulate_xchg_1_2() 
and emulate_cmpxchg_1_2()
 - drop {cmp}xchg_{relaxed,acquire,release) versions as they aren't needed for 
Xen
 - update __amoswap_generic() prototype and defintion: drop pre and post 
barries.
 - update emulate_xchg_1_2() prototype and definion: add lr_sfx, drop pre and 
post barries.
 - rename __xchg_generic to __xchg(), make __xchg as static inline function to 
be able to "#ifndef CONFIG_32BIT case 8:... " 
---
Changes in V5:
 - update the commit message.
 - drop ALIGN_DOWN().
 - update the definition of emulate_xchg_1_2(): 
   - lr.d -> lr.w, sc.d -> sc.w.
   - drop ret argument.
   - code style fixes around asm volatile.
   - update prototype.
   - use asm named operands.
   - rename local variables.
   - add comment above the macros
 - update the definition of __xchg_generic:
   - rename to __xchg()
   - transform it to static inline
   - code style fixes around switch()
   - update prototype.
 - redefine cmpxchg()
 - update emulate_cmpxchg_1_2():
   - update prototype
   - update local variables names and usage of them
   - use name asm operands.
   - add comment above the macros
 - drop pre and post, and use .aq,.rl, .aqrl suffixes.
 - drop {cmp}xchg_{relaxed, aquire, release} as they are not used by Xen.
 - drop unnessary details in comment above emulate_cmpxchg_1_2()
---
Changes in V4:
 - Code style fixes.
 - enforce in __xchg_*() has the same type for new and *ptr, also "\n"
   was removed at the end of asm instruction.
 - dependency from 
https://lore.kernel.org/xen-devel/cover.1706259490.git.federico.serafini@xxxxxxxxxxx/
 - switch from ASSERT_UNREACHABLE to STATIC_ASSERT_UNREACHABLE().
 - drop xchg32(ptr, x) and xchg64(ptr, x) as they aren't used.
 - drop cmpxcg{32,64}_{local} as they aren't used.
 - introduce generic version of xchg_* and cmpxchg_*.
 - update the commit message.
---
Changes in V3:
 - update the commit message
 - add emulation of {cmp}xchg_... for 1 and 2 bytes types
---
Changes in V2:
 - update the comment at the top of the header.
 - change xen/lib.h to xen/bug.h.
 - sort inclusion of headers properly.
---
 xen/arch/riscv/include/asm/cmpxchg.h | 209 +++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100644 xen/arch/riscv/include/asm/cmpxchg.h

diff --git a/xen/arch/riscv/include/asm/cmpxchg.h 
b/xen/arch/riscv/include/asm/cmpxchg.h
new file mode 100644
index 0000000000..aba2858933
--- /dev/null
+++ b/xen/arch/riscv/include/asm/cmpxchg.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2014 Regents of the University of California */
+
+#ifndef _ASM_RISCV_CMPXCHG_H
+#define _ASM_RISCV_CMPXCHG_H
+
+#include <xen/compiler.h>
+#include <xen/lib.h>
+
+#include <asm/fence.h>
+#include <asm/io.h>
+#include <asm/system.h>
+
+#define __amoswap_generic(ptr, new, ret, sfx) \
+({ \
+    asm volatile ( \
+        " amoswap" sfx " %0, %2, %1" \
+        : "=r" (ret), "+A" (*ptr) \
+        : "r" (new) \
+        : "memory" ); \
+})
+
+/*
+ * For LR and SC, the A extension requires that the address held in rs1 be
+ * naturally aligned to the size of the operand (i.e., eight-byte aligned
+ * for 64-bit words and four-byte aligned for 32-bit words).
+ * If the address is not naturally aligned, an address-misaligned exception
+ * or an access-fault exception will be generated.
+ *
+ * Thereby:
+ * - for 1-byte xchg access the containing word by clearing low two bits
+ * - for 2-byte xchg ccess the containing word by clearing bit 1.
+ *
+ * If resulting 4-byte access is still misalgined, it will fault just as
+ * non-emulated 4-byte access would.
+ */
+#define emulate_xchg_1_2(ptr, new, lr_sfx, sc_sfx) \
+({ \
+    uint32_t *aligned_ptr = (uint32_t *)((unsigned long)ptr & ~(0x4 - 
sizeof(*(ptr)))); \
+    unsigned int new_val_pos = ((unsigned long)(ptr) & (0x4 - sizeof(*(ptr)))) 
* BITS_PER_BYTE; \
+    unsigned long mask = GENMASK(((sizeof(*(ptr))) * BITS_PER_BYTE) - 1, 0) << 
new_val_pos; \
+    unsigned int new_ = new << new_val_pos; \
+    unsigned int old; \
+    unsigned int scratch; \
+    \
+    asm volatile ( \
+        "0: lr.w" lr_sfx " %[old], %[aligned_ptr]\n" \
+        "   and  %[scratch], %[old], %z[nmask]\n" \
+        "   or   %[scratch], %[scratch], %z[new_]\n" \
+        "   sc.w" sc_sfx " %[scratch], %[scratch], %[aligned_ptr]\n" \
+        "   bnez %[scratch], 0b\n" \
+        : [old] "=&r" (old), [scratch] "=&r" (scratch), [aligned_ptr] "+A" 
(*aligned_ptr) \
+        : [new_] "rJ" (new_), [nmask] "rJ" (~mask) \
+        : "memory" ); \
+    \
+    (__typeof__(*(ptr)))((old & mask) >> new_val_pos); \
+})
+
+static always_inline unsigned long __xchg(volatile void *ptr, unsigned long 
new, int size)
+{
+    unsigned long ret;
+
+    switch ( size )
+    {
+    case 1:
+        ret = emulate_xchg_1_2((volatile uint8_t *)ptr, new, ".aq", ".aqrl");
+        break;
+    case 2:
+        ret = emulate_xchg_1_2((volatile uint16_t *)ptr, new, ".aq", ".aqrl");
+        break;
+    case 4:
+        __amoswap_generic((volatile uint32_t *)ptr, new, ret, ".w.aqrl");
+        break;
+#ifndef CONFIG_32BIT
+    case 8:
+        __amoswap_generic((volatile uint64_t *)ptr, new, ret, ".d.aqrl");
+        break;
+#endif
+    default:
+        STATIC_ASSERT_UNREACHABLE();
+    }
+
+    return ret;
+}
+
+#define xchg(ptr, x) \
+({ \
+    __typeof__(*(ptr)) n_ = (x); \
+    (__typeof__(*(ptr))) \
+        __xchg((ptr), (unsigned long)(n_), sizeof(*(ptr))); \
+})
+
+#define __generic_cmpxchg(ptr, old, new, ret, lr_sfx, sc_sfx)  \
+ ({ \
+    register unsigned int rc; \
+    __typeof__(*(ptr)) old__ = (__typeof__(*(ptr)))(old); \
+    __typeof__(*(ptr)) new__ = (__typeof__(*(ptr)))(new); \
+    asm volatile( \
+        "0: lr" lr_sfx " %0, %2\n" \
+        "   bne  %0, %z3, 1f\n" \
+        "   sc" sc_sfx " %1, %z4, %2\n" \
+        "   bnez %1, 0b\n" \
+        "1:\n" \
+        : "=&r" (ret), "=&r" (rc), "+A" (*ptr) \
+        : "rJ" (old__), "rJ" (new__) \
+        : "memory"); \
+ })
+
+/*
+ * For LR and SC, the A extension requires that the address held in rs1 be
+ * naturally aligned to the size of the operand (i.e., eight-byte aligned
+ * for 64-bit words and four-byte aligned for 32-bit words).
+ * If the address is not naturally aligned, an address-misaligned exception
+ * or an access-fault exception will be generated.
+ *
+ * Thereby:
+ * - for 1-byte xchg access the containing word by clearing low two bits
+ * - for 2-byte xchg ccess the containing word by clearing first bit.
+ * 
+ * If resulting 4-byte access is still misalgined, it will fault just as
+ * non-emulated 4-byte access would.
+ *
+ * old_val was casted to unsigned long for cmpxchgptr()
+ */
+#define emulate_cmpxchg_1_2(ptr, old, new, lr_sfx, sc_sfx) \
+({ \
+    uint32_t *aligned_ptr = (uint32_t *)((unsigned long)ptr & ~(0x4 - 
sizeof(*(ptr)))); \
+    uint8_t new_val_pos = ((unsigned long)(ptr) & (0x4 - sizeof(*(ptr)))) * 
BITS_PER_BYTE; \
+    unsigned long mask = GENMASK(((sizeof(*(ptr))) * BITS_PER_BYTE) - 1, 0) << 
new_val_pos; \
+    unsigned int old_ = old << new_val_pos; \
+    unsigned int new_ = new << new_val_pos; \
+    unsigned int old_val; \
+    unsigned int scratch; \
+    \
+    __asm__ __volatile__ ( \
+        "0: lr.w" lr_sfx " %[scratch], %[aligned_ptr]\n" \
+        "   and  %[old_val], %[scratch], %z[mask]\n" \
+        "   bne  %[old_val], %z[old_], 1f\n" \
+        "   xor  %[scratch], %[old_val], %[scratch]\n" \
+        "   or   %[scratch], %[scratch], %z[new_]\n" \
+        "   sc.w" sc_sfx " %[scratch], %[scratch], %[aligned_ptr]\n" \
+        "   bnez %[scratch], 0b\n" \
+        "1:\n" \
+        : [old_val] "=&r" (old_val), [scratch] "=&r" (scratch), [aligned_ptr] 
"+A" (*aligned_ptr) \
+        : [old_] "rJ" (old_), [new_] "rJ" (new_), \
+          [mask] "rJ" (mask) \
+        : "memory" ); \
+    \
+    (__typeof__(*(ptr)))((unsigned long)old_val >> new_val_pos); \
+})
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+static always_inline unsigned long __cmpxchg(volatile void *ptr,
+                         unsigned long old,
+                         unsigned long new,
+                         int size)
+{
+    unsigned long ret;
+
+    switch ( size )
+    {
+    case 1:
+        ret = emulate_cmpxchg_1_2((volatile uint8_t *)ptr, old, new,
+                                  ".aq", ".aqrl");
+        break;
+    case 2:
+        ret = emulate_cmpxchg_1_2((volatile uint16_t *)ptr, old, new,
+                                   ".aq", ".aqrl");
+        break;
+    case 4:
+        __generic_cmpxchg((volatile uint32_t *)ptr, old, new, ret,
+                          ".w.aq", ".w.aqrl");
+        break;
+#ifndef CONFIG_32BIT
+    case 8:
+        __generic_cmpxchg((volatile uint64_t *)ptr, old, new,
+                           ret, ".d.aq", ".d.aqrl");
+        break;
+#endif
+    default:
+        STATIC_ASSERT_UNREACHABLE();
+    }
+
+    return ret;
+}
+
+#define cmpxchg(ptr, o, n) \
+({ \
+    __typeof__(*(ptr)) o_ = (o); \
+    __typeof__(*(ptr)) n_ = (n); \
+    (__typeof__(*(ptr))) \
+    __cmpxchg((ptr), (unsigned long)(o_), (unsigned long)(n_), \
+              sizeof(*(ptr))); \
+})
+
+#endif /* _ASM_RISCV_CMPXCHG_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
-- 
2.43.0




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.