zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

atomic.h (27887B) - Raw


      1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
      2 
      3 /*-
      4  * SPDX-License-Identifier: BSD-4-Clause
      5  *
      6  * Copyright (C) 2003-2004 Olivier Houchard
      7  * Copyright (C) 1994-1997 Mark Brinicombe
      8  * Copyright (C) 1994 Brini
      9  * All rights reserved.
     10  *
     11  * This code is derived from software written for Brini by Mark Brinicombe
     12  *
     13  * Redistribution and use in source and binary forms, with or without
     14  * modification, are permitted provided that the following conditions
     15  * are met:
     16  * 1. Redistributions of source code must retain the above copyright
     17  *    notice, this list of conditions and the following disclaimer.
     18  * 2. Redistributions in binary form must reproduce the above copyright
     19  *    notice, this list of conditions and the following disclaimer in the
     20  *    documentation and/or other materials provided with the distribution.
     21  * 3. All advertising materials mentioning features or use of this software
     22  *    must display the following acknowledgement:
     23  *	This product includes software developed by Brini.
     24  * 4. The name of Brini may not be used to endorse or promote products
     25  *    derived from this software without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
     28  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     29  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     30  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     31  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     32  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
     33  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     34  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     35  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
     36  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #ifndef	_MACHINE_ATOMIC_H_
     40 #define	_MACHINE_ATOMIC_H_
     41 
     42 #include <sys/atomic_common.h>
     43 
     44 #if __ARM_ARCH >= 7
     45 #define isb()  __asm __volatile("isb" : : : "memory")
     46 #define dsb()  __asm __volatile("dsb" : : : "memory")
     47 #define dmb()  __asm __volatile("dmb" : : : "memory")
     48 #else
     49 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
     50 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
     51 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
     52 #endif
     53 
     54 #define mb()   dmb()
     55 #define wmb()  dmb()
     56 #define rmb()  dmb()
     57 
     58 #define	ARM_HAVE_ATOMIC64
     59 
     60 #define ATOMIC_ACQ_REL_LONG(NAME)					\
     61 static __inline void							\
     62 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
     63 {									\
     64 	atomic_##NAME##_long(p, v);					\
     65 	dmb();								\
     66 }									\
     67 									\
     68 static __inline  void							\
     69 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
     70 {									\
     71 	dmb();								\
     72 	atomic_##NAME##_long(p, v);					\
     73 }
     74 
     75 #define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
     76 static __inline  void							\
     77 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
     78 {									\
     79 	atomic_##NAME##_##WIDTH(p, v);					\
     80 	dmb();								\
     81 }									\
     82 									\
     83 static __inline  void							\
     84 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
     85 {									\
     86 	dmb();								\
     87 	atomic_##NAME##_##WIDTH(p, v);					\
     88 }
     89 
     90 static __inline void
     91 atomic_add_32(volatile uint32_t *p, uint32_t val)
     92 {
     93 	uint32_t tmp = 0, tmp2 = 0;
     94 
     95 	__asm __volatile(
     96 	    "1: ldrex	%0, [%2]	\n"
     97 	    "   add	%0, %0, %3	\n"
     98 	    "   strex	%1, %0, [%2]	\n"
     99 	    "   cmp	%1, #0		\n"
    100 	    "   it	ne		\n"
    101 	    "   bne	1b		\n"
    102 	    : "=&r" (tmp), "+r" (tmp2)
    103 	    ,"+r" (p), "+r" (val) : : "cc", "memory");
    104 }
    105 
    106 static __inline void
    107 atomic_add_64(volatile uint64_t *p, uint64_t val)
    108 {
    109 	uint64_t tmp;
    110 	uint32_t exflag;
    111 
    112 	__asm __volatile(
    113 	    "1:							\n"
    114 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
    115 	    "   adds	%Q[tmp], %Q[val]			\n"
    116 	    "   adc	%R[tmp], %R[tmp], %R[val]		\n"
    117 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
    118 	    "   teq	%[exf], #0				\n"
    119 	    "   it	ne					\n"
    120 	    "   bne	1b					\n"
    121 	    : [exf] "=&r" (exflag),
    122 	      [tmp] "=&r" (tmp)
    123 	    : [ptr] "r"   (p),
    124 	      [val] "r"   (val)
    125 	    : "cc", "memory");
    126 }
    127 
    128 static __inline void
    129 atomic_add_long(volatile u_long *p, u_long val)
    130 {
    131 
    132 	atomic_add_32((volatile uint32_t *)p, val);
    133 }
    134 
    135 ATOMIC_ACQ_REL(add, 32)
    136 ATOMIC_ACQ_REL(add, 64)
    137 ATOMIC_ACQ_REL_LONG(add)
    138 
    139 static __inline void
    140 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
    141 {
    142 	uint32_t tmp = 0, tmp2 = 0;
    143 
    144 	__asm __volatile(
    145 	    "1: ldrex	%0, [%2]	\n"
    146 	    "   bic	%0, %0, %3	\n"
    147 	    "   strex	%1, %0, [%2]	\n"
    148 	    "   cmp	%1, #0		\n"
    149 	    "   it	ne		\n"
    150 	    "   bne	1b		\n"
    151 	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
    152 	    : : "cc", "memory");
    153 }
    154 
    155 static __inline void
    156 atomic_clear_64(volatile uint64_t *p, uint64_t val)
    157 {
    158 	uint64_t tmp;
    159 	uint32_t exflag;
    160 
    161 	__asm __volatile(
    162 	    "1:							\n"
    163 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
    164 	    "   bic	%Q[tmp], %Q[val]			\n"
    165 	    "   bic	%R[tmp], %R[val]			\n"
    166 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
    167 	    "   teq	%[exf], #0				\n"
    168 	    "   it	ne					\n"
    169 	    "   bne	1b					\n"
    170 	    : [exf] "=&r" (exflag),
    171 	      [tmp] "=&r" (tmp)
    172 	    : [ptr] "r"   (p),
    173 	      [val] "r"   (val)
    174 	    : "cc", "memory");
    175 }
    176 
    177 static __inline void
    178 atomic_clear_long(volatile u_long *address, u_long setmask)
    179 {
    180 
    181 	atomic_clear_32((volatile uint32_t *)address, setmask);
    182 }
    183 
    184 ATOMIC_ACQ_REL(clear, 32)
    185 ATOMIC_ACQ_REL(clear, 64)
    186 ATOMIC_ACQ_REL_LONG(clear)
    187 
    188 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
    189     {                                                         \
    190 	TYPE tmp;                                             \
    191                                                               \
    192 	__asm __volatile(                                     \
    193 	    "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
    194 	    "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
    195 	    "   teq            %[tmp], %[ret]            \n"  \
    196 	    "   ittee          ne                        \n"  \
    197 	    "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
    198 	    "   movne          %[ret], #0                \n"  \
    199 	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
    200 	    "   eorseq         %[ret], #1                \n"  \
    201 	    "   beq            1b                        \n"  \
    202 	    : [ret] "=&r" (RET),                              \
    203 	      [tmp] "=&r" (tmp)                               \
    204 	    : [ptr] "r"   (_ptr),                             \
    205 	      [oldv] "r"  (_old),                             \
    206 	      [newv] "r"  (_new)                              \
    207 	    : "cc", "memory");                                \
    208     }
    209 
    210 #define ATOMIC_FCMPSET_CODE64(RET)                                 \
    211     {                                                              \
    212 	uint64_t cmp, tmp;                                         \
    213                                                                    \
    214 	__asm __volatile(                                          \
    215 	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
    216 	    "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
    217 	    "   teq      %Q[tmp], %Q[cmp]                     \n"  \
    218 	    "   it       eq                                   \n"  \
    219 	    "   teqeq    %R[tmp], %R[cmp]                     \n"  \
    220 	    "   ittee    ne                                   \n"  \
    221 	    "   movne    %[ret], #0                           \n"  \
    222 	    "   strdne   %[cmp], [%[oldv]]                    \n"  \
    223 	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
    224 	    "   eorseq   %[ret], #1                           \n"  \
    225 	    "   beq      1b                                   \n"  \
    226 	    : [ret] "=&r" (RET),                                   \
    227 	      [cmp] "=&r" (cmp),                                   \
    228 	      [tmp] "=&r" (tmp)                                    \
    229 	    : [ptr] "r"   (_ptr),                                  \
    230 	      [oldv] "r"  (_old),                                  \
    231 	      [newv] "r"  (_new)                                   \
    232 	    : "cc", "memory");                                     \
    233     }
    234 
    235 static __inline int
    236 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
    237 {
    238 	int ret;
    239 
    240 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
    241 	return (ret);
    242 }
    243 #define	atomic_fcmpset_8	atomic_fcmpset_8
    244 
    245 static __inline int
    246 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
    247 {
    248 	int ret;
    249 
    250 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
    251 	dmb();
    252 	return (ret);
    253 }
    254 
    255 static __inline int
    256 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
    257 {
    258 	int ret;
    259 
    260 	dmb();
    261 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
    262 	return (ret);
    263 }
    264 
    265 static __inline int
    266 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
    267 {
    268 	int ret;
    269 
    270 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
    271 	return (ret);
    272 }
    273 #define	atomic_fcmpset_16	atomic_fcmpset_16
    274 
    275 static __inline int
    276 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
    277 {
    278 	int ret;
    279 
    280 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
    281 	dmb();
    282 	return (ret);
    283 }
    284 
    285 static __inline int
    286 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
    287 {
    288 	int ret;
    289 
    290 	dmb();
    291 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
    292 	return (ret);
    293 }
    294 
    295 static __inline int
    296 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
    297 {
    298 	int ret;
    299 
    300 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
    301 	return (ret);
    302 }
    303 
    304 static __inline int
    305 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
    306 {
    307 	int ret;
    308 
    309 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
    310 	dmb();
    311 	return (ret);
    312 }
    313 
    314 static __inline int
    315 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
    316 {
    317 	int ret;
    318 
    319 	dmb();
    320 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
    321 	return (ret);
    322 }
    323 
    324 static __inline int
    325 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
    326 {
    327 	int ret;
    328 
    329 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
    330 	return (ret);
    331 }
    332 
    333 static __inline int
    334 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
    335 {
    336 	int ret;
    337 
    338 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
    339 	dmb();
    340 	return (ret);
    341 }
    342 
    343 static __inline int
    344 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
    345 {
    346 	int ret;
    347 
    348 	dmb();
    349 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
    350 	return (ret);
    351 }
    352 
    353 static __inline int
    354 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
    355 {
    356 	int ret;
    357 
    358 	ATOMIC_FCMPSET_CODE64(ret);
    359 	return (ret);
    360 }
    361 
    362 static __inline int
    363 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
    364 {
    365 	int ret;
    366 
    367 	ATOMIC_FCMPSET_CODE64(ret);
    368 	dmb();
    369 	return (ret);
    370 }
    371 
    372 static __inline int
    373 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
    374 {
    375 	int ret;
    376 
    377 	dmb();
    378 	ATOMIC_FCMPSET_CODE64(ret);
    379 	return (ret);
    380 }
    381 
    382 #define ATOMIC_CMPSET_CODE(RET, SUF)                         \
    383     {                                                        \
    384 	__asm __volatile(                                    \
    385 	    "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
    386 	    "   teq            %[ret], %[oldv]           \n" \
    387 	    "   itee           ne                        \n" \
    388 	    "   movne          %[ret], #0                \n" \
    389 	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
    390 	    "   eorseq         %[ret], #1                \n" \
    391 	    "   beq            1b                        \n" \
    392 	    : [ret] "=&r" (RET)                              \
    393 	    : [ptr] "r"   (_ptr),                            \
    394 	      [oldv] "r"  (_old),                            \
    395 	      [newv] "r"  (_new)                             \
    396 	    : "cc", "memory");                               \
    397     }
    398 
    399 #define ATOMIC_CMPSET_CODE64(RET)                                 \
    400     {                                                             \
    401 	uint64_t tmp;                                             \
    402 	                                                          \
    403 	__asm __volatile(                                         \
    404 	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
    405 	    "   teq      %Q[tmp], %Q[oldv]                    \n" \
    406 	    "   it       eq                                   \n" \
    407 	    "   teqeq    %R[tmp], %R[oldv]                    \n" \
    408 	    "   itee     ne                                   \n" \
    409 	    "   movne    %[ret], #0                           \n" \
    410 	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
    411 	    "   eorseq   %[ret], #1                           \n" \
    412 	    "   beq      1b                                   \n" \
    413 	    : [ret] "=&r" (RET),                                  \
    414 	      [tmp] "=&r" (tmp)                                   \
    415 	    : [ptr] "r"   (_ptr),                                 \
    416 	      [oldv] "r"  (_old),                                 \
    417 	      [newv] "r"  (_new)                                  \
    418 	    : "cc", "memory");                                    \
    419     }
    420 
    421 static __inline int
    422 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
    423 {
    424 	int ret;
    425 
    426 	ATOMIC_CMPSET_CODE(ret, "b");
    427 	return (ret);
    428 }
    429 #define	atomic_cmpset_8		atomic_cmpset_8
    430 
    431 static __inline int
    432 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
    433 {
    434 	int ret;
    435 
    436 	ATOMIC_CMPSET_CODE(ret, "b");
    437 	dmb();
    438 	return (ret);
    439 }
    440 
    441 static __inline int
    442 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
    443 {
    444 	int ret;
    445 
    446 	dmb();
    447 	ATOMIC_CMPSET_CODE(ret, "b");
    448 	return (ret);
    449 }
    450 
    451 static __inline int
    452 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
    453 {
    454 	int ret;
    455 
    456 	ATOMIC_CMPSET_CODE(ret, "h");
    457 	return (ret);
    458 }
    459 #define	atomic_cmpset_16	atomic_cmpset_16
    460 
    461 static __inline int
    462 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
    463 {
    464 	int ret;
    465 
    466 	ATOMIC_CMPSET_CODE(ret, "h");
    467 	dmb();
    468 	return (ret);
    469 }
    470 
    471 static __inline int
    472 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
    473 {
    474 	int ret;
    475 
    476 	dmb();
    477 	ATOMIC_CMPSET_CODE(ret, "h");
    478 	return (ret);
    479 }
    480 
    481 static __inline int
    482 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
    483 {
    484 	int ret;
    485 
    486 	ATOMIC_CMPSET_CODE(ret, "");
    487 	return (ret);
    488 }
    489 
    490 static __inline int
    491 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
    492 {
    493 	int ret;
    494 
    495 	ATOMIC_CMPSET_CODE(ret, "");
    496 	dmb();
    497 	return (ret);
    498 }
    499 
    500 static __inline int
    501 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
    502 {
    503 	int ret;
    504 
    505 	dmb();
    506 	ATOMIC_CMPSET_CODE(ret, "");
    507 	return (ret);
    508 }
    509 
    510 static __inline int
    511 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
    512 {
    513 	int ret;
    514 
    515 	ATOMIC_CMPSET_CODE(ret, "");
    516 	return (ret);
    517 }
    518 
    519 static __inline int
    520 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
    521 {
    522 	int ret;
    523 
    524 	ATOMIC_CMPSET_CODE(ret, "");
    525 	dmb();
    526 	return (ret);
    527 }
    528 
    529 static __inline int
    530 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
    531 {
    532 	int ret;
    533 
    534 	dmb();
    535 	ATOMIC_CMPSET_CODE(ret, "");
    536 	return (ret);
    537 }
    538 
    539 static __inline int
    540 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
    541 {
    542 	int ret;
    543 
    544 	ATOMIC_CMPSET_CODE64(ret);
    545 	return (ret);
    546 }
    547 
    548 static __inline int
    549 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
    550 {
    551 	int ret;
    552 
    553 	ATOMIC_CMPSET_CODE64(ret);
    554 	dmb();
    555 	return (ret);
    556 }
    557 
    558 static __inline int
    559 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
    560 {
    561 	int ret;
    562 
    563 	dmb();
    564 	ATOMIC_CMPSET_CODE64(ret);
    565 	return (ret);
    566 }
    567 
    568 static __inline uint32_t
    569 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
    570 {
    571 	uint32_t tmp = 0, tmp2 = 0, ret = 0;
    572 
    573 	__asm __volatile(
    574 	    "1: ldrex	%0, [%3]	\n"
    575 	    "   add	%1, %0, %4	\n"
    576 	    "   strex	%2, %1, [%3]	\n"
    577 	    "   cmp	%2, #0		\n"
    578 	    "   it	ne		\n"
    579 	    "   bne	1b		\n"
    580 	    : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
    581 	    : : "cc", "memory");
    582 	return (ret);
    583 }
    584 
    585 static __inline uint64_t
    586 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
    587 {
    588 	uint64_t ret, tmp;
    589 	uint32_t exflag;
    590 
    591 	__asm __volatile(
    592 	    "1:							\n"
    593 	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
    594 	    "   adds	%Q[tmp], %Q[ret], %Q[val]		\n"
    595 	    "   adc	%R[tmp], %R[ret], %R[val]		\n"
    596 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
    597 	    "   teq	%[exf], #0				\n"
    598 	    "   it	ne					\n"
    599 	    "   bne	1b					\n"
    600 	    : [ret] "=&r" (ret),
    601 	      [exf] "=&r" (exflag),
    602 	      [tmp] "=&r" (tmp)
    603 	    : [ptr] "r"   (p),
    604 	      [val] "r"   (val)
    605 	    : "cc", "memory");
    606 	return (ret);
    607 }
    608 
    609 static __inline u_long
    610 atomic_fetchadd_long(volatile u_long *p, u_long val)
    611 {
    612 
    613 	return (atomic_fetchadd_32((volatile uint32_t *)p, val));
    614 }
    615 
    616 static __inline uint32_t
    617 atomic_load_acq_32(volatile uint32_t *p)
    618 {
    619 	uint32_t v;
    620 
    621 	v = *p;
    622 	dmb();
    623 	return (v);
    624 }
    625 
    626 static __inline uint64_t
    627 atomic_load_64(volatile uint64_t *p)
    628 {
    629 	uint64_t ret;
    630 
    631 	/*
    632 	 * The only way to atomically load 64 bits is with LDREXD which puts the
    633 	 * exclusive monitor into the exclusive state, so reset it to open state
    634 	 * with CLREX because we don't actually need to store anything.
    635 	 */
    636 	__asm __volatile(
    637 	    "ldrexd	%Q[ret], %R[ret], [%[ptr]]	\n"
    638 	    "clrex					\n"
    639 	    : [ret] "=&r" (ret)
    640 	    : [ptr] "r"   (p)
    641 	    : "cc", "memory");
    642 	return (ret);
    643 }
    644 
    645 static __inline uint64_t
    646 atomic_load_acq_64(volatile uint64_t *p)
    647 {
    648 	uint64_t ret;
    649 
    650 	ret = atomic_load_64(p);
    651 	dmb();
    652 	return (ret);
    653 }
    654 
    655 static __inline u_long
    656 atomic_load_acq_long(volatile u_long *p)
    657 {
    658 	u_long v;
    659 
    660 	v = *p;
    661 	dmb();
    662 	return (v);
    663 }
    664 
    665 static __inline uint32_t
    666 atomic_readandclear_32(volatile uint32_t *p)
    667 {
    668 	uint32_t ret, tmp = 0, tmp2 = 0;
    669 
    670 	__asm __volatile(
    671 	    "1: ldrex	%0, [%3]	\n"
    672 	    "   mov	%1, #0		\n"
    673 	    "   strex	%2, %1, [%3]	\n"
    674 	    "   cmp	%2, #0		\n"
    675 	    "   it	ne		\n"
    676 	    "   bne	1b		\n"
    677 	    : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
    678 	    : : "cc", "memory");
    679 	return (ret);
    680 }
    681 
    682 static __inline uint64_t
    683 atomic_readandclear_64(volatile uint64_t *p)
    684 {
    685 	uint64_t ret, tmp;
    686 	uint32_t exflag;
    687 
    688 	__asm __volatile(
    689 	    "1:							\n"
    690 	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
    691 	    "   mov	%Q[tmp], #0				\n"
    692 	    "   mov	%R[tmp], #0				\n"
    693 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
    694 	    "   teq	%[exf], #0				\n"
    695 	    "   it	ne					\n"
    696 	    "   bne	1b					\n"
    697 	    : [ret] "=&r" (ret),
    698 	      [exf] "=&r" (exflag),
    699 	      [tmp] "=&r" (tmp)
    700 	    : [ptr] "r"   (p)
    701 	    : "cc", "memory");
    702 	return (ret);
    703 }
    704 
    705 static __inline u_long
    706 atomic_readandclear_long(volatile u_long *p)
    707 {
    708 
    709 	return (atomic_readandclear_32((volatile uint32_t *)p));
    710 }
    711 
    712 static __inline void
    713 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
    714 {
    715 	uint32_t tmp = 0, tmp2 = 0;
    716 
    717 	__asm __volatile(
    718 	    "1: ldrex	%0, [%2]	\n"
    719 	    "   orr	%0, %0, %3	\n"
    720 	    "   strex	%1, %0, [%2]	\n"
    721 	    "   cmp	%1, #0		\n"
    722 	    "   it	ne		\n"
    723 	    "   bne	1b		\n"
    724 	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
    725 	    : : "cc", "memory");
    726 }
    727 
    728 static __inline void
    729 atomic_set_64(volatile uint64_t *p, uint64_t val)
    730 {
    731 	uint64_t tmp;
    732 	uint32_t exflag;
    733 
    734 	__asm __volatile(
    735 	    "1:							\n"
    736 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
    737 	    "   orr	%Q[tmp], %Q[val]			\n"
    738 	    "   orr	%R[tmp], %R[val]			\n"
    739 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
    740 	    "   teq	%[exf], #0				\n"
    741 	    "   it	ne					\n"
    742 	    "   bne	1b					\n"
    743 	    : [exf] "=&r" (exflag),
    744 	      [tmp] "=&r" (tmp)
    745 	    : [ptr] "r"   (p),
    746 	      [val] "r"   (val)
    747 	    : "cc", "memory");
    748 }
    749 
    750 static __inline void
    751 atomic_set_long(volatile u_long *address, u_long setmask)
    752 {
    753 
    754 	atomic_set_32((volatile uint32_t *)address, setmask);
    755 }
    756 
    757 ATOMIC_ACQ_REL(set, 32)
    758 ATOMIC_ACQ_REL(set, 64)
    759 ATOMIC_ACQ_REL_LONG(set)
    760 
    761 static __inline void
    762 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
    763 {
    764 	uint32_t tmp = 0, tmp2 = 0;
    765 
    766 	__asm __volatile(
    767 	    "1: ldrex	%0, [%2]	\n"
    768 	    "   sub	%0, %0, %3	\n"
    769 	    "   strex	%1, %0, [%2]	\n"
    770 	    "   cmp	%1, #0		\n"
    771 	    "   it	ne		\n"
    772 	    "   bne	1b		\n"
    773 	    : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
    774 	    : : "cc", "memory");
    775 }
    776 
    777 static __inline void
    778 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
    779 {
    780 	uint64_t tmp;
    781 	uint32_t exflag;
    782 
    783 	__asm __volatile(
    784 	    "1:							\n"
    785 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
    786 	    "   subs	%Q[tmp], %Q[val]			\n"
    787 	    "   sbc	%R[tmp], %R[tmp], %R[val]		\n"
    788 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
    789 	    "   teq	%[exf], #0				\n"
    790 	    "   it	ne					\n"
    791 	    "   bne	1b					\n"
    792 	    : [exf] "=&r" (exflag),
    793 	      [tmp] "=&r" (tmp)
    794 	    : [ptr] "r"   (p),
    795 	      [val] "r"   (val)
    796 	    : "cc", "memory");
    797 }
    798 
    799 static __inline void
    800 atomic_subtract_long(volatile u_long *p, u_long val)
    801 {
    802 
    803 	atomic_subtract_32((volatile uint32_t *)p, val);
    804 }
    805 
    806 ATOMIC_ACQ_REL(subtract, 32)
    807 ATOMIC_ACQ_REL(subtract, 64)
    808 ATOMIC_ACQ_REL_LONG(subtract)
    809 
    810 static __inline void
    811 atomic_store_64(volatile uint64_t *p, uint64_t val)
    812 {
    813 	uint64_t tmp;
    814 	uint32_t exflag;
    815 
    816 	/*
    817 	 * The only way to atomically store 64 bits is with STREXD, which will
    818 	 * succeed only if paired up with a preceeding LDREXD using the same
    819 	 * address, so we read and discard the existing value before storing.
    820 	 */
    821 	__asm __volatile(
    822 	    "1:							\n"
    823 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
    824 	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
    825 	    "   teq	%[exf], #0				\n"
    826 	    "   it	ne					\n"
    827 	    "   bne	1b					\n"
    828 	    : [tmp] "=&r" (tmp),
    829 	      [exf] "=&r" (exflag)
    830 	    : [ptr] "r"   (p),
    831 	      [val] "r"   (val)
    832 	    : "cc", "memory");
    833 }
    834 
    835 static __inline void
    836 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
    837 {
    838 
    839 	dmb();
    840 	*p = v;
    841 }
    842 
    843 static __inline void
    844 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
    845 {
    846 
    847 	dmb();
    848 	atomic_store_64(p, val);
    849 }
    850 
    851 static __inline void
    852 atomic_store_rel_long(volatile u_long *p, u_long v)
    853 {
    854 
    855 	dmb();
    856 	*p = v;
    857 }
    858 
    859 static __inline int
    860 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
    861 {
    862 	int newv, oldv, result;
    863 
    864 	__asm __volatile(
    865 	    "   mov     ip, #1					\n"
    866 	    "   lsl     ip, ip, %[bit]				\n"
    867 	    /*  Done with %[bit] as input, reuse below as output. */
    868 	    "1:							\n"
    869 	    "   ldrex	%[oldv], [%[ptr]]			\n"
    870 	    "   bic     %[newv], %[oldv], ip			\n"
    871 	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
    872 	    "   teq	%[bit], #0				\n"
    873 	    "   it	ne					\n"
    874 	    "   bne	1b					\n"
    875 	    "   ands	%[bit], %[oldv], ip			\n"
    876 	    "   it	ne					\n"
    877 	    "   movne   %[bit], #1                              \n"
    878 	    : [bit]  "=&r"   (result),
    879 	      [oldv] "=&r"   (oldv),
    880 	      [newv] "=&r"   (newv)
    881 	    : [ptr]  "r"     (ptr),
    882 	             "[bit]" (bit & 0x1f)
    883 	    : "cc", "ip", "memory");
    884 
    885 	return (result);
    886 }
    887 
    888 static __inline int
    889 atomic_testandclear_int(volatile u_int *p, u_int v)
    890 {
    891 
    892 	return (atomic_testandclear_32((volatile uint32_t *)p, v));
    893 }
    894 
    895 static __inline int
    896 atomic_testandclear_long(volatile u_long *p, u_int v)
    897 {
    898 
    899 	return (atomic_testandclear_32((volatile uint32_t *)p, v));
    900 }
    901 #define	atomic_testandclear_long	atomic_testandclear_long
    902 
    903 
    904 static __inline int
    905 atomic_testandclear_64(volatile uint64_t *p, u_int v)
    906 {
    907 	volatile uint32_t *p32;
    908 
    909 	p32 = (volatile uint32_t *)p;
    910 	/*
    911 	 * Assume little-endian,
    912 	 * atomic_testandclear_32() uses only last 5 bits of v
    913 	 */
    914 	if ((v & 0x20) != 0)
    915 		p32++;
    916 	return (atomic_testandclear_32(p32, v));
    917 }
    918 
    919 static __inline int
    920 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
    921 {
    922 	int newv, oldv, result;
    923 
    924 	__asm __volatile(
    925 	    "   mov     ip, #1					\n"
    926 	    "   lsl     ip, ip, %[bit]				\n"
    927 	    /*  Done with %[bit] as input, reuse below as output. */
    928 	    "1:							\n"
    929 	    "   ldrex	%[oldv], [%[ptr]]			\n"
    930 	    "   orr     %[newv], %[oldv], ip			\n"
    931 	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
    932 	    "   teq	%[bit], #0				\n"
    933 	    "   it	ne					\n"
    934 	    "   bne	1b					\n"
    935 	    "   ands	%[bit], %[oldv], ip			\n"
    936 	    "   it	ne					\n"
    937 	    "   movne   %[bit], #1                              \n"
    938 	    : [bit]  "=&r"   (result),
    939 	      [oldv] "=&r"   (oldv),
    940 	      [newv] "=&r"   (newv)
    941 	    : [ptr]  "r"     (ptr),
    942 	             "[bit]" (bit & 0x1f)
    943 	    : "cc", "ip", "memory");
    944 
    945 	return (result);
    946 }
    947 
    948 static __inline int
    949 atomic_testandset_int(volatile u_int *p, u_int v)
    950 {
    951 
    952 	return (atomic_testandset_32((volatile uint32_t *)p, v));
    953 }
    954 
    955 static __inline int
    956 atomic_testandset_long(volatile u_long *p, u_int v)
    957 {
    958 
    959 	return (atomic_testandset_32((volatile uint32_t *)p, v));
    960 }
    961 #define	atomic_testandset_long	atomic_testandset_long
    962 
    963 static __inline int
    964 atomic_testandset_64(volatile uint64_t *p, u_int v)
    965 {
    966 	volatile uint32_t *p32;
    967 
    968 	p32 = (volatile uint32_t *)p;
    969 	/*
    970 	 * Assume little-endian,
    971 	 * atomic_testandset_32() uses only last 5 bits of v
    972 	 */
    973 	if ((v & 0x20) != 0)
    974 		p32++;
    975 	return (atomic_testandset_32(p32, v));
    976 }
    977 
    978 static __inline uint32_t
    979 atomic_swap_32(volatile uint32_t *p, uint32_t v)
    980 {
    981 	uint32_t ret, exflag;
    982 
    983 	__asm __volatile(
    984 	    "1: ldrex	%[ret], [%[ptr]]		\n"
    985 	    "   strex	%[exf], %[val], [%[ptr]]	\n"
    986 	    "   teq	%[exf], #0			\n"
    987 	    "   it	ne				\n"
    988 	    "   bne	1b				\n"
    989 	    : [ret] "=&r"  (ret),
    990 	      [exf] "=&r" (exflag)
    991 	    : [val] "r"  (v),
    992 	      [ptr] "r"  (p)
    993 	    : "cc", "memory");
    994 	return (ret);
    995 }
    996 
    997 static __inline u_long
    998 atomic_swap_long(volatile u_long *p, u_long v)
    999 {
   1000 
   1001 	return (atomic_swap_32((volatile uint32_t *)p, v));
   1002 }
   1003 
   1004 static __inline uint64_t
   1005 atomic_swap_64(volatile uint64_t *p, uint64_t v)
   1006 {
   1007 	uint64_t ret;
   1008 	uint32_t exflag;
   1009 
   1010 	__asm __volatile(
   1011 	    "1: ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
   1012 	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
   1013 	    "   teq	%[exf], #0				\n"
   1014 	    "   it	ne					\n"
   1015 	    "   bne	1b					\n"
   1016 	    : [ret] "=&r" (ret),
   1017 	      [exf] "=&r" (exflag)
   1018 	    : [val] "r"   (v),
   1019 	      [ptr] "r"   (p)
   1020 	    : "cc", "memory");
   1021 	return (ret);
   1022 }
   1023 
   1024 #undef ATOMIC_ACQ_REL
   1025 #undef ATOMIC_ACQ_REL_LONG
   1026 
   1027 static __inline void
   1028 atomic_thread_fence_acq(void)
   1029 {
   1030 
   1031 	dmb();
   1032 }
   1033 
   1034 static __inline void
   1035 atomic_thread_fence_rel(void)
   1036 {
   1037 
   1038 	dmb();
   1039 }
   1040 
   1041 static __inline void
   1042 atomic_thread_fence_acq_rel(void)
   1043 {
   1044 
   1045 	dmb();
   1046 }
   1047 
   1048 static __inline void
   1049 atomic_thread_fence_seq_cst(void)
   1050 {
   1051 
   1052 	dmb();
   1053 }
   1054 
   1055 #define atomic_clear_ptr		atomic_clear_32
   1056 #define atomic_clear_acq_ptr		atomic_clear_acq_32
   1057 #define atomic_clear_rel_ptr		atomic_clear_rel_32
   1058 #define atomic_set_ptr			atomic_set_32
   1059 #define atomic_set_acq_ptr		atomic_set_acq_32
   1060 #define atomic_set_rel_ptr		atomic_set_rel_32
   1061 #define atomic_fcmpset_ptr		atomic_fcmpset_32
   1062 #define atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_32
   1063 #define atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_32
   1064 #define atomic_cmpset_ptr		atomic_cmpset_32
   1065 #define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
   1066 #define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
   1067 #define atomic_load_acq_ptr		atomic_load_acq_32
   1068 #define atomic_store_rel_ptr		atomic_store_rel_32
   1069 #define atomic_swap_ptr			atomic_swap_32
   1070 #define atomic_readandclear_ptr		atomic_readandclear_32
   1071 
   1072 #define atomic_add_int			atomic_add_32
   1073 #define atomic_add_acq_int		atomic_add_acq_32
   1074 #define atomic_add_rel_int		atomic_add_rel_32
   1075 #define atomic_subtract_int		atomic_subtract_32
   1076 #define atomic_subtract_acq_int		atomic_subtract_acq_32
   1077 #define atomic_subtract_rel_int		atomic_subtract_rel_32
   1078 #define atomic_clear_int		atomic_clear_32
   1079 #define atomic_clear_acq_int		atomic_clear_acq_32
   1080 #define atomic_clear_rel_int		atomic_clear_rel_32
   1081 #define atomic_set_int			atomic_set_32
   1082 #define atomic_set_acq_int		atomic_set_acq_32
   1083 #define atomic_set_rel_int		atomic_set_rel_32
   1084 #define atomic_fcmpset_int		atomic_fcmpset_32
   1085 #define atomic_fcmpset_acq_int		atomic_fcmpset_acq_32
   1086 #define atomic_fcmpset_rel_int		atomic_fcmpset_rel_32
   1087 #define atomic_cmpset_int		atomic_cmpset_32
   1088 #define atomic_cmpset_acq_int		atomic_cmpset_acq_32
   1089 #define atomic_cmpset_rel_int		atomic_cmpset_rel_32
   1090 #define atomic_fetchadd_int		atomic_fetchadd_32
   1091 #define atomic_readandclear_int		atomic_readandclear_32
   1092 #define atomic_load_acq_int		atomic_load_acq_32
   1093 #define atomic_store_rel_int		atomic_store_rel_32
   1094 #define atomic_swap_int			atomic_swap_32
   1095 
   1096 /*
   1097  * For:
   1098  *  - atomic_load_acq_8
   1099  *  - atomic_load_acq_16
   1100  *  - atomic_testandset_acq_long
   1101  */
   1102 #include <sys/_atomic_subword.h>
   1103 
   1104 #endif /* _MACHINE_ATOMIC_H_ */