most egregious problem was the lack of memory clobber and lack of
volatile asm; this made the atomics memory barriers but not compiler
barriers. use of "+r" rather than "=r" for a clobbered temp was also
wrong, since the initial value is indeterminate.
static inline int a_cas(volatile int *p, int t, int s)
{
int old, tmp;
static inline int a_cas(volatile int *p, int t, int s)
{
int old, tmp;
- __asm__("\n1: lr.w.aqrl %0, %2\n"
+ __asm__ __volatile__ (
+ "\n1: lr.w.aqrl %0, %2\n"
" bne %0, %3, 1f\n"
" sc.w.aqrl %1, %4, %2\n"
" bnez %1, 1b\n"
"1:"
" bne %0, %3, 1f\n"
" sc.w.aqrl %1, %4, %2\n"
" bnez %1, 1b\n"
"1:"
- : "=&r"(old), "+r"(tmp), "+A"(*p)
- : "r"(t), "r"(s));
+ : "=&r"(old), "=r"(tmp), "+A"(*p)
+ : "r"(t), "r"(s)
+ : "memory");
- __asm__("\n1: lr.d.aqrl %0, %2\n"
+ __asm__ __volatile__ (
+ "\n1: lr.d.aqrl %0, %2\n"
" bne %0, %3, 1f\n"
" sc.d.aqrl %1, %4, %2\n"
" bnez %1, 1b\n"
"1:"
" bne %0, %3, 1f\n"
" sc.d.aqrl %1, %4, %2\n"
" bnez %1, 1b\n"
"1:"
- : "=&r"(old), "+r"(tmp), "+A"(*(long *)p)
- : "r"(t), "r"(s));
+ : "=&r"(old), "=r"(tmp), "+A"(*(long *)p)
+ : "r"(t), "r"(s)
+ : "memory");