"      blmi    " #fail                         \
        :                                       \
        : "r" (ptr), "I" (1)                    \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
+       smp_mb();                               \
        })
 
 #define __down_op_ret(ptr,fail)                        \
 "      mov     %0, ip"                         \
        : "=&r" (ret)                           \
        : "r" (ptr), "I" (1)                    \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
+       smp_mb();                               \
        ret;                                    \
        })
 
 #define __up_op(ptr,wake)                      \
        ({                                      \
+       smp_mb();                               \
        __asm__ __volatile__(                   \
        "@ up_op\n"                             \
 "1:    ldrex   lr, [%0]\n"                     \
 "      blle    " #wake                         \
        :                                       \
        : "r" (ptr), "I" (1)                    \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
        })
 
 /*
 "      blne    " #fail                         \
        :                                       \
        : "r" (ptr), "I" (RW_LOCK_BIAS)         \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
+       smp_mb();                               \
        })
 
 #define __up_op_write(ptr,wake)                        \
        ({                                      \
+       smp_mb();                               \
        __asm__ __volatile__(                   \
        "@ up_op_read\n"                        \
 "1:    ldrex   lr, [%0]\n"                     \
 "      blcs    " #wake                         \
        :                                       \
        : "r" (ptr), "I" (RW_LOCK_BIAS)         \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
        })
 
 #define __down_op_read(ptr,fail)               \
 
 #define __up_op_read(ptr,wake)                 \
        ({                                      \
+       smp_mb();                               \
        __asm__ __volatile__(                   \
        "@ up_op_read\n"                        \
 "1:    ldrex   lr, [%0]\n"                     \
 "      bleq    " #wake                         \
        :                                       \
        : "r" (ptr), "I" (1)                    \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
        })
 
 #else
 "      blmi    " #fail                         \
        :                                       \
        : "r" (ptr), "I" (1)                    \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
+       smp_mb();                               \
        })
 
 #define __down_op_ret(ptr,fail)                        \
 "      mov     %0, ip"                         \
        : "=&r" (ret)                           \
        : "r" (ptr), "I" (1)                    \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
+       smp_mb();                               \
        ret;                                    \
        })
 
 #define __up_op(ptr,wake)                      \
        ({                                      \
+       smp_mb();                               \
        __asm__ __volatile__(                   \
        "@ up_op\n"                             \
 "      mrs     ip, cpsr\n"                     \
 "      blle    " #wake                         \
        :                                       \
        : "r" (ptr), "I" (1)                    \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
        })
 
 /*
 "      blne    " #fail                         \
        :                                       \
        : "r" (ptr), "I" (RW_LOCK_BIAS)         \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
+       smp_mb();                               \
        })
 
 #define __up_op_write(ptr,wake)                        \
 "      blcs    " #wake                         \
        :                                       \
        : "r" (ptr), "I" (RW_LOCK_BIAS)         \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
+       smp_mb();                               \
        })
 
 #define __down_op_read(ptr,fail)               \
 
 #define __up_op_read(ptr,wake)                 \
        ({                                      \
+       smp_mb();                               \
        __asm__ __volatile__(                   \
        "@ up_op_read\n"                        \
 "      mrs     ip, cpsr\n"                     \
 "      bleq    " #wake                         \
        :                                       \
        : "r" (ptr), "I" (1)                    \
-       : "ip", "lr", "cc", "memory");          \
+       : "ip", "lr", "cc");                    \
        })
 
 #endif
 
 /*
  * ARMv6 Spin-locking.
  *
- * We (exclusively) read the old value, and decrement it.  If it
- * hits zero, we may have won the lock, so we try (exclusively)
- * storing it.
+ * We exclusively read the old value.  If it is zero, we may have
+ * won the lock, so we try exclusively storing it.  A memory barrier
+ * is required after we get a lock, and before we release it, because
+ * V6 CPUs are assumed to have weakly ordered memory.
  *
  * Unlocked value: 0
  * Locked value: 1
 "      bne     1b"
        : "=&r" (tmp)
        : "r" (&lock->lock), "r" (1)
-       : "cc", "memory");
+       : "cc");
+
+       smp_mb();
 }
 
 static inline int _raw_spin_trylock(spinlock_t *lock)
 "      strexeq %0, %2, [%1]"
        : "=&r" (tmp)
        : "r" (&lock->lock), "r" (1)
-       : "cc", "memory");
-
-       return tmp == 0;
+       : "cc");
+
+       if (tmp == 0) {
+               smp_mb();
+               return 1;
+       } else {
+               return 0;
+       }
 }
 
 static inline void _raw_spin_unlock(spinlock_t *lock)
 {
+       smp_mb();
+
        __asm__ __volatile__(
 "      str     %1, [%0]"
        :
        : "r" (&lock->lock), "r" (0)
-       : "cc", "memory");
+       : "cc");
 }
 
 /*
 "      bne     1b"
        : "=&r" (tmp)
        : "r" (&rw->lock), "r" (0x80000000)
-       : "cc", "memory");
+       : "cc");
+
+       smp_mb();
 }
 
 static inline int _raw_write_trylock(rwlock_t *rw)
 "      strexeq %0, %2, [%1]"
        : "=&r" (tmp)
        : "r" (&rw->lock), "r" (0x80000000)
-       : "cc", "memory");
-
-       return tmp == 0;
+       : "cc");
+
+       if (tmp == 0) {
+               smp_mb();
+               return 1;
+       } else {
+               return 0;
+       }
 }
 
 static inline void _raw_write_unlock(rwlock_t *rw)
 {
+       smp_mb();
+
        __asm__ __volatile__(
        "str    %1, [%0]"
        :
        : "r" (&rw->lock), "r" (0)
-       : "cc", "memory");
+       : "cc");
 }
 
 /*
 "      bmi     1b"
        : "=&r" (tmp), "=&r" (tmp2)
        : "r" (&rw->lock)
-       : "cc", "memory");
+       : "cc");
+
+       smp_mb();
 }
 
 static inline void _raw_read_unlock(rwlock_t *rw)
 {
        unsigned long tmp, tmp2;
 
+       smp_mb();
+
        __asm__ __volatile__(
 "1:    ldrex   %0, [%2]\n"
 "      sub     %0, %0, #1\n"
 "      bne     1b"
        : "=&r" (tmp), "=&r" (tmp2)
        : "r" (&rw->lock)
-       : "cc", "memory");
+       : "cc");
 }
 
 #define _raw_read_trylock(lock) generic_raw_read_trylock(lock)