__und_usr:
        usr_entry
 
-       tst     r3, #PSR_T_BIT                  @ Thumb mode?
-       bne     __und_usr_unknown               @ ignore FP
-       sub     r4, r2, #4
-
        @
        @ fall through to the emulation code, which returns using r9 if
        @ it has emulated the instruction, or the more conventional lr
        @
        adr     r9, ret_from_exception
        adr     lr, __und_usr_unknown
-1:     ldrt    r0, [r4]
+       tst     r3, #PSR_T_BIT                  @ Thumb mode?
+       subeq   r4, r2, #4                      @ ARM instr at LR - 4
+       subne   r4, r2, #2                      @ Thumb instr at LR - 2
+1:     ldreqt  r0, [r4]
+       beq     call_fpe
+       @ Thumb instruction
+#if __LINUX_ARM_ARCH__ >= 7
+2:     ldrht   r5, [r4], #2
+       and     r0, r5, #0xf800                 @ mask bits 111x x... .... ....
+       cmp     r0, #0xe800                     @ 32bit instruction if xx != 0
+       blo     __und_usr_unknown
+3:     ldrht   r0, [r4]
+       add     r2, r2, #2                      @ r2 is PC + 2, make it PC + 4
+       orr     r0, r0, r5, lsl #16
+#else
+       b       __und_usr_unknown
+#endif
+
        @
        @ fallthrough to call_fpe
        @
  * The out of line fixup for the ldrt above.
  */
        .section .fixup, "ax"
-2:     mov     pc, r9
+4:     mov     pc, r9
        .previous
        .section __ex_table,"a"
-       .long   1b, 2b
+       .long   1b, 4b
+#if __LINUX_ARM_ARCH__ >= 7
+       .long   2b, 4b
+       .long   3b, 4b
+#endif
        .previous
 
 /*
  *  r10 = this threads thread_info structure.
  *  lr  = unrecognised instruction return address
  */
+       @
+       @ Fall-through from Thumb-2 __und_usr
+       @
+#ifdef CONFIG_NEON
+       adr     r6, .LCneon_thumb_opcodes
+       b       2f
+#endif
 call_fpe:
 #ifdef CONFIG_NEON
-       adr     r6, .LCneon_opcodes
+       adr     r6, .LCneon_arm_opcodes
 2:
        ldr     r7, [r6], #4                    @ mask value
        cmp     r7, #0                          @ end mask?
 1:
 #endif
        tst     r0, #0x08000000                 @ only CDP/CPRT/LDC/STC have bit 27
+       tstne   r0, #0x04000000                 @ bit 26 set on both ARM and Thumb-2
 #if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710)
        and     r8, r0, #0x0f000000             @ mask out op-code bits
        teqne   r8, #0x0f000000                 @ SWI (ARM6/7 bug)?
 #ifdef CONFIG_NEON
        .align  6
 
-.LCneon_opcodes:
+.LCneon_arm_opcodes:
        .word   0xfe000000                      @ mask
        .word   0xf2000000                      @ opcode
 
        .word   0xff100000                      @ mask
        .word   0xf4000000                      @ opcode
 
+       .word   0x00000000                      @ mask
+       .word   0x00000000                      @ opcode
+
+.LCneon_thumb_opcodes:
+       .word   0xef000000                      @ mask
+       .word   0xef000000                      @ opcode
+
+       .word   0xff100000                      @ mask
+       .word   0xf9000000                      @ opcode
+
        .word   0x00000000                      @ mask
        .word   0x00000000                      @ opcode
 #endif