*/
 
 #include <linux/linkage.h>
+#include <asm/blackfin.h>
 
 .align 2
 
        P2 = R2;        /* P2 = count */
        SSYNC;
        LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
-.Llong_loop_s: R0 = [P0];
-.Llong_loop_e: [P1++] = R0;
+.Llong_loop_s:  R0 = [P0];
+               [P1++] = R0;
+               NOP;
+.Llong_loop_e:         NOP;
        sti R3;
        RTS;
 
+
 ENTRY(_insw)
        P0 = R0;        /* P0 = port */
        cli R3;
        P2 = R2;        /* P2 = count */
        SSYNC;
        LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
-.Lword_loop_s: R0 = W[P0];
-.Lword_loop_e: W[P1++] = R0;
+.Lword_loop_s:  R0 = W[P0];
+               W[P1++] = R0;
+               NOP;
+.Lword_loop_e:         NOP;
        sti R3;
        RTS;
 
        P2 = R2;        /* P2 = count */
        SSYNC;
        LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
-.Lbyte_loop_s: R0 = B[P0];
-.Lbyte_loop_e: B[P1++] = R0;
+.Lbyte_loop_s:  R0 = B[P0];
+               B[P1++] = R0;
+               NOP;
+.Lbyte_loop_e:  NOP;
        sti R3;
        RTS;