]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/infiniband/hw/mthca/mthca_qp.c
IB/mthca: Update QP state if query QP succeeds
[linux-2.6-omap-h63xx.git] / drivers / infiniband / hw / mthca / mthca_qp.c
index eef415b12b2ebda3d60043bf4b7f9cf8fee72563..09dc3614cf2c1ad774b847ff8f9959f13bf0095a 100644 (file)
@@ -437,29 +437,34 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
        int mthca_state;
        u8 status;
 
+       mutex_lock(&qp->mutex);
+
        if (qp->state == IB_QPS_RESET) {
                qp_attr->qp_state = IB_QPS_RESET;
                goto done;
        }
 
        mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
-       if (IS_ERR(mailbox))
-               return PTR_ERR(mailbox);
+       if (IS_ERR(mailbox)) {
+               err = PTR_ERR(mailbox);
+               goto out;
+       }
 
        err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status);
        if (err)
-               goto out;
+               goto out_mailbox;
        if (status) {
                mthca_warn(dev, "QUERY_QP returned status %02x\n", status);
                err = -EINVAL;
-               goto out;
+               goto out_mailbox;
        }
 
        qp_param    = mailbox->buf;
        context     = &qp_param->context;
        mthca_state = be32_to_cpu(context->flags) >> 28;
 
-       qp_attr->qp_state            = to_ib_qp_state(mthca_state);
+       qp->state                    = to_ib_qp_state(mthca_state);
+       qp_attr->qp_state            = qp->state;
        qp_attr->path_mtu            = context->mtu_msgmax >> 5;
        qp_attr->path_mig_state      =
                to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -506,8 +511,11 @@ done:
 
        qp_init_attr->cap            = qp_attr->cap;
 
-out:
+out_mailbox:
        mthca_free_mailbox(dev, mailbox);
+
+out:
+       mutex_unlock(&qp->mutex);
        return err;
 }
 
@@ -1175,6 +1183,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
 {
        int ret;
        int i;
+       struct mthca_next_seg *next;
 
        qp->refcount = 1;
        init_waitqueue_head(&qp->wait);
@@ -1217,7 +1226,6 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
        }
 
        if (mthca_is_memfree(dev)) {
-               struct mthca_next_seg *next;
                struct mthca_data_seg *scatter;
                int size = (sizeof (struct mthca_next_seg) +
                            qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16;
@@ -1240,6 +1248,13 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
                                                    qp->sq.wqe_shift) +
                                                   qp->send_wqe_offset);
                }
+       } else {
+               for (i = 0; i < qp->rq.max; ++i) {
+                       next = get_recv_wqe(qp, i);
+                       next->nda_op = htonl((((i + 1) % qp->rq.max) <<
+                                             qp->rq.wqe_shift) | 1);
+               }
+
        }
 
        qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
@@ -1525,7 +1540,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        case IB_WR_SEND_WITH_IMM:
                sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
                sqp->ud_header.immediate_present = 1;
-               sqp->ud_header.immediate_data = wr->imm_data;
+               sqp->ud_header.immediate_data = wr->ex.imm_data;
                break;
        default:
                return -EINVAL;
@@ -1578,6 +1593,45 @@ static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
        return cur + nreq >= wq->max;
 }
 
+static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
+                                         u64 remote_addr, u32 rkey)
+{
+       rseg->raddr    = cpu_to_be64(remote_addr);
+       rseg->rkey     = cpu_to_be32(rkey);
+       rseg->reserved = 0;
+}
+
+static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
+                                          struct ib_send_wr *wr)
+{
+       if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+               aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
+               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
+       } else {
+               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+               aseg->compare  = 0;
+       }
+
+}
+
+static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
+                            struct ib_send_wr *wr)
+{
+       useg->lkey    = cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
+       useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
+       useg->dqpn    = cpu_to_be32(wr->wr.ud.remote_qpn);
+       useg->qkey    = cpu_to_be32(wr->wr.ud.remote_qkey);
+
+}
+
+static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
+                            struct ib_send_wr *wr)
+{
+       memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
+       useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+       useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+}
+
 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                          struct ib_send_wr **bad_wr)
 {
@@ -1590,8 +1644,15 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        int nreq;
        int i;
        int size;
-       int size0 = 0;
-       u32 f0;
+       /*
+        * f0 and size0 are only used if nreq != 0, and they will
+        * always be initialized the first time through the main loop
+        * before nreq is incremented.  So nreq cannot become non-zero
+        * without initializing f0 and size0, and they are in fact
+        * never used uninitialized.
+        */
+       int uninitialized_var(size0);
+       u32 uninitialized_var(f0);
        int ind;
        u8 op0 = 0;
 
@@ -1626,7 +1687,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        cpu_to_be32(1);
                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-                       ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+                       ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
 
                wqe += sizeof (struct mthca_next_seg);
                size = sizeof (struct mthca_next_seg) / 16;
@@ -1636,25 +1697,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        switch (wr->opcode) {
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                        case IB_WR_ATOMIC_FETCH_AND_ADD:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cpu_to_be64(wr->wr.atomic.remote_addr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       cpu_to_be32(wr->wr.atomic.rkey);
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-
+                               set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+                                             wr->wr.atomic.rkey);
                                wqe += sizeof (struct mthca_raddr_seg);
 
-                               if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-                                       ((struct mthca_atomic_seg *) wqe)->swap_add =
-                                               cpu_to_be64(wr->wr.atomic.swap);
-                                       ((struct mthca_atomic_seg *) wqe)->compare =
-                                               cpu_to_be64(wr->wr.atomic.compare_add);
-                               } else {
-                                       ((struct mthca_atomic_seg *) wqe)->swap_add =
-                                               cpu_to_be64(wr->wr.atomic.compare_add);
-                                       ((struct mthca_atomic_seg *) wqe)->compare = 0;
-                               }
-
+                               set_atomic_seg(wqe, wr);
                                wqe += sizeof (struct mthca_atomic_seg);
                                size += (sizeof (struct mthca_raddr_seg) +
                                         sizeof (struct mthca_atomic_seg)) / 16;
@@ -1663,12 +1710,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
                        case IB_WR_RDMA_READ:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cpu_to_be64(wr->wr.rdma.remote_addr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       cpu_to_be32(wr->wr.rdma.rkey);
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-                               wqe += sizeof (struct mthca_raddr_seg);
+                               set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+                                             wr->wr.rdma.rkey);
+                               wqe  += sizeof (struct mthca_raddr_seg);
                                size += sizeof (struct mthca_raddr_seg) / 16;
                                break;
 
@@ -1683,12 +1727,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        switch (wr->opcode) {
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cpu_to_be64(wr->wr.rdma.remote_addr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       cpu_to_be32(wr->wr.rdma.rkey);
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-                               wqe += sizeof (struct mthca_raddr_seg);
+                               set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+                                             wr->wr.rdma.rkey);
+                               wqe  += sizeof (struct mthca_raddr_seg);
                                size += sizeof (struct mthca_raddr_seg) / 16;
                                break;
 
@@ -1700,16 +1741,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case UD:
-                       ((struct mthca_tavor_ud_seg *) wqe)->lkey =
-                               cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
-                       ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
-                               cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
-                       ((struct mthca_tavor_ud_seg *) wqe)->dqpn =
-                               cpu_to_be32(wr->wr.ud.remote_qpn);
-                       ((struct mthca_tavor_ud_seg *) wqe)->qkey =
-                               cpu_to_be32(wr->wr.ud.remote_qkey);
-
-                       wqe += sizeof (struct mthca_tavor_ud_seg);
+                       set_tavor_ud_seg(wqe, wr);
+                       wqe  += sizeof (struct mthca_tavor_ud_seg);
                        size += sizeof (struct mthca_tavor_ud_seg) / 16;
                        break;
 
@@ -1734,13 +1767,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                }
 
                for (i = 0; i < wr->num_sge; ++i) {
-                       ((struct mthca_data_seg *) wqe)->byte_count =
-                               cpu_to_be32(wr->sg_list[i].length);
-                       ((struct mthca_data_seg *) wqe)->lkey =
-                               cpu_to_be32(wr->sg_list[i].lkey);
-                       ((struct mthca_data_seg *) wqe)->addr =
-                               cpu_to_be64(wr->sg_list[i].addr);
-                       wqe += sizeof (struct mthca_data_seg);
+                       mthca_set_data_seg(wqe, wr->sg_list + i);
+                       wqe  += sizeof (struct mthca_data_seg);
                        size += sizeof (struct mthca_data_seg) / 16;
                }
 
@@ -1768,11 +1796,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                    mthca_opcode[wr->opcode]);
                wmb();
                ((struct mthca_next_seg *) prev_wqe)->ee_nds =
-                       cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size |
+                       cpu_to_be32((nreq ? 0 : MTHCA_NEXT_DBD) | size |
                                    ((wr->send_flags & IB_SEND_FENCE) ?
                                    MTHCA_NEXT_FENCE : 0));
 
-               if (!size0) {
+               if (!nreq) {
                        size0 = size;
                        op0   = mthca_opcode[wr->opcode];
                        f0    = wr->send_flags & IB_SEND_FENCE ?
@@ -1786,15 +1814,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 out:
        if (likely(nreq)) {
-               __be32 doorbell[2];
-
-               doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
-                                          qp->send_wqe_offset) | f0 | op0);
-               doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
-
                wmb();
 
-               mthca_write64(doorbell,
+               mthca_write64(((qp->sq.next_ind << qp->sq.wqe_shift) +
+                              qp->send_wqe_offset) | f0 | op0,
+                             (qp->qpn << 8) | size0,
                              dev->kar + MTHCA_SEND_DOORBELL,
                              MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
                /*
@@ -1816,13 +1840,19 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 {
        struct mthca_dev *dev = to_mdev(ibqp->device);
        struct mthca_qp *qp = to_mqp(ibqp);
-       __be32 doorbell[2];
        unsigned long flags;
        int err = 0;
        int nreq;
        int i;
        int size;
-       int size0 = 0;
+       /*
+        * size0 is only used if nreq != 0, and it will always be
+        * initialized the first time through the main loop before
+        * nreq is incremented.  So nreq cannot become non-zero
+        * without initializing size0, and it is in fact never used
+        * uninitialized.
+        */
+       int uninitialized_var(size0);
        int ind;
        void *wqe;
        void *prev_wqe;
@@ -1848,7 +1878,6 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                prev_wqe = qp->rq.last;
                qp->rq.last = wqe;
 
-               ((struct mthca_next_seg *) wqe)->nda_op = 0;
                ((struct mthca_next_seg *) wqe)->ee_nds =
                        cpu_to_be32(MTHCA_NEXT_DBD);
                ((struct mthca_next_seg *) wqe)->flags = 0;
@@ -1863,25 +1892,17 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                }
 
                for (i = 0; i < wr->num_sge; ++i) {
-                       ((struct mthca_data_seg *) wqe)->byte_count =
-                               cpu_to_be32(wr->sg_list[i].length);
-                       ((struct mthca_data_seg *) wqe)->lkey =
-                               cpu_to_be32(wr->sg_list[i].lkey);
-                       ((struct mthca_data_seg *) wqe)->addr =
-                               cpu_to_be64(wr->sg_list[i].addr);
-                       wqe += sizeof (struct mthca_data_seg);
+                       mthca_set_data_seg(wqe, wr->sg_list + i);
+                       wqe  += sizeof (struct mthca_data_seg);
                        size += sizeof (struct mthca_data_seg) / 16;
                }
 
                qp->wrid[ind] = wr->wr_id;
 
-               ((struct mthca_next_seg *) prev_wqe)->nda_op =
-                       cpu_to_be32((ind << qp->rq.wqe_shift) | 1);
-               wmb();
                ((struct mthca_next_seg *) prev_wqe)->ee_nds =
                        cpu_to_be32(MTHCA_NEXT_DBD | size);
 
-               if (!size0)
+               if (!nreq)
                        size0 = size;
 
                ++ind;
@@ -1892,30 +1913,23 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                if (unlikely(nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB)) {
                        nreq = 0;
 
-                       doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
-                       doorbell[1] = cpu_to_be32(qp->qpn << 8);
-
                        wmb();
 
-                       mthca_write64(doorbell,
-                                     dev->kar + MTHCA_RECEIVE_DOORBELL,
+                       mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
+                                     qp->qpn << 8, dev->kar + MTHCA_RECEIVE_DOORBELL,
                                      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 
                        qp->rq.next_ind = ind;
                        qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
-                       size0 = 0;
                }
        }
 
 out:
        if (likely(nreq)) {
-               doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
-               doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
-
                wmb();
 
-               mthca_write64(doorbell,
-                             dev->kar + MTHCA_RECEIVE_DOORBELL,
+               mthca_write64((qp->rq.next_ind << qp->rq.wqe_shift) | size0,
+                             qp->qpn << 8 | nreq, dev->kar + MTHCA_RECEIVE_DOORBELL,
                              MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
        }
 
@@ -1937,7 +1951,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 {
        struct mthca_dev *dev = to_mdev(ibqp->device);
        struct mthca_qp *qp = to_mqp(ibqp);
-       __be32 doorbell[2];
+       u32 dbhi;
        void *wqe;
        void *prev_wqe;
        unsigned long flags;
@@ -1945,8 +1959,15 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        int nreq;
        int i;
        int size;
-       int size0 = 0;
-       u32 f0;
+       /*
+        * f0 and size0 are only used if nreq != 0, and they will
+        * always be initialized the first time through the main loop
+        * before nreq is incremented.  So nreq cannot become non-zero
+        * without initializing f0 and size0, and they are in fact
+        * never used uninitialized.
+        */
+       int uninitialized_var(size0);
+       u32 uninitialized_var(f0);
        int ind;
        u8 op0 = 0;
 
@@ -1960,13 +1981,10 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                if (unlikely(nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB)) {
                        nreq = 0;
 
-                       doorbell[0] = cpu_to_be32((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
-                                                 ((qp->sq.head & 0xffff) << 8) |
-                                                 f0 | op0);
-                       doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
+                       dbhi = (MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
+                               ((qp->sq.head & 0xffff) << 8) | f0 | op0;
 
                        qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
-                       size0 = 0;
 
                        /*
                         * Make sure that descriptors are written before
@@ -1980,7 +1998,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                         * write MMIO send doorbell.
                         */
                        wmb();
-                       mthca_write64(doorbell,
+
+                       mthca_write64(dbhi, (qp->qpn << 8) | size0,
                                      dev->kar + MTHCA_SEND_DOORBELL,
                                      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
                }
@@ -2004,10 +2023,12 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                         cpu_to_be32(MTHCA_NEXT_CQ_UPDATE) : 0) |
                        ((wr->send_flags & IB_SEND_SOLICITED) ?
                         cpu_to_be32(MTHCA_NEXT_SOLICIT) : 0)   |
+                       ((wr->send_flags & IB_SEND_IP_CSUM) ?
+                        cpu_to_be32(MTHCA_NEXT_IP_CSUM | MTHCA_NEXT_TCP_UDP_CSUM) : 0) |
                        cpu_to_be32(1);
                if (wr->opcode == IB_WR_SEND_WITH_IMM ||
                    wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
-                       ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
+                       ((struct mthca_next_seg *) wqe)->imm = wr->ex.imm_data;
 
                wqe += sizeof (struct mthca_next_seg);
                size = sizeof (struct mthca_next_seg) / 16;
@@ -2017,26 +2038,12 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        switch (wr->opcode) {
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                        case IB_WR_ATOMIC_FETCH_AND_ADD:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cpu_to_be64(wr->wr.atomic.remote_addr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       cpu_to_be32(wr->wr.atomic.rkey);
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-
+                               set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+                                             wr->wr.atomic.rkey);
                                wqe += sizeof (struct mthca_raddr_seg);
 
-                               if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-                                       ((struct mthca_atomic_seg *) wqe)->swap_add =
-                                               cpu_to_be64(wr->wr.atomic.swap);
-                                       ((struct mthca_atomic_seg *) wqe)->compare =
-                                               cpu_to_be64(wr->wr.atomic.compare_add);
-                               } else {
-                                       ((struct mthca_atomic_seg *) wqe)->swap_add =
-                                               cpu_to_be64(wr->wr.atomic.compare_add);
-                                       ((struct mthca_atomic_seg *) wqe)->compare = 0;
-                               }
-
-                               wqe += sizeof (struct mthca_atomic_seg);
+                               set_atomic_seg(wqe, wr);
+                               wqe  += sizeof (struct mthca_atomic_seg);
                                size += (sizeof (struct mthca_raddr_seg) +
                                         sizeof (struct mthca_atomic_seg)) / 16;
                                break;
@@ -2044,12 +2051,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        case IB_WR_RDMA_READ:
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cpu_to_be64(wr->wr.rdma.remote_addr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       cpu_to_be32(wr->wr.rdma.rkey);
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-                               wqe += sizeof (struct mthca_raddr_seg);
+                               set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+                                             wr->wr.rdma.rkey);
+                               wqe  += sizeof (struct mthca_raddr_seg);
                                size += sizeof (struct mthca_raddr_seg) / 16;
                                break;
 
@@ -2064,12 +2068,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        switch (wr->opcode) {
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
-                               ((struct mthca_raddr_seg *) wqe)->raddr =
-                                       cpu_to_be64(wr->wr.rdma.remote_addr);
-                               ((struct mthca_raddr_seg *) wqe)->rkey =
-                                       cpu_to_be32(wr->wr.rdma.rkey);
-                               ((struct mthca_raddr_seg *) wqe)->reserved = 0;
-                               wqe += sizeof (struct mthca_raddr_seg);
+                               set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
+                                             wr->wr.rdma.rkey);
+                               wqe  += sizeof (struct mthca_raddr_seg);
                                size += sizeof (struct mthca_raddr_seg) / 16;
                                break;
 
@@ -2081,14 +2082,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case UD:
-                       memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
-                              to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
-                       ((struct mthca_arbel_ud_seg *) wqe)->dqpn =
-                               cpu_to_be32(wr->wr.ud.remote_qpn);
-                       ((struct mthca_arbel_ud_seg *) wqe)->qkey =
-                               cpu_to_be32(wr->wr.ud.remote_qkey);
-
-                       wqe += sizeof (struct mthca_arbel_ud_seg);
+                       set_arbel_ud_seg(wqe, wr);
+                       wqe  += sizeof (struct mthca_arbel_ud_seg);
                        size += sizeof (struct mthca_arbel_ud_seg) / 16;
                        break;
 
@@ -2113,13 +2108,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                }
 
                for (i = 0; i < wr->num_sge; ++i) {
-                       ((struct mthca_data_seg *) wqe)->byte_count =
-                               cpu_to_be32(wr->sg_list[i].length);
-                       ((struct mthca_data_seg *) wqe)->lkey =
-                               cpu_to_be32(wr->sg_list[i].lkey);
-                       ((struct mthca_data_seg *) wqe)->addr =
-                               cpu_to_be64(wr->sg_list[i].addr);
-                       wqe += sizeof (struct mthca_data_seg);
+                       mthca_set_data_seg(wqe, wr->sg_list + i);
+                       wqe  += sizeof (struct mthca_data_seg);
                        size += sizeof (struct mthca_data_seg) / 16;
                }
 
@@ -2151,7 +2141,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                    ((wr->send_flags & IB_SEND_FENCE) ?
                                     MTHCA_NEXT_FENCE : 0));
 
-               if (!size0) {
+               if (!nreq) {
                        size0 = size;
                        op0   = mthca_opcode[wr->opcode];
                        f0    = wr->send_flags & IB_SEND_FENCE ?
@@ -2165,10 +2155,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 out:
        if (likely(nreq)) {
-               doorbell[0] = cpu_to_be32((nreq << 24)                  |
-                                         ((qp->sq.head & 0xffff) << 8) |
-                                         f0 | op0);
-               doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
+               dbhi = (nreq << 24) | ((qp->sq.head & 0xffff) << 8) | f0 | op0;
 
                qp->sq.head += nreq;
 
@@ -2184,8 +2171,8 @@ out:
                 * write MMIO send doorbell.
                 */
                wmb();
-               mthca_write64(doorbell,
-                             dev->kar + MTHCA_SEND_DOORBELL,
+
+               mthca_write64(dbhi, (qp->qpn << 8) | size0, dev->kar + MTHCA_SEND_DOORBELL,
                              MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
        }
 
@@ -2241,20 +2228,12 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                }
 
                for (i = 0; i < wr->num_sge; ++i) {
-                       ((struct mthca_data_seg *) wqe)->byte_count =
-                               cpu_to_be32(wr->sg_list[i].length);
-                       ((struct mthca_data_seg *) wqe)->lkey =
-                               cpu_to_be32(wr->sg_list[i].lkey);
-                       ((struct mthca_data_seg *) wqe)->addr =
-                               cpu_to_be64(wr->sg_list[i].addr);
+                       mthca_set_data_seg(wqe, wr->sg_list + i);
                        wqe += sizeof (struct mthca_data_seg);
                }
 
-               if (i < qp->rq.max_gs) {
-                       ((struct mthca_data_seg *) wqe)->byte_count = 0;
-                       ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
-                       ((struct mthca_data_seg *) wqe)->addr = 0;
-               }
+               if (i < qp->rq.max_gs)
+                       mthca_set_data_seg_inval(wqe);
 
                qp->wrid[ind] = wr->wr_id;