]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/infiniband/hw/ipath/ipath_rc.c
Pull ec into release branch
[linux-2.6-omap-h63xx.git] / drivers / infiniband / hw / ipath / ipath_rc.c
index b4b88d0b53f54257bd5119f8ecf550ec9da3f252..46744ea2babdb336510b3772ed7dfd9fb6d8608c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -98,13 +98,21 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
        case OP(RDMA_READ_RESPONSE_LAST):
        case OP(RDMA_READ_RESPONSE_ONLY):
        case OP(ATOMIC_ACKNOWLEDGE):
-               qp->s_ack_state = OP(ACKNOWLEDGE);
+               /*
+                * We can increment the tail pointer now that the last
+                * response has been sent instead of only being
+                * constructed.
+                */
+               if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
+                       qp->s_tail_ack_queue = 0;
                /* FALLTHROUGH */
+       case OP(SEND_ONLY):
        case OP(ACKNOWLEDGE):
                /* Check for no next entry in the queue. */
                if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
                        if (qp->s_flags & IPATH_S_ACK_PENDING)
                                goto normal;
+                       qp->s_ack_state = OP(ACKNOWLEDGE);
                        goto bail;
                }
 
@@ -119,9 +127,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
                                qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
                        } else {
                                qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
-                               if (++qp->s_tail_ack_queue >
-                                   IPATH_MAX_RDMA_ATOMIC)
-                                       qp->s_tail_ack_queue = 0;
+                               e->sent = 1;
                        }
                        ohdr->u.aeth = ipath_compute_aeth(qp);
                        hwords++;
@@ -139,8 +145,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
                                cpu_to_be32(e->atomic_data);
                        hwords += sizeof(ohdr->u.at) / sizeof(u32);
                        bth2 = e->psn;
-                       if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
-                               qp->s_tail_ack_queue = 0;
+                       e->sent = 1;
                }
                bth0 = qp->s_ack_state << 24;
                break;
@@ -156,8 +161,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
                        ohdr->u.aeth = ipath_compute_aeth(qp);
                        hwords++;
                        qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
-                       if (++qp->s_tail_ack_queue > IPATH_MAX_RDMA_ATOMIC)
-                               qp->s_tail_ack_queue = 0;
+                       qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
                }
                bth0 = qp->s_ack_state << 24;
                bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
@@ -171,7 +175,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
                 * the ACK before setting s_ack_state to ACKNOWLEDGE
                 * (see above).
                 */
-               qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+               qp->s_ack_state = OP(SEND_ONLY);
                qp->s_flags &= ~IPATH_S_ACK_PENDING;
                qp->s_cur_sge = NULL;
                if (qp->s_nak_state)
@@ -188,7 +192,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
        }
        qp->s_hdrwords = hwords;
        qp->s_cur_size = len;
-       *bth0p = bth0;
+       *bth0p = bth0 | (1 << 22); /* Set M bit */
        *bth2p = bth2;
        return 1;
 
@@ -223,29 +227,24 @@ int ipath_make_rc_req(struct ipath_qp *qp,
        /* Sending responses has higher priority over sending requests. */
        if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
             (qp->s_flags & IPATH_S_ACK_PENDING) ||
-            qp->s_ack_state != IB_OPCODE_RC_ACKNOWLEDGE) &&
+            qp->s_ack_state != OP(ACKNOWLEDGE)) &&
            ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
                goto done;
 
        if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
-           qp->s_rnr_timeout)
+           qp->s_rnr_timeout || qp->s_wait_credit)
                goto bail;
 
        /* Limit the number of packets sent without an ACK. */
        if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
                qp->s_wait_credit = 1;
                dev->n_rc_stalls++;
-               spin_lock(&dev->pending_lock);
-               if (list_empty(&qp->timerwait))
-                       list_add_tail(&qp->timerwait,
-                                     &dev->pending[dev->pending_index]);
-               spin_unlock(&dev->pending_lock);
                goto bail;
        }
 
        /* header size in 32-bit words LRH+BTH = (8+12)/4. */
        hwords = 5;
-       bth0 = 0;
+       bth0 = 1 << 22; /* Set M bit */
 
        /* Send a request. */
        wqe = get_swqe_ptr(qp, qp->s_cur);
@@ -587,9 +586,12 @@ static void send_rc_ack(struct ipath_qp *qp)
        u32 hwords;
        struct ipath_ib_header hdr;
        struct ipath_other_headers *ohdr;
+       unsigned long flags;
 
        /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-       if (qp->r_head_ack_queue != qp->s_tail_ack_queue)
+       if (qp->r_head_ack_queue != qp->s_tail_ack_queue ||
+           (qp->s_flags & IPATH_S_ACK_PENDING) ||
+           qp->s_ack_state != OP(ACKNOWLEDGE))
                goto queue_ack;
 
        /* Construct the header. */
@@ -606,7 +608,7 @@ static void send_rc_ack(struct ipath_qp *qp)
        }
        /* read pkey_index w/o lock (its atomic) */
        bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
-               OP(ACKNOWLEDGE) << 24;
+               (OP(ACKNOWLEDGE) << 24) | (1 << 22);
        if (qp->r_nak_state)
                ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
                                            (qp->r_nak_state <<
@@ -640,11 +642,11 @@ static void send_rc_ack(struct ipath_qp *qp)
        dev->n_rc_qacks++;
 
 queue_ack:
-       spin_lock_irq(&qp->s_lock);
+       spin_lock_irqsave(&qp->s_lock, flags);
        qp->s_flags |= IPATH_S_ACK_PENDING;
        qp->s_nak_state = qp->r_nak_state;
        qp->s_ack_psn = qp->r_ack_psn;
-       spin_unlock_irq(&qp->s_lock);
+       spin_unlock_irqrestore(&qp->s_lock, flags);
 
        /* Call ipath_do_rc_send() in another thread. */
        tasklet_hi_schedule(&qp->s_task);
@@ -808,13 +810,15 @@ static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
  * Called at interrupt level with the QP s_lock held and interrupts disabled.
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  */
-static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
+static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
+                    u64 val)
 {
        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
        struct ib_wc wc;
        struct ipath_swqe *wqe;
        int ret = 0;
        u32 ack_psn;
+       int diff;
 
        /*
         * Remove the QP from the timeout queue (or RNR timeout queue).
@@ -842,7 +846,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
         * The MSN might be for a later WQE than the PSN indicates so
         * only complete WQEs that the PSN finishes.
         */
-       while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) {
+       while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
+               /*
+                * RDMA_READ_RESPONSE_ONLY is a special case since
+                * we want to generate completion events for everything
+                * before the RDMA read, copy the data, then generate
+                * the completion for the read.
+                */
+               if (wqe->wr.opcode == IB_WR_RDMA_READ &&
+                   opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
+                   diff == 0) {
+                       ret = 1;
+                       goto bail;
+               }
                /*
                 * If this request is a RDMA read or atomic, and the ACK is
                 * for a later operation, this ACK NAKs the RDMA read or
@@ -853,12 +869,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
                 * is sent but before the response is received.
                 */
                if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
-                    (opcode != OP(RDMA_READ_RESPONSE_LAST) ||
-                     ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
+                    (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
                    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
                      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
-                    (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
-                     ipath_cmp24(wqe->psn, psn) != 0))) {
+                    (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
                        /*
                         * The last valid PSN seen is the previous
                         * request's.
@@ -872,6 +886,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
                         */
                        goto bail;
                }
+               if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+                   wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+                       *(u64 *) wqe->sg_list[0].vaddr = val;
                if (qp->s_num_rd_atomic &&
                    (wqe->wr.opcode == IB_WR_RDMA_READ ||
                     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@@ -1081,6 +1098,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
        int diff;
        u32 pad;
        u32 aeth;
+       u64 val;
 
        spin_lock_irqsave(&qp->s_lock, flags);
 
@@ -1120,8 +1138,6 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                        data += sizeof(__be32);
                }
                if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
-                       u64 val;
-
                        if (!header_in_data) {
                                __be32 *p = ohdr->u.at.atomic_ack_eth;
 
@@ -1129,12 +1145,13 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                                        be32_to_cpu(p[1]);
                        } else
                                val = be64_to_cpu(((__be64 *) data)[0]);
-                       *(u64 *) wqe->sg_list[0].vaddr = val;
-               }
-               if (!do_rc_ack(qp, aeth, psn, opcode) ||
+               } else
+                       val = 0;
+               if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
                    opcode != OP(RDMA_READ_RESPONSE_FIRST))
                        goto ack_done;
                hdrsize += 4;
+               wqe = get_swqe_ptr(qp, qp->s_last);
                if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
                        goto ack_op_err;
                /*
@@ -1178,13 +1195,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                goto bail;
 
        case OP(RDMA_READ_RESPONSE_ONLY):
-               if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-                       dev->n_rdma_seq++;
-                       ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+               if (!header_in_data)
+                       aeth = be32_to_cpu(ohdr->u.aeth);
+               else
+                       aeth = be32_to_cpu(((__be32 *) data)[0]);
+               if (!do_rc_ack(qp, aeth, psn, opcode, 0))
                        goto ack_done;
-               }
-               if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-                       goto ack_op_err;
                /* Get the number of bytes the message was padded by. */
                pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
                /*
@@ -1199,6 +1215,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                 * have to be careful to copy the data to the right
                 * location.
                 */
+               wqe = get_swqe_ptr(qp, qp->s_last);
                qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
                                                  wqe, psn, pmtu);
                goto read_last;
@@ -1232,7 +1249,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
                        data += sizeof(__be32);
                }
                ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
-               (void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
+               (void) do_rc_ack(qp, aeth, psn,
+                                OP(RDMA_READ_RESPONSE_LAST), 0);
                goto ack_done;
        }
 
@@ -1261,6 +1279,7 @@ ack_err:
        wc.dlid_path_bits = 0;
        wc.port_num = 0;
        ipath_sqerror_qp(qp, &wc);
+       spin_unlock_irqrestore(&qp->s_lock, flags);
 bail:
        return;
 }
@@ -1294,6 +1313,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
        struct ipath_ack_entry *e;
        u8 i, prev;
        int old_req;
+       unsigned long flags;
 
        if (diff > 0) {
                /*
@@ -1327,7 +1347,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
        psn &= IPATH_PSN_MASK;
        e = NULL;
        old_req = 1;
-       spin_lock_irq(&qp->s_lock);
+       spin_lock_irqsave(&qp->s_lock, flags);
        for (i = qp->r_head_ack_queue; ; i = prev) {
                if (i == qp->s_tail_ack_queue)
                        old_req = 0;
@@ -1344,8 +1364,11 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
                        e = NULL;
                        break;
                }
-               if (ipath_cmp24(psn, e->psn) >= 0)
+               if (ipath_cmp24(psn, e->psn) >= 0) {
+                       if (prev == qp->s_tail_ack_queue)
+                               old_req = 0;
                        break;
+               }
        }
        switch (opcode) {
        case OP(RDMA_READ_REQUEST): {
@@ -1425,7 +1448,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
                 * after all the previous RDMA reads and atomics.
                 */
                if (i == qp->r_head_ack_queue) {
-                       spin_unlock_irq(&qp->s_lock);
+                       spin_unlock_irqrestore(&qp->s_lock, flags);
                        qp->r_nak_state = 0;
                        qp->r_ack_psn = qp->r_psn - 1;
                        goto send_ack;
@@ -1439,11 +1462,10 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
                break;
        }
        qp->r_nak_state = 0;
-       spin_unlock_irq(&qp->s_lock);
        tasklet_hi_schedule(&qp->s_task);
 
 unlock_done:
-       spin_unlock_irq(&qp->s_lock);
+       spin_unlock_irqrestore(&qp->s_lock, flags);
 done:
        return 1;
 
@@ -1453,10 +1475,28 @@ send_ack:
 
 static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
 {
-       spin_lock_irq(&qp->s_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&qp->s_lock, flags);
        qp->state = IB_QPS_ERR;
        ipath_error_qp(qp, err);
-       spin_unlock_irq(&qp->s_lock);
+       spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
+{
+       unsigned long flags;
+       unsigned next;
+
+       next = n + 1;
+       if (next > IPATH_MAX_RDMA_ATOMIC)
+               next = 0;
+       spin_lock_irqsave(&qp->s_lock, flags);
+       if (n == qp->s_tail_ack_queue) {
+               qp->s_tail_ack_queue = next;
+               qp->s_ack_state = OP(ACKNOWLEDGE);
+       }
+       spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
 /**
@@ -1671,6 +1711,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
        case OP(RDMA_WRITE_FIRST):
        case OP(RDMA_WRITE_ONLY):
        case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+               if (unlikely(!(qp->qp_access_flags &
+                              IB_ACCESS_REMOTE_WRITE)))
+                       goto nack_inv;
                /* consume RWQE */
                /* RETH comes after BTH */
                if (!header_in_data)
@@ -1700,9 +1743,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                        qp->r_sge.sge.length = 0;
                        qp->r_sge.sge.sge_length = 0;
                }
-               if (unlikely(!(qp->qp_access_flags &
-                              IB_ACCESS_REMOTE_WRITE)))
-                       goto nack_acc;
                if (opcode == OP(RDMA_WRITE_FIRST))
                        goto send_middle;
                else if (opcode == OP(RDMA_WRITE_ONLY))
@@ -1716,13 +1756,17 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                u32 len;
                u8 next;
 
-               if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
-                       goto nack_acc;
+               if (unlikely(!(qp->qp_access_flags &
+                              IB_ACCESS_REMOTE_READ)))
+                       goto nack_inv;
                next = qp->r_head_ack_queue + 1;
                if (next > IPATH_MAX_RDMA_ATOMIC)
                        next = 0;
-               if (unlikely(next == qp->s_tail_ack_queue))
-                       goto nack_inv;
+               if (unlikely(next == qp->s_tail_ack_queue)) {
+                       if (!qp->s_ack_queue[next].sent)
+                               goto nack_inv;
+                       ipath_update_ack_queue(qp, next);
+               }
                e = &qp->s_ack_queue[qp->r_head_ack_queue];
                /* RETH comes after BTH */
                if (!header_in_data)
@@ -1757,6 +1801,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                        e->rdma_sge.sge.sge_length = 0;
                }
                e->opcode = opcode;
+               e->sent = 0;
                e->psn = psn;
                /*
                 * We need to increment the MSN here instead of when we
@@ -1788,12 +1833,15 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 
                if (unlikely(!(qp->qp_access_flags &
                               IB_ACCESS_REMOTE_ATOMIC)))
-                       goto nack_acc;
+                       goto nack_inv;
                next = qp->r_head_ack_queue + 1;
                if (next > IPATH_MAX_RDMA_ATOMIC)
                        next = 0;
-               if (unlikely(next == qp->s_tail_ack_queue))
-                       goto nack_inv;
+               if (unlikely(next == qp->s_tail_ack_queue)) {
+                       if (!qp->s_ack_queue[next].sent)
+                               goto nack_inv;
+                       ipath_update_ack_queue(qp, next);
+               }
                if (!header_in_data)
                        ateth = &ohdr->u.atomic_eth;
                else
@@ -1818,6 +1866,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                                      be64_to_cpu(ateth->compare_data),
                                      sdata);
                e->opcode = opcode;
+               e->sent = 0;
                e->psn = psn & IPATH_PSN_MASK;
                qp->r_msn++;
                qp->r_psn++;