IPOIB_MCAST_FLAG_SENDONLY = 1,
        IPOIB_MCAST_FLAG_BUSY     = 2,  /* joining or already joined */
        IPOIB_MCAST_FLAG_ATTACHED = 3,
+
+       MAX_SEND_CQE              = 16,
 };
 
 #define        IPOIB_OP_RECV   (1ul << 31)
        u16               pkey_index;
        struct ib_pd     *pd;
        struct ib_mr     *mr;
-       struct ib_cq     *cq;
+       struct ib_cq     *recv_cq;
+       struct ib_cq     *send_cq;
        struct ib_qp     *qp;
        u32               qkey;
 
        struct ib_sge        tx_sge[MAX_SKB_FRAGS + 1];
        struct ib_send_wr    tx_wr;
        unsigned             tx_outstanding;
+       struct ib_wc         send_wc[MAX_SEND_CQE];
 
        struct ib_recv_wr    rx_wr;
        struct ib_sge        rx_sge[IPOIB_UD_RX_SG];
 static inline void ipoib_unregister_debugfs(void) { }
 #endif
 
-
 #define ipoib_printk(level, priv, format, arg...)      \
        printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
 #define ipoib_warn(priv, format, arg...)               \
 
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_qp_init_attr attr = {
                .event_handler = ipoib_cm_rx_event_handler,
-               .send_cq = priv->cq, /* For drain WR */
-               .recv_cq = priv->cq,
+               .send_cq = priv->recv_cq, /* For drain WR */
+               .recv_cq = priv->recv_cq,
                .srq = priv->cm.srq,
                .cap.max_send_wr = 1, /* For drain WR */
                .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_qp_init_attr attr = {
-               .send_cq                = priv->cq,
-               .recv_cq                = priv->cq,
+               .send_cq                = priv->recv_cq,
+               .recv_cq                = priv->recv_cq,
                .srq                    = priv->cm.srq,
                .cap.max_send_wr        = ipoib_sendq_size,
                .cap.max_send_sge       = 1,
 
            coal->rx_max_coalesced_frames > 0xffff)
                return -EINVAL;
 
-       ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames,
+       ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
                           coal->rx_coalesce_usecs);
        if (ret && ret != -ENOSYS) {
                ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
 
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        unsigned int wr_id = wc->wr_id;
        struct ipoib_tx_buf *tx_req;
-       unsigned long flags;
 
        ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
                       wr_id, wc->status);
 
        dev_kfree_skb_any(tx_req->skb);
 
-       spin_lock_irqsave(&priv->tx_lock, flags);
        ++priv->tx_tail;
        if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
            netif_queue_stopped(dev) &&
            test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
                netif_wake_queue(dev);
-       spin_unlock_irqrestore(&priv->tx_lock, flags);
 
        if (wc->status != IB_WC_SUCCESS &&
            wc->status != IB_WC_WR_FLUSH_ERR)
                           wc->status, wr_id, wc->vendor_err);
 }
 
+static int poll_tx(struct ipoib_dev_priv *priv)
+{
+       int n, i;
+
+       n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
+       for (i = 0; i < n; ++i)
+               ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
+
+       return n == MAX_SEND_CQE;
+}
+
 int ipoib_poll(struct napi_struct *napi, int budget)
 {
        struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
                int max = (budget - done);
 
                t = min(IPOIB_NUM_WC, max);
-               n = ib_poll_cq(priv->cq, t, priv->ibwc);
+               n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
 
                for (i = 0; i < n; i++) {
                        struct ib_wc *wc = priv->ibwc + i;
                                        ipoib_cm_handle_rx_wc(dev, wc);
                                else
                                        ipoib_ib_handle_rx_wc(dev, wc);
-                       } else {
-                               if (wc->wr_id & IPOIB_OP_CM)
-                                       ipoib_cm_handle_tx_wc(dev, wc);
-                               else
-                                       ipoib_ib_handle_tx_wc(dev, wc);
-                       }
+                       } else
+                               ipoib_cm_handle_tx_wc(priv->dev, wc);
                }
 
                if (n != t)
 
        if (done < budget) {
                netif_rx_complete(dev, napi);
-               if (unlikely(ib_req_notify_cq(priv->cq,
+               if (unlikely(ib_req_notify_cq(priv->recv_cq,
                                              IB_CQ_NEXT_COMP |
                                              IB_CQ_REPORT_MISSED_EVENTS)) &&
                    netif_rx_reschedule(dev, napi))
 
                address->last_send = priv->tx_head;
                ++priv->tx_head;
+               skb_orphan(skb);
 
                if (++priv->tx_outstanding == ipoib_sendq_size) {
                        ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
                        netif_stop_queue(dev);
                }
        }
+
+       if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+               poll_tx(priv);
 }
 
 static void __ipoib_reap_ah(struct net_device *dev)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int i, n;
        do {
-               n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
+               n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
                for (i = 0; i < n; ++i) {
                        /*
                         * Convert any successful completions to flush
                                        ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
                                else
                                        ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
-                       } else {
-                               if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
-                                       ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
-                               else
-                                       ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
-                       }
+                       } else
+                               ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
                }
        } while (n == IPOIB_NUM_WC);
+
+       while (poll_tx(priv))
+               ; /* nothing */
 }
 
 int ipoib_ib_dev_stop(struct net_device *dev, int flush)
                msleep(1);
        }
 
-       ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP);
+       ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
 
        return 0;
 }
 
 
        ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
        ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
-       ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
+       ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
+                                                    IPOIB_MIN_QUEUE_SIZE));
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
        ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
 #endif
 
                goto out_free_pd;
        }
 
-       size = ipoib_sendq_size + ipoib_recvq_size + 1;
+       size = ipoib_recvq_size + 1;
        ret = ipoib_cm_dev_init(dev);
        if (!ret) {
+               size += ipoib_sendq_size;
                if (ipoib_cm_has_srq(dev))
                        size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
                else
                        size += ipoib_recvq_size * ipoib_max_conn_qp;
        }
 
-       priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
-       if (IS_ERR(priv->cq)) {
-               printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
+       priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
+       if (IS_ERR(priv->recv_cq)) {
+               printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
                goto out_free_mr;
        }
 
-       if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
-               goto out_free_cq;
+       priv->send_cq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0);
+       if (IS_ERR(priv->send_cq)) {
+               printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
+               goto out_free_recv_cq;
+       }
+
+       if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP))
+               goto out_free_send_cq;
 
-       init_attr.send_cq = priv->cq;
-       init_attr.recv_cq = priv->cq;
+       init_attr.send_cq = priv->send_cq;
+       init_attr.recv_cq = priv->recv_cq;
 
        if (priv->hca_caps & IB_DEVICE_UD_TSO)
                init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;
        priv->qp = ib_create_qp(priv->pd, &init_attr);
        if (IS_ERR(priv->qp)) {
                printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
-               goto out_free_cq;
+               goto out_free_send_cq;
        }
 
        priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
 
        return 0;
 
-out_free_cq:
-       ib_destroy_cq(priv->cq);
+out_free_send_cq:
+       ib_destroy_cq(priv->send_cq);
+
+out_free_recv_cq:
+       ib_destroy_cq(priv->recv_cq);
 
 out_free_mr:
        ib_dereg_mr(priv->mr);
                clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
        }
 
-       if (ib_destroy_cq(priv->cq))
-               ipoib_warn(priv, "ib_cq_destroy failed\n");
+       if (ib_destroy_cq(priv->send_cq))
+               ipoib_warn(priv, "ib_cq_destroy (send) failed\n");
+
+       if (ib_destroy_cq(priv->recv_cq))
+               ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
 
        ipoib_cm_dev_cleanup(dev);