#include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
 
+int ib_rate_to_mult(enum ib_rate rate)
+{
+       switch (rate) {
+       case IB_RATE_2_5_GBPS: return  1;
+       case IB_RATE_5_GBPS:   return  2;
+       case IB_RATE_10_GBPS:  return  4;
+       case IB_RATE_20_GBPS:  return  8;
+       case IB_RATE_30_GBPS:  return 12;
+       case IB_RATE_40_GBPS:  return 16;
+       case IB_RATE_60_GBPS:  return 24;
+       case IB_RATE_80_GBPS:  return 32;
+       case IB_RATE_120_GBPS: return 48;
+       default:               return -1;
+       }
+}
+EXPORT_SYMBOL(ib_rate_to_mult);
+
+enum ib_rate mult_to_ib_rate(int mult)
+{
+       switch (mult) {
+       case 1:  return IB_RATE_2_5_GBPS;
+       case 2:  return IB_RATE_5_GBPS;
+       case 4:  return IB_RATE_10_GBPS;
+       case 8:  return IB_RATE_20_GBPS;
+       case 12: return IB_RATE_30_GBPS;
+       case 16: return IB_RATE_40_GBPS;
+       case 24: return IB_RATE_60_GBPS;
+       case 32: return IB_RATE_80_GBPS;
+       case 48: return IB_RATE_120_GBPS;
+       default: return IB_RATE_PORT_CURRENT;
+       }
+}
+EXPORT_SYMBOL(mult_to_ib_rate);
+
 /* Protection domains */
 
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
 
 
 #include "mthca_dev.h"
 
+enum {
+      MTHCA_RATE_TAVOR_FULL   = 0,
+      MTHCA_RATE_TAVOR_1X     = 1,
+      MTHCA_RATE_TAVOR_4X     = 2,
+      MTHCA_RATE_TAVOR_1X_DDR = 3
+};
+
+enum {
+      MTHCA_RATE_MEMFREE_FULL    = 0,
+      MTHCA_RATE_MEMFREE_QUARTER = 1,
+      MTHCA_RATE_MEMFREE_EIGHTH  = 2,
+      MTHCA_RATE_MEMFREE_HALF    = 3
+};
+
 struct mthca_av {
        __be32 port_pd;
        u8     reserved1;
        __be32 dgid[4];
 };
 
+static enum ib_rate memfree_rate_to_ib(u8 mthca_rate, u8 port_rate)
+{
+       switch (mthca_rate) {
+       case MTHCA_RATE_MEMFREE_EIGHTH:
+               return mult_to_ib_rate(port_rate >> 3);
+       case MTHCA_RATE_MEMFREE_QUARTER:
+               return mult_to_ib_rate(port_rate >> 2);
+       case MTHCA_RATE_MEMFREE_HALF:
+               return mult_to_ib_rate(port_rate >> 1);
+       case MTHCA_RATE_MEMFREE_FULL:
+       default:
+               return mult_to_ib_rate(port_rate);
+       }
+}
+
+static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
+{
+       switch (mthca_rate) {
+       case MTHCA_RATE_TAVOR_1X:     return IB_RATE_2_5_GBPS;
+       case MTHCA_RATE_TAVOR_1X_DDR: return IB_RATE_5_GBPS;
+       case MTHCA_RATE_TAVOR_4X:     return IB_RATE_10_GBPS;
+       default:                      return port_rate;
+       }
+}
+
+enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port)
+{
+       if (mthca_is_memfree(dev)) {
+               /* Handle old Arbel FW */
+               if (dev->limits.stat_rate_support == 0x3 && mthca_rate)
+                       return IB_RATE_2_5_GBPS;
+
+               return memfree_rate_to_ib(mthca_rate, dev->rate[port - 1]);
+       } else
+               return tavor_rate_to_ib(mthca_rate, dev->rate[port - 1]);
+}
+
+static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate)
+{
+       if (cur_rate <= req_rate)
+               return 0;
+
+       /*
+        * Inter-packet delay (IPD) to get from rate X down to a rate
+        * no more than Y is (X - 1) / Y.
+        */
+       switch ((cur_rate - 1) / req_rate) {
+       case 0:  return MTHCA_RATE_MEMFREE_FULL;
+       case 1:  return MTHCA_RATE_MEMFREE_HALF;
+       case 2:  /* fall through */
+       case 3:  return MTHCA_RATE_MEMFREE_QUARTER;
+       default: return MTHCA_RATE_MEMFREE_EIGHTH;
+       }
+}
+
+static u8 ib_rate_to_tavor(u8 static_rate)
+{
+       switch (static_rate) {
+       case IB_RATE_2_5_GBPS: return MTHCA_RATE_TAVOR_1X;
+       case IB_RATE_5_GBPS:   return MTHCA_RATE_TAVOR_1X_DDR;
+       case IB_RATE_10_GBPS:  return MTHCA_RATE_TAVOR_4X;
+       default:               return MTHCA_RATE_TAVOR_FULL;
+       }
+}
+
+u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
+{
+       u8 rate;
+
+       if (!static_rate || ib_rate_to_mult(static_rate) >= dev->rate[port - 1])
+               return 0;
+
+       if (mthca_is_memfree(dev))
+               rate = ib_rate_to_memfree(ib_rate_to_mult(static_rate),
+                                         dev->rate[port - 1]);
+       else
+               rate = ib_rate_to_tavor(static_rate);
+
+       if (!(dev->limits.stat_rate_support & (1 << rate)))
+               rate = 1;
+
+       return rate;
+}
+
 int mthca_create_ah(struct mthca_dev *dev,
                    struct mthca_pd *pd,
                    struct ib_ah_attr *ah_attr,
        av->g_slid  = ah_attr->src_path_bits;
        av->dlid    = cpu_to_be16(ah_attr->dlid);
        av->msg_sr  = (3 << 4) | /* 2K message */
-               ah_attr->static_rate;
+               mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
        av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
        if (ah_attr->ah_flags & IB_AH_GRH) {
                av->g_slid |= 0x80;
 
        u32 *outbox;
        u8 field;
        u16 size;
+       u16 stat_rate;
        int err;
 
 #define QUERY_DEV_LIM_OUT_SIZE             0x100
 #define QUERY_DEV_LIM_MTU_WIDTH_OFFSET      0x36
 #define QUERY_DEV_LIM_VL_PORT_OFFSET        0x37
 #define QUERY_DEV_LIM_MAX_GID_OFFSET        0x3b
+#define QUERY_DEV_LIM_RATE_SUPPORT_OFFSET   0x3c
 #define QUERY_DEV_LIM_MAX_PKEY_OFFSET       0x3f
 #define QUERY_DEV_LIM_FLAGS_OFFSET          0x44
 #define QUERY_DEV_LIM_RSVD_UAR_OFFSET       0x48
        dev_lim->num_ports = field & 0xf;
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_GID_OFFSET);
        dev_lim->max_gids = 1 << (field & 0xf);
+       MTHCA_GET(stat_rate, outbox, QUERY_DEV_LIM_RATE_SUPPORT_OFFSET);
+       dev_lim->stat_rate_support = stat_rate;
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_PKEY_OFFSET);
        dev_lim->max_pkeys = 1 << (field & 0xf);
        MTHCA_GET(dev_lim->flags, outbox, QUERY_DEV_LIM_FLAGS_OFFSET);
 
        int max_vl;
        int num_ports;
        int max_gids;
+       u16 stat_rate_support;
        int max_pkeys;
        u32 flags;
        int reserved_uars;
 
        int      reserved_pds;
        u32      page_size_cap;
        u32      flags;
+       u16      stat_rate_support;
        u8       port_width_cap;
 };
 
        struct ib_mad_agent  *send_agent[MTHCA_MAX_PORTS][2];
        struct ib_ah         *sm_ah[MTHCA_MAX_PORTS];
        spinlock_t            sm_lock;
+       u8                    rate[MTHCA_MAX_PORTS];
 };
 
 #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
                  struct ib_ud_header *header);
 int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
 int mthca_ah_grh_present(struct mthca_ah *ah);
+u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
+enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
 
 int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
 
        MTHCA_VENDOR_CLASS2 = 0xa
 };
 
+int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
+{
+       struct ib_port_attr *tprops = NULL;
+       int                  ret;
+
+       tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
+       if (!tprops)
+               return -ENOMEM;
+
+       ret = ib_query_port(&dev->ib_dev, port_num, tprops);
+       if (ret) {
+               printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
+                      ret, dev->ib_dev.name, port_num);
+               goto out;
+       }
+
+       dev->rate[port_num - 1] = tprops->active_speed *
+                                 ib_width_enum_to_int(tprops->active_width);
+
+out:
+       kfree(tprops);
+       return ret;
+}
+
 static void update_sm_ah(struct mthca_dev *dev,
                         u8 port_num, u16 lid, u8 sl)
 {
             mad->mad_hdr.mgmt_class  == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
            mad->mad_hdr.method     == IB_MGMT_METHOD_SET) {
                if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
+                       mthca_update_rate(to_mdev(ibdev), port_num);
                        update_sm_ah(to_mdev(ibdev), port_num,
                                     be16_to_cpup((__be16 *) (mad->data + 58)),
                                     (*(u8 *) (mad->data + 76)) & 0xf);
 {
        struct ib_mad_agent *agent;
        int p, q;
+       int ret;
 
        spin_lock_init(&dev->sm_lock);
 
                                                      q ? IB_QPT_GSI : IB_QPT_SMI,
                                                      NULL, 0, send_handler,
                                                      NULL, NULL);
-                       if (IS_ERR(agent))
+                       if (IS_ERR(agent)) {
+                               ret = PTR_ERR(agent);
                                goto err;
+                       }
                        dev->send_agent[p][q] = agent;
                }
 
+
+       for (p = 1; p <= dev->limits.num_ports; ++p) {
+               ret = mthca_update_rate(dev, p);
+               if (ret) {
+                       mthca_err(dev, "Failed to obtain port %d rate."
+                                 " aborting.\n", p);
+                       goto err;
+               }
+       }
+
        return 0;
 
 err:
                        if (dev->send_agent[p][q])
                                ib_unregister_mad_agent(dev->send_agent[p][q]);
 
-       return PTR_ERR(agent);
+       return ret;
 }
 
 void __devexit mthca_free_agents(struct mthca_dev *dev)
 
        mdev->limits.port_width_cap     = dev_lim->max_port_width;
        mdev->limits.page_size_cap      = ~(u32) (dev_lim->min_page_sz - 1);
        mdev->limits.flags              = dev_lim->flags;
+       /*
+        * For old FW that doesn't return static rate support, use a
+        * value of 0x3 (only static rate values of 0 or 1 are handled),
+        * except on Sinai, where even old FW can handle static rate
+        * values of 2 and 3.
+        */
+       if (dev_lim->stat_rate_support)
+               mdev->limits.stat_rate_support = dev_lim->stat_rate_support;
+       else if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
+               mdev->limits.stat_rate_support = 0xf;
+       else
+               mdev->limits.stat_rate_support = 0x3;
 
        /* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
           May be doable since hardware supports it for SRQ.
 
        atomic_t               refcount;
        u32                    qpn;
        int                    is_direct;
+       u8                     port; /* for SQP and memfree use only */
+       u8                     alt_port; /* for memfree use only */
        u8                     transport;
        u8                     state;
        u8                     atomic_rd_en;
 
 struct mthca_sqp {
        struct mthca_qp qp;
-       int             port;
        int             pkey_index;
        u32             qkey;
        u32             send_psn;
 
                return;
        }
 
+       if (event_type == IB_EVENT_PATH_MIG)
+               qp->port = qp->alt_port;
+
        event.device      = &dev->ib_dev;
        event.event       = event_type;
        event.element.qp  = &qp->ibqp;
 {
        memset(ib_ah_attr, 0, sizeof *path);
        ib_ah_attr->port_num      = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
+
+       if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
+               return;
+
        ib_ah_attr->dlid          = be16_to_cpu(path->rlid);
        ib_ah_attr->sl            = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
        ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
-       ib_ah_attr->static_rate   = path->static_rate & 0x7;
+       ib_ah_attr->static_rate   = mthca_rate_to_ib(dev,
+                                                    path->static_rate & 0x7,
+                                                    ib_ah_attr->port_num);
        ib_ah_attr->ah_flags      = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
        if (ib_ah_attr->ah_flags) {
                ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
        qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
        qp_attr->cap.max_inline_data = qp->max_inline_data;
 
-       to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
-       to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+       if (qp->transport == RC || qp->transport == UC) {
+               to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+               to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+       }
 
        qp_attr->pkey_index     = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
        qp_attr->alt_pkey_index = be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
 }
 
 static int mthca_path_set(struct mthca_dev *dev, struct ib_ah_attr *ah,
-                         struct mthca_qp_path *path)
+                         struct mthca_qp_path *path, u8 port)
 {
        path->g_mylmc     = ah->src_path_bits & 0x7f;
        path->rlid        = cpu_to_be16(ah->dlid);
-       path->static_rate = !!ah->static_rate;
+       path->static_rate = mthca_get_rate(dev, ah->static_rate, port);
 
        if (ah->ah_flags & IB_AH_GRH) {
                if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
 
        if (qp->transport == MLX)
                qp_context->pri_path.port_pkey |=
-                       cpu_to_be32(to_msqp(qp)->port << 24);
+                       cpu_to_be32(qp->port << 24);
        else {
                if (attr_mask & IB_QP_PORT) {
                        qp_context->pri_path.port_pkey |=
        }
 
        if (attr_mask & IB_QP_AV) {
-               if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path))
+               if (mthca_path_set(dev, &attr->ah_attr, &qp_context->pri_path,
+                                  attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
                        return -EINVAL;
 
                qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
                        return -EINVAL;
                }
 
-               if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path))
+               if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
+                                  attr->alt_ah_attr.port_num))
                        return -EINVAL;
 
                qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
                qp->atomic_rd_en = attr->qp_access_flags;
        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
                qp->resp_depth = attr->max_dest_rd_atomic;
+       if (attr_mask & IB_QP_PORT)
+               qp->port = attr->port_num;
+       if (attr_mask & IB_QP_ALT_PATH)
+               qp->alt_port = attr->alt_port_num;
 
        if (is_sqp(dev, qp))
                store_attrs(to_msqp(qp), attr, attr_mask);
        if (is_qp0(dev, qp)) {
                if (cur_state != IB_QPS_RTR &&
                    new_state == IB_QPS_RTR)
-                       init_port(dev, to_msqp(qp)->port);
+                       init_port(dev, qp->port);
 
                if (cur_state != IB_QPS_RESET &&
                    cur_state != IB_QPS_ERR &&
                    (new_state == IB_QPS_RESET ||
                     new_state == IB_QPS_ERR))
-                       mthca_CLOSE_IB(dev, to_msqp(qp)->port, &status);
+                       mthca_CLOSE_IB(dev, qp->port, &status);
        }
 
        /*
        if (qp->qpn == -1)
                return -ENOMEM;
 
+       /* initialize port to zero for error-catching. */
+       qp->port = 0;
+
        err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
                                    send_policy, qp);
        if (err) {
        if (err)
                goto err_out;
 
-       sqp->port = port;
+       sqp->qp.port      = port;
        sqp->qp.qpn       = mqpn;
        sqp->qp.transport = MLX;
 
                sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
        sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
        if (!sqp->qp.ibqp.qp_num)
-               ib_get_cached_pkey(&dev->ib_dev, sqp->port,
+               ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
                                   sqp->pkey_index, &pkey);
        else
-               ib_get_cached_pkey(&dev->ib_dev, sqp->port,
+               ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
                                   wr->wr.ud.pkey_index, &pkey);
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
        sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
 
                   gid_buf, path.pathrec.dlid ? "yes" : "no");
 
        if (path.pathrec.dlid) {
-               rate = ib_sa_rate_enum_to_int(path.pathrec.rate) * 25;
+               rate = ib_rate_to_mult(path.pathrec.rate) * 25;
 
                seq_printf(file,
                           "  DLID:     0x%04x\n"
 
                struct ib_ah_attr av = {
                        .dlid          = be16_to_cpu(pathrec->dlid),
                        .sl            = pathrec->sl,
-                       .port_num      = priv->port
+                       .port_num      = priv->port,
+                       .static_rate   = pathrec->rate
                };
-               int path_rate = ib_sa_rate_enum_to_int(pathrec->rate);
-
-               if (path_rate > 0 && priv->local_rate > path_rate)
-                       av.static_rate = (priv->local_rate - 1) / path_rate;
-
-               ipoib_dbg(priv, "static_rate %d for local port %dX, path %dX\n",
-                         av.static_rate, priv->local_rate,
-                         ib_sa_rate_enum_to_int(pathrec->rate));
 
                ah = ipoib_create_ah(dev, priv->pd, &av);
        }
 
                        .port_num      = priv->port,
                        .sl            = mcast->mcmember.sl,
                        .ah_flags      = IB_AH_GRH,
+                       .static_rate   = mcast->mcmember.rate,
                        .grh           = {
                                .flow_label    = be32_to_cpu(mcast->mcmember.flow_label),
                                .hop_limit     = mcast->mcmember.hop_limit,
                                .traffic_class = mcast->mcmember.traffic_class
                        }
                };
-               int path_rate = ib_sa_rate_enum_to_int(mcast->mcmember.rate);
-
                av.grh.dgid = mcast->mcmember.mgid;
 
-               if (path_rate > 0 && priv->local_rate > path_rate)
-                       av.static_rate = (priv->local_rate - 1) / path_rate;
-
-               ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n",
-                               av.static_rate, priv->local_rate,
-                               ib_sa_rate_enum_to_int(mcast->mcmember.rate));
-
                ah = ipoib_create_ah(dev, priv->pd, &av);
                if (!ah) {
                        ipoib_warn(priv, "ib_address_create failed\n");
 
        IB_SA_BEST = 3
 };
 
-enum ib_sa_rate {
-       IB_SA_RATE_2_5_GBPS = 2,
-       IB_SA_RATE_5_GBPS   = 5,
-       IB_SA_RATE_10_GBPS  = 3,
-       IB_SA_RATE_20_GBPS  = 6,
-       IB_SA_RATE_30_GBPS  = 4,
-       IB_SA_RATE_40_GBPS  = 7,
-       IB_SA_RATE_60_GBPS  = 8,
-       IB_SA_RATE_80_GBPS  = 9,
-       IB_SA_RATE_120_GBPS = 10
-};
-
-static inline int ib_sa_rate_enum_to_int(enum ib_sa_rate rate)
-{
-       switch (rate) {
-       case IB_SA_RATE_2_5_GBPS: return  1;
-       case IB_SA_RATE_5_GBPS:   return  2;
-       case IB_SA_RATE_10_GBPS:  return  4;
-       case IB_SA_RATE_20_GBPS:  return  8;
-       case IB_SA_RATE_30_GBPS:  return 12;
-       case IB_SA_RATE_40_GBPS:  return 16;
-       case IB_SA_RATE_60_GBPS:  return 24;
-       case IB_SA_RATE_80_GBPS:  return 32;
-       case IB_SA_RATE_120_GBPS: return 48;
-       default:                  return -1;
-       }
-}
-
 /*
  * Structures for SA records are named "struct ib_sa_xxx_rec."  No
  * attempt is made to pack structures to match the physical layout of
 
        IB_AH_GRH       = 1
 };
 
+enum ib_rate {
+       IB_RATE_PORT_CURRENT = 0,
+       IB_RATE_2_5_GBPS = 2,
+       IB_RATE_5_GBPS   = 5,
+       IB_RATE_10_GBPS  = 3,
+       IB_RATE_20_GBPS  = 6,
+       IB_RATE_30_GBPS  = 4,
+       IB_RATE_40_GBPS  = 7,
+       IB_RATE_60_GBPS  = 8,
+       IB_RATE_80_GBPS  = 9,
+       IB_RATE_120_GBPS = 10
+};
+
+/**
+ * ib_rate_to_mult - Convert the IB rate enum to a multiple of the
+ * base rate of 2.5 Gbit/sec.  For example, IB_RATE_5_GBPS will be
+ * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
+ * @rate: rate to convert.
+ */
+int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
+
+/**
+ * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
+ * enum.
+ * @mult: multiple to convert.
+ */
+enum ib_rate mult_to_ib_rate(int mult) __attribute_const__;
+
 struct ib_ah_attr {
        struct ib_global_route  grh;
        u16                     dlid;