Taking rtnl_lock in ipoib_mcast_join_complete() causes a deadlock with
ipoib_stop().  We avoid it by scheduling the piece of code that takes
the lock on ipoib_workqueue instead of executing it directly.  This
works because we only flush the ipoib_workqueue with the RTNL not held.
The deadlock happens because ipoib_stop() calls ipoib_ib_dev_down()
which calls ipoib_mcast_dev_flush(), which calls ipoib_mcast_free(),
which calls ipoib_mcast_leave(). The latter calls
ib_sa_free_multicast(), and this waits until the multicast completion
handler finishes.  This handler is ipoib_mcast_join_complete(), which
waits for the rtnl_lock(), which was already taken by ipoib_stop().
This bug was introduced in commit 
a77a57a1 ("IPoIB: Fix deadlock on
RTNL in ipoib_stop()").
Signed-off-by: Yossi Etigin <yosefe@voltaire.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
 
        struct delayed_work pkey_poll_task;
        struct delayed_work mcast_task;
+       struct work_struct carrier_on_task;
        struct work_struct flush_light;
        struct work_struct flush_normal;
        struct work_struct flush_heavy;
 void ipoib_dev_cleanup(struct net_device *dev);
 
 void ipoib_mcast_join_task(struct work_struct *work);
+void ipoib_mcast_carrier_on_task(struct work_struct *work);
 void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
 
 void ipoib_mcast_restart_task(struct work_struct *work);
 
 
        INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
        INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
+       INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
        INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
        INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
        INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
 
        return ret;
 }
 
+void ipoib_mcast_carrier_on_task(struct work_struct *work)
+{
+       struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+                                                  carrier_on_task);
+
+       /*
+        * Take rtnl_lock to avoid racing with ipoib_stop() and
+        * turning the carrier back on while a device is being
+        * removed.
+        */
+       rtnl_lock();
+       netif_carrier_on(priv->dev);
+       rtnl_unlock();
+}
+
 static int ipoib_mcast_join_complete(int status,
                                     struct ib_sa_multicast *multicast)
 {
                                           &priv->mcast_task, 0);
                mutex_unlock(&mcast_mutex);
 
-               if (mcast == priv->broadcast) {
-                       /*
-                        * Take RTNL lock here to avoid racing with
-                        * ipoib_stop() and turning the carrier back
-                        * on while a device is being removed.
-                        */
-                       rtnl_lock();
-                       netif_carrier_on(dev);
-                       rtnl_unlock();
-               }
+               /*
+                * Defer carrier on work to ipoib_workqueue to avoid a
+                * deadlock on rtnl_lock here.
+                */
+               if (mcast == priv->broadcast)
+                       queue_work(ipoib_workqueue, &priv->carrier_on_task);
 
                return 0;
        }