#define DLM_LOCK_RES_IN_PROGRESS          0x00000010
 #define DLM_LOCK_RES_MIGRATING            0x00000020
 
+/* max milliseconds to wait to sync up a network failure with a node death */
+#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000)
+
 #define DLM_PURGE_INTERVAL_MS   (8 * 1000)
 
 struct dlm_lock_resource
 void dlm_complete_recovery_thread(struct dlm_ctxt *dlm);
 void dlm_wait_for_recovery(struct dlm_ctxt *dlm);
 int dlm_is_node_dead(struct dlm_ctxt *dlm, u8 node);
+int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout);
 
 void dlm_put(struct dlm_ctxt *dlm);
 struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm);
 
        } else {
                mlog_errno(tmpret);
                if (dlm_is_host_down(tmpret)) {
+                       /* instead of logging the same network error over
+                        * and over, sleep here and wait for the heartbeat
+                        * to notice the node is dead.  times out after 5s. */
+                       dlm_wait_for_node_death(dlm, res->owner, 
+                                               DLM_NODE_DEATH_WAIT_MAX);
                        ret = DLM_RECOVERING;
                        mlog(0, "node %u died so returning DLM_RECOVERING "
                             "from convert message!\n", res->owner);
 
                        mlog(0, "retrying lock with migration/"
                             "recovery/in progress\n");
                        msleep(100);
-                       dlm_wait_for_recovery(dlm);
+                       /* no waiting for dlm_reco_thread */
+                       if (recovery) {
+                               if (status == DLM_RECOVERING) {
+                                       mlog(0, "%s: got RECOVERING "
+                                            "for $REOCVERY lock, master "
+                                            "was %u\n", dlm->name, 
+                                            res->owner);
+                                       dlm_wait_for_node_death(dlm, res->owner, 
+                                                       DLM_NODE_DEATH_WAIT_MAX);
+                               }
+                       } else {
+                               dlm_wait_for_recovery(dlm);
+                       }
                        goto retry_lock;
                }
 
 
        return dead;
 }
 
+int dlm_wait_for_node_death(struct dlm_ctxt *dlm, u8 node, int timeout)
+{
+       if (timeout) {
+               mlog(ML_NOTICE, "%s: waiting %dms for notification of "
+                    "death of node %u\n", dlm->name, timeout, node);
+               wait_event_timeout(dlm->dlm_reco_thread_wq,
+                          dlm_is_node_dead(dlm, node),
+                          msecs_to_jiffies(timeout));
+       } else {
+               mlog(ML_NOTICE, "%s: waiting indefinitely for notification "
+                    "of death of node %u\n", dlm->name, node);
+               wait_event(dlm->dlm_reco_thread_wq,
+                          dlm_is_node_dead(dlm, node));
+       }
+       /* for now, return 0 */
+       return 0;
+}
+
 /* callers of the top-level api calls (dlmlock/dlmunlock) should
  * block on the dlm->reco.event when recovery is in progress.
  * the dlm recovery thread will set this state when it begins