memset(tx_ring->buffer_info, 0, size);
 
        /* round up to nearest 4K */
-       tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc)
-                       + sizeof(u32);
+       tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
        tx_ring->size = ALIGN(tx_ring->size, 4096);
 
        tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
  **/
 static void igb_configure_tx(struct igb_adapter *adapter)
 {
-       u64 tdba, tdwba;
+       u64 tdba;
        struct e1000_hw *hw = &adapter->hw;
        u32 tctl;
        u32 txdctl, txctrl;
                                tdba & 0x00000000ffffffffULL);
                wr32(E1000_TDBAH(i), tdba >> 32);
 
-               tdwba = ring->dma + ring->count * sizeof(struct e1000_tx_desc);
-               tdwba |= 1; /* enable head wb */
-               wr32(E1000_TDWBAL(i),
-                               tdwba & 0x00000000ffffffffULL);
-               wr32(E1000_TDWBAH(i), tdwba >> 32);
-
                ring->head = E1000_TDH(i);
                ring->tail = E1000_TDT(i);
                writel(0, hw->hw_addr + ring->tail);
        context_desc->seqnum_seed = 0;
 
        buffer_info->time_stamp = jiffies;
+       buffer_info->next_to_watch = i;
        buffer_info->dma = 0;
        i++;
        if (i == tx_ring->count)
                                cpu_to_le32(tx_ring->queue_index << 4);
 
                buffer_info->time_stamp = jiffies;
+               buffer_info->next_to_watch = i;
                buffer_info->dma = 0;
 
                i++;
 #define IGB_MAX_DATA_PER_TXD   (1<<IGB_MAX_TXD_PWR)
 
 static inline int igb_tx_map_adv(struct igb_adapter *adapter,
-                                struct igb_ring *tx_ring,
-                                struct sk_buff *skb)
+                                struct igb_ring *tx_ring, struct sk_buff *skb,
+                                unsigned int first)
 {
        struct igb_buffer *buffer_info;
        unsigned int len = skb_headlen(skb);
        buffer_info->length = len;
        /* set time_stamp *before* dma to help avoid a possible race */
        buffer_info->time_stamp = jiffies;
+       buffer_info->next_to_watch = i;
        buffer_info->dma = pci_map_single(adapter->pdev, skb->data, len,
                                          PCI_DMA_TODEVICE);
        count++;
                BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
                buffer_info->length = len;
                buffer_info->time_stamp = jiffies;
+               buffer_info->next_to_watch = i;
                buffer_info->dma = pci_map_page(adapter->pdev,
                                                frag->page,
                                                frag->page_offset,
                        i = 0;
        }
 
-       i = (i == 0) ? tx_ring->count - 1 : i - 1;
+       i = ((i == 0) ? tx_ring->count - 1 : i - 1);
        tx_ring->buffer_info[i].skb = skb;
+       tx_ring->buffer_info[first].next_to_watch = i;
 
        return count;
 }
                                   struct igb_ring *tx_ring)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
+       unsigned int first;
        unsigned int tx_flags = 0;
        unsigned int len;
        u8 hdr_len = 0;
        if (skb->protocol == htons(ETH_P_IP))
                tx_flags |= IGB_TX_FLAGS_IPV4;
 
+       first = tx_ring->next_to_use;
+
        tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags,
                                              &hdr_len) : 0;
 
                                tx_flags |= IGB_TX_FLAGS_CSUM;
 
        igb_tx_queue_adv(adapter, tx_ring, tx_flags,
-                        igb_tx_map_adv(adapter, tx_ring, skb),
+                        igb_tx_map_adv(adapter, tx_ring, skb, first),
                         skb->len, hdr_len);
 
        netdev->trans_start = jiffies;
        return 1;
 }
 
-static inline u32 get_head(struct igb_ring *tx_ring)
-{
-       void *end = (struct e1000_tx_desc *)tx_ring->desc + tx_ring->count;
-       return le32_to_cpu(*(volatile __le32 *)end);
-}
-
 /**
  * igb_clean_tx_irq - Reclaim resources after transmit completes
  * @adapter: board private structure
 static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
 {
        struct igb_adapter *adapter = tx_ring->adapter;
-       struct e1000_hw *hw = &adapter->hw;
        struct net_device *netdev = adapter->netdev;
-       struct e1000_tx_desc *tx_desc;
+       struct e1000_hw *hw = &adapter->hw;
        struct igb_buffer *buffer_info;
        struct sk_buff *skb;
-       unsigned int i;
-       u32 head, oldhead;
-       unsigned int count = 0;
+       union e1000_adv_tx_desc *tx_desc, *eop_desc;
        unsigned int total_bytes = 0, total_packets = 0;
-       bool retval = true;
+       unsigned int i, eop, count = 0;
+       bool cleaned = false;
 
-       rmb();
-       head = get_head(tx_ring);
        i = tx_ring->next_to_clean;
-       while (1) {
-               while (i != head) {
-                       tx_desc = E1000_TX_DESC(*tx_ring, i);
+       eop = tx_ring->buffer_info[i].next_to_watch;
+       eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
+
+       while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
+              (count < tx_ring->count)) {
+               for (cleaned = false; !cleaned; count++) {
+                       tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
                        buffer_info = &tx_ring->buffer_info[i];
+                       cleaned = (i == eop);
                        skb = buffer_info->skb;
 
                        if (skb) {
                        }
 
                        igb_unmap_and_free_tx_resource(adapter, buffer_info);
+                       tx_desc->wb.status = 0;
 
                        i++;
                        if (i == tx_ring->count)
                                i = 0;
-
-                       count++;
-                       if (count == IGB_MAX_TX_CLEAN) {
-                               retval = false;
-                               goto done_cleaning;
-                       }
                }
-               oldhead = head;
-               rmb();
-               head = get_head(tx_ring);
-               if (head == oldhead)
-                       goto done_cleaning;
-       }  /* while (1) */
-
-done_cleaning:
+
+               eop = tx_ring->buffer_info[i].next_to_watch;
+               eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
+       }
+
        tx_ring->next_to_clean = i;
 
        if (unlikely(count &&
                    && !(rd32(E1000_STATUS) &
                         E1000_STATUS_TXOFF)) {
 
-                       tx_desc = E1000_TX_DESC(*tx_ring, i);
                        /* detected Tx unit hang */
                        dev_err(&adapter->pdev->dev,
                                "Detected Tx Unit Hang\n"
                                "  TDT                  <%x>\n"
                                "  next_to_use          <%x>\n"
                                "  next_to_clean        <%x>\n"
-                               "  head (WB)            <%x>\n"
                                "buffer_info[next_to_clean]\n"
                                "  time_stamp           <%lx>\n"
+                               "  next_to_watch        <%x>\n"
                                "  jiffies              <%lx>\n"
                                "  desc.status          <%x>\n",
                                tx_ring->queue_index,
                                readl(adapter->hw.hw_addr + tx_ring->tail),
                                tx_ring->next_to_use,
                                tx_ring->next_to_clean,
-                               head,
                                tx_ring->buffer_info[i].time_stamp,
+                               eop,
                                jiffies,
-                               tx_desc->upper.fields.status);
+                               eop_desc->wb.status);
                        netif_stop_subqueue(netdev, tx_ring->queue_index);
                }
        }
        tx_ring->tx_stats.packets += total_packets;
        adapter->net_stats.tx_bytes += total_bytes;
        adapter->net_stats.tx_packets += total_packets;
-       return retval;
+       return (count < tx_ring->count);
 }
 
 #ifdef CONFIG_IGB_LRO