]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/net/pasemi_mac.c
mv643xx_eth: Clean up mv643xx_eth.h
[linux-2.6-omap-h63xx.git] / drivers / net / pasemi_mac.c
index 4d87cd65626f8c2954450f0f8b46c97fbd94b4c8..9f9a421c99b33fa4a107d4aa7f3f19160a3b51b8 100644 (file)
 #include <net/checksum.h>
 
 #include <asm/irq.h>
+#include <asm/firmware.h>
 
 #include "pasemi_mac.h"
 
+/* We have our own align, since ppc64 in general has it at 0 because
+ * of design flaws in some of the server bridge chips. However, for
+ * PWRficient doing the unaligned copies is more expensive than doing
+ * unaligned DMA, so make sure the data is aligned instead.
+ */
+#define LOCAL_SKB_ALIGN        2
 
 /* TODO list
  *
- * - Get rid of pci_{read,write}_config(), map registers with ioremap
- *   for performance
- * - PHY support
  * - Multicast support
  * - Large MTU support
- * - Other performance improvements
+ * - SW LRO
+ * - Multiqueue RX/TX
  */
 
 
 /* Must be a power of two */
-#define RX_RING_SIZE 512
-#define TX_RING_SIZE 512
+#define RX_RING_SIZE 4096
+#define TX_RING_SIZE 4096
 
 #define DEFAULT_MSG_ENABLE       \
        (NETIF_MSG_DRV          | \
         NETIF_MSG_RX_ERR       | \
         NETIF_MSG_TX_ERR)
 
-#define TX_DESC(mac, num)      ((mac)->tx->desc[(num) & (TX_RING_SIZE-1)])
-#define TX_DESC_INFO(mac, num) ((mac)->tx->desc_info[(num) & (TX_RING_SIZE-1)])
-#define RX_DESC(mac, num)      ((mac)->rx->desc[(num) & (RX_RING_SIZE-1)])
-#define RX_DESC_INFO(mac, num) ((mac)->rx->desc_info[(num) & (RX_RING_SIZE-1)])
+#define TX_RING(mac, num)      ((mac)->tx->ring[(num) & (TX_RING_SIZE-1)])
+#define TX_RING_INFO(mac, num) ((mac)->tx->ring_info[(num) & (TX_RING_SIZE-1)])
+#define RX_RING(mac, num)      ((mac)->rx->ring[(num) & (RX_RING_SIZE-1)])
+#define RX_RING_INFO(mac, num) ((mac)->rx->ring_info[(num) & (RX_RING_SIZE-1)])
 #define RX_BUFF(mac, num)      ((mac)->rx->buffers[(num) & (RX_RING_SIZE-1)])
 
 #define RING_USED(ring)                (((ring)->next_to_fill - (ring)->next_to_clean) \
@@ -85,9 +90,13 @@ MODULE_PARM_DESC(debug, "PA Semi MAC bitmapped debugging message enable value");
 
 static struct pasdma_status *dma_status;
 
-static unsigned int read_iob_reg(struct pasemi_mac *mac, unsigned int reg)
+static int translation_enabled(void)
 {
-       return in_le32(mac->iob_regs+reg);
+#if defined(CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE)
+       return 1;
+#else
+       return firmware_has_feature(FW_FEATURE_LPAR);
+#endif
 }
 
 static void write_iob_reg(struct pasemi_mac *mac, unsigned int reg,
@@ -165,11 +174,36 @@ static int pasemi_get_mac_addr(struct pasemi_mac *mac)
        return 0;
 }
 
+static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac,
+                                   struct sk_buff *skb,
+                                   dma_addr_t *dmas)
+{
+       int f;
+       int nfrags = skb_shinfo(skb)->nr_frags;
+
+       pci_unmap_single(mac->dma_pdev, dmas[0], skb_headlen(skb),
+                        PCI_DMA_TODEVICE);
+
+       for (f = 0; f < nfrags; f++) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+               pci_unmap_page(mac->dma_pdev, dmas[f+1], frag->size,
+                              PCI_DMA_TODEVICE);
+       }
+       dev_kfree_skb_irq(skb);
+
+       /* Freed descriptor slot + main SKB ptr + nfrags additional ptrs,
+        * aligned up to a power of 2
+        */
+       return (nfrags + 3) & ~1;
+}
+
 static int pasemi_mac_setup_rx_resources(struct net_device *dev)
 {
        struct pasemi_mac_rxring *ring;
        struct pasemi_mac *mac = netdev_priv(dev);
        int chan_id = mac->dma_rxch;
+       unsigned int cfg;
 
        ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 
@@ -179,22 +213,21 @@ static int pasemi_mac_setup_rx_resources(struct net_device *dev)
        spin_lock_init(&ring->lock);
 
        ring->size = RX_RING_SIZE;
-       ring->desc_info = kzalloc(sizeof(struct pasemi_mac_buffer) *
+       ring->ring_info = kzalloc(sizeof(struct pasemi_mac_buffer) *
                                  RX_RING_SIZE, GFP_KERNEL);
 
-       if (!ring->desc_info)
-               goto out_desc_info;
+       if (!ring->ring_info)
+               goto out_ring_info;
 
        /* Allocate descriptors */
-       ring->desc = dma_alloc_coherent(&mac->dma_pdev->dev,
-                                       RX_RING_SIZE *
-                                       sizeof(struct pas_dma_xct_descr),
+       ring->ring = dma_alloc_coherent(&mac->dma_pdev->dev,
+                                       RX_RING_SIZE * sizeof(u64),
                                        &ring->dma, GFP_KERNEL);
 
-       if (!ring->desc)
-               goto out_desc;
+       if (!ring->ring)
+               goto out_ring_desc;
 
-       memset(ring->desc, 0, RX_RING_SIZE * sizeof(struct pas_dma_xct_descr));
+       memset(ring->ring, 0, RX_RING_SIZE * sizeof(u64));
 
        ring->buffers = dma_alloc_coherent(&mac->dma_pdev->dev,
                                           RX_RING_SIZE * sizeof(u64),
@@ -208,20 +241,30 @@ static int pasemi_mac_setup_rx_resources(struct net_device *dev)
 
        write_dma_reg(mac, PAS_DMA_RXCHAN_BASEU(chan_id),
                           PAS_DMA_RXCHAN_BASEU_BRBH(ring->dma >> 32) |
-                          PAS_DMA_RXCHAN_BASEU_SIZ(RX_RING_SIZE >> 2));
+                          PAS_DMA_RXCHAN_BASEU_SIZ(RX_RING_SIZE >> 3));
+
+       cfg = PAS_DMA_RXCHAN_CFG_HBU(2);
 
-       write_dma_reg(mac, PAS_DMA_RXCHAN_CFG(chan_id),
-                          PAS_DMA_RXCHAN_CFG_HBU(2));
+       if (translation_enabled())
+               cfg |= PAS_DMA_RXCHAN_CFG_CTR;
+
+       write_dma_reg(mac, PAS_DMA_RXCHAN_CFG(chan_id), cfg);
 
        write_dma_reg(mac, PAS_DMA_RXINT_BASEL(mac->dma_if),
-                          PAS_DMA_RXINT_BASEL_BRBL(__pa(ring->buffers)));
+                          PAS_DMA_RXINT_BASEL_BRBL(ring->buf_dma));
 
        write_dma_reg(mac, PAS_DMA_RXINT_BASEU(mac->dma_if),
-                          PAS_DMA_RXINT_BASEU_BRBH(__pa(ring->buffers) >> 32) |
+                          PAS_DMA_RXINT_BASEU_BRBH(ring->buf_dma >> 32) |
                           PAS_DMA_RXINT_BASEU_SIZ(RX_RING_SIZE >> 3));
 
-       write_dma_reg(mac, PAS_DMA_RXINT_CFG(mac->dma_if),
-                          PAS_DMA_RXINT_CFG_DHL(2));
+       cfg = PAS_DMA_RXINT_CFG_DHL(3) | PAS_DMA_RXINT_CFG_L2 |
+             PAS_DMA_RXINT_CFG_LW | PAS_DMA_RXINT_CFG_RBP |
+             PAS_DMA_RXINT_CFG_HEN;
+
+       if (translation_enabled())
+               cfg |= PAS_DMA_RXINT_CFG_ITRR | PAS_DMA_RXINT_CFG_ITR;
+
+       write_dma_reg(mac, PAS_DMA_RXINT_CFG(mac->dma_if), cfg);
 
        ring->next_to_fill = 0;
        ring->next_to_clean = 0;
@@ -234,11 +277,11 @@ static int pasemi_mac_setup_rx_resources(struct net_device *dev)
 
 out_buffers:
        dma_free_coherent(&mac->dma_pdev->dev,
-                         RX_RING_SIZE * sizeof(struct pas_dma_xct_descr),
-                         mac->rx->desc, mac->rx->dma);
-out_desc:
-       kfree(ring->desc_info);
-out_desc_info:
+                         RX_RING_SIZE * sizeof(u64),
+                         mac->rx->ring, mac->rx->dma);
+out_ring_desc:
+       kfree(ring->ring_info);
+out_ring_info:
        kfree(ring);
 out_ring:
        return -ENOMEM;
@@ -251,6 +294,7 @@ static int pasemi_mac_setup_tx_resources(struct net_device *dev)
        u32 val;
        int chan_id = mac->dma_txch;
        struct pasemi_mac_txring *ring;
+       unsigned int cfg;
 
        ring = kzalloc(sizeof(*ring), GFP_KERNEL);
        if (!ring)
@@ -259,33 +303,36 @@ static int pasemi_mac_setup_tx_resources(struct net_device *dev)
        spin_lock_init(&ring->lock);
 
        ring->size = TX_RING_SIZE;
-       ring->desc_info = kzalloc(sizeof(struct pasemi_mac_buffer) *
+       ring->ring_info = kzalloc(sizeof(struct pasemi_mac_buffer) *
                                  TX_RING_SIZE, GFP_KERNEL);
-       if (!ring->desc_info)
-               goto out_desc_info;
+       if (!ring->ring_info)
+               goto out_ring_info;
 
        /* Allocate descriptors */
-       ring->desc = dma_alloc_coherent(&mac->dma_pdev->dev,
-                                       TX_RING_SIZE *
-                                       sizeof(struct pas_dma_xct_descr),
+       ring->ring = dma_alloc_coherent(&mac->dma_pdev->dev,
+                                       TX_RING_SIZE * sizeof(u64),
                                        &ring->dma, GFP_KERNEL);
-       if (!ring->desc)
-               goto out_desc;
+       if (!ring->ring)
+               goto out_ring_desc;
 
-       memset(ring->desc, 0, TX_RING_SIZE * sizeof(struct pas_dma_xct_descr));
+       memset(ring->ring, 0, TX_RING_SIZE * sizeof(u64));
 
        write_dma_reg(mac, PAS_DMA_TXCHAN_BASEL(chan_id),
                           PAS_DMA_TXCHAN_BASEL_BRBL(ring->dma));
        val = PAS_DMA_TXCHAN_BASEU_BRBH(ring->dma >> 32);
-       val |= PAS_DMA_TXCHAN_BASEU_SIZ(TX_RING_SIZE >> 2);
+       val |= PAS_DMA_TXCHAN_BASEU_SIZ(TX_RING_SIZE >> 3);
 
        write_dma_reg(mac, PAS_DMA_TXCHAN_BASEU(chan_id), val);
 
-       write_dma_reg(mac, PAS_DMA_TXCHAN_CFG(chan_id),
-                          PAS_DMA_TXCHAN_CFG_TY_IFACE |
-                          PAS_DMA_TXCHAN_CFG_TATTR(mac->dma_if) |
-                          PAS_DMA_TXCHAN_CFG_UP |
-                          PAS_DMA_TXCHAN_CFG_WT(2));
+       cfg = PAS_DMA_TXCHAN_CFG_TY_IFACE |
+             PAS_DMA_TXCHAN_CFG_TATTR(mac->dma_if) |
+             PAS_DMA_TXCHAN_CFG_UP |
+             PAS_DMA_TXCHAN_CFG_WT(2);
+
+       if (translation_enabled())
+               cfg |= PAS_DMA_TXCHAN_CFG_TRD | PAS_DMA_TXCHAN_CFG_TRR;
+
+       write_dma_reg(mac, PAS_DMA_TXCHAN_CFG(chan_id), cfg);
 
        ring->next_to_fill = 0;
        ring->next_to_clean = 0;
@@ -296,9 +343,9 @@ static int pasemi_mac_setup_tx_resources(struct net_device *dev)
 
        return 0;
 
-out_desc:
-       kfree(ring->desc_info);
-out_desc_info:
+out_ring_desc:
+       kfree(ring->ring_info);
+out_ring_info:
        kfree(ring);
 out_ring:
        return -ENOMEM;
@@ -307,33 +354,37 @@ out_ring:
 static void pasemi_mac_free_tx_resources(struct net_device *dev)
 {
        struct pasemi_mac *mac = netdev_priv(dev);
-       unsigned int i;
+       unsigned int i, j;
        struct pasemi_mac_buffer *info;
-       struct pas_dma_xct_descr *dp;
-
-       for (i = 0; i < TX_RING_SIZE; i++) {
-               info = &TX_DESC_INFO(mac, i);
-               dp = &TX_DESC(mac, i);
-               if (info->dma) {
-                       if (info->skb) {
-                               pci_unmap_single(mac->dma_pdev,
-                                                info->dma,
-                                                info->skb->len,
-                                                PCI_DMA_TODEVICE);
-                               dev_kfree_skb_any(info->skb);
-                       }
-                       info->dma = 0;
-                       info->skb = NULL;
-                       dp->mactx = 0;
-                       dp->ptr = 0;
-               }
+       dma_addr_t dmas[MAX_SKB_FRAGS+1];
+       int freed;
+       int start, limit;
+
+       start = mac->tx->next_to_clean;
+       limit = mac->tx->next_to_fill;
+
+       /* Compensate for when fill has wrapped and clean has not */
+       if (start > limit)
+               limit += TX_RING_SIZE;
+
+       for (i = start; i < limit; i += freed) {
+               info = &TX_RING_INFO(mac, i+1);
+               if (info->dma && info->skb) {
+                       for (j = 0; j <= skb_shinfo(info->skb)->nr_frags; j++)
+                               dmas[j] = TX_RING_INFO(mac, i+1+j).dma;
+                       freed = pasemi_mac_unmap_tx_skb(mac, info->skb, dmas);
+               } else
+                       freed = 2;
        }
 
+       for (i = 0; i < TX_RING_SIZE; i++)
+               TX_RING(mac, i) = 0;
+
        dma_free_coherent(&mac->dma_pdev->dev,
-                         TX_RING_SIZE * sizeof(struct pas_dma_xct_descr),
-                         mac->tx->desc, mac->tx->dma);
+                         TX_RING_SIZE * sizeof(u64),
+                         mac->tx->ring, mac->tx->dma);
 
-       kfree(mac->tx->desc_info);
+       kfree(mac->tx->ring_info);
        kfree(mac->tx);
        mac->tx = NULL;
 }
@@ -343,70 +394,66 @@ static void pasemi_mac_free_rx_resources(struct net_device *dev)
        struct pasemi_mac *mac = netdev_priv(dev);
        unsigned int i;
        struct pasemi_mac_buffer *info;
-       struct pas_dma_xct_descr *dp;
 
        for (i = 0; i < RX_RING_SIZE; i++) {
-               info = &RX_DESC_INFO(mac, i);
-               dp = &RX_DESC(mac, i);
-               if (info->skb) {
-                       if (info->dma) {
-                               pci_unmap_single(mac->dma_pdev,
-                                                info->dma,
-                                                info->skb->len,
-                                                PCI_DMA_FROMDEVICE);
-                               dev_kfree_skb_any(info->skb);
-                       }
-                       info->dma = 0;
-                       info->skb = NULL;
-                       dp->macrx = 0;
-                       dp->ptr = 0;
+               info = &RX_RING_INFO(mac, i);
+               if (info->skb && info->dma) {
+                       pci_unmap_single(mac->dma_pdev,
+                                        info->dma,
+                                        info->skb->len,
+                                        PCI_DMA_FROMDEVICE);
+                       dev_kfree_skb_any(info->skb);
                }
+               info->dma = 0;
+               info->skb = NULL;
        }
 
+       for (i = 0; i < RX_RING_SIZE; i++)
+               RX_RING(mac, i) = 0;
+
        dma_free_coherent(&mac->dma_pdev->dev,
-                         RX_RING_SIZE * sizeof(struct pas_dma_xct_descr),
-                         mac->rx->desc, mac->rx->dma);
+                         RX_RING_SIZE * sizeof(u64),
+                         mac->rx->ring, mac->rx->dma);
 
        dma_free_coherent(&mac->dma_pdev->dev, RX_RING_SIZE * sizeof(u64),
                          mac->rx->buffers, mac->rx->buf_dma);
 
-       kfree(mac->rx->desc_info);
+       kfree(mac->rx->ring_info);
        kfree(mac->rx);
        mac->rx = NULL;
 }
 
-static void pasemi_mac_replenish_rx_ring(struct net_device *dev)
+static void pasemi_mac_replenish_rx_ring(struct net_device *dev, int limit)
 {
        struct pasemi_mac *mac = netdev_priv(dev);
-       unsigned int i;
-       int start = mac->rx->next_to_fill;
-       unsigned int limit, count;
-
-       limit = RING_AVAIL(mac->rx);
-       /* Check to see if we're doing first-time setup */
-       if (unlikely(mac->rx->next_to_clean == 0 && mac->rx->next_to_fill == 0))
-               limit = RX_RING_SIZE;
+       int fill, count;
 
        if (limit <= 0)
                return;
 
-       i = start;
-       for (count = limit; count; count--) {
-               struct pasemi_mac_buffer *info = &RX_DESC_INFO(mac, i);
-               u64 *buff = &RX_BUFF(mac, i);
+       fill = mac->rx->next_to_fill;
+       for (count = 0; count < limit; count++) {
+               struct pasemi_mac_buffer *info = &RX_RING_INFO(mac, fill);
+               u64 *buff = &RX_BUFF(mac, fill);
                struct sk_buff *skb;
                dma_addr_t dma;
 
+               /* Entry in use? */
+               WARN_ON(*buff);
+
                /* skb might still be in there for recycle on short receives */
                if (info->skb)
                        skb = info->skb;
-               else
+               else {
                        skb = dev_alloc_skb(BUF_SIZE);
+                       skb_reserve(skb, LOCAL_SKB_ALIGN);
+               }
 
                if (unlikely(!skb))
                        break;
 
-               dma = pci_map_single(mac->dma_pdev, skb->data, skb->len,
+               dma = pci_map_single(mac->dma_pdev, skb->data,
+                                    BUF_SIZE - LOCAL_SKB_ALIGN,
                                     PCI_DMA_FROMDEVICE);
 
                if (unlikely(dma_mapping_error(dma))) {
@@ -417,15 +464,15 @@ static void pasemi_mac_replenish_rx_ring(struct net_device *dev)
                info->skb = skb;
                info->dma = dma;
                *buff = XCT_RXB_LEN(BUF_SIZE) | XCT_RXB_ADDR(dma);
-               i++;
+               fill++;
        }
 
        wmb();
 
-       write_dma_reg(mac, PAS_DMA_RXCHAN_INCR(mac->dma_rxch), limit - count);
-       write_dma_reg(mac, PAS_DMA_RXINT_INCR(mac->dma_if), limit - count);
+       write_dma_reg(mac, PAS_DMA_RXINT_INCR(mac->dma_if), count);
 
-       mac->rx->next_to_fill += limit - count;
+       mac->rx->next_to_fill = (mac->rx->next_to_fill + count) &
+                               (RX_RING_SIZE - 1);
 }
 
 static void pasemi_mac_restart_rx_intr(struct pasemi_mac *mac)
@@ -455,64 +502,91 @@ static void pasemi_mac_restart_tx_intr(struct pasemi_mac *mac)
 }
 
 
+static inline void pasemi_mac_rx_error(struct pasemi_mac *mac, u64 macrx)
+{
+       unsigned int rcmdsta, ccmdsta;
+
+       if (!netif_msg_rx_err(mac))
+               return;
+
+       rcmdsta = read_dma_reg(mac, PAS_DMA_RXINT_RCMDSTA(mac->dma_if));
+       ccmdsta = read_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch));
+
+       printk(KERN_ERR "pasemi_mac: rx error. macrx %016lx, rx status %lx\n",
+               macrx, *mac->rx_status);
+
+       printk(KERN_ERR "pasemi_mac: rcmdsta %08x ccmdsta %08x\n",
+               rcmdsta, ccmdsta);
+}
+
+static inline void pasemi_mac_tx_error(struct pasemi_mac *mac, u64 mactx)
+{
+       unsigned int cmdsta;
+
+       if (!netif_msg_tx_err(mac))
+               return;
+
+       cmdsta = read_dma_reg(mac, PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch));
+
+       printk(KERN_ERR "pasemi_mac: tx error. mactx 0x%016lx, "\
+               "tx status 0x%016lx\n", mactx, *mac->tx_status);
+
+       printk(KERN_ERR "pasemi_mac: tcmdsta 0x%08x\n", cmdsta);
+}
+
 static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
 {
        unsigned int n;
        int count;
-       struct pas_dma_xct_descr *dp;
        struct pasemi_mac_buffer *info;
        struct sk_buff *skb;
-       unsigned int i, len;
+       unsigned int len;
        u64 macrx;
        dma_addr_t dma;
+       int buf_index;
+       u64 eval;
 
        spin_lock(&mac->rx->lock);
 
        n = mac->rx->next_to_clean;
 
-       for (count = limit; count; count--) {
+       prefetch(RX_RING(mac, n));
 
-               rmb();
+       for (count = 0; count < limit; count++) {
+               macrx = RX_RING(mac, n);
 
-               dp = &RX_DESC(mac, n);
-               prefetchw(dp);
-               macrx = dp->macrx;
+               if ((macrx & XCT_MACRX_E) ||
+                   (*mac->rx_status & PAS_STATUS_ERROR))
+                       pasemi_mac_rx_error(mac, macrx);
 
                if (!(macrx & XCT_MACRX_O))
                        break;
 
-
                info = NULL;
 
-               /* We have to scan for our skb since there's no way
-                * to back-map them from the descriptor, and if we
-                * have several receive channels then they might not
-                * show up in the same order as they were put on the
-                * interface ring.
-                */
+               BUG_ON(!(macrx & XCT_MACRX_RR_8BRES));
 
-               dma = (dp->ptr & XCT_PTR_ADDR_M);
-               for (i = n; i < (n + RX_RING_SIZE); i++) {
-                       info = &RX_DESC_INFO(mac, i);
-                       if (info->dma == dma)
-                               break;
-               }
-               prefetchw(info);
+               eval = (RX_RING(mac, n+1) & XCT_RXRES_8B_EVAL_M) >>
+                       XCT_RXRES_8B_EVAL_S;
+               buf_index = eval-1;
+
+               dma = (RX_RING(mac, n+2) & XCT_PTR_ADDR_M);
+               info = &RX_RING_INFO(mac, buf_index);
 
                skb = info->skb;
-               prefetchw(skb);
-               info->dma = 0;
 
-               pci_unmap_single(mac->dma_pdev, dma, skb->len,
-                                PCI_DMA_FROMDEVICE);
+               prefetch(skb);
+               prefetch(&skb->data_len);
 
                len = (macrx & XCT_MACRX_LLEN_M) >> XCT_MACRX_LLEN_S;
 
                if (len < 256) {
-                       struct sk_buff *new_skb =
-                           netdev_alloc_skb(mac->netdev, len + NET_IP_ALIGN);
+                       struct sk_buff *new_skb;
+
+                       new_skb = netdev_alloc_skb(mac->netdev,
+                                                  len + LOCAL_SKB_ALIGN);
                        if (new_skb) {
-                               skb_reserve(new_skb, NET_IP_ALIGN);
+                               skb_reserve(new_skb, LOCAL_SKB_ALIGN);
                                memcpy(new_skb->data, skb->data, len);
                                /* save the skb in buffer_info as good */
                                skb = new_skb;
@@ -521,6 +595,10 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
                } else
                        info->skb = NULL;
 
+               pci_unmap_single(mac->dma_pdev, dma, len, PCI_DMA_FROMDEVICE);
+
+               info->dma = 0;
+
                skb_put(skb, len);
 
                if (likely((macrx & XCT_MACRX_HTY_M) == XCT_MACRX_HTY_IPV4_OK)) {
@@ -536,72 +614,109 @@ static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit)
                skb->protocol = eth_type_trans(skb, mac->netdev);
                netif_receive_skb(skb);
 
-               dp->ptr = 0;
-               dp->macrx = 0;
+               RX_RING(mac, n) = 0;
+               RX_RING(mac, n+1) = 0;
+
+               /* Need to zero it out since hardware doesn't, since the
+                * replenish loop uses it to tell when it's done.
+                */
+               RX_BUFF(mac, buf_index) = 0;
+
+               n += 4;
+       }
 
-               n++;
+       if (n > RX_RING_SIZE) {
+               /* Errata 5971 workaround: L2 target of headers */
+               write_iob_reg(mac, PAS_IOB_COM_PKTHDRCNT, 0);
+               n &= (RX_RING_SIZE-1);
        }
 
-       mac->rx->next_to_clean += limit - count;
-       pasemi_mac_replenish_rx_ring(mac->netdev);
+       mac->rx->next_to_clean = n;
+
+       /* Increase is in number of 16-byte entries, and since each descriptor
+        * with an 8BRES takes up 3x8 bytes (padded to 4x8), increase with
+        * count*2.
+        */
+       write_dma_reg(mac, PAS_DMA_RXCHAN_INCR(mac->dma_rxch), count << 1);
+
+       pasemi_mac_replenish_rx_ring(mac->netdev, count);
 
        spin_unlock(&mac->rx->lock);
 
        return count;
 }
 
+/* Can't make this too large or we blow the kernel stack limits */
+#define TX_CLEAN_BATCHSIZE (128/MAX_SKB_FRAGS)
+
 static int pasemi_mac_clean_tx(struct pasemi_mac *mac)
 {
-       int i;
-       struct pasemi_mac_buffer *info;
-       struct pas_dma_xct_descr *dp;
-       unsigned int start, count, limit;
+       int i, j;
+       unsigned int start, descr_count, buf_count, batch_limit;
+       unsigned int ring_limit;
        unsigned int total_count;
-       int flags;
-       struct sk_buff *skbs[32];
-       dma_addr_t dmas[32];
+       unsigned long flags;
+       struct sk_buff *skbs[TX_CLEAN_BATCHSIZE];
+       dma_addr_t dmas[TX_CLEAN_BATCHSIZE][MAX_SKB_FRAGS+1];
 
        total_count = 0;
+       batch_limit = TX_CLEAN_BATCHSIZE;
 restart:
        spin_lock_irqsave(&mac->tx->lock, flags);
 
        start = mac->tx->next_to_clean;
-       limit = min(mac->tx->next_to_fill, start+32);
+       ring_limit = mac->tx->next_to_fill;
 
-       count = 0;
+       /* Compensate for when fill has wrapped but clean has not */
+       if (start > ring_limit)
+               ring_limit += TX_RING_SIZE;
 
-       for (i = start; i < limit; i++) {
-               dp = &TX_DESC(mac, i);
+       buf_count = 0;
+       descr_count = 0;
 
-               if (unlikely(dp->mactx & XCT_MACTX_O))
+       for (i = start;
+            descr_count < batch_limit && i < ring_limit;
+            i += buf_count) {
+               u64 mactx = TX_RING(mac, i);
+               struct sk_buff *skb;
+
+               if ((mactx  & XCT_MACTX_E) ||
+                   (*mac->tx_status & PAS_STATUS_ERROR))
+                       pasemi_mac_tx_error(mac, mactx);
+
+               if (unlikely(mactx & XCT_MACTX_O))
                        /* Not yet transmitted */
                        break;
 
-               info = &TX_DESC_INFO(mac, i);
-               skbs[count] = info->skb;
-               dmas[count] = info->dma;
+               skb = TX_RING_INFO(mac, i+1).skb;
+               skbs[descr_count] = skb;
 
-               info->skb = NULL;
-               info->dma = 0;
-               dp->mactx = 0;
-               dp->ptr = 0;
+               buf_count = 2 + skb_shinfo(skb)->nr_frags;
+               for (j = 0; j <= skb_shinfo(skb)->nr_frags; j++)
+                       dmas[descr_count][j] = TX_RING_INFO(mac, i+1+j).dma;
 
-               count++;
+               TX_RING(mac, i) = 0;
+               TX_RING(mac, i+1) = 0;
+
+               /* Since we always fill with an even number of entries, make
+                * sure we skip any unused one at the end as well.
+                */
+               if (buf_count & 1)
+                       buf_count++;
+               descr_count++;
        }
-       mac->tx->next_to_clean += count;
+       mac->tx->next_to_clean = i & (TX_RING_SIZE-1);
+
        spin_unlock_irqrestore(&mac->tx->lock, flags);
        netif_wake_queue(mac->netdev);
 
-       for (i = 0; i < count; i++) {
-               pci_unmap_single(mac->dma_pdev, dmas[i],
-                                skbs[i]->len, PCI_DMA_TODEVICE);
-               dev_kfree_skb_irq(skbs[i]);
-       }
+       for (i = 0; i < descr_count; i++)
+               pasemi_mac_unmap_tx_skb(mac, skbs[i], dmas[i]);
 
-       total_count += count;
+       total_count += descr_count;
 
        /* If the batch was full, try to clean more */
-       if (count == 32)
+       if (descr_count == batch_limit)
                goto restart;
 
        return total_count;
@@ -617,9 +732,6 @@ static irqreturn_t pasemi_mac_rx_intr(int irq, void *data)
        if (!(*mac->rx_status & PAS_STATUS_CAUSE_M))
                return IRQ_NONE;
 
-       if (*mac->rx_status & PAS_STATUS_ERROR)
-               printk("rx_status reported error\n");
-
        /* Don't reset packet count so it won't fire again but clear
         * all others.
         */
@@ -791,11 +903,6 @@ static int pasemi_mac_open(struct net_device *dev)
 
        write_mac_reg(mac, PAS_MAC_CFG_TXP, flags);
 
-       flags = PAS_MAC_CFG_PCFG_S1 | PAS_MAC_CFG_PCFG_PE |
-               PAS_MAC_CFG_PCFG_PR | PAS_MAC_CFG_PCFG_CE;
-
-       flags |= PAS_MAC_CFG_PCFG_TSR_1G | PAS_MAC_CFG_PCFG_SPD_1G;
-
        write_iob_reg(mac, PAS_IOB_DMA_RXCH_CFG(mac->dma_rxch),
                           PAS_IOB_DMA_RXCH_CFG_CNTTH(0));
 
@@ -810,8 +917,6 @@ static int pasemi_mac_open(struct net_device *dev)
        write_iob_reg(mac, PAS_IOB_DMA_COM_TIMEOUTCFG,
                           PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT(0xffffff));
 
-       write_mac_reg(mac, PAS_MAC_CFG_PCFG, flags);
-
        ret = pasemi_mac_setup_rx_resources(dev);
        if (ret)
                goto out_rx_resources;
@@ -826,18 +931,42 @@ static int pasemi_mac_open(struct net_device *dev)
 
        /* enable rx if */
        write_dma_reg(mac, PAS_DMA_RXINT_RCMDSTA(mac->dma_if),
-                          PAS_DMA_RXINT_RCMDSTA_EN);
+                          PAS_DMA_RXINT_RCMDSTA_EN |
+                          PAS_DMA_RXINT_RCMDSTA_DROPS_M |
+                          PAS_DMA_RXINT_RCMDSTA_BP |
+                          PAS_DMA_RXINT_RCMDSTA_OO |
+                          PAS_DMA_RXINT_RCMDSTA_BT);
 
        /* enable rx channel */
        write_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch),
                           PAS_DMA_RXCHAN_CCMDSTA_EN |
-                          PAS_DMA_RXCHAN_CCMDSTA_DU);
+                          PAS_DMA_RXCHAN_CCMDSTA_DU |
+                          PAS_DMA_RXCHAN_CCMDSTA_OD |
+                          PAS_DMA_RXCHAN_CCMDSTA_FD |
+                          PAS_DMA_RXCHAN_CCMDSTA_DT);
 
        /* enable tx channel */
        write_dma_reg(mac, PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch),
-                          PAS_DMA_TXCHAN_TCMDSTA_EN);
+                          PAS_DMA_TXCHAN_TCMDSTA_EN |
+                          PAS_DMA_TXCHAN_TCMDSTA_SZ |
+                          PAS_DMA_TXCHAN_TCMDSTA_DB |
+                          PAS_DMA_TXCHAN_TCMDSTA_DE |
+                          PAS_DMA_TXCHAN_TCMDSTA_DA);
+
+       pasemi_mac_replenish_rx_ring(dev, RX_RING_SIZE);
 
-       pasemi_mac_replenish_rx_ring(dev);
+       write_dma_reg(mac, PAS_DMA_RXCHAN_INCR(mac->dma_rxch), RX_RING_SIZE>>1);
+
+       flags = PAS_MAC_CFG_PCFG_S1 | PAS_MAC_CFG_PCFG_PE |
+               PAS_MAC_CFG_PCFG_PR | PAS_MAC_CFG_PCFG_CE;
+
+       if (mac->type == MAC_TYPE_GMAC)
+               flags |= PAS_MAC_CFG_PCFG_TSR_1G | PAS_MAC_CFG_PCFG_SPD_1G;
+       else
+               flags |= PAS_MAC_CFG_PCFG_TSR_10G | PAS_MAC_CFG_PCFG_SPD_10G;
+
+       /* Enable interface in MAC */
+       write_mac_reg(mac, PAS_MAC_CFG_PCFG, flags);
 
        ret = pasemi_mac_phy_init(dev);
        /* Some configs don't have PHYs (XAUI etc), so don't complain about
@@ -899,7 +1028,7 @@ out_rx_resources:
 static int pasemi_mac_close(struct net_device *dev)
 {
        struct pasemi_mac *mac = netdev_priv(dev);
-       unsigned int stat;
+       unsigned int sta;
        int retries;
 
        if (mac->phydev) {
@@ -910,6 +1039,26 @@ static int pasemi_mac_close(struct net_device *dev)
        netif_stop_queue(dev);
        napi_disable(&mac->napi);
 
+       sta = read_dma_reg(mac, PAS_DMA_RXINT_RCMDSTA(mac->dma_if));
+       if (sta & (PAS_DMA_RXINT_RCMDSTA_BP |
+                     PAS_DMA_RXINT_RCMDSTA_OO |
+                     PAS_DMA_RXINT_RCMDSTA_BT))
+               printk(KERN_DEBUG "pasemi_mac: rcmdsta error: 0x%08x\n", sta);
+
+       sta = read_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch));
+       if (sta & (PAS_DMA_RXCHAN_CCMDSTA_DU |
+                    PAS_DMA_RXCHAN_CCMDSTA_OD |
+                    PAS_DMA_RXCHAN_CCMDSTA_FD |
+                    PAS_DMA_RXCHAN_CCMDSTA_DT))
+               printk(KERN_DEBUG "pasemi_mac: ccmdsta error: 0x%08x\n", sta);
+
+       sta = read_dma_reg(mac, PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch));
+       if (sta & (PAS_DMA_TXCHAN_TCMDSTA_SZ |
+                     PAS_DMA_TXCHAN_TCMDSTA_DB |
+                     PAS_DMA_TXCHAN_TCMDSTA_DE |
+                     PAS_DMA_TXCHAN_TCMDSTA_DA))
+               printk(KERN_DEBUG "pasemi_mac: tcmdsta error: 0x%08x\n", sta);
+
        /* Clean out any pending buffers */
        pasemi_mac_clean_tx(mac);
        pasemi_mac_clean_rx(mac, RX_RING_SIZE);
@@ -920,33 +1069,33 @@ static int pasemi_mac_close(struct net_device *dev)
        write_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch), PAS_DMA_RXCHAN_CCMDSTA_ST);
 
        for (retries = 0; retries < MAX_RETRIES; retries++) {
-               stat = read_dma_reg(mac, PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch));
-               if (!(stat & PAS_DMA_TXCHAN_TCMDSTA_ACT))
+               sta = read_dma_reg(mac, PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch));
+               if (!(sta & PAS_DMA_TXCHAN_TCMDSTA_ACT))
                        break;
                cond_resched();
        }
 
-       if (stat & PAS_DMA_TXCHAN_TCMDSTA_ACT)
+       if (sta & PAS_DMA_TXCHAN_TCMDSTA_ACT)
                dev_err(&mac->dma_pdev->dev, "Failed to stop tx channel\n");
 
        for (retries = 0; retries < MAX_RETRIES; retries++) {
-               stat = read_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch));
-               if (!(stat & PAS_DMA_RXCHAN_CCMDSTA_ACT))
+               sta = read_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch));
+               if (!(sta & PAS_DMA_RXCHAN_CCMDSTA_ACT))
                        break;
                cond_resched();
        }
 
-       if (stat & PAS_DMA_RXCHAN_CCMDSTA_ACT)
+       if (sta & PAS_DMA_RXCHAN_CCMDSTA_ACT)
                dev_err(&mac->dma_pdev->dev, "Failed to stop rx channel\n");
 
        for (retries = 0; retries < MAX_RETRIES; retries++) {
-               stat = read_dma_reg(mac, PAS_DMA_RXINT_RCMDSTA(mac->dma_if));
-               if (!(stat & PAS_DMA_RXINT_RCMDSTA_ACT))
+               sta = read_dma_reg(mac, PAS_DMA_RXINT_RCMDSTA(mac->dma_if));
+               if (!(sta & PAS_DMA_RXINT_RCMDSTA_ACT))
                        break;
                cond_resched();
        }
 
-       if (stat & PAS_DMA_RXINT_RCMDSTA_ACT)
+       if (sta & PAS_DMA_RXINT_RCMDSTA_ACT)
                dev_err(&mac->dma_pdev->dev, "Failed to stop rx interface\n");
 
        /* Then, disable the channel. This must be done separately from
@@ -971,11 +1120,11 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 {
        struct pasemi_mac *mac = netdev_priv(dev);
        struct pasemi_mac_txring *txring;
-       struct pasemi_mac_buffer *info;
-       struct pas_dma_xct_descr *dp;
-       u64 dflags, mactx, ptr;
-       dma_addr_t map;
-       int flags;
+       u64 dflags, mactx;
+       dma_addr_t map[MAX_SKB_FRAGS+1];
+       unsigned int map_size[MAX_SKB_FRAGS+1];
+       unsigned long flags;
+       int i, nfrags;
 
        dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_SS | XCT_MACTX_CRC_PAD;
 
@@ -996,54 +1145,78 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
                }
        }
 
-       map = pci_map_single(mac->dma_pdev, skb->data, skb->len, PCI_DMA_TODEVICE);
+       nfrags = skb_shinfo(skb)->nr_frags;
 
-       if (dma_mapping_error(map))
-               return NETDEV_TX_BUSY;
+       map[0] = pci_map_single(mac->dma_pdev, skb->data, skb_headlen(skb),
+                               PCI_DMA_TODEVICE);
+       map_size[0] = skb_headlen(skb);
+       if (dma_mapping_error(map[0]))
+               goto out_err_nolock;
+
+       for (i = 0; i < nfrags; i++) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               map[i+1] = pci_map_page(mac->dma_pdev, frag->page,
+                                       frag->page_offset, frag->size,
+                                       PCI_DMA_TODEVICE);
+               map_size[i+1] = frag->size;
+               if (dma_mapping_error(map[i+1])) {
+                       nfrags = i;
+                       goto out_err_nolock;
+               }
+       }
 
        mactx = dflags | XCT_MACTX_LLEN(skb->len);
-       ptr   = XCT_PTR_LEN(skb->len) | XCT_PTR_ADDR(map);
 
        txring = mac->tx;
 
        spin_lock_irqsave(&txring->lock, flags);
 
-       if (RING_AVAIL(txring) <= 1) {
-               spin_unlock_irqrestore(&txring->lock, flags);
-               pasemi_mac_clean_tx(mac);
-               pasemi_mac_restart_tx_intr(mac);
-               spin_lock_irqsave(&txring->lock, flags);
-
-               if (RING_AVAIL(txring) <= 1) {
-                       /* Still no room -- stop the queue and wait for tx
-                        * intr when there's room.
-                        */
-                       netif_stop_queue(dev);
-                       goto out_err;
-               }
+       /* Avoid stepping on the same cache line that the DMA controller
+        * is currently about to send, so leave at least 8 words available.
+        * Total free space needed is mactx + fragments + 8
+        */
+       if (RING_AVAIL(txring) < nfrags + 10) {
+               /* no room -- stop the queue and wait for tx intr */
+               netif_stop_queue(dev);
+               goto out_err;
        }
 
-       dp = &TX_DESC(mac, txring->next_to_fill);
-       info = &TX_DESC_INFO(mac, txring->next_to_fill);
+       TX_RING(mac, txring->next_to_fill) = mactx;
+       txring->next_to_fill++;
+       TX_RING_INFO(mac, txring->next_to_fill).skb = skb;
+       for (i = 0; i <= nfrags; i++) {
+               TX_RING(mac, txring->next_to_fill+i) =
+               XCT_PTR_LEN(map_size[i]) | XCT_PTR_ADDR(map[i]);
+               TX_RING_INFO(mac, txring->next_to_fill+i).dma = map[i];
+       }
 
-       dp->mactx = mactx;
-       dp->ptr   = ptr;
-       info->dma = map;
-       info->skb = skb;
+       /* We have to add an even number of 8-byte entries to the ring
+        * even if the last one is unused. That means always an odd number
+        * of pointers + one mactx descriptor.
+        */
+       if (nfrags & 1)
+               nfrags++;
+
+       txring->next_to_fill = (txring->next_to_fill + nfrags + 1) &
+                               (TX_RING_SIZE-1);
 
-       txring->next_to_fill++;
        dev->stats.tx_packets++;
        dev->stats.tx_bytes += skb->len;
 
        spin_unlock_irqrestore(&txring->lock, flags);
 
-       write_dma_reg(mac, PAS_DMA_TXCHAN_INCR(mac->dma_txch), 1);
+       write_dma_reg(mac, PAS_DMA_TXCHAN_INCR(mac->dma_txch), (nfrags+2) >> 1);
 
        return NETDEV_TX_OK;
 
 out_err:
        spin_unlock_irqrestore(&txring->lock, flags);
-       pci_unmap_single(mac->dma_pdev, map, skb->len, PCI_DMA_TODEVICE);
+out_err_nolock:
+       while (nfrags--)
+               pci_unmap_single(mac->dma_pdev, map[nfrags], map_size[nfrags],
+                                PCI_DMA_TODEVICE);
+
        return NETDEV_TX_BUSY;
 }
 
@@ -1179,7 +1352,7 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        netif_napi_add(dev, &mac->napi, pasemi_mac_poll, 64);
 
-       dev->features = NETIF_F_HW_CSUM | NETIF_F_LLTX;
+       dev->features = NETIF_F_HW_CSUM | NETIF_F_LLTX | NETIF_F_SG;
 
        /* These should come out of the device tree eventually */
        mac->dma_txch = index;
@@ -1236,7 +1409,7 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                dev_err(&mac->pdev->dev, "register_netdev failed with error %d\n",
                        err);
                goto out;
-       } else
+       } else if netif_msg_probe(mac)
                printk(KERN_INFO "%s: PA Semi %s: intf %d, txch %d, rxch %d, "
                       "hw addr %s\n",
                       dev->name, mac->type == MAC_TYPE_GMAC ? "GMAC" : "XAUI",