+ prev_tx->flaglen |= cpu_to_le32(tx_flags_extra);
+
+ /* save skb in this slot's context area */
+ prev_tx_ctx->skb = skb;
+
+ if (skb_is_gso(skb))
+ tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
+ else
+ tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ?
+ NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
+
+ spin_lock_irq(&np->lock);
+
+ /* set tx flags */
+ start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+ np->put_tx.orig = put_tx;
+
+ spin_unlock_irq(&np->lock);
+
+ dprintk(KERN_DEBUG "%s: nv_start_xmit: entries %d queued for transmission. tx_flags_extra: %x\n",
+ dev->name, entries, tx_flags_extra);
+ {
+ int j;
+ for (j=0; j<64; j++) {
+ if ((j%16) == 0)
+ dprintk("\n%03x:", j);
+ dprintk(" %02x", ((unsigned char*)skb->data)[j]);
+ }
+ dprintk("\n");
+ }
+
+ dev->trans_start = jiffies;
+ writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
+ return NETDEV_TX_OK;
+}
+
+static int nv_start_xmit_optimized(struct sk_buff *skb, struct net_device *dev)
+{
+ struct fe_priv *np = netdev_priv(dev);
+ u32 tx_flags = 0;
+ u32 tx_flags_extra;
+ unsigned int fragments = skb_shinfo(skb)->nr_frags;
+ unsigned int i;
+ u32 offset = 0;
+ u32 bcnt;
+ u32 size = skb->len-skb->data_len;
+ u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
+ u32 empty_slots;
+ struct ring_desc_ex* put_tx;
+ struct ring_desc_ex* start_tx;
+ struct ring_desc_ex* prev_tx;
+ struct nv_skb_map* prev_tx_ctx;
+
+ /* add fragments to entries count */
+ for (i = 0; i < fragments; i++) {
+ entries += (skb_shinfo(skb)->frags[i].size >> NV_TX2_TSO_MAX_SHIFT) +
+ ((skb_shinfo(skb)->frags[i].size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
+ }
+
+ empty_slots = nv_get_empty_tx_slots(np);
+ if (unlikely(empty_slots <= entries)) {
+ spin_lock_irq(&np->lock);
+ netif_stop_queue(dev);
+ np->tx_stop = 1;
+ spin_unlock_irq(&np->lock);
+ return NETDEV_TX_BUSY;
+ }
+
+ start_tx = put_tx = np->put_tx.ex;
+
+ /* setup the header buffer */
+ do {
+ prev_tx = put_tx;
+ prev_tx_ctx = np->put_tx_ctx;
+ bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
+ np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
+ PCI_DMA_TODEVICE);
+ np->put_tx_ctx->dma_len = bcnt;
+ put_tx->bufhigh = cpu_to_le64(np->put_tx_ctx->dma) >> 32;
+ put_tx->buflow = cpu_to_le64(np->put_tx_ctx->dma) & 0x0FFFFFFFF;
+ put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
+
+ tx_flags = NV_TX2_VALID;
+ offset += bcnt;
+ size -= bcnt;
+ if (unlikely(put_tx++ == np->last_tx.ex))
+ put_tx = np->first_tx.ex;
+ if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
+ np->put_tx_ctx = np->first_tx_ctx;
+ } while (size);
+
+ /* setup the fragments */
+ for (i = 0; i < fragments; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ u32 size = frag->size;
+ offset = 0;
+
+ do {
+ prev_tx = put_tx;
+ prev_tx_ctx = np->put_tx_ctx;
+ bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
+ np->put_tx_ctx->dma = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt,
+ PCI_DMA_TODEVICE);
+ np->put_tx_ctx->dma_len = bcnt;
+ put_tx->bufhigh = cpu_to_le64(np->put_tx_ctx->dma) >> 32;
+ put_tx->buflow = cpu_to_le64(np->put_tx_ctx->dma) & 0x0FFFFFFFF;
+ put_tx->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
+
+ offset += bcnt;
+ size -= bcnt;
+ if (unlikely(put_tx++ == np->last_tx.ex))
+ put_tx = np->first_tx.ex;
+ if (unlikely(np->put_tx_ctx++ == np->last_tx_ctx))
+ np->put_tx_ctx = np->first_tx_ctx;
+ } while (size);