drivers/atm/ambassador.c

   1 /*
   2   Madge Ambassador ATM Adapter driver.
   3   Copyright (C) 1995-1999  Madge Networks Ltd.
   4
   5   This program is free software; you can redistribute it and/or modify
   6   it under the terms of the GNU General Public License as published by
   7   the Free Software Foundation; either version 2 of the License, or
   8   (at your option) any later version.
   9
  10   This program is distributed in the hope that it will be useful,
  11   but WITHOUT ANY WARRANTY; without even the implied warranty of
  12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13   GNU General Public License for more details.
  14
  15   You should have received a copy of the GNU General Public License
  16   along with this program; if not, write to the Free Software
  17   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18
  19   The GNU GPL is contained in /usr/doc/copyright/GPL on a Debian
  20   system and in the file COPYING in the Linux kernel source.
  21 */
  22
  23 /* * dedicated to the memory of Graham Gordon 1971-1998 * */
  24
  25 #include <linux/module.h>
  26 #include <linux/types.h>
  27 #include <linux/pci.h>
  28 #include <linux/kernel.h>
  29 #include <linux/init.h>
  30 #include <linux/ioport.h>
  31 #include <linux/atmdev.h>
  32 #include <linux/delay.h>
  33 #include <linux/interrupt.h>
  34 #include <linux/poison.h>
  35 #include <linux/bitrev.h>
  36
  37 #include <asm/atomic.h>
  38 #include <asm/io.h>
  39 #include <asm/byteorder.h>
  40
  41 #include "ambassador.h"
  42
  43 #define maintainer_string "Giuliano Procida at Madge Networks <gprocida@madge.com>"
  44 #define description_string "Madge ATM Ambassador driver"
  45 #define version_string "1.2.4"
  46
  47 static inline void __init show_version (void) {
  48   printk ("%s version %s\n", description_string, version_string);
  49 }
  50
  51 /*
  52
  53   Theory of Operation
  54
  55   I Hardware, detection, initialisation and shutdown.
  56
  57   1. Supported Hardware
  58
  59   This driver is for the PCI ATMizer-based Ambassador card (except
  60   very early versions). It is not suitable for the similar EISA "TR7"
  61   card. Commercially, both cards are known as Collage Server ATM
  62   adapters.
  63
  64   The loader supports image transfer to the card, image start and few
  65   other miscellaneous commands.
  66
  67   Only AAL5 is supported with vpi = 0 and vci in the range 0 to 1023.
  68
  69   The cards are big-endian.
  70
  71   2. Detection
  72
  73   Standard PCI stuff, the early cards are detected and rejected.
  74
  75   3. Initialisation
  76
  77   The cards are reset and the self-test results are checked. The
  78   microcode image is then transferred and started. This waits for a
  79   pointer to a descriptor containing details of the host-based queues
  80   and buffers and various parameters etc. Once they are processed
  81   normal operations may begin. The BIA is read using a microcode
  82   command.
  83
  84   4. Shutdown
  85
  86   This may be accomplished either by a card reset or via the microcode
  87   shutdown command. Further investigation required.
  88
  89   5. Persistent state
  90
  91   The card reset does not affect PCI configuration (good) or the
  92   contents of several other "shared run-time registers" (bad) which
  93   include doorbell and interrupt control as well as EEPROM and PCI
  94   control. The driver must be careful when modifying these registers
  95   not to touch bits it does not use and to undo any changes at exit.
  96
  97   II Driver software
  98
  99   0. Generalities
 100
 101   The adapter is quite intelligent (fast) and has a simple interface
 102   (few features). VPI is always zero, 1024 VCIs are supported. There
 103   is limited cell rate support. UBR channels can be capped and ABR
 104   (explicit rate, but not EFCI) is supported. There is no CBR or VBR
 105   support.
 106
 107   1. Driver <-> Adapter Communication
 108
 109   Apart from the basic loader commands, the driver communicates
 110   through three entities: the command queue (CQ), the transmit queue
 111   pair (TXQ) and the receive queue pairs (RXQ). These three entities
 112   are set up by the host and passed to the microcode just after it has
 113   been started.
 114
 115   All queues are host-based circular queues. They are contiguous and
 116   (due to hardware limitations) have some restrictions as to their
 117   locations in (bus) memory. They are of the "full means the same as
 118   empty so don't do that" variety since the adapter uses pointers
 119   internally.
 120
 121   The queue pairs work as follows: one queue is for supply to the
 122   adapter, items in it are pending and are owned by the adapter; the
 123   other is the queue for return from the adapter, items in it have
 124   been dealt with by the adapter. The host adds items to the supply
 125   (TX descriptors and free RX buffer descriptors) and removes items
 126   from the return (TX and RX completions). The adapter deals with out
 127   of order completions.
 128
 129   Interrupts (card to host) and the doorbell (host to card) are used
 130   for signalling.
 131
 132   1. CQ
 133
 134   This is to communicate "open VC", "close VC", "get stats" etc. to
 135   the adapter. At most one command is retired every millisecond by the
 136   card. There is no out of order completion or notification. The
 137   driver needs to check the return code of the command, waiting as
 138   appropriate.
 139
 140   2. TXQ
 141
 142   TX supply items are of variable length (scatter gather support) and
 143   so the queue items are (more or less) pointers to the real thing.
 144   Each TX supply item contains a unique, host-supplied handle (the skb
 145   bus address seems most sensible as this works for Alphas as well,
 146   there is no need to do any endian conversions on the handles).
 147
 148   TX return items consist of just the handles above.
 149
 150   3. RXQ (up to 4 of these with different lengths and buffer sizes)
 151
 152   RX supply items consist of a unique, host-supplied handle (the skb
 153   bus address again) and a pointer to the buffer data area.
 154
 155   RX return items consist of the handle above, the VC, length and a
 156   status word. This just screams "oh so easy" doesn't it?
 157
 158   Note on RX pool sizes:
 159
 160   Each pool should have enough buffers to handle a back-to-back stream
 161   of minimum sized frames on a single VC. For example:
 162
 163     frame spacing = 3us (about right)
 164
 165     delay = IRQ lat + RX handling + RX buffer replenish = 20 (us)  (a guess)
 166
 167     min number of buffers for one VC = 1 + delay/spacing (buffers)
 168
 169     delay/spacing = latency = (20+2)/3 = 7 (buffers)  (rounding up)
 170
 171   The 20us delay assumes that there is no need to sleep; if we need to
 172   sleep to get buffers we are going to drop frames anyway.
 173
 174   In fact, each pool should have enough buffers to support the
 175   simultaneous reassembly of a separate frame on each VC and cope with
 176   the case in which frames complete in round robin cell fashion on
 177   each VC.
 178
 179   Only one frame can complete at each cell arrival, so if "n" VCs are
 180   open, the worst case is to have them all complete frames together
 181   followed by all starting new frames together.
 182
 183     desired number of buffers = n + delay/spacing
 184
 185   These are the extreme requirements, however, they are "n+k" for some
 186   "k" so we have only the constant to choose. This is the argument
 187   rx_lats which current defaults to 7.
 188
 189   Actually, "n ? n+k : 0" is better and this is what is implemented,
 190   subject to the limit given by the pool size.
 191
 192   4. Driver locking
 193
 194   Simple spinlocks are used around the TX and RX queue mechanisms.
 195   Anyone with a faster, working method is welcome to implement it.
 196
 197   The adapter command queue is protected with a spinlock. We always
 198   wait for commands to complete.
 199
 200   A more complex form of locking is used around parts of the VC open
 201   and close functions. There are three reasons for a lock: 1. we need
 202   to do atomic rate reservation and release (not used yet), 2. Opening
 203   sometimes involves two adapter commands which must not be separated
 204   by another command on the same VC, 3. the changes to RX pool size
 205   must be atomic. The lock needs to work over context switches, so we
 206   use a semaphore.
 207
 208   III Hardware Features and Microcode Bugs
 209
 210   1. Byte Ordering
 211
 212   *%^"$&%^$*&^"$(%^$#&^%$(&#%$*(&^#%!"!"!*!
 213
 214   2. Memory access
 215
 216   All structures that are not accessed using DMA must be 4-byte
 217   aligned (not a problem) and must not cross 4MB boundaries.
 218
 219   There is a DMA memory hole at E0000000-E00000FF (groan).
 220
 221   TX fragments (DMA read) must not cross 4MB boundaries (would be 16MB
 222   but for a hardware bug).
 223
 224   RX buffers (DMA write) must not cross 16MB boundaries and must
 225   include spare trailing bytes up to the next 4-byte boundary; they
 226   will be written with rubbish.
 227
 228   The PLX likes to prefetch; if reading up to 4 u32 past the end of
 229   each TX fragment is not a problem, then TX can be made to go a
 230   little faster by passing a flag at init that disables a prefetch
 231   workaround. We do not pass this flag. (new microcode only)
 232
 233   Now we:
 234   . Note that alloc_skb rounds up size to a 16byte boundary.
 235   . Ensure all areas do not traverse 4MB boundaries.
 236   . Ensure all areas do not start at a E00000xx bus address.
 237   (I cannot be certain, but this may always hold with Linux)
 238   . Make all failures cause a loud message.
 239   . Discard non-conforming SKBs (causes TX failure or RX fill delay).
 240   . Discard non-conforming TX fragment descriptors (the TX fails).
 241   In the future we could:
 242   . Allow RX areas that traverse 4MB (but not 16MB) boundaries.
 243   . Segment TX areas into some/more fragments, when necessary.
 244   . Relax checks for non-DMA items (ignore hole).
 245   . Give scatter-gather (iovec) requirements using ???. (?)
 246
 247   3. VC close is broken (only for new microcode)
 248
 249   The VC close adapter microcode command fails to do anything if any
 250   frames have been received on the VC but none have been transmitted.
 251   Frames continue to be reassembled and passed (with IRQ) to the
 252   driver.
 253
 254   IV To Do List
 255
 256   . Fix bugs!
 257
 258   . Timer code may be broken.
 259
 260   . Deal with buggy VC close (somehow) in microcode 12.
 261
 262   . Handle interrupted and/or non-blocking writes - is this a job for
 263     the protocol layer?
 264
 265   . Add code to break up TX fragments when they span 4MB boundaries.
 266
 267   . Add SUNI phy layer (need to know where SUNI lives on card).
 268
 269   . Implement a tx_alloc fn to (a) satisfy TX alignment etc. and (b)
 270     leave extra headroom space for Ambassador TX descriptors.
 271
 272   . Understand these elements of struct atm_vcc: recvq (proto?),
 273     sleep, callback, listenq, backlog_quota, reply and user_back.
 274
 275   . Adjust TX/RX skb allocation to favour IP with LANE/CLIP (configurable).
 276
 277   . Impose a TX-pending limit (2?) on each VC, help avoid TX q overflow.
 278
 279   . Decide whether RX buffer recycling is or can be made completely safe;
 280     turn it back on. It looks like Werner is going to axe this.
 281
 282   . Implement QoS changes on open VCs (involves extracting parts of VC open
 283     and close into separate functions and using them to make changes).
 284
 285   . Hack on command queue so that someone can issue multiple commands and wait
 286     on the last one (OR only "no-op" or "wait" commands are waited for).
 287
 288   . Eliminate need for while-schedule around do_command.
 289
 290 */
 291
 292 /********** microcode **********/
 293
 294 #ifdef AMB_NEW_MICROCODE
 295 #define UCODE(x) UCODE2(atmsar12.x)
 296 #else
 297 #define UCODE(x) UCODE2(atmsar11.x)
 298 #endif
 299 #define UCODE2(x) #x
 300
 301 static u32 __devinitdata ucode_start =
 302 #include UCODE(start)
 303 ;
 304
 305 static region __devinitdata ucode_regions[] = {
 306 #include UCODE(regions)
 307   { 0, 0 }
 308 };
 309
 310 static u32 __devinitdata ucode_data[] = {
 311 #include UCODE(data)
 312   0xdeadbeef
 313 };
 314
 315 static void do_housekeeping (unsigned long arg);
 316 /********** globals **********/
 317
 318 static unsigned short debug = 0;
 319 static unsigned int cmds = 8;
 320 static unsigned int txs = 32;
 321 static unsigned int rxs[NUM_RX_POOLS] = { 64, 64, 64, 64 };
 322 static unsigned int rxs_bs[NUM_RX_POOLS] = { 4080, 12240, 36720, 65535 };
 323 static unsigned int rx_lats = 7;
 324 static unsigned char pci_lat = 0;
 325
 326 static const unsigned long onegigmask = -1 << 30;
 327
 328 /********** access to adapter **********/
 329
 330 static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) {
 331   PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data);
 332 #ifdef AMB_MMIO
 333   dev->membase[addr / sizeof(u32)] = data;
 334 #else
 335   outl (data, dev->iobase + addr);
 336 #endif
 337 }
 338
 339 static inline u32 rd_plain (const amb_dev * dev, size_t addr) {
 340 #ifdef AMB_MMIO
 341   u32 data = dev->membase[addr / sizeof(u32)];
 342 #else
 343   u32 data = inl (dev->iobase + addr);
 344 #endif
 345   PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data);
 346   return data;
 347 }
 348
 349 static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) {
 350   __be32 be = cpu_to_be32 (data);
 351   PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be);
 352 #ifdef AMB_MMIO
 353   dev->membase[addr / sizeof(u32)] = be;
 354 #else
 355   outl (be, dev->iobase + addr);
 356 #endif
 357 }
 358
 359 static inline u32 rd_mem (const amb_dev * dev, size_t addr) {
 360 #ifdef AMB_MMIO
 361   __be32 be = dev->membase[addr / sizeof(u32)];
 362 #else
 363   __be32 be = inl (dev->iobase + addr);
 364 #endif
 365   u32 data = be32_to_cpu (be);
 366   PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be);
 367   return data;
 368 }
 369
 370 /********** dump routines **********/
 371
 372 static inline void dump_registers (const amb_dev * dev) {
 373 #ifdef DEBUG_AMBASSADOR
 374   if (debug & DBG_REGS) {
 375     size_t i;
 376     PRINTD (DBG_REGS, "reading PLX control: ");
 377     for (i = 0x00; i < 0x30; i += sizeof(u32))
 378       rd_mem (dev, i);
 379     PRINTD (DBG_REGS, "reading mailboxes: ");
 380     for (i = 0x40; i < 0x60; i += sizeof(u32))
 381       rd_mem (dev, i);
 382     PRINTD (DBG_REGS, "reading doorb irqev irqen reset:");
 383     for (i = 0x60; i < 0x70; i += sizeof(u32))
 384       rd_mem (dev, i);
 385   }
 386 #else
 387   (void) dev;
 388 #endif
 389   return;
 390 }
 391
 392 static inline void dump_loader_block (volatile loader_block * lb) {
 393 #ifdef DEBUG_AMBASSADOR
 394   unsigned int i;
 395   PRINTDB (DBG_LOAD, "lb @ %p; res: %d, cmd: %d, pay:",
 396            lb, be32_to_cpu (lb->result), be32_to_cpu (lb->command));
 397   for (i = 0; i < MAX_COMMAND_DATA; ++i)
 398     PRINTDM (DBG_LOAD, " %08x", be32_to_cpu (lb->payload.data[i]));
 399   PRINTDE (DBG_LOAD, ", vld: %08x", be32_to_cpu (lb->valid));
 400 #else
 401   (void) lb;
 402 #endif
 403   return;
 404 }
 405
 406 static inline void dump_command (command * cmd) {
 407 #ifdef DEBUG_AMBASSADOR
 408   unsigned int i;
 409   PRINTDB (DBG_CMD, "cmd @ %p, req: %08x, pars:",
 410            cmd, /*be32_to_cpu*/ (cmd->request));
 411   for (i = 0; i < 3; ++i)
 412     PRINTDM (DBG_CMD, " %08x", /*be32_to_cpu*/ (cmd->args.par[i]));
 413   PRINTDE (DBG_CMD, "");
 414 #else
 415   (void) cmd;
 416 #endif
 417   return;
 418 }
 419
 420 static inline void dump_skb (char * prefix, unsigned int vc, struct sk_buff * skb) {
 421 #ifdef DEBUG_AMBASSADOR
 422   unsigned int i;
 423   unsigned char * data = skb->data;
 424   PRINTDB (DBG_DATA, "%s(%u) ", prefix, vc);
 425   for (i=0; i<skb->len && i < 256;i++)
 426     PRINTDM (DBG_DATA, "%02x ", data[i]);
 427   PRINTDE (DBG_DATA,"");
 428 #else
 429   (void) prefix;
 430   (void) vc;
 431   (void) skb;
 432 #endif
 433   return;
 434 }
 435
 436 /********** check memory areas for use by Ambassador **********/
 437
 438 /* see limitations under Hardware Features */
 439
 440 static inline int check_area (void * start, size_t length) {
 441   // assumes length > 0
 442   const u32 fourmegmask = -1 << 22;
 443   const u32 twofivesixmask = -1 << 8;
 444   const u32 starthole = 0xE0000000;
 445   u32 startaddress = virt_to_bus (start);
 446   u32 lastaddress = startaddress+length-1;
 447   if ((startaddress ^ lastaddress) & fourmegmask ||
 448       (startaddress & twofivesixmask) == starthole) {
 449     PRINTK (KERN_ERR, "check_area failure: [%x,%x] - mail maintainer!",
 450             startaddress, lastaddress);
 451     return -1;
 452   } else {
 453     return 0;
 454   }
 455 }
 456
 457 /********** free an skb (as per ATM device driver documentation) **********/
 458
 459 static inline void amb_kfree_skb (struct sk_buff * skb) {
 460   if (ATM_SKB(skb)->vcc->pop) {
 461     ATM_SKB(skb)->vcc->pop (ATM_SKB(skb)->vcc, skb);
 462   } else {
 463     dev_kfree_skb_any (skb);
 464   }
 465 }
 466
 467 /********** TX completion **********/
 468
 469 static inline void tx_complete (amb_dev * dev, tx_out * tx) {
 470   tx_simple * tx_descr = bus_to_virt (tx->handle);
 471   struct sk_buff * skb = tx_descr->skb;
 472
 473   PRINTD (DBG_FLOW|DBG_TX, "tx_complete %p %p", dev, tx);
 474
 475   // VC layer stats
 476   atomic_inc(&ATM_SKB(skb)->vcc->stats->tx);
 477
 478   // free the descriptor
 479   kfree (tx_descr);
 480
 481   // free the skb
 482   amb_kfree_skb (skb);
 483
 484   dev->stats.tx_ok++;
 485   return;
 486 }
 487
 488 /********** RX completion **********/
 489
 490 static void rx_complete (amb_dev * dev, rx_out * rx) {
 491   struct sk_buff * skb = bus_to_virt (rx->handle);
 492   u16 vc = be16_to_cpu (rx->vc);
 493   // unused: u16 lec_id = be16_to_cpu (rx->lec_id);
 494   u16 status = be16_to_cpu (rx->status);
 495   u16 rx_len = be16_to_cpu (rx->length);
 496
 497   PRINTD (DBG_FLOW|DBG_RX, "rx_complete %p %p (len=%hu)", dev, rx, rx_len);
 498
 499   // XXX move this in and add to VC stats ???
 500   if (!status) {
 501     struct atm_vcc * atm_vcc = dev->rxer[vc];
 502     dev->stats.rx.ok++;
 503
 504     if (atm_vcc) {
 505
 506       if (rx_len <= atm_vcc->qos.rxtp.max_sdu) {
 507
 508         if (atm_charge (atm_vcc, skb->truesize)) {
 509
 510           // prepare socket buffer
 511           ATM_SKB(skb)->vcc = atm_vcc;
 512           skb_put (skb, rx_len);
 513
 514           dump_skb ("<<<", vc, skb);
 515
 516           // VC layer stats
 517           atomic_inc(&atm_vcc->stats->rx);
 518           __net_timestamp(skb);
 519           // end of our responsability
 520           atm_vcc->push (atm_vcc, skb);
 521           return;
 522
 523         } else {
 524           // someone fix this (message), please!
 525           PRINTD (DBG_INFO|DBG_RX, "dropped thanks to atm_charge (vc %hu, truesize %u)", vc, skb->truesize);
 526           // drop stats incremented in atm_charge
 527         }
 528
 529       } else {
 530         PRINTK (KERN_INFO, "dropped over-size frame");
 531         // should we count this?
 532         atomic_inc(&atm_vcc->stats->rx_drop);
 533       }
 534
 535     } else {
 536       PRINTD (DBG_WARN|DBG_RX, "got frame but RX closed for channel %hu", vc);
 537       // this is an adapter bug, only in new version of microcode
 538     }
 539
 540   } else {
 541     dev->stats.rx.error++;
 542     if (status & CRC_ERR)
 543       dev->stats.rx.badcrc++;
 544     if (status & LEN_ERR)
 545       dev->stats.rx.toolong++;
 546     if (status & ABORT_ERR)
 547       dev->stats.rx.aborted++;
 548     if (status & UNUSED_ERR)
 549       dev->stats.rx.unused++;
 550   }
 551
 552   dev_kfree_skb_any (skb);
 553   return;
 554 }
 555
 556 /*
 557
 558   Note on queue handling.
 559
 560   Here "give" and "take" refer to queue entries and a queue (pair)
 561   rather than frames to or from the host or adapter. Empty frame
 562   buffers are given to the RX queue pair and returned unused or
 563   containing RX frames. TX frames (well, pointers to TX fragment
 564   lists) are given to the TX queue pair, completions are returned.
 565
 566 */
 567
 568 /********** command queue **********/
 569
 570 // I really don't like this, but it's the best I can do at the moment
 571
 572 // also, the callers are responsible for byte order as the microcode
 573 // sometimes does 16-bit accesses (yuk yuk yuk)
 574
 575 static int command_do (amb_dev * dev, command * cmd) {
 576   amb_cq * cq = &dev->cq;
 577   volatile amb_cq_ptrs * ptrs = &cq->ptrs;
 578   command * my_slot;
 579
 580   PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
 581
 582   if (test_bit (dead, &dev->flags))
 583     return 0;
 584
 585   spin_lock (&cq->lock);
 586
 587   // if not full...
 588   if (cq->pending < cq->maximum) {
 589     // remember my slot for later
 590     my_slot = ptrs->in;
 591     PRINTD (DBG_CMD, "command in slot %p", my_slot);
 592
 593     dump_command (cmd);
 594
 595     // copy command in
 596     *ptrs->in = *cmd;
 597     cq->pending++;
 598     ptrs->in = NEXTQ (ptrs->in, ptrs->start, ptrs->limit);
 599
 600     // mail the command
 601     wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
 602
 603     if (cq->pending > cq->high)
 604       cq->high = cq->pending;
 605     spin_unlock (&cq->lock);
 606
 607     // these comments were in a while-loop before, msleep removes the loop
 608     // go to sleep
 609     // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
 610     msleep(cq->pending);
 611
 612     // wait for my slot to be reached (all waiters are here or above, until...)
 613     while (ptrs->out != my_slot) {
 614       PRINTD (DBG_CMD, "wait: command slot (now at %p)", ptrs->out);
 615       set_current_state(TASK_UNINTERRUPTIBLE);
 616       schedule();
 617     }
 618
 619     // wait on my slot (... one gets to its slot, and... )
 620     while (ptrs->out->request != cpu_to_be32 (SRB_COMPLETE)) {
 621       PRINTD (DBG_CMD, "wait: command slot completion");
 622       set_current_state(TASK_UNINTERRUPTIBLE);
 623       schedule();
 624     }
 625
 626     PRINTD (DBG_CMD, "command complete");
 627     // update queue (... moves the queue along to the next slot)
 628     spin_lock (&cq->lock);
 629     cq->pending--;
 630     // copy command out
 631     *cmd = *ptrs->out;
 632     ptrs->out = NEXTQ (ptrs->out, ptrs->start, ptrs->limit);
 633     spin_unlock (&cq->lock);
 634
 635     return 0;
 636   } else {
 637     cq->filled++;
 638     spin_unlock (&cq->lock);
 639     return -EAGAIN;
 640   }
 641
 642 }
 643
 644 /********** TX queue pair **********/
 645
 646 static inline int tx_give (amb_dev * dev, tx_in * tx) {
 647   amb_txq * txq = &dev->txq;
 648   unsigned long flags;
 649
 650   PRINTD (DBG_FLOW|DBG_TX, "tx_give %p", dev);
 651
 652   if (test_bit (dead, &dev->flags))
 653     return 0;
 654
 655   spin_lock_irqsave (&txq->lock, flags);
 656
 657   if (txq->pending < txq->maximum) {
 658     PRINTD (DBG_TX, "TX in slot %p", txq->in.ptr);
 659
 660     *txq->in.ptr = *tx;
 661     txq->pending++;
 662     txq->in.ptr = NEXTQ (txq->in.ptr, txq->in.start, txq->in.limit);
 663     // hand over the TX and ring the bell
 664     wr_mem (dev, offsetof(amb_mem, mb.adapter.tx_address), virt_to_bus (txq->in.ptr));
 665     wr_mem (dev, offsetof(amb_mem, doorbell), TX_FRAME);
 666
 667     if (txq->pending > txq->high)
 668       txq->high = txq->pending;
 669     spin_unlock_irqrestore (&txq->lock, flags);
 670     return 0;
 671   } else {
 672     txq->filled++;
 673     spin_unlock_irqrestore (&txq->lock, flags);
 674     return -EAGAIN;
 675   }
 676 }
 677
 678 static inline int tx_take (amb_dev * dev) {
 679   amb_txq * txq = &dev->txq;
 680   unsigned long flags;
 681
 682   PRINTD (DBG_FLOW|DBG_TX, "tx_take %p", dev);
 683
 684   spin_lock_irqsave (&txq->lock, flags);
 685
 686   if (txq->pending && txq->out.ptr->handle) {
 687     // deal with TX completion
 688     tx_complete (dev, txq->out.ptr);
 689     // mark unused again
 690     txq->out.ptr->handle = 0;
 691     // remove item
 692     txq->pending--;
 693     txq->out.ptr = NEXTQ (txq->out.ptr, txq->out.start, txq->out.limit);
 694
 695     spin_unlock_irqrestore (&txq->lock, flags);
 696     return 0;
 697   } else {
 698
 699     spin_unlock_irqrestore (&txq->lock, flags);
 700     return -1;
 701   }
 702 }
 703
 704 /********** RX queue pairs **********/
 705
 706 static inline int rx_give (amb_dev * dev, rx_in * rx, unsigned char pool) {
 707   amb_rxq * rxq = &dev->rxq[pool];
 708   unsigned long flags;
 709
 710   PRINTD (DBG_FLOW|DBG_RX, "rx_give %p[%hu]", dev, pool);
 711
 712   spin_lock_irqsave (&rxq->lock, flags);
 713
 714   if (rxq->pending < rxq->maximum) {
 715     PRINTD (DBG_RX, "RX in slot %p", rxq->in.ptr);
 716
 717     *rxq->in.ptr = *rx;
 718     rxq->pending++;
 719     rxq->in.ptr = NEXTQ (rxq->in.ptr, rxq->in.start, rxq->in.limit);
 720     // hand over the RX buffer
 721     wr_mem (dev, offsetof(amb_mem, mb.adapter.rx_address[pool]), virt_to_bus (rxq->in.ptr));
 722
 723     spin_unlock_irqrestore (&rxq->lock, flags);
 724     return 0;
 725   } else {
 726     spin_unlock_irqrestore (&rxq->lock, flags);
 727     return -1;
 728   }
 729 }
 730
 731 static inline int rx_take (amb_dev * dev, unsigned char pool) {
 732   amb_rxq * rxq = &dev->rxq[pool];
 733   unsigned long flags;
 734
 735   PRINTD (DBG_FLOW|DBG_RX, "rx_take %p[%hu]", dev, pool);
 736
 737   spin_lock_irqsave (&rxq->lock, flags);
 738
 739   if (rxq->pending && (rxq->out.ptr->status || rxq->out.ptr->length)) {
 740     // deal with RX completion
 741     rx_complete (dev, rxq->out.ptr);
 742     // mark unused again
 743     rxq->out.ptr->status = 0;
 744     rxq->out.ptr->length = 0;
 745     // remove item
 746     rxq->pending--;
 747     rxq->out.ptr = NEXTQ (rxq->out.ptr, rxq->out.start, rxq->out.limit);
 748
 749     if (rxq->pending < rxq->low)
 750       rxq->low = rxq->pending;
 751     spin_unlock_irqrestore (&rxq->lock, flags);
 752     return 0;
 753   } else {
 754     if (!rxq->pending && rxq->buffers_wanted)
 755       rxq->emptied++;
 756     spin_unlock_irqrestore (&rxq->lock, flags);
 757     return -1;
 758   }
 759 }
 760
 761 /********** RX Pool handling **********/
 762
 763 /* pre: buffers_wanted = 0, post: pending = 0 */
 764 static inline void drain_rx_pool (amb_dev * dev, unsigned char pool) {
 765   amb_rxq * rxq = &dev->rxq[pool];
 766
 767   PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pool %p %hu", dev, pool);
 768
 769   if (test_bit (dead, &dev->flags))
 770     return;
 771
 772   /* we are not quite like the fill pool routines as we cannot just
 773      remove one buffer, we have to remove all of them, but we might as
 774      well pretend... */
 775   if (rxq->pending > rxq->buffers_wanted) {
 776     command cmd;
 777     cmd.request = cpu_to_be32 (SRB_FLUSH_BUFFER_Q);
 778     cmd.args.flush.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
 779     while (command_do (dev, &cmd))
 780       schedule();
 781     /* the pool may also be emptied via the interrupt handler */
 782     while (rxq->pending > rxq->buffers_wanted)
 783       if (rx_take (dev, pool))
 784         schedule();
 785   }
 786
 787   return;
 788 }
 789
 790 static void drain_rx_pools (amb_dev * dev) {
 791   unsigned char pool;
 792
 793   PRINTD (DBG_FLOW|DBG_POOL, "drain_rx_pools %p", dev);
 794
 795   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 796     drain_rx_pool (dev, pool);
 797 }
 798
 799 static inline void fill_rx_pool (amb_dev * dev, unsigned char pool,
 800                                  gfp_t priority)
 801 {
 802   rx_in rx;
 803   amb_rxq * rxq;
 804
 805   PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pool %p %hu %x", dev, pool, priority);
 806
 807   if (test_bit (dead, &dev->flags))
 808     return;
 809
 810   rxq = &dev->rxq[pool];
 811   while (rxq->pending < rxq->maximum && rxq->pending < rxq->buffers_wanted) {
 812
 813     struct sk_buff * skb = alloc_skb (rxq->buffer_size, priority);
 814     if (!skb) {
 815       PRINTD (DBG_SKB|DBG_POOL, "failed to allocate skb for RX pool %hu", pool);
 816       return;
 817     }
 818     if (check_area (skb->data, skb->truesize)) {
 819       dev_kfree_skb_any (skb);
 820       return;
 821     }
 822     // cast needed as there is no %? for pointer differences
 823     PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
 824             skb, skb->head, (long) (skb->end - skb->head));
 825     rx.handle = virt_to_bus (skb);
 826     rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
 827     if (rx_give (dev, &rx, pool))
 828       dev_kfree_skb_any (skb);
 829
 830   }
 831
 832   return;
 833 }
 834
 835 // top up all RX pools (can also be called as a bottom half)
 836 static void fill_rx_pools (amb_dev * dev) {
 837   unsigned char pool;
 838
 839   PRINTD (DBG_FLOW|DBG_POOL, "fill_rx_pools %p", dev);
 840
 841   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 842     fill_rx_pool (dev, pool, GFP_ATOMIC);
 843
 844   return;
 845 }
 846
 847 /********** enable host interrupts **********/
 848
 849 static inline void interrupts_on (amb_dev * dev) {
 850   wr_plain (dev, offsetof(amb_mem, interrupt_control),
 851             rd_plain (dev, offsetof(amb_mem, interrupt_control))
 852             | AMB_INTERRUPT_BITS);
 853 }
 854
 855 /********** disable host interrupts **********/
 856
 857 static inline void interrupts_off (amb_dev * dev) {
 858   wr_plain (dev, offsetof(amb_mem, interrupt_control),
 859             rd_plain (dev, offsetof(amb_mem, interrupt_control))
 860             &~ AMB_INTERRUPT_BITS);
 861 }
 862
 863 /********** interrupt handling **********/
 864
 865 static irqreturn_t interrupt_handler(int irq, void *dev_id) {
 866   amb_dev * dev = dev_id;
 867
 868   PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler: %p", dev_id);
 869
 870   {
 871     u32 interrupt = rd_plain (dev, offsetof(amb_mem, interrupt));
 872
 873     // for us or someone else sharing the same interrupt
 874     if (!interrupt) {
 875       PRINTD (DBG_IRQ, "irq not for me: %d", irq);
 876       return IRQ_NONE;
 877     }
 878
 879     // definitely for us
 880     PRINTD (DBG_IRQ, "FYI: interrupt was %08x", interrupt);
 881     wr_plain (dev, offsetof(amb_mem, interrupt), -1);
 882   }
 883
 884   {
 885     unsigned int irq_work = 0;
 886     unsigned char pool;
 887     for (pool = 0; pool < NUM_RX_POOLS; ++pool)
 888       while (!rx_take (dev, pool))
 889         ++irq_work;
 890     while (!tx_take (dev))
 891       ++irq_work;
 892
 893     if (irq_work) {
 894 #ifdef FILL_RX_POOLS_IN_BH
 895       schedule_work (&dev->bh);
 896 #else
 897       fill_rx_pools (dev);
 898 #endif
 899
 900       PRINTD (DBG_IRQ, "work done: %u", irq_work);
 901     } else {
 902       PRINTD (DBG_IRQ|DBG_WARN, "no work done");
 903     }
 904   }
 905
 906   PRINTD (DBG_IRQ|DBG_FLOW, "interrupt_handler done: %p", dev_id);
 907   return IRQ_HANDLED;
 908 }
 909
 910 /********** make rate (not quite as much fun as Horizon) **********/
 911
 912 static int make_rate (unsigned int rate, rounding r,
 913                       u16 * bits, unsigned int * actual) {
 914   unsigned char exp = -1; // hush gcc
 915   unsigned int man = -1;  // hush gcc
 916
 917   PRINTD (DBG_FLOW|DBG_QOS, "make_rate %u", rate);
 918
 919   // rates in cells per second, ITU format (nasty 16-bit floating-point)
 920   // given 5-bit e and 9-bit m:
 921   // rate = EITHER (1+m/2^9)*2^e    OR 0
 922   // bits = EITHER 1<<14 | e<<9 | m OR 0
 923   // (bit 15 is "reserved", bit 14 "non-zero")
 924   // smallest rate is 0 (special representation)
 925   // largest rate is (1+511/512)*2^31 = 4290772992 (< 2^32-1)
 926   // smallest non-zero rate is (1+0/512)*2^0 = 1 (> 0)
 927   // simple algorithm:
 928   // find position of top bit, this gives e
 929   // remove top bit and shift (rounding if feeling clever) by 9-e
 930
 931   // ucode bug: please don't set bit 14! so 0 rate not representable
 932
 933   if (rate > 0xffc00000U) {
 934     // larger than largest representable rate
 935
 936     if (r == round_up) {
 937         return -EINVAL;
 938     } else {
 939       exp = 31;
 940       man = 511;
 941     }
 942
 943   } else if (rate) {
 944     // representable rate
 945
 946     exp = 31;
 947     man = rate;
 948
 949     // invariant: rate = man*2^(exp-31)
 950     while (!(man & (1<<31))) {
 951       exp = exp - 1;
 952       man = man<<1;
 953     }
 954
 955     // man has top bit set
 956     // rate = (2^31+(man-2^31))*2^(exp-31)
 957     // rate = (1+(man-2^31)/2^31)*2^exp
 958     man = man<<1;
 959     man &= 0xffffffffU; // a nop on 32-bit systems
 960     // rate = (1+man/2^32)*2^exp
 961
 962     // exp is in the range 0 to 31, man is in the range 0 to 2^32-1
 963     // time to lose significance... we want m in the range 0 to 2^9-1
 964     // rounding presents a minor problem... we first decide which way
 965     // we are rounding (based on given rounding direction and possibly
 966     // the bits of the mantissa that are to be discarded).
 967
 968     switch (r) {
 969       case round_down: {
 970         // just truncate
 971         man = man>>(32-9);
 972         break;
 973       }
 974       case round_up: {
 975         // check all bits that we are discarding
 976         if (man & (~0U>>9)) {
 977           man = (man>>(32-9)) + 1;
 978           if (man == (1<<9)) {
 979             // no need to check for round up outside of range
 980             man = 0;
 981             exp += 1;
 982           }
 983         } else {
 984           man = (man>>(32-9));
 985         }
 986         break;
 987       }
 988       case round_nearest: {
 989         // check msb that we are discarding
 990         if (man & (1<<(32-9-1))) {
 991           man = (man>>(32-9)) + 1;
 992           if (man == (1<<9)) {
 993             // no need to check for round up outside of range
 994             man = 0;
 995             exp += 1;
 996           }
 997         } else {
 998           man = (man>>(32-9));
 999         }
1000         break;
1001       }
1002     }
1003
1004   } else {
1005     // zero rate - not representable
1006
1007     if (r == round_down) {
1008       return -EINVAL;
1009     } else {
1010       exp = 0;
1011       man = 0;
1012     }
1013
1014   }
1015
1016   PRINTD (DBG_QOS, "rate: man=%u, exp=%hu", man, exp);
1017
1018   if (bits)
1019     *bits = /* (1<<14) | */ (exp<<9) | man;
1020
1021   if (actual)
1022     *actual = (exp >= 9)
1023       ? (1 << exp) + (man << (exp-9))
1024       : (1 << exp) + ((man + (1<<(9-exp-1))) >> (9-exp));
1025
1026   return 0;
1027 }
1028
1029 /********** Linux ATM Operations **********/
1030
1031 // some are not yet implemented while others do not make sense for
1032 // this device
1033
1034 /********** Open a VC **********/
1035
1036 static int amb_open (struct atm_vcc * atm_vcc)
1037 {
1038   int error;
1039
1040   struct atm_qos * qos;
1041   struct atm_trafprm * txtp;
1042   struct atm_trafprm * rxtp;
1043   u16 tx_rate_bits;
1044   u16 tx_vc_bits = -1; // hush gcc
1045   u16 tx_frame_bits = -1; // hush gcc
1046
1047   amb_dev * dev = AMB_DEV(atm_vcc->dev);
1048   amb_vcc * vcc;
1049   unsigned char pool = -1; // hush gcc
1050   short vpi = atm_vcc->vpi;
1051   int vci = atm_vcc->vci;
1052
1053   PRINTD (DBG_FLOW|DBG_VCC, "amb_open %x %x", vpi, vci);
1054
1055 #ifdef ATM_VPI_UNSPEC
1056   // UNSPEC is deprecated, remove this code eventually
1057   if (vpi == ATM_VPI_UNSPEC || vci == ATM_VCI_UNSPEC) {
1058     PRINTK (KERN_WARNING, "rejecting open with unspecified VPI/VCI (deprecated)");
1059     return -EINVAL;
1060   }
1061 #endif
1062
1063   if (!(0 <= vpi && vpi < (1<<NUM_VPI_BITS) &&
1064         0 <= vci && vci < (1<<NUM_VCI_BITS))) {
1065     PRINTD (DBG_WARN|DBG_VCC, "VPI/VCI out of range: %hd/%d", vpi, vci);
1066     return -EINVAL;
1067   }
1068
1069   qos = &atm_vcc->qos;
1070
1071   if (qos->aal != ATM_AAL5) {
1072     PRINTD (DBG_QOS, "AAL not supported");
1073     return -EINVAL;
1074   }
1075
1076   // traffic parameters
1077
1078   PRINTD (DBG_QOS, "TX:");
1079   txtp = &qos->txtp;
1080   if (txtp->traffic_class != ATM_NONE) {
1081     switch (txtp->traffic_class) {
1082       case ATM_UBR: {
1083         // we take "the PCR" as a rate-cap
1084         int pcr = atm_pcr_goal (txtp);
1085         if (!pcr) {
1086           // no rate cap
1087           tx_rate_bits = 0;
1088           tx_vc_bits = TX_UBR;
1089           tx_frame_bits = TX_FRAME_NOTCAP;
1090         } else {
1091           rounding r;
1092           if (pcr < 0) {
1093             r = round_down;
1094             pcr = -pcr;
1095           } else {
1096             r = round_up;
1097           }
1098           error = make_rate (pcr, r, &tx_rate_bits, NULL);
1099           tx_vc_bits = TX_UBR_CAPPED;
1100           tx_frame_bits = TX_FRAME_CAPPED;
1101         }
1102         break;
1103       }
1104 #if 0
1105       case ATM_ABR: {
1106         pcr = atm_pcr_goal (txtp);
1107         PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1108         break;
1109       }
1110 #endif
1111       default: {
1112         // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1113         PRINTD (DBG_QOS, "request for non-UBR denied");
1114         return -EINVAL;
1115       }
1116     }
1117     PRINTD (DBG_QOS, "tx_rate_bits=%hx, tx_vc_bits=%hx",
1118             tx_rate_bits, tx_vc_bits);
1119   }
1120
1121   PRINTD (DBG_QOS, "RX:");
1122   rxtp = &qos->rxtp;
1123   if (rxtp->traffic_class == ATM_NONE) {
1124     // do nothing
1125   } else {
1126     // choose an RX pool (arranged in increasing size)
1127     for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1128       if ((unsigned int) rxtp->max_sdu <= dev->rxq[pool].buffer_size) {
1129         PRINTD (DBG_VCC|DBG_QOS|DBG_POOL, "chose pool %hu (max_sdu %u <= %u)",
1130                 pool, rxtp->max_sdu, dev->rxq[pool].buffer_size);
1131         break;
1132       }
1133     if (pool == NUM_RX_POOLS) {
1134       PRINTD (DBG_WARN|DBG_VCC|DBG_QOS|DBG_POOL,
1135               "no pool suitable for VC (RX max_sdu %d is too large)",
1136               rxtp->max_sdu);
1137       return -EINVAL;
1138     }
1139
1140     switch (rxtp->traffic_class) {
1141       case ATM_UBR: {
1142         break;
1143       }
1144 #if 0
1145       case ATM_ABR: {
1146         pcr = atm_pcr_goal (rxtp);
1147         PRINTD (DBG_QOS, "pcr goal = %d", pcr);
1148         break;
1149       }
1150 #endif
1151       default: {
1152         // PRINTD (DBG_QOS, "request for non-UBR/ABR denied");
1153         PRINTD (DBG_QOS, "request for non-UBR denied");
1154         return -EINVAL;
1155       }
1156     }
1157   }
1158
1159   // get space for our vcc stuff
1160   vcc = kmalloc (sizeof(amb_vcc), GFP_KERNEL);
1161   if (!vcc) {
1162     PRINTK (KERN_ERR, "out of memory!");
1163     return -ENOMEM;
1164   }
1165   atm_vcc->dev_data = (void *) vcc;
1166
1167   // no failures beyond this point
1168
1169   // we are not really "immediately before allocating the connection
1170   // identifier in hardware", but it will just have to do!
1171   set_bit(ATM_VF_ADDR,&atm_vcc->flags);
1172
1173   if (txtp->traffic_class != ATM_NONE) {
1174     command cmd;
1175
1176     vcc->tx_frame_bits = tx_frame_bits;
1177
1178     down (&dev->vcc_sf);
1179     if (dev->rxer[vci]) {
1180       // RXer on the channel already, just modify rate...
1181       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1182       cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1183       cmd.args.modify_rate.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1184       while (command_do (dev, &cmd))
1185         schedule();
1186       // ... and TX flags, preserving the RX pool
1187       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1188       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1189       cmd.args.modify_flags.flags = cpu_to_be32
1190         ( (AMB_VCC(dev->rxer[vci])->rx_info.pool << SRB_POOL_SHIFT)
1191           | (tx_vc_bits << SRB_FLAGS_SHIFT) );
1192       while (command_do (dev, &cmd))
1193         schedule();
1194     } else {
1195       // no RXer on the channel, just open (with pool zero)
1196       cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1197       cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1198       cmd.args.open.flags = cpu_to_be32 (tx_vc_bits << SRB_FLAGS_SHIFT);
1199       cmd.args.open.rate = cpu_to_be32 (tx_rate_bits << SRB_RATE_SHIFT);
1200       while (command_do (dev, &cmd))
1201         schedule();
1202     }
1203     dev->txer[vci].tx_present = 1;
1204     up (&dev->vcc_sf);
1205   }
1206
1207   if (rxtp->traffic_class != ATM_NONE) {
1208     command cmd;
1209
1210     vcc->rx_info.pool = pool;
1211
1212     down (&dev->vcc_sf);
1213     /* grow RX buffer pool */
1214     if (!dev->rxq[pool].buffers_wanted)
1215       dev->rxq[pool].buffers_wanted = rx_lats;
1216     dev->rxq[pool].buffers_wanted += 1;
1217     fill_rx_pool (dev, pool, GFP_KERNEL);
1218
1219     if (dev->txer[vci].tx_present) {
1220       // TXer on the channel already
1221       // switch (from pool zero) to this pool, preserving the TX bits
1222       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1223       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1224       cmd.args.modify_flags.flags = cpu_to_be32
1225         ( (pool << SRB_POOL_SHIFT)
1226           | (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT) );
1227     } else {
1228       // no TXer on the channel, open the VC (with no rate info)
1229       cmd.request = cpu_to_be32 (SRB_OPEN_VC);
1230       cmd.args.open.vc = cpu_to_be32 (vci);  // vpi 0
1231       cmd.args.open.flags = cpu_to_be32 (pool << SRB_POOL_SHIFT);
1232       cmd.args.open.rate = cpu_to_be32 (0);
1233     }
1234     while (command_do (dev, &cmd))
1235       schedule();
1236     // this link allows RX frames through
1237     dev->rxer[vci] = atm_vcc;
1238     up (&dev->vcc_sf);
1239   }
1240
1241   // indicate readiness
1242   set_bit(ATM_VF_READY,&atm_vcc->flags);
1243
1244   return 0;
1245 }
1246
1247 /********** Close a VC **********/
1248
1249 static void amb_close (struct atm_vcc * atm_vcc) {
1250   amb_dev * dev = AMB_DEV (atm_vcc->dev);
1251   amb_vcc * vcc = AMB_VCC (atm_vcc);
1252   u16 vci = atm_vcc->vci;
1253
1254   PRINTD (DBG_VCC|DBG_FLOW, "amb_close");
1255
1256   // indicate unreadiness
1257   clear_bit(ATM_VF_READY,&atm_vcc->flags);
1258
1259   // disable TXing
1260   if (atm_vcc->qos.txtp.traffic_class != ATM_NONE) {
1261     command cmd;
1262
1263     down (&dev->vcc_sf);
1264     if (dev->rxer[vci]) {
1265       // RXer still on the channel, just modify rate... XXX not really needed
1266       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_RATE);
1267       cmd.args.modify_rate.vc = cpu_to_be32 (vci);  // vpi 0
1268       cmd.args.modify_rate.rate = cpu_to_be32 (0);
1269       // ... and clear TX rate flags (XXX to stop RM cell output?), preserving RX pool
1270     } else {
1271       // no RXer on the channel, close channel
1272       cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1273       cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1274     }
1275     dev->txer[vci].tx_present = 0;
1276     while (command_do (dev, &cmd))
1277       schedule();
1278     up (&dev->vcc_sf);
1279   }
1280
1281   // disable RXing
1282   if (atm_vcc->qos.rxtp.traffic_class != ATM_NONE) {
1283     command cmd;
1284
1285     // this is (the?) one reason why we need the amb_vcc struct
1286     unsigned char pool = vcc->rx_info.pool;
1287
1288     down (&dev->vcc_sf);
1289     if (dev->txer[vci].tx_present) {
1290       // TXer still on the channel, just go to pool zero XXX not really needed
1291       cmd.request = cpu_to_be32 (SRB_MODIFY_VC_FLAGS);
1292       cmd.args.modify_flags.vc = cpu_to_be32 (vci);  // vpi 0
1293       cmd.args.modify_flags.flags = cpu_to_be32
1294         (dev->txer[vci].tx_vc_bits << SRB_FLAGS_SHIFT);
1295     } else {
1296       // no TXer on the channel, close the VC
1297       cmd.request = cpu_to_be32 (SRB_CLOSE_VC);
1298       cmd.args.close.vc = cpu_to_be32 (vci); // vpi 0
1299     }
1300     // forget the rxer - no more skbs will be pushed
1301     if (atm_vcc != dev->rxer[vci])
1302       PRINTK (KERN_ERR, "%s vcc=%p rxer[vci]=%p",
1303               "arghhh! we're going to die!",
1304               vcc, dev->rxer[vci]);
1305     dev->rxer[vci] = NULL;
1306     while (command_do (dev, &cmd))
1307       schedule();
1308
1309     /* shrink RX buffer pool */
1310     dev->rxq[pool].buffers_wanted -= 1;
1311     if (dev->rxq[pool].buffers_wanted == rx_lats) {
1312       dev->rxq[pool].buffers_wanted = 0;
1313       drain_rx_pool (dev, pool);
1314     }
1315     up (&dev->vcc_sf);
1316   }
1317
1318   // free our structure
1319   kfree (vcc);
1320
1321   // say the VPI/VCI is free again
1322   clear_bit(ATM_VF_ADDR,&atm_vcc->flags);
1323
1324   return;
1325 }
1326
1327 /********** Set socket options for a VC **********/
1328
1329 // int amb_getsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
1330
1331 /********** Set socket options for a VC **********/
1332
1333 // int amb_setsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
1334
1335 /********** Send **********/
1336
1337 static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1338   amb_dev * dev = AMB_DEV(atm_vcc->dev);
1339   amb_vcc * vcc = AMB_VCC(atm_vcc);
1340   u16 vc = atm_vcc->vci;
1341   unsigned int tx_len = skb->len;
1342   unsigned char * tx_data = skb->data;
1343   tx_simple * tx_descr;
1344   tx_in tx;
1345
1346   if (test_bit (dead, &dev->flags))
1347     return -EIO;
1348
1349   PRINTD (DBG_FLOW|DBG_TX, "amb_send vc %x data %p len %u",
1350           vc, tx_data, tx_len);
1351
1352   dump_skb (">>>", vc, skb);
1353
1354   if (!dev->txer[vc].tx_present) {
1355     PRINTK (KERN_ERR, "attempt to send on RX-only VC %x", vc);
1356     return -EBADFD;
1357   }
1358
1359   // this is a driver private field so we have to set it ourselves,
1360   // despite the fact that we are _required_ to use it to check for a
1361   // pop function
1362   ATM_SKB(skb)->vcc = atm_vcc;
1363
1364   if (skb->len > (size_t) atm_vcc->qos.txtp.max_sdu) {
1365     PRINTK (KERN_ERR, "sk_buff length greater than agreed max_sdu, dropping...");
1366     return -EIO;
1367   }
1368
1369   if (check_area (skb->data, skb->len)) {
1370     atomic_inc(&atm_vcc->stats->tx_err);
1371     return -ENOMEM; // ?
1372   }
1373
1374   // allocate memory for fragments
1375   tx_descr = kmalloc (sizeof(tx_simple), GFP_KERNEL);
1376   if (!tx_descr) {
1377     PRINTK (KERN_ERR, "could not allocate TX descriptor");
1378     return -ENOMEM;
1379   }
1380   if (check_area (tx_descr, sizeof(tx_simple))) {
1381     kfree (tx_descr);
1382     return -ENOMEM;
1383   }
1384   PRINTD (DBG_TX, "fragment list allocated at %p", tx_descr);
1385
1386   tx_descr->skb = skb;
1387
1388   tx_descr->tx_frag.bytes = cpu_to_be32 (tx_len);
1389   tx_descr->tx_frag.address = cpu_to_be32 (virt_to_bus (tx_data));
1390
1391   tx_descr->tx_frag_end.handle = virt_to_bus (tx_descr);
1392   tx_descr->tx_frag_end.vc = 0;
1393   tx_descr->tx_frag_end.next_descriptor_length = 0;
1394   tx_descr->tx_frag_end.next_descriptor = 0;
1395 #ifdef AMB_NEW_MICROCODE
1396   tx_descr->tx_frag_end.cpcs_uu = 0;
1397   tx_descr->tx_frag_end.cpi = 0;
1398   tx_descr->tx_frag_end.pad = 0;
1399 #endif
1400
1401   tx.vc = cpu_to_be16 (vcc->tx_frame_bits | vc);
1402   tx.tx_descr_length = cpu_to_be16 (sizeof(tx_frag)+sizeof(tx_frag_end));
1403   tx.tx_descr_addr = cpu_to_be32 (virt_to_bus (&tx_descr->tx_frag));
1404
1405   while (tx_give (dev, &tx))
1406     schedule();
1407   return 0;
1408 }
1409
1410 /********** Change QoS on a VC **********/
1411
1412 // int amb_change_qos (struct atm_vcc * atm_vcc, struct atm_qos * qos, int flags);
1413
1414 /********** Free RX Socket Buffer **********/
1415
1416 #if 0
1417 static void amb_free_rx_skb (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
1418   amb_dev * dev = AMB_DEV (atm_vcc->dev);
1419   amb_vcc * vcc = AMB_VCC (atm_vcc);
1420   unsigned char pool = vcc->rx_info.pool;
1421   rx_in rx;
1422
1423   // This may be unsafe for various reasons that I cannot really guess
1424   // at. However, I note that the ATM layer calls kfree_skb rather
1425   // than dev_kfree_skb at this point so we are least covered as far
1426   // as buffer locking goes. There may be bugs if pcap clones RX skbs.
1427
1428   PRINTD (DBG_FLOW|DBG_SKB, "amb_rx_free skb %p (atm_vcc %p, vcc %p)",
1429           skb, atm_vcc, vcc);
1430
1431   rx.handle = virt_to_bus (skb);
1432   rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
1433
1434   skb->data = skb->head;
1435   skb->tail = skb->head;
1436   skb->len = 0;
1437
1438   if (!rx_give (dev, &rx, pool)) {
1439     // success
1440     PRINTD (DBG_SKB|DBG_POOL, "recycled skb for pool %hu", pool);
1441     return;
1442   }
1443
1444   // just do what the ATM layer would have done
1445   dev_kfree_skb_any (skb);
1446
1447   return;
1448 }
1449 #endif
1450
1451 /********** Proc File Output **********/
1452
1453 static int amb_proc_read (struct atm_dev * atm_dev, loff_t * pos, char * page) {
1454   amb_dev * dev = AMB_DEV (atm_dev);
1455   int left = *pos;
1456   unsigned char pool;
1457
1458   PRINTD (DBG_FLOW, "amb_proc_read");
1459
1460   /* more diagnostics here? */
1461
1462   if (!left--) {
1463     amb_stats * s = &dev->stats;
1464     return sprintf (page,
1465                     "frames: TX OK %lu, RX OK %lu, RX bad %lu "
1466                     "(CRC %lu, long %lu, aborted %lu, unused %lu).\n",
1467                     s->tx_ok, s->rx.ok, s->rx.error,
1468                     s->rx.badcrc, s->rx.toolong,
1469                     s->rx.aborted, s->rx.unused);
1470   }
1471
1472   if (!left--) {
1473     amb_cq * c = &dev->cq;
1474     return sprintf (page, "cmd queue [cur/hi/max]: %u/%u/%u. ",
1475                     c->pending, c->high, c->maximum);
1476   }
1477
1478   if (!left--) {
1479     amb_txq * t = &dev->txq;
1480     return sprintf (page, "TX queue [cur/max high full]: %u/%u %u %u.\n",
1481                     t->pending, t->maximum, t->high, t->filled);
1482   }
1483
1484   if (!left--) {
1485     unsigned int count = sprintf (page, "RX queues [cur/max/req low empty]:");
1486     for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1487       amb_rxq * r = &dev->rxq[pool];
1488       count += sprintf (page+count, " %u/%u/%u %u %u",
1489                         r->pending, r->maximum, r->buffers_wanted, r->low, r->emptied);
1490     }
1491     count += sprintf (page+count, ".\n");
1492     return count;
1493   }
1494
1495   if (!left--) {
1496     unsigned int count = sprintf (page, "RX buffer sizes:");
1497     for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1498       amb_rxq * r = &dev->rxq[pool];
1499       count += sprintf (page+count, " %u", r->buffer_size);
1500     }
1501     count += sprintf (page+count, ".\n");
1502     return count;
1503   }
1504
1505 #if 0
1506   if (!left--) {
1507     // suni block etc?
1508   }
1509 #endif
1510
1511   return 0;
1512 }
1513
1514 /********** Operation Structure **********/
1515
1516 static const struct atmdev_ops amb_ops = {
1517   .open         = amb_open,
1518   .close        = amb_close,
1519   .send         = amb_send,
1520   .proc_read    = amb_proc_read,
1521   .owner        = THIS_MODULE,
1522 };
1523
1524 /********** housekeeping **********/
1525 static void do_housekeeping (unsigned long arg) {
1526   amb_dev * dev = (amb_dev *) arg;
1527
1528   // could collect device-specific (not driver/atm-linux) stats here
1529
1530   // last resort refill once every ten seconds
1531   fill_rx_pools (dev);
1532   mod_timer(&dev->housekeeping, jiffies + 10*HZ);
1533
1534   return;
1535 }
1536
1537 /********** creation of communication queues **********/
1538
1539 static int __devinit create_queues (amb_dev * dev, unsigned int cmds,
1540                                  unsigned int txs, unsigned int * rxs,
1541                                  unsigned int * rx_buffer_sizes) {
1542   unsigned char pool;
1543   size_t total = 0;
1544   void * memory;
1545   void * limit;
1546
1547   PRINTD (DBG_FLOW, "create_queues %p", dev);
1548
1549   total += cmds * sizeof(command);
1550
1551   total += txs * (sizeof(tx_in) + sizeof(tx_out));
1552
1553   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
1554     total += rxs[pool] * (sizeof(rx_in) + sizeof(rx_out));
1555
1556   memory = kmalloc (total, GFP_KERNEL);
1557   if (!memory) {
1558     PRINTK (KERN_ERR, "could not allocate queues");
1559     return -ENOMEM;
1560   }
1561   if (check_area (memory, total)) {
1562     PRINTK (KERN_ERR, "queues allocated in nasty area");
1563     kfree (memory);
1564     return -ENOMEM;
1565   }
1566
1567   limit = memory + total;
1568   PRINTD (DBG_INIT, "queues from %p to %p", memory, limit);
1569
1570   PRINTD (DBG_CMD, "command queue at %p", memory);
1571
1572   {
1573     command * cmd = memory;
1574     amb_cq * cq = &dev->cq;
1575
1576     cq->pending = 0;
1577     cq->high = 0;
1578     cq->maximum = cmds - 1;
1579
1580     cq->ptrs.start = cmd;
1581     cq->ptrs.in = cmd;
1582     cq->ptrs.out = cmd;
1583     cq->ptrs.limit = cmd + cmds;
1584
1585     memory = cq->ptrs.limit;
1586   }
1587
1588   PRINTD (DBG_TX, "TX queue pair at %p", memory);
1589
1590   {
1591     tx_in * in = memory;
1592     tx_out * out;
1593     amb_txq * txq = &dev->txq;
1594
1595     txq->pending = 0;
1596     txq->high = 0;
1597     txq->filled = 0;
1598     txq->maximum = txs - 1;
1599
1600     txq->in.start = in;
1601     txq->in.ptr = in;
1602     txq->in.limit = in + txs;
1603
1604     memory = txq->in.limit;
1605     out = memory;
1606
1607     txq->out.start = out;
1608     txq->out.ptr = out;
1609     txq->out.limit = out + txs;
1610
1611     memory = txq->out.limit;
1612   }
1613
1614   PRINTD (DBG_RX, "RX queue pairs at %p", memory);
1615
1616   for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
1617     rx_in * in = memory;
1618     rx_out * out;
1619     amb_rxq * rxq = &dev->rxq[pool];
1620
1621     rxq->buffer_size = rx_buffer_sizes[pool];
1622     rxq->buffers_wanted = 0;
1623
1624     rxq->pending = 0;
1625     rxq->low = rxs[pool] - 1;
1626     rxq->emptied = 0;
1627     rxq->maximum = rxs[pool] - 1;
1628
1629     rxq->in.start = in;
1630     rxq->in.ptr = in;
1631     rxq->in.limit = in + rxs[pool];
1632
1633     memory = rxq->in.limit;
1634     out = memory;
1635
1636     rxq->out.start = out;
1637     rxq->out.ptr = out;
1638     rxq->out.limit = out + rxs[pool];
1639
1640     memory = rxq->out.limit;
1641   }
1642
1643   if (memory == limit) {
1644     return 0;
1645   } else {
1646     PRINTK (KERN_ERR, "bad queue alloc %p != %p (tell maintainer)", memory, limit);
1647     kfree (limit - total);
1648     return -ENOMEM;
1649   }
1650
1651 }
1652
1653 /********** destruction of communication queues **********/
1654
1655 static void destroy_queues (amb_dev * dev) {
1656   // all queues assumed empty
1657   void * memory = dev->cq.ptrs.start;
1658   // includes txq.in, txq.out, rxq[].in and rxq[].out
1659
1660   PRINTD (DBG_FLOW, "destroy_queues %p", dev);
1661
1662   PRINTD (DBG_INIT, "freeing queues at %p", memory);
1663   kfree (memory);
1664
1665   return;
1666 }
1667
1668 /********** basic loader commands and error handling **********/
1669 // centisecond timeouts - guessing away here
1670 static unsigned int command_timeouts [] = {
1671         [host_memory_test]     = 15,
1672         [read_adapter_memory]  = 2,
1673         [write_adapter_memory] = 2,
1674         [adapter_start]        = 50,
1675         [get_version_number]   = 10,
1676         [interrupt_host]       = 1,
1677         [flash_erase_sector]   = 1,
1678         [adap_download_block]  = 1,
1679         [adap_erase_flash]     = 1,
1680         [adap_run_in_iram]     = 1,
1681         [adap_end_download]    = 1
1682 };
1683
1684
1685 static unsigned int command_successes [] = {
1686         [host_memory_test]     = COMMAND_PASSED_TEST,
1687         [read_adapter_memory]  = COMMAND_READ_DATA_OK,
1688         [write_adapter_memory] = COMMAND_WRITE_DATA_OK,
1689         [adapter_start]        = COMMAND_COMPLETE,
1690         [get_version_number]   = COMMAND_COMPLETE,
1691         [interrupt_host]       = COMMAND_COMPLETE,
1692         [flash_erase_sector]   = COMMAND_COMPLETE,
1693         [adap_download_block]  = COMMAND_COMPLETE,
1694         [adap_erase_flash]     = COMMAND_COMPLETE,
1695         [adap_run_in_iram]     = COMMAND_COMPLETE,
1696         [adap_end_download]    = COMMAND_COMPLETE
1697 };
1698
1699 static  int decode_loader_result (loader_command cmd, u32 result)
1700 {
1701         int res;
1702         const char *msg;
1703
1704         if (result == command_successes[cmd])
1705                 return 0;
1706
1707         switch (result) {
1708                 case BAD_COMMAND:
1709                         res = -EINVAL;
1710                         msg = "bad command";
1711                         break;
1712                 case COMMAND_IN_PROGRESS:
1713                         res = -ETIMEDOUT;
1714                         msg = "command in progress";
1715                         break;
1716                 case COMMAND_PASSED_TEST:
1717                         res = 0;
1718                         msg = "command passed test";
1719                         break;
1720                 case COMMAND_FAILED_TEST:
1721                         res = -EIO;
1722                         msg = "command failed test";
1723                         break;
1724                 case COMMAND_READ_DATA_OK:
1725                         res = 0;
1726                         msg = "command read data ok";
1727                         break;
1728                 case COMMAND_READ_BAD_ADDRESS:
1729                         res = -EINVAL;
1730                         msg = "command read bad address";
1731                         break;
1732                 case COMMAND_WRITE_DATA_OK:
1733                         res = 0;
1734                         msg = "command write data ok";
1735                         break;
1736                 case COMMAND_WRITE_BAD_ADDRESS:
1737                         res = -EINVAL;
1738                         msg = "command write bad address";
1739                         break;
1740                 case COMMAND_WRITE_FLASH_FAILURE:
1741                         res = -EIO;
1742                         msg = "command write flash failure";
1743                         break;
1744                 case COMMAND_COMPLETE:
1745                         res = 0;
1746                         msg = "command complete";
1747                         break;
1748                 case COMMAND_FLASH_ERASE_FAILURE:
1749                         res = -EIO;
1750                         msg = "command flash erase failure";
1751                         break;
1752                 case COMMAND_WRITE_BAD_DATA:
1753                         res = -EINVAL;
1754                         msg = "command write bad data";
1755                         break;
1756                 default:
1757                         res = -EINVAL;
1758                         msg = "unknown error";
1759                         PRINTD (DBG_LOAD|DBG_ERR,
1760                                 "decode_loader_result got %d=%x !",
1761                                 result, result);
1762                         break;
1763         }
1764
1765         PRINTK (KERN_ERR, "%s", msg);
1766         return res;
1767 }
1768
1769 static int __devinit do_loader_command (volatile loader_block * lb,
1770                                      const amb_dev * dev, loader_command cmd) {
1771
1772   unsigned long timeout;
1773
1774   PRINTD (DBG_FLOW|DBG_LOAD, "do_loader_command");
1775
1776   /* do a command
1777
1778      Set the return value to zero, set the command type and set the
1779      valid entry to the right magic value. The payload is already
1780      correctly byte-ordered so we leave it alone. Hit the doorbell
1781      with the bus address of this structure.
1782
1783   */
1784
1785   lb->result = 0;
1786   lb->command = cpu_to_be32 (cmd);
1787   lb->valid = cpu_to_be32 (DMA_VALID);
1788   // dump_registers (dev);
1789   // dump_loader_block (lb);
1790   wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
1791
1792   timeout = command_timeouts[cmd] * 10;
1793
1794   while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
1795     if (timeout) {
1796       timeout = msleep_interruptible(timeout);
1797     } else {
1798       PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
1799       dump_registers (dev);
1800       dump_loader_block (lb);
1801       return -ETIMEDOUT;
1802     }
1803
1804   if (cmd == adapter_start) {
1805     // wait for start command to acknowledge...
1806     timeout = 100;
1807     while (rd_plain (dev, offsetof(amb_mem, doorbell)))
1808       if (timeout) {
1809         timeout = msleep_interruptible(timeout);
1810       } else {
1811         PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
1812                 be32_to_cpu (lb->result));
1813         dump_registers (dev);
1814         return -ETIMEDOUT;
1815       }
1816     return 0;
1817   } else {
1818     return decode_loader_result (cmd, be32_to_cpu (lb->result));
1819   }
1820
1821 }
1822
1823 /* loader: determine loader version */
1824
1825 static int __devinit get_loader_version (loader_block * lb,
1826                                       const amb_dev * dev, u32 * version) {
1827   int res;
1828
1829   PRINTD (DBG_FLOW|DBG_LOAD, "get_loader_version");
1830
1831   res = do_loader_command (lb, dev, get_version_number);
1832   if (res)
1833     return res;
1834   if (version)
1835     *version = be32_to_cpu (lb->payload.version);
1836   return 0;
1837 }
1838
1839 /* loader: write memory data blocks */
1840
1841 static int __devinit loader_write (loader_block * lb,
1842                                 const amb_dev * dev, const u32 * data,
1843                                 u32 address, unsigned int count) {
1844   unsigned int i;
1845   transfer_block * tb = &lb->payload.transfer;
1846
1847   PRINTD (DBG_FLOW|DBG_LOAD, "loader_write");
1848
1849   if (count > MAX_TRANSFER_DATA)
1850     return -EINVAL;
1851   tb->address = cpu_to_be32 (address);
1852   tb->count = cpu_to_be32 (count);
1853   for (i = 0; i < count; ++i)
1854     tb->data[i] = cpu_to_be32 (data[i]);
1855   return do_loader_command (lb, dev, write_adapter_memory);
1856 }
1857
1858 /* loader: verify memory data blocks */
1859
1860 static int __devinit loader_verify (loader_block * lb,
1861                                  const amb_dev * dev, const u32 * data,
1862                                  u32 address, unsigned int count) {
1863   unsigned int i;
1864   transfer_block * tb = &lb->payload.transfer;
1865   int res;
1866
1867   PRINTD (DBG_FLOW|DBG_LOAD, "loader_verify");
1868
1869   if (count > MAX_TRANSFER_DATA)
1870     return -EINVAL;
1871   tb->address = cpu_to_be32 (address);
1872   tb->count = cpu_to_be32 (count);
1873   res = do_loader_command (lb, dev, read_adapter_memory);
1874   if (!res)
1875     for (i = 0; i < count; ++i)
1876       if (tb->data[i] != cpu_to_be32 (data[i])) {
1877         res = -EINVAL;
1878         break;
1879       }
1880   return res;
1881 }
1882
1883 /* loader: start microcode */
1884
1885 static int __devinit loader_start (loader_block * lb,
1886                                 const amb_dev * dev, u32 address) {
1887   PRINTD (DBG_FLOW|DBG_LOAD, "loader_start");
1888
1889   lb->payload.start = cpu_to_be32 (address);
1890   return do_loader_command (lb, dev, adapter_start);
1891 }
1892
1893 /********** reset card **********/
1894
1895 static inline void sf (const char * msg)
1896 {
1897         PRINTK (KERN_ERR, "self-test failed: %s", msg);
1898 }
1899
1900 static int amb_reset (amb_dev * dev, int diags) {
1901   u32 word;
1902
1903   PRINTD (DBG_FLOW|DBG_LOAD, "amb_reset");
1904
1905   word = rd_plain (dev, offsetof(amb_mem, reset_control));
1906   // put card into reset state
1907   wr_plain (dev, offsetof(amb_mem, reset_control), word | AMB_RESET_BITS);
1908   // wait a short while
1909   udelay (10);
1910 #if 1
1911   // put card into known good state
1912   wr_plain (dev, offsetof(amb_mem, interrupt_control), AMB_DOORBELL_BITS);
1913   // clear all interrupts just in case
1914   wr_plain (dev, offsetof(amb_mem, interrupt), -1);
1915 #endif
1916   // clear self-test done flag
1917   wr_plain (dev, offsetof(amb_mem, mb.loader.ready), 0);
1918   // take card out of reset state
1919   wr_plain (dev, offsetof(amb_mem, reset_control), word &~ AMB_RESET_BITS);
1920
1921   if (diags) {
1922     unsigned long timeout;
1923     // 4.2 second wait
1924     msleep(4200);
1925     // half second time-out
1926     timeout = 500;
1927     while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
1928       if (timeout) {
1929         timeout = msleep_interruptible(timeout);
1930       } else {
1931         PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
1932         return -ETIMEDOUT;
1933       }
1934
1935     // get results of self-test
1936     // XXX double check byte-order
1937     word = rd_mem (dev, offsetof(amb_mem, mb.loader.result));
1938     if (word & SELF_TEST_FAILURE) {
1939       if (word & GPINT_TST_FAILURE)
1940         sf ("interrupt");
1941       if (word & SUNI_DATA_PATTERN_FAILURE)
1942         sf ("SUNI data pattern");
1943       if (word & SUNI_DATA_BITS_FAILURE)
1944         sf ("SUNI data bits");
1945       if (word & SUNI_UTOPIA_FAILURE)
1946         sf ("SUNI UTOPIA interface");
1947       if (word & SUNI_FIFO_FAILURE)
1948         sf ("SUNI cell buffer FIFO");
1949       if (word & SRAM_FAILURE)
1950         sf ("bad SRAM");
1951       // better return value?
1952       return -EIO;
1953     }
1954
1955   }
1956   return 0;
1957 }
1958
1959 /********** transfer and start the microcode **********/
1960
1961 static int __devinit ucode_init (loader_block * lb, amb_dev * dev) {
1962   unsigned int i = 0;
1963   unsigned int total = 0;
1964   const u32 * pointer = ucode_data;
1965   u32 address;
1966   unsigned int count;
1967   int res;
1968
1969   PRINTD (DBG_FLOW|DBG_LOAD, "ucode_init");
1970
1971   while (address = ucode_regions[i].start,
1972          count = ucode_regions[i].count) {
1973     PRINTD (DBG_LOAD, "starting region (%x, %u)", address, count);
1974     while (count) {
1975       unsigned int words;
1976       if (count <= MAX_TRANSFER_DATA)
1977         words = count;
1978       else
1979         words = MAX_TRANSFER_DATA;
1980       total += words;
1981       res = loader_write (lb, dev, pointer, address, words);
1982       if (res)
1983         return res;
1984       res = loader_verify (lb, dev, pointer, address, words);
1985       if (res)
1986         return res;
1987       count -= words;
1988       address += sizeof(u32) * words;
1989       pointer += words;
1990     }
1991     i += 1;
1992   }
1993   if (*pointer == ATM_POISON) {
1994     return loader_start (lb, dev, ucode_start);
1995   } else {
1996     // cast needed as there is no %? for pointer differnces
1997     PRINTD (DBG_LOAD|DBG_ERR,
1998             "offset=%li, *pointer=%x, address=%x, total=%u",
1999             (long) (pointer - ucode_data), *pointer, address, total);
2000     PRINTK (KERN_ERR, "incorrect microcode data");
2001     return -ENOMEM;
2002   }
2003 }
2004
2005 /********** give adapter parameters **********/
2006
2007 static inline __be32 bus_addr(void * addr) {
2008     return cpu_to_be32 (virt_to_bus (addr));
2009 }
2010
2011 static int __devinit amb_talk (amb_dev * dev) {
2012   adap_talk_block a;
2013   unsigned char pool;
2014   unsigned long timeout;
2015
2016   PRINTD (DBG_FLOW, "amb_talk %p", dev);
2017
2018   a.command_start = bus_addr (dev->cq.ptrs.start);
2019   a.command_end   = bus_addr (dev->cq.ptrs.limit);
2020   a.tx_start      = bus_addr (dev->txq.in.start);
2021   a.tx_end        = bus_addr (dev->txq.in.limit);
2022   a.txcom_start   = bus_addr (dev->txq.out.start);
2023   a.txcom_end     = bus_addr (dev->txq.out.limit);
2024
2025   for (pool = 0; pool < NUM_RX_POOLS; ++pool) {
2026     // the other "a" items are set up by the adapter
2027     a.rec_struct[pool].buffer_start = bus_addr (dev->rxq[pool].in.start);
2028     a.rec_struct[pool].buffer_end   = bus_addr (dev->rxq[pool].in.limit);
2029     a.rec_struct[pool].rx_start     = bus_addr (dev->rxq[pool].out.start);
2030     a.rec_struct[pool].rx_end       = bus_addr (dev->rxq[pool].out.limit);
2031     a.rec_struct[pool].buffer_size = cpu_to_be32 (dev->rxq[pool].buffer_size);
2032   }
2033
2034 #ifdef AMB_NEW_MICROCODE
2035   // disable fast PLX prefetching
2036   a.init_flags = 0;
2037 #endif
2038
2039   // pass the structure
2040   wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
2041
2042   // 2.2 second wait (must not touch doorbell during 2 second DMA test)
2043   msleep(2200);
2044   // give the adapter another half second?
2045   timeout = 500;
2046   while (rd_plain (dev, offsetof(amb_mem, doorbell)))
2047     if (timeout) {
2048       timeout = msleep_interruptible(timeout);
2049     } else {
2050       PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
2051       return -ETIMEDOUT;
2052     }
2053
2054   return 0;
2055 }
2056
2057 // get microcode version
2058 static void __devinit amb_ucode_version (amb_dev * dev) {
2059   u32 major;
2060   u32 minor;
2061   command cmd;
2062   cmd.request = cpu_to_be32 (SRB_GET_VERSION);
2063   while (command_do (dev, &cmd)) {
2064     set_current_state(TASK_UNINTERRUPTIBLE);
2065     schedule();
2066   }
2067   major = be32_to_cpu (cmd.args.version.major);
2068   minor = be32_to_cpu (cmd.args.version.minor);
2069   PRINTK (KERN_INFO, "microcode version is %u.%u", major, minor);
2070 }
2071
2072 // get end station address
2073 static void __devinit amb_esi (amb_dev * dev, u8 * esi) {
2074   u32 lower4;
2075   u16 upper2;
2076   command cmd;
2077
2078   cmd.request = cpu_to_be32 (SRB_GET_BIA);
2079   while (command_do (dev, &cmd)) {
2080     set_current_state(TASK_UNINTERRUPTIBLE);
2081     schedule();
2082   }
2083   lower4 = be32_to_cpu (cmd.args.bia.lower4);
2084   upper2 = be32_to_cpu (cmd.args.bia.upper2);
2085   PRINTD (DBG_LOAD, "BIA: lower4: %08x, upper2 %04x", lower4, upper2);
2086
2087   if (esi) {
2088     unsigned int i;
2089
2090     PRINTDB (DBG_INIT, "ESI:");
2091     for (i = 0; i < ESI_LEN; ++i) {
2092       if (i < 4)
2093           esi[i] = bitrev8(lower4>>(8*i));
2094       else
2095           esi[i] = bitrev8(upper2>>(8*(i-4)));
2096       PRINTDM (DBG_INIT, " %02x", esi[i]);
2097     }
2098
2099     PRINTDE (DBG_INIT, "");
2100   }
2101
2102   return;
2103 }
2104
2105 static void fixup_plx_window (amb_dev *dev, loader_block *lb)
2106 {
2107         // fix up the PLX-mapped window base address to match the block
2108         unsigned long blb;
2109         u32 mapreg;
2110         blb = virt_to_bus(lb);
2111         // the kernel stack had better not ever cross a 1Gb boundary!
2112         mapreg = rd_plain (dev, offsetof(amb_mem, stuff[10]));
2113         mapreg &= ~onegigmask;
2114         mapreg |= blb & onegigmask;
2115         wr_plain (dev, offsetof(amb_mem, stuff[10]), mapreg);
2116         return;
2117 }
2118
2119 static int __devinit amb_init (amb_dev * dev)
2120 {
2121   loader_block lb;
2122
2123   u32 version;
2124
2125   if (amb_reset (dev, 1)) {
2126     PRINTK (KERN_ERR, "card reset failed!");
2127   } else {
2128     fixup_plx_window (dev, &lb);
2129
2130     if (get_loader_version (&lb, dev, &version)) {
2131       PRINTK (KERN_INFO, "failed to get loader version");
2132     } else {
2133       PRINTK (KERN_INFO, "loader version is %08x", version);
2134
2135       if (ucode_init (&lb, dev)) {
2136         PRINTK (KERN_ERR, "microcode failure");
2137       } else if (create_queues (dev, cmds, txs, rxs, rxs_bs)) {
2138         PRINTK (KERN_ERR, "failed to get memory for queues");
2139       } else {
2140
2141         if (amb_talk (dev)) {
2142           PRINTK (KERN_ERR, "adapter did not accept queues");
2143         } else {
2144
2145           amb_ucode_version (dev);
2146           return 0;
2147
2148         } /* amb_talk */
2149
2150         destroy_queues (dev);
2151       } /* create_queues, ucode_init */
2152
2153       amb_reset (dev, 0);
2154     } /* get_loader_version */
2155
2156   } /* amb_reset */
2157
2158   return -EINVAL;
2159 }
2160
2161 static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev)
2162 {
2163       unsigned char pool;
2164       memset (dev, 0, sizeof(amb_dev));
2165
2166       // set up known dev items straight away
2167       dev->pci_dev = pci_dev;
2168       pci_set_drvdata(pci_dev, dev);
2169
2170       dev->iobase = pci_resource_start (pci_dev, 1);
2171       dev->irq = pci_dev->irq;
2172       dev->membase = bus_to_virt(pci_resource_start(pci_dev, 0));
2173
2174       // flags (currently only dead)
2175       dev->flags = 0;
2176
2177       // Allocate cell rates (fibre)
2178       // ATM_OC3_PCR = 1555200000/8/270*260/53 - 29/53
2179       // to be really pedantic, this should be ATM_OC3c_PCR
2180       dev->tx_avail = ATM_OC3_PCR;
2181       dev->rx_avail = ATM_OC3_PCR;
2182
2183 #ifdef FILL_RX_POOLS_IN_BH
2184       // initialise bottom half
2185       INIT_WORK(&dev->bh, (void (*)(void *)) fill_rx_pools, dev);
2186 #endif
2187
2188       // semaphore for txer/rxer modifications - we cannot use a
2189       // spinlock as the critical region needs to switch processes
2190       init_MUTEX (&dev->vcc_sf);
2191       // queue manipulation spinlocks; we want atomic reads and
2192       // writes to the queue descriptors (handles IRQ and SMP)
2193       // consider replacing "int pending" -> "atomic_t available"
2194       // => problem related to who gets to move queue pointers
2195       spin_lock_init (&dev->cq.lock);
2196       spin_lock_init (&dev->txq.lock);
2197       for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2198         spin_lock_init (&dev->rxq[pool].lock);
2199 }
2200
2201 static void setup_pci_dev(struct pci_dev *pci_dev)
2202 {
2203         unsigned char lat;
2204
2205         // enable bus master accesses
2206         pci_set_master(pci_dev);
2207
2208         // frobnicate latency (upwards, usually)
2209         pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
2210
2211         if (!pci_lat)
2212                 pci_lat = (lat < MIN_PCI_LATENCY) ? MIN_PCI_LATENCY : lat;
2213
2214         if (lat != pci_lat) {
2215                 PRINTK (KERN_INFO, "Changing PCI latency timer from %hu to %hu",
2216                         lat, pci_lat);
2217                 pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
2218         }
2219 }
2220
2221 static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
2222 {
2223         amb_dev * dev;
2224         int err;
2225         unsigned int irq;
2226
2227         err = pci_enable_device(pci_dev);
2228         if (err < 0) {
2229                 PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2230                 goto out;
2231         }
2232
2233         // read resources from PCI configuration space
2234         irq = pci_dev->irq;
2235
2236         if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
2237                 PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
2238                 err = -EINVAL;
2239                 goto out_disable;
2240         }
2241
2242         PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
2243                 " IO %llx, IRQ %u, MEM %p",
2244                 (unsigned long long)pci_resource_start(pci_dev, 1),
2245                 irq, bus_to_virt(pci_resource_start(pci_dev, 0)));
2246
2247         // check IO region
2248         err = pci_request_region(pci_dev, 1, DEV_LABEL);
2249         if (err < 0) {
2250                 PRINTK (KERN_ERR, "IO range already in use!");
2251                 goto out_disable;
2252         }
2253
2254         dev = kmalloc (sizeof(amb_dev), GFP_KERNEL);
2255         if (!dev) {
2256                 PRINTK (KERN_ERR, "out of memory!");
2257                 err = -ENOMEM;
2258                 goto out_release;
2259         }
2260
2261         setup_dev(dev, pci_dev);
2262
2263         err = amb_init(dev);
2264         if (err < 0) {
2265                 PRINTK (KERN_ERR, "adapter initialisation failure");
2266                 goto out_free;
2267         }
2268
2269         setup_pci_dev(pci_dev);
2270
2271         // grab (but share) IRQ and install handler
2272         err = request_irq(irq, interrupt_handler, IRQF_SHARED, DEV_LABEL, dev);
2273         if (err < 0) {
2274                 PRINTK (KERN_ERR, "request IRQ failed!");
2275                 goto out_reset;
2276         }
2277
2278         dev->atm_dev = atm_dev_register (DEV_LABEL, &amb_ops, -1, NULL);
2279         if (!dev->atm_dev) {
2280                 PRINTD (DBG_ERR, "failed to register Madge ATM adapter");
2281                 err = -EINVAL;
2282                 goto out_free_irq;
2283         }
2284
2285         PRINTD (DBG_INFO, "registered Madge ATM adapter (no. %d) (%p) at %p",
2286                 dev->atm_dev->number, dev, dev->atm_dev);
2287                 dev->atm_dev->dev_data = (void *) dev;
2288
2289         // register our address
2290         amb_esi (dev, dev->atm_dev->esi);
2291
2292         // 0 bits for vpi, 10 bits for vci
2293         dev->atm_dev->ci_range.vpi_bits = NUM_VPI_BITS;
2294         dev->atm_dev->ci_range.vci_bits = NUM_VCI_BITS;
2295
2296         init_timer(&dev->housekeeping);
2297         dev->housekeeping.function = do_housekeeping;
2298         dev->housekeeping.data = (unsigned long) dev;
2299         mod_timer(&dev->housekeeping, jiffies);
2300
2301         // enable host interrupts
2302         interrupts_on (dev);
2303
2304 out:
2305         return err;
2306
2307 out_free_irq:
2308         free_irq(irq, dev);
2309 out_reset:
2310         amb_reset(dev, 0);
2311 out_free:
2312         kfree(dev);
2313 out_release:
2314         pci_release_region(pci_dev, 1);
2315 out_disable:
2316         pci_disable_device(pci_dev);
2317         goto out;
2318 }
2319
2320
2321 static void __devexit amb_remove_one(struct pci_dev *pci_dev)
2322 {
2323         struct amb_dev *dev;
2324
2325         dev = pci_get_drvdata(pci_dev);
2326
2327         PRINTD(DBG_INFO|DBG_INIT, "closing %p (atm_dev = %p)", dev, dev->atm_dev);
2328         del_timer_sync(&dev->housekeeping);
2329         // the drain should not be necessary
2330         drain_rx_pools(dev);
2331         interrupts_off(dev);
2332         amb_reset(dev, 0);
2333         free_irq(dev->irq, dev);
2334         pci_disable_device(pci_dev);
2335         destroy_queues(dev);
2336         atm_dev_deregister(dev->atm_dev);
2337         kfree(dev);
2338         pci_release_region(pci_dev, 1);
2339 }
2340
2341 static void __init amb_check_args (void) {
2342   unsigned char pool;
2343   unsigned int max_rx_size;
2344
2345 #ifdef DEBUG_AMBASSADOR
2346   PRINTK (KERN_NOTICE, "debug bitmap is %hx", debug &= DBG_MASK);
2347 #else
2348   if (debug)
2349     PRINTK (KERN_NOTICE, "no debugging support");
2350 #endif
2351
2352   if (cmds < MIN_QUEUE_SIZE)
2353     PRINTK (KERN_NOTICE, "cmds has been raised to %u",
2354             cmds = MIN_QUEUE_SIZE);
2355
2356   if (txs < MIN_QUEUE_SIZE)
2357     PRINTK (KERN_NOTICE, "txs has been raised to %u",
2358             txs = MIN_QUEUE_SIZE);
2359
2360   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2361     if (rxs[pool] < MIN_QUEUE_SIZE)
2362       PRINTK (KERN_NOTICE, "rxs[%hu] has been raised to %u",
2363               pool, rxs[pool] = MIN_QUEUE_SIZE);
2364
2365   // buffers sizes should be greater than zero and strictly increasing
2366   max_rx_size = 0;
2367   for (pool = 0; pool < NUM_RX_POOLS; ++pool)
2368     if (rxs_bs[pool] <= max_rx_size)
2369       PRINTK (KERN_NOTICE, "useless pool (rxs_bs[%hu] = %u)",
2370               pool, rxs_bs[pool]);
2371     else
2372       max_rx_size = rxs_bs[pool];
2373
2374   if (rx_lats < MIN_RX_BUFFERS)
2375     PRINTK (KERN_NOTICE, "rx_lats has been raised to %u",
2376             rx_lats = MIN_RX_BUFFERS);
2377
2378   return;
2379 }
2380
2381 /********** module stuff **********/
2382
2383 MODULE_AUTHOR(maintainer_string);
2384 MODULE_DESCRIPTION(description_string);
2385 MODULE_LICENSE("GPL");
2386 module_param(debug,   ushort, 0644);
2387 module_param(cmds,    uint, 0);
2388 module_param(txs,     uint, 0);
2389 module_param_array(rxs,     uint, NULL, 0);
2390 module_param_array(rxs_bs,  uint, NULL, 0);
2391 module_param(rx_lats, uint, 0);
2392 module_param(pci_lat, byte, 0);
2393 MODULE_PARM_DESC(debug,   "debug bitmap, see .h file");
2394 MODULE_PARM_DESC(cmds,    "number of command queue entries");
2395 MODULE_PARM_DESC(txs,     "number of TX queue entries");
2396 MODULE_PARM_DESC(rxs,     "number of RX queue entries [" __MODULE_STRING(NUM_RX_POOLS) "]");
2397 MODULE_PARM_DESC(rxs_bs,  "size of RX buffers [" __MODULE_STRING(NUM_RX_POOLS) "]");
2398 MODULE_PARM_DESC(rx_lats, "number of extra buffers to cope with RX latencies");
2399 MODULE_PARM_DESC(pci_lat, "PCI latency in bus cycles");
2400
2401 /********** module entry **********/
2402
2403 static struct pci_device_id amb_pci_tbl[] = {
2404         { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR, PCI_ANY_ID, PCI_ANY_ID,
2405           0, 0, 0 },
2406         { PCI_VENDOR_ID_MADGE, PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD, PCI_ANY_ID, PCI_ANY_ID,
2407           0, 0, 0 },
2408         { 0, }
2409 };
2410
2411 MODULE_DEVICE_TABLE(pci, amb_pci_tbl);
2412
2413 static struct pci_driver amb_driver = {
2414         .name =         "amb",
2415         .probe =        amb_probe,
2416         .remove =       __devexit_p(amb_remove_one),
2417         .id_table =     amb_pci_tbl,
2418 };
2419
2420 static int __init amb_module_init (void)
2421 {
2422   PRINTD (DBG_FLOW|DBG_INIT, "init_module");
2423
2424   // sanity check - cast needed as printk does not support %Zu
2425   if (sizeof(amb_mem) != 4*16 + 4*12) {
2426     PRINTK (KERN_ERR, "Fix amb_mem (is %lu words).",
2427             (unsigned long) sizeof(amb_mem));
2428     return -ENOMEM;
2429   }
2430
2431   show_version();
2432
2433   amb_check_args();
2434
2435   // get the juice
2436   return pci_register_driver(&amb_driver);
2437 }
2438
2439 /********** module exit **********/
2440
2441 static void __exit amb_module_exit (void)
2442 {
2443   PRINTD (DBG_FLOW|DBG_INIT, "cleanup_module");
2444
2445   pci_unregister_driver(&amb_driver);
2446 }
2447
2448 module_init(amb_module_init);
2449 module_exit(amb_module_exit);