*  Support for audio capture
  *  PCI function #1 of the cx2388x.
  *
+ *    (c) 2007 Trent Piepho <xyzzy@speakeasy.org>
  *    (c) 2005,2006 Ricardo Cerqueira <v4l@cerqueira.org>
  *    (c) 2005 Mauro Carvalho Chehab <mchehab@infradead.org>
  *    Based on a dummy cx88 module by Gerd Knorr <kraxel@bytesex.org>
 #define dprintk_core(level,fmt, arg...)        if (debug >= level) \
        printk(KERN_DEBUG "%s/1: " fmt, chip->core->name , ## arg)
 
-
 /****************************************************************************
        Data type declarations - Can be moded to a header file later
  ****************************************************************************/
 struct cx88_audio_dev {
        struct cx88_core           *core;
        struct cx88_dmaqueue       q;
+       u64 starttime;
 
        /* pci i/o */
        struct pci_dev             *pci;
        struct snd_card            *card;
 
        spinlock_t                 reg_lock;
+       atomic_t                   count;
 
        unsigned int               dma_size;
        unsigned int               period_size;
        unsigned int               num_periods;
 
-       struct videobuf_dmabuf dma_risc;
+       struct videobuf_dmabuf     dma_risc;
 
        int                        mixer_volume[MIXER_ADDR_LAST+1][2];
        int                        capture_source[MIXER_ADDR_LAST+1][2];
 
-       long int read_count;
-       long int read_offset;
-
-       struct cx88_buffer   *buf;
-
-       long opened;
-       struct snd_pcm_substream *substream;
+       struct cx88_buffer         *buf;
 
+       struct snd_pcm_substream   *substream;
 };
 typedef struct cx88_audio_dev snd_cx88_card_t;
 
        cx_write(MO_AUDD_LNGTH, buf->bpl);
 
        /* reset counter */
-       cx_write(MO_AUDD_GPCNTRL,GP_COUNT_CONTROL_RESET);
+       cx_write(MO_AUDD_GPCNTRL, GP_COUNT_CONTROL_RESET);
+       atomic_set(&chip->count, 0);
 
-       dprintk(1, "Start audio DMA, %d B/line, %d lines/FIFO, %d lines/irq, "
-               "%d B/irq\n", buf->bpl, cx_read(audio_ch->cmds_start + 8)>>1,
+       dprintk(1, "Start audio DMA, %d B/line, %d lines/FIFO, %d periods, %d "
+               "byte buffer\n", buf->bpl, cx_read(audio_ch->cmds_start + 8)>>1,
                chip->num_periods, buf->bpl * chip->num_periods);
 
-       dprintk(1, "Enabling IRQ, setting mask from 0x%x to 0x%x\n",
-               chip->core->pci_irqmask,
-               chip->core->pci_irqmask | PCI_INT_AUDINT);
-
        /* Enables corresponding bits at AUD_INT_STAT */
        cx_write(MO_AUD_INTMSK, AUD_INT_OPC_ERR | AUD_INT_DN_SYNC |
                                AUD_INT_DN_RISCI2 | AUD_INT_DN_RISCI1);
        return 0;
 }
 
-#define MAX_IRQ_LOOP 10
+#define MAX_IRQ_LOOP 50
 
 /*
  * BOARD Specific: IRQ dma bits
 {
        struct cx88_core *core = chip->core;
        u32 status, mask;
-       u32 count;
 
        status = cx_read(MO_AUD_INTSTAT);
        mask   = cx_read(MO_AUD_INTMSK);
-       if (0 == (status & mask)) {
-               spin_unlock(&chip->reg_lock);
+       if (0 == (status & mask))
                return;
-       }
        cx_write(MO_AUD_INTSTAT, status);
        if (debug > 1  ||  (status & mask & ~0xff))
                cx88_print_irqbits(core->name, "irq aud",
                                   status, mask);
        /* risc op code error */
        if (status & AUD_INT_OPC_ERR) {
-               printk(KERN_WARNING "%s/0: audio risc op code error\n",core->name);
+               printk(KERN_WARNING "%s/1: Audio risc op code error\n",core->name);
                cx_clear(MO_AUD_DMACNTRL, 0x11);
                cx88_sram_channel_dump(core, &cx88_sram_channels[SRAM_CH25]);
        }
-
+       if (status & AUD_INT_DN_SYNC) {
+               dprintk(1, "Downstream sync error\n");
+               cx_write(MO_AUDD_GPCNTRL, GP_COUNT_CONTROL_RESET);
+               return;
+       }
        /* risc1 downstream */
        if (status & AUD_INT_DN_RISCI1) {
-               spin_lock(&chip->reg_lock);
-               count = cx_read(MO_AUDD_GPCNT);
-               spin_unlock(&chip->reg_lock);
-               if (chip->read_count == 0)
-                       chip->read_count += chip->dma_size;
-       }
-
-       if  (chip->read_count >= chip->period_size) {
-               dprintk(2, "Elapsing period\n");
+               atomic_set(&chip->count, cx_read(MO_AUDD_GPCNT));
                snd_pcm_period_elapsed(chip->substream);
        }
-
-       dprintk(3,"Leaving audio IRQ handler...\n");
-
        /* FIXME: Any other status should deserve a special handling? */
 }
 
                        (core->pci_irqmask | PCI_INT_AUDINT);
                if (0 == status)
                        goto out;
-               dprintk( 3, "cx8801_irq\n" );
-               dprintk( 3, "    loop: %d/%d\n", loop, MAX_IRQ_LOOP );
-               dprintk( 3, "    status: %d\n", status );
+               dprintk(3, "cx8801_irq loop %d/%d, status %x\n",
+                       loop, MAX_IRQ_LOOP, status);
                handled = 1;
                cx_write(MO_PCI_INTSTAT, status);
 
                if (status & core->pci_irqmask)
                        cx88_core_irq(core, status);
-               if (status & PCI_INT_AUDINT) {
-                       dprintk( 2, "    ALSA IRQ handling\n" );
+               if (status & PCI_INT_AUDINT)
                        cx8801_aud_irq(chip);
-               }
-       };
+       }
 
        if (MAX_IRQ_LOOP == loop) {
-               dprintk( 0, "clearing mask\n" );
-               dprintk(1,"%s/0: irq loop -- clearing mask\n",
+               printk(KERN_ERR
+                      "%s/1: IRQ loop detected, disabling interrupts\n",
                       core->name);
                cx_clear(MO_PCI_INTMSK, PCI_INT_AUDINT);
        }
 
        chip->dma_size = 0;
 
-       return 0;
+       return 0;
 }
 
 /****************************************************************************
 /*
  * Digital hardware definition
  */
+#define DEFAULT_FIFO_SIZE      4096
 static struct snd_pcm_hardware snd_cx88_digital_hw = {
        .info = SNDRV_PCM_INFO_MMAP |
                SNDRV_PCM_INFO_INTERLEAVED |
        .rate_max =             48000,
        .channels_min = 2,
        .channels_max = 2,
-       .buffer_bytes_max = (2*2048),
-       .period_bytes_min = 2048,
-       .period_bytes_max = 2048,
-       .periods_min = 2,
-       .periods_max = 2,
+       /* Analog audio output will be full of clicks and pops if there
+          are not exactly four lines in the SRAM FIFO buffer.  */
+       .period_bytes_min = DEFAULT_FIFO_SIZE/4,
+       .period_bytes_max = DEFAULT_FIFO_SIZE/4,
+       .periods_min = 1,
+       .periods_max = 1024,
+       .buffer_bytes_max = (1024*1024),
 };
 
-/*
- * audio pcm capture runtime free
- */
-static void snd_card_cx88_runtime_free(struct snd_pcm_runtime *runtime)
-{
-}
 /*
  * audio pcm capture open callback
  */
        struct snd_pcm_runtime *runtime = substream->runtime;
        int err;
 
-       if (test_and_set_bit(0, &chip->opened))
-               return -EBUSY;
-
-       err = snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS);
+       err = snd_pcm_hw_constraint_pow2(runtime, 0, SNDRV_PCM_HW_PARAM_PERIODS);
        if (err < 0)
                goto _error;
 
        chip->substream = substream;
 
-       chip->read_count = 0;
-       chip->read_offset = 0;
-
-       runtime->private_free = snd_card_cx88_runtime_free;
        runtime->hw = snd_cx88_digital_hw;
 
+       if (cx88_sram_channels[SRAM_CH25].fifo_size != DEFAULT_FIFO_SIZE) {
+               unsigned int bpl = cx88_sram_channels[SRAM_CH25].fifo_size / 4;
+               bpl &= ~7; /* must be multiple of 8 */
+               runtime->hw.period_bytes_min = bpl;
+               runtime->hw.period_bytes_max = bpl;
+       }
+
        return 0;
 _error:
        dprintk(1,"Error opening PCM!\n");
-       clear_bit(0, &chip->opened);
-       smp_mb__after_clear_bit();
        return err;
 }
 
  */
 static int snd_cx88_close(struct snd_pcm_substream *substream)
 {
-       snd_cx88_card_t *chip = snd_pcm_substream_chip(substream);
-
-       clear_bit(0, &chip->opened);
-       smp_mb__after_clear_bit();
-
        return 0;
 }
 
 {
        snd_cx88_card_t *chip = snd_pcm_substream_chip(substream);
        struct cx88_buffer *buf;
+       int ret;
 
        if (substream->runtime->dma_area) {
                dsp_buffer_free(chip);
                substream->runtime->dma_area = NULL;
        }
 
-
        chip->period_size = params_period_bytes(hw_params);
        chip->num_periods = params_periods(hw_params);
        chip->dma_size = chip->period_size * params_periods(hw_params);
 
        BUG_ON(!chip->dma_size);
+       BUG_ON(chip->num_periods & (chip->num_periods-1));
 
-       dprintk(1,"Setting buffer\n");
-
-       buf = kzalloc(sizeof(*buf),GFP_KERNEL);
+       buf = kzalloc(sizeof(*buf), GFP_KERNEL);
        if (NULL == buf)
                return -ENOMEM;
 
        buf->vb.memory = V4L2_MEMORY_MMAP;
+       buf->vb.field  = V4L2_FIELD_NONE;
        buf->vb.width  = chip->period_size;
+       buf->bpl       = chip->period_size;
        buf->vb.height = chip->num_periods;
        buf->vb.size   = chip->dma_size;
-       buf->vb.field  = V4L2_FIELD_NONE;
 
        videobuf_dma_init(&buf->vb.dma);
-       videobuf_dma_init_kernel(&buf->vb.dma,PCI_DMA_FROMDEVICE,
+       ret = videobuf_dma_init_kernel(&buf->vb.dma, PCI_DMA_FROMDEVICE,
                        (PAGE_ALIGN(buf->vb.size) >> PAGE_SHIFT));
+       if (ret < 0)
+               goto error;
 
-       videobuf_pci_dma_map(chip->pci,&buf->vb.dma);
-
+       ret = videobuf_pci_dma_map(chip->pci,&buf->vb.dma);
+       if (ret < 0)
+               goto error;
 
-       cx88_risc_databuffer(chip->pci, &buf->risc,
-                       buf->vb.dma.sglist,
-                       buf->vb.width, buf->vb.height);
+       ret = cx88_risc_databuffer(chip->pci, &buf->risc, buf->vb.dma.sglist,
+                                  buf->vb.width, buf->vb.height, 1);
+       if (ret < 0)
+               goto error;
 
-       buf->risc.jmp[0] = cpu_to_le32(RISC_JUMP | RISC_IRQ1 | RISC_CNT_INC);
+       /* Loop back to start of program */
+       buf->risc.jmp[0] = cpu_to_le32(RISC_JUMP|RISC_IRQ1|RISC_CNT_INC);
        buf->risc.jmp[1] = cpu_to_le32(buf->risc.dma);
 
        buf->vb.state = STATE_PREPARED;
 
-       buf->bpl = chip->period_size;
        chip->buf = buf;
        chip->dma_risc = buf->vb.dma;
 
-       dprintk(1,"Buffer ready at %u\n",chip->dma_risc.nr_pages);
        substream->runtime->dma_area = chip->dma_risc.vmalloc;
        return 0;
+
+error:
+       kfree(buf);
+       return ret;
 }
 
 /*
        return 0;
 }
 
-
 /*
  * trigger callback
  */
        snd_cx88_card_t *chip = snd_pcm_substream_chip(substream);
        int err;
 
+       /* Local interrupts are already disabled by ALSA */
        spin_lock(&chip->reg_lock);
 
        switch (cmd) {
 {
        snd_cx88_card_t *chip = snd_pcm_substream_chip(substream);
        struct snd_pcm_runtime *runtime = substream->runtime;
+       u16 count;
 
-       if (chip->read_count) {
-               chip->read_count -= snd_pcm_lib_period_bytes(substream);
-               chip->read_offset += snd_pcm_lib_period_bytes(substream);
-               if (chip->read_offset == chip->dma_size)
-                       chip->read_offset = 0;
-       }
-
-       dprintk(2, "Pointer time, will return %li, read %li\n",chip->read_offset,chip->read_count);
-       return bytes_to_frames(runtime, chip->read_offset);
+       count = atomic_read(&chip->count);
 
+//     dprintk(2, "%s - count %d (+%u), period %d, frame %lu\n", __FUNCTION__,
+//             count, new, count & (runtime->periods-1),
+//             runtime->period_size * (count & (runtime->periods-1)));
+       return runtime->period_size * (count & (runtime->periods-1));
 }
 
 /*
        int v;
        u32 old_control;
 
+       /* Do we really know this will always be called with IRQs on? */
        spin_lock_irq(&chip->reg_lock);
+
        old_control = 0x3f - (cx_read(AUD_VOL_CTL) & 0x3f);
        v = 0x3f - (value->value.integer.value[0] & 0x3f);
        cx_andor(AUD_VOL_CTL, 0x3f, v);
+
        spin_unlock_irq(&chip->reg_lock);
 
        return v != old_control;
 
 
 #define NO_SYNC_LINE (-1U)
 
+/* @lpi: lines per IRQ, or 0 to not generate irqs. Note: IRQ to be
+        generated _after_ lpi lines are transferred. */
 static u32* cx88_risc_field(u32 *rp, struct scatterlist *sglist,
                            unsigned int offset, u32 sync_line,
                            unsigned int bpl, unsigned int padding,
-                           unsigned int lines)
+                           unsigned int lines, unsigned int lpi)
 {
        struct scatterlist *sg;
-       unsigned int line,todo;
+       unsigned int line,todo,sol;
 
        /* sync instruction */
        if (sync_line != NO_SYNC_LINE)
                        offset -= sg_dma_len(sg);
                        sg++;
                }
+               if (lpi && line>0 && !(line % lpi))
+                       sol = RISC_SOL | RISC_IRQ1 | RISC_CNT_INC;
+               else
+                       sol = RISC_SOL;
                if (bpl <= sg_dma_len(sg)-offset) {
                        /* fits into current chunk */
-                       *(rp++)=cpu_to_le32(RISC_WRITE|RISC_SOL|RISC_EOL|bpl);
+                       *(rp++)=cpu_to_le32(RISC_WRITE|sol|RISC_EOL|bpl);
                        *(rp++)=cpu_to_le32(sg_dma_address(sg)+offset);
                        offset+=bpl;
                } else {
                        /* scanline needs to be split */
                        todo = bpl;
-                       *(rp++)=cpu_to_le32(RISC_WRITE|RISC_SOL|
+                       *(rp++)=cpu_to_le32(RISC_WRITE|sol|
                                            (sg_dma_len(sg)-offset));
                        *(rp++)=cpu_to_le32(sg_dma_address(sg)+offset);
                        todo -= (sg_dma_len(sg)-offset);
        rp = risc->cpu;
        if (UNSET != top_offset)
                rp = cx88_risc_field(rp, sglist, top_offset, 0,
-                                    bpl, padding, lines);
+                                    bpl, padding, lines, 0);
        if (UNSET != bottom_offset)
                rp = cx88_risc_field(rp, sglist, bottom_offset, 0x200,
-                                    bpl, padding, lines);
+                                    bpl, padding, lines, 0);
 
        /* save pointer to jmp instruction address */
        risc->jmp = rp;
 
 int cx88_risc_databuffer(struct pci_dev *pci, struct btcx_riscmem *risc,
                         struct scatterlist *sglist, unsigned int bpl,
-                        unsigned int lines)
+                        unsigned int lines, unsigned int lpi)
 {
        u32 instructions;
        u32 *rp;
 
        /* write risc instructions */
        rp = risc->cpu;
-       rp = cx88_risc_field(rp, sglist, 0, NO_SYNC_LINE, bpl, 0, lines);
+       rp = cx88_risc_field(rp, sglist, 0, NO_SYNC_LINE, bpl, 0, lines, lpi);
 
        /* save pointer to jmp instruction address */
        risc->jmp = rp;