]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/md/bitmap.c
[PATCH] md: use queue_hardsect_size instead of block_size for md superblock size...
[linux-2.6-omap-h63xx.git] / drivers / md / bitmap.c
index 204564dc6a0d9f111d73bfe04e00a5154122b89e..2fba2bbe72d8f627136d5983b40c421c55220f6d 100644 (file)
@@ -108,7 +108,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
 {
        unsigned char *page;
 
-#if INJECT_FAULTS_1
+#ifdef INJECT_FAULTS_1
        page = NULL;
 #else
        page = kmalloc(PAGE_SIZE, GFP_NOIO);
@@ -116,7 +116,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
        if (!page)
                printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
        else
-               printk("%s: bitmap_alloc_page: allocated page at %p\n",
+               PRINTK("%s: bitmap_alloc_page: allocated page at %p\n",
                        bmname(bitmap), page);
        return page;
 }
@@ -258,14 +258,72 @@ char *file_path(struct file *file, char *buf, int count)
  * basic page I/O operations
  */
 
+/* IO operations when bitmap is stored near all superblocks */
+static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index)
+{
+       /* choose a good rdev and read the page from there */
+
+       mdk_rdev_t *rdev;
+       struct list_head *tmp;
+       struct page *page = alloc_page(GFP_KERNEL);
+       sector_t target;
+
+       if (!page)
+               return ERR_PTR(-ENOMEM);
+
+       ITERATE_RDEV(mddev, rdev, tmp) {
+               if (! rdev->in_sync || rdev->faulty)
+                       continue;
+
+               target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512);
+
+               if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) {
+                       page->index = index;
+                       return page;
+               }
+       }
+       return ERR_PTR(-EIO);
+
+}
+
+static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wait)
+{
+       mdk_rdev_t *rdev;
+       struct list_head *tmp;
+
+       ITERATE_RDEV(mddev, rdev, tmp)
+               if (rdev->in_sync && !rdev->faulty)
+                       md_super_write(mddev, rdev,
+                                      (rdev->sb_offset<<1) + offset
+                                      + page->index * (PAGE_SIZE/512),
+                                      PAGE_SIZE,
+                                      page);
+
+       if (wait)
+               wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
+       return 0;
+}
+
 /*
- * write out a page
+ * write out a page to a file
  */
 static int write_page(struct bitmap *bitmap, struct page *page, int wait)
 {
        int ret = -ENOMEM;
 
-       lock_page(page);
+       if (bitmap->file == NULL)
+               return write_sb_page(bitmap->mddev, bitmap->offset, page, wait);
+
+       if (wait)
+               lock_page(page);
+       else {
+               if (TestSetPageLocked(page))
+                       return -EAGAIN; /* already locked */
+               if (PageWriteback(page)) {
+                       unlock_page(page);
+                       return -EAGAIN;
+               }
+       }
 
        ret = page->mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
        if (!ret)
@@ -352,7 +410,7 @@ int bitmap_update_sb(struct bitmap *bitmap)
        if (!bitmap->mddev->degraded)
                sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
        kunmap(bitmap->sb_page);
-       return write_page(bitmap, bitmap->sb_page, 0);
+       return write_page(bitmap, bitmap->sb_page, 1);
 }
 
 /* print out the bitmap file superblock */
@@ -380,6 +438,7 @@ void bitmap_print_sb(struct bitmap *bitmap)
        printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
        printk(KERN_DEBUG "     sync size: %llu KB\n",
                        (unsigned long long)le64_to_cpu(sb->sync_size)/2);
+       printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
        kunmap(bitmap->sb_page);
 }
 
@@ -388,13 +447,18 @@ static int bitmap_read_sb(struct bitmap *bitmap)
 {
        char *reason = NULL;
        bitmap_super_t *sb;
-       unsigned long chunksize, daemon_sleep;
+       unsigned long chunksize, daemon_sleep, write_behind;
        unsigned long bytes_read;
        unsigned long long events;
        int err = -EINVAL;
 
        /* page 0 is the superblock, read it... */
-       bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read);
+       if (bitmap->file)
+               bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read);
+       else {
+               bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0);
+               bytes_read = PAGE_SIZE;
+       }
        if (IS_ERR(bitmap->sb_page)) {
                err = PTR_ERR(bitmap->sb_page);
                bitmap->sb_page = NULL;
@@ -412,6 +476,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
 
        chunksize = le32_to_cpu(sb->chunksize);
        daemon_sleep = le32_to_cpu(sb->daemon_sleep);
+       write_behind = le32_to_cpu(sb->write_behind);
 
        /* verify that the bitmap-specific fields are valid */
        if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@@ -423,7 +488,9 @@ static int bitmap_read_sb(struct bitmap *bitmap)
        else if ((1 << ffz(~chunksize)) != chunksize)
                reason = "bitmap chunksize not a power of 2";
        else if (daemon_sleep < 1 || daemon_sleep > 15)
-               reason = "daemon sleep period out of range";
+               reason = "daemon sleep period out of range (1-15s)";
+       else if (write_behind > COUNTER_MAX)
+               reason = "write-behind limit out of range (0 - 16383)";
        if (reason) {
                printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
                        bmname(bitmap), reason);
@@ -456,8 +523,12 @@ success:
        /* assign fields using values from superblock */
        bitmap->chunksize = chunksize;
        bitmap->daemon_sleep = daemon_sleep;
+       bitmap->daemon_lastrun = jiffies;
+       bitmap->max_write_behind = write_behind;
        bitmap->flags |= sb->state;
        bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
+       if (sb->state & BITMAP_STALE)
+               bitmap->events_cleared = bitmap->mddev->events;
        err = 0;
 out:
        kunmap(bitmap->sb_page);
@@ -555,7 +626,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
                page_cache_release(sb_page);
 }
 
-static void bitmap_stop_daemons(struct bitmap *bitmap);
+static void bitmap_stop_daemon(struct bitmap *bitmap);
 
 /* dequeue the next item in a page list -- don't call from irq context */
 static struct page_list *dequeue_page(struct bitmap *bitmap)
@@ -597,7 +668,7 @@ static void bitmap_file_put(struct bitmap *bitmap)
        bitmap->file = NULL;
        spin_unlock_irqrestore(&bitmap->lock, flags);
 
-       bitmap_stop_daemons(bitmap);
+       bitmap_stop_daemon(bitmap);
 
        drain_write_queues(bitmap);
 
@@ -625,14 +696,16 @@ static void bitmap_file_kick(struct bitmap *bitmap)
        bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET);
        bitmap_update_sb(bitmap);
 
-       path = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (path)
-               ptr = file_path(bitmap->file, path, PAGE_SIZE);
+       if (bitmap->file) {
+               path = kmalloc(PAGE_SIZE, GFP_KERNEL);
+               if (path)
+                       ptr = file_path(bitmap->file, path, PAGE_SIZE);
 
-       printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n",
-               bmname(bitmap), ptr ? ptr : "");
+               printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n",
+                      bmname(bitmap), ptr ? ptr : "");
 
-       kfree(path);
+               kfree(path);
+       }
 
        bitmap_file_put(bitmap);
 
@@ -676,7 +749,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
        void *kaddr;
        unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
 
-       if (!bitmap->file || !bitmap->filemap) {
+       if (!bitmap->filemap) {
                return;
        }
 
@@ -707,6 +780,7 @@ int bitmap_unplug(struct bitmap *bitmap)
        unsigned long i, attr, flags;
        struct page *page;
        int wait = 0;
+       int err;
 
        if (!bitmap)
                return 0;
@@ -715,7 +789,7 @@ int bitmap_unplug(struct bitmap *bitmap)
         * flushed out to disk */
        for (i = 0; i < bitmap->file_pages; i++) {
                spin_lock_irqsave(&bitmap->lock, flags);
-               if (!bitmap->file || !bitmap->filemap) {
+               if (!bitmap->filemap) {
                        spin_unlock_irqrestore(&bitmap->lock, flags);
                        return 0;
                }
@@ -727,22 +801,33 @@ int bitmap_unplug(struct bitmap *bitmap)
                        wait = 1;
                spin_unlock_irqrestore(&bitmap->lock, flags);
 
-               if (attr & (BITMAP_PAGE_DIRTY | BITMAP_PAGE_NEEDWRITE))
-                       if (write_page(bitmap, page, 0))
+               if (attr & (BITMAP_PAGE_DIRTY | BITMAP_PAGE_NEEDWRITE)) {
+                       err = write_page(bitmap, page, 0);
+                       if (err == -EAGAIN) {
+                               if (attr & BITMAP_PAGE_DIRTY)
+                                       err = write_page(bitmap, page, 1);
+                               else
+                                       err = 0;
+                       }
+                       if (err)
                                return 1;
+               }
        }
        if (wait) { /* if any writes were performed, we need to wait on them */
-               spin_lock_irq(&bitmap->write_lock);
-               wait_event_lock_irq(bitmap->write_wait,
-                       list_empty(&bitmap->complete_pages), bitmap->write_lock,
-                       wake_up_process(bitmap->writeback_daemon->tsk));
-               spin_unlock_irq(&bitmap->write_lock);
+               if (bitmap->file) {
+                       spin_lock_irq(&bitmap->write_lock);
+                       wait_event_lock_irq(bitmap->write_wait,
+                                           list_empty(&bitmap->complete_pages), bitmap->write_lock,
+                                           wake_up_process(bitmap->writeback_daemon->tsk));
+                       spin_unlock_irq(&bitmap->write_lock);
+               } else
+                       wait_event(bitmap->mddev->sb_wait,
+                                  atomic_read(&bitmap->mddev->pending_writes)==0);
        }
        return 0;
 }
 
-static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
-       unsigned long sectors, int in_sync);
+static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize
  * the in-memory bitmap from the on-disk bitmap -- also, sets up the
  * memory mapping of the bitmap file
@@ -750,8 +835,11 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
  *   if there's no bitmap file, or if the bitmap file had been
  *   previously kicked from the array, we mark all the bits as
  *   1's in order to cause a full resync.
+ *
+ * We ignore all bits for sectors that end earlier than 'start'.
+ * This is used when reading an out-of-date bitmap...
  */
-static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
+static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 {
        unsigned long i, chunks, index, oldindex, bit;
        struct page *page = NULL, *oldpage = NULL;
@@ -764,9 +852,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
        chunks = bitmap->chunks;
        file = bitmap->file;
 
-       BUG_ON(!file);
+       BUG_ON(!file && !bitmap->offset);
 
-#if INJECT_FAULTS_3
+#ifdef INJECT_FAULTS_3
        outofdate = 1;
 #else
        outofdate = bitmap->flags & BITMAP_STALE;
@@ -779,7 +867,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
 
        num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
 
-       if (i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
+       if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
                printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
                        bmname(bitmap),
                        (unsigned long) i_size_read(file->f_mapping->host),
@@ -816,14 +904,18 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
                                 */
                                page = bitmap->sb_page;
                                offset = sizeof(bitmap_super_t);
-                       } else {
+                       } else if (file) {
                                page = read_page(file, index, &dummy);
-                               if (IS_ERR(page)) { /* read error */
-                                       ret = PTR_ERR(page);
-                                       goto out;
-                               }
                                offset = 0;
+                       } else {
+                               page = read_sb_page(bitmap->mddev, bitmap->offset, index);
+                               offset = 0;
+                       }
+                       if (IS_ERR(page)) { /* read error */
+                               ret = PTR_ERR(page);
+                               goto out;
                        }
+
                        oldindex = index;
                        oldpage = page;
                        kmap(page);
@@ -834,7 +926,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
                                 * whole page and write it out
                                 */
                                memset(page_address(page) + offset, 0xff,
-                                       PAGE_SIZE - offset);
+                                      PAGE_SIZE - offset);
                                ret = write_page(bitmap, page, 1);
                                if (ret) {
                                        kunmap(page);
@@ -848,9 +940,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
                }
                if (test_bit(bit, page_address(page))) {
                        /* if the disk bit is set, set the memory bit */
-                       bitmap_set_memory_bits(bitmap,
-                                       i << CHUNK_BLOCK_SHIFT(bitmap), 1, in_sync);
+                       bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap),
+                                              ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start)
+                               );
                        bit_cnt++;
+                       set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
                }
        }
 
@@ -874,6 +968,19 @@ out:
        return ret;
 }
 
+void bitmap_write_all(struct bitmap *bitmap)
+{
+       /* We don't actually write all bitmap blocks here,
+        * just flag them as needing to be written
+        */
+
+       unsigned long chunks = bitmap->chunks;
+       unsigned long bytes = (chunks+7)/8 + sizeof(bitmap_super_t);
+       unsigned long num_pages = (bytes + PAGE_SIZE-1) / PAGE_SIZE;
+       while (num_pages--)
+               bitmap->filemap_attr[num_pages] |= BITMAP_PAGE_NEEDWRITE;
+}
+
 
 static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
 {
@@ -913,7 +1020,7 @@ int bitmap_daemon_work(struct bitmap *bitmap)
        for (j = 0; j < bitmap->chunks; j++) {
                bitmap_counter_t *bmc;
                spin_lock_irqsave(&bitmap->lock, flags);
-               if (!bitmap->file || !bitmap->filemap) {
+               if (!bitmap->filemap) {
                        /* error or shutdown */
                        spin_unlock_irqrestore(&bitmap->lock, flags);
                        break;
@@ -930,8 +1037,15 @@ int bitmap_daemon_work(struct bitmap *bitmap)
                                }
                                spin_unlock_irqrestore(&bitmap->lock, flags);
                                if (attr & BITMAP_PAGE_NEEDWRITE) {
-                                       if (write_page(bitmap, page, 0))
+                                       switch (write_page(bitmap, page, 0)) {
+                                       case -EAGAIN:
+                                               set_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
+                                               break;
+                                       case 0:
+                                               break;
+                                       default:
                                                bitmap_file_kick(bitmap);
+                                       }
                                        page_cache_release(page);
                                }
                                continue;
@@ -944,6 +1058,10 @@ int bitmap_daemon_work(struct bitmap *bitmap)
                                        clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
                                        spin_unlock_irqrestore(&bitmap->lock, flags);
                                        err = write_page(bitmap, lastpage, 0);
+                                       if (err == -EAGAIN) {
+                                               err = 0;
+                                               set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
+                                       }
                                } else {
                                        set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
                                        spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -992,6 +1110,10 @@ int bitmap_daemon_work(struct bitmap *bitmap)
                        clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
                        spin_unlock_irqrestore(&bitmap->lock, flags);
                        err = write_page(bitmap, lastpage, 0);
+                       if (err == -EAGAIN) {
+                               set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
+                               err = 0;
+                       }
                } else {
                        set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
                        spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -1034,6 +1156,9 @@ static void bitmap_writeback_daemon(mddev_t *mddev)
                err = -EINTR;
                goto out;
        }
+       if (bitmap == NULL)
+               /* about to be stopped. */
+               return;
 
        PRINTK("%s: bitmap writeback daemon woke up...\n", bmname(bitmap));
        /* wait on bitmap page writebacks */
@@ -1063,21 +1188,13 @@ static void bitmap_writeback_daemon(mddev_t *mddev)
        }
 }
 
-static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr,
+static mdk_thread_t *bitmap_start_daemon(struct bitmap *bitmap,
                                void (*func)(mddev_t *), char *name)
 {
        mdk_thread_t *daemon;
-       unsigned long flags;
        char namebuf[32];
 
-       spin_lock_irqsave(&bitmap->lock, flags);
-       *ptr = NULL;
-       if (!bitmap->file) /* no need for daemon if there's no backing file */
-               goto out_unlock;
-
-       spin_unlock_irqrestore(&bitmap->lock, flags);
-
-#if INJECT_FATAL_FAULT_2
+#ifdef INJECT_FATAL_FAULT_2
        daemon = NULL;
 #else
        sprintf(namebuf, "%%s_%s", name);
@@ -1086,47 +1203,32 @@ static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr,
        if (!daemon) {
                printk(KERN_ERR "%s: failed to start bitmap daemon\n",
                        bmname(bitmap));
-               return -ECHILD;
+               return ERR_PTR(-ECHILD);
        }
 
-       spin_lock_irqsave(&bitmap->lock, flags);
-       *ptr = daemon;
-
        md_wakeup_thread(daemon); /* start it running */
 
        PRINTK("%s: %s daemon (pid %d) started...\n",
                bmname(bitmap), name, daemon->tsk->pid);
-out_unlock:
-       spin_unlock_irqrestore(&bitmap->lock, flags);
-       return 0;
-}
 
-static int bitmap_start_daemons(struct bitmap *bitmap)
-{
-       int err = bitmap_start_daemon(bitmap, &bitmap->writeback_daemon,
-                                       bitmap_writeback_daemon, "bitmap_wb");
-       return err;
+       return daemon;
 }
 
-static void bitmap_stop_daemon(struct bitmap *bitmap, mdk_thread_t **ptr)
+static void bitmap_stop_daemon(struct bitmap *bitmap)
 {
-       mdk_thread_t *daemon;
-       unsigned long flags;
-
-       spin_lock_irqsave(&bitmap->lock, flags);
-       daemon = *ptr;
-       *ptr = NULL;
-       spin_unlock_irqrestore(&bitmap->lock, flags);
-       if (daemon)
-               md_unregister_thread(daemon); /* destroy the thread */
-}
+       /* the daemon can't stop itself... it'll just exit instead... */
+       if (bitmap->writeback_daemon && ! IS_ERR(bitmap->writeback_daemon) &&
+           current->pid != bitmap->writeback_daemon->tsk->pid) {
+               mdk_thread_t *daemon;
+               unsigned long flags;
 
-static void bitmap_stop_daemons(struct bitmap *bitmap)
-{
-       /* the daemons can't stop themselves... they'll just exit instead... */
-       if (bitmap->writeback_daemon &&
-           current->pid != bitmap->writeback_daemon->tsk->pid)
-               bitmap_stop_daemon(bitmap, &bitmap->writeback_daemon);
+               spin_lock_irqsave(&bitmap->lock, flags);
+               daemon = bitmap->writeback_daemon;
+               bitmap->writeback_daemon = NULL;
+               spin_unlock_irqrestore(&bitmap->lock, flags);
+               if (daemon && ! IS_ERR(daemon))
+                       md_unregister_thread(daemon); /* destroy the thread */
+       }
 }
 
 static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
@@ -1166,9 +1268,16 @@ static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
        }
 }
 
-int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors)
+int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
 {
        if (!bitmap) return 0;
+
+       if (behind) {
+               atomic_inc(&bitmap->behind_writes);
+               PRINTK(KERN_DEBUG "inc write-behind count %d/%d\n",
+                 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
+       }
+
        while (sectors) {
                int blocks;
                bitmap_counter_t *bmc;
@@ -1203,9 +1312,15 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
 }
 
 void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
-                    int success)
+                    int success, int behind)
 {
        if (!bitmap) return;
+       if (behind) {
+               atomic_dec(&bitmap->behind_writes);
+               PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
+                 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
+       }
+
        while (sectors) {
                int blocks;
                unsigned long flags;
@@ -1235,7 +1350,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
        }
 }
 
-int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks)
+int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
+                       int degraded)
 {
        bitmap_counter_t *bmc;
        int rv;
@@ -1252,8 +1368,10 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks)
                        rv = 1;
                else if (NEEDED(*bmc)) {
                        rv = 1;
-                       *bmc |= RESYNC_MASK;
-                       *bmc &= ~NEEDED_MASK;
+                       if (!degraded) { /* don't set/clear bits if degraded */
+                               *bmc |= RESYNC_MASK;
+                               *bmc &= ~NEEDED_MASK;
+                       }
                }
        }
        spin_unlock_irq(&bitmap->lock);
@@ -1313,76 +1431,66 @@ void bitmap_close_sync(struct bitmap *bitmap)
        }
 }
 
-static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
-                                  unsigned long sectors, int in_sync)
+static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
 {
        /* For each chunk covered by any of these sectors, set the
-        * counter to 1 and set resync_needed unless in_sync.  They should all
+        * counter to 1 and set resync_needed.  They should all
         * be 0 at this point
         */
-       while (sectors) {
-               int secs;
-               bitmap_counter_t *bmc;
-               spin_lock_irq(&bitmap->lock);
-               bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
-               if (!bmc) {
-                       spin_unlock_irq(&bitmap->lock);
-                       return;
-               }
-               if (! *bmc) {
-                       struct page *page;
-                       *bmc = 1 | (in_sync? 0 : NEEDED_MASK);
-                       bitmap_count_page(bitmap, offset, 1);
-                       page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
-                       set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
-               }
+
+       int secs;
+       bitmap_counter_t *bmc;
+       spin_lock_irq(&bitmap->lock);
+       bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
+       if (!bmc) {
                spin_unlock_irq(&bitmap->lock);
-               if (sectors > secs)
-                       sectors -= secs;
-               else
-                       sectors = 0;
+               return;
        }
+       if (! *bmc) {
+               struct page *page;
+               *bmc = 1 | (needed?NEEDED_MASK:0);
+               bitmap_count_page(bitmap, offset, 1);
+               page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
+               set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
+       }
+       spin_unlock_irq(&bitmap->lock);
+
 }
 
-/* dirty the entire bitmap */
-int bitmap_setallbits(struct bitmap *bitmap)
+/*
+ * flush out any pending updates
+ */
+void bitmap_flush(mddev_t *mddev)
 {
-       unsigned long flags;
-       unsigned long j;
-
-       /* dirty the in-memory bitmap */
-       bitmap_set_memory_bits(bitmap, 0, bitmap->chunks << CHUNK_BLOCK_SHIFT(bitmap), 1);
-
-       /* dirty the bitmap file */
-       for (j = 0; j < bitmap->file_pages; j++) {
-               struct page *page = bitmap->filemap[j];
+       struct bitmap *bitmap = mddev->bitmap;
+       int sleep;
 
-               spin_lock_irqsave(&bitmap->lock, flags);
-               page_cache_get(page);
-               spin_unlock_irqrestore(&bitmap->lock, flags);
-               memset(kmap(page), 0xff, PAGE_SIZE);
-               kunmap(page);
-               if (write_page(bitmap, page, 0))
-                       return 1;
-       }
+       if (!bitmap) /* there was no bitmap */
+               return;
 
-       return 0;
+       /* run the daemon_work three time to ensure everything is flushed
+        * that can be
+        */
+       sleep = bitmap->daemon_sleep;
+       bitmap->daemon_sleep = 0;
+       bitmap_daemon_work(bitmap);
+       bitmap_daemon_work(bitmap);
+       bitmap_daemon_work(bitmap);
+       bitmap->daemon_sleep = sleep;
+       bitmap_update_sb(bitmap);
 }
 
 /*
  * free memory that was allocated
  */
-void bitmap_destroy(mddev_t *mddev)
+static void bitmap_free(struct bitmap *bitmap)
 {
        unsigned long k, pages;
        struct bitmap_page *bp;
-       struct bitmap *bitmap = mddev->bitmap;
 
        if (!bitmap) /* there was no bitmap */
                return;
 
-       mddev->bitmap = NULL; /* disconnect from the md device */
-
        /* release the bitmap file and kill the daemon */
        bitmap_file_put(bitmap);
 
@@ -1400,6 +1508,17 @@ void bitmap_destroy(mddev_t *mddev)
        kfree(bp);
        kfree(bitmap);
 }
+void bitmap_destroy(mddev_t *mddev)
+{
+       struct bitmap *bitmap = mddev->bitmap;
+
+       if (!bitmap) /* there was no bitmap */
+               return;
+
+       mddev->bitmap = NULL; /* disconnect from the md device */
+
+       bitmap_free(bitmap);
+}
 
 /*
  * initialize the bitmap structure
@@ -1413,12 +1532,15 @@ int bitmap_create(mddev_t *mddev)
        unsigned long pages;
        struct file *file = mddev->bitmap_file;
        int err;
+       sector_t start;
 
        BUG_ON(sizeof(bitmap_super_t) != 256);
 
-       if (!file) /* bitmap disabled, nothing to do */
+       if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */
                return 0;
 
+       BUG_ON(file && mddev->bitmap_offset);
+
        bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL);
        if (!bitmap)
                return -ENOMEM;
@@ -1427,22 +1549,23 @@ int bitmap_create(mddev_t *mddev)
 
        spin_lock_init(&bitmap->lock);
        bitmap->mddev = mddev;
-       mddev->bitmap = bitmap;
 
        spin_lock_init(&bitmap->write_lock);
        INIT_LIST_HEAD(&bitmap->complete_pages);
        init_waitqueue_head(&bitmap->write_wait);
        bitmap->write_pool = mempool_create(WRITE_POOL_SIZE, write_pool_alloc,
                                write_pool_free, NULL);
+       err = -ENOMEM;
        if (!bitmap->write_pool)
-               return -ENOMEM;
+               goto error;
 
        bitmap->file = file;
-       get_file(file);
+       bitmap->offset = mddev->bitmap_offset;
+       if (file) get_file(file);
        /* read superblock from bitmap file (this sets bitmap->chunksize) */
        err = bitmap_read_sb(bitmap);
        if (err)
-               return err;
+               goto error;
 
        bitmap->chunkshift = find_first_bit(&bitmap->chunksize,
                                        sizeof(bitmap->chunksize));
@@ -1461,31 +1584,49 @@ int bitmap_create(mddev_t *mddev)
 
        bitmap->syncchunk = ~0UL;
 
-#if INJECT_FATAL_FAULT_1
+#ifdef INJECT_FATAL_FAULT_1
        bitmap->bp = NULL;
 #else
        bitmap->bp = kmalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
 #endif
+       err = -ENOMEM;
        if (!bitmap->bp)
-               return -ENOMEM;
+               goto error;
        memset(bitmap->bp, 0, pages * sizeof(*bitmap->bp));
 
        bitmap->flags |= BITMAP_ACTIVE;
 
        /* now that we have some pages available, initialize the in-memory
         * bitmap from the on-disk bitmap */
-       err = bitmap_init_from_disk(bitmap, mddev->recovery_cp == MaxSector);
+       start = 0;
+       if (mddev->degraded == 0
+           || bitmap->events_cleared == mddev->events)
+               /* no need to keep dirty bits to optimise a re-add of a missing device */
+               start = mddev->recovery_cp;
+       err = bitmap_init_from_disk(bitmap, start);
+
        if (err)
-               return err;
+               goto error;
 
        printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
                pages, bmname(bitmap));
 
-       /* kick off the bitmap daemons */
-       err = bitmap_start_daemons(bitmap);
-       if (err)
-               return err;
+       mddev->bitmap = bitmap;
+
+       if (file)
+               /* kick off the bitmap writeback daemon */
+               bitmap->writeback_daemon =
+                       bitmap_start_daemon(bitmap,
+                                           bitmap_writeback_daemon,
+                                           "bitmap_wb");
+
+       if (IS_ERR(bitmap->writeback_daemon))
+               return PTR_ERR(bitmap->writeback_daemon);
        return bitmap_update_sb(bitmap);
+
+ error:
+       bitmap_free(bitmap);
+       return err;
 }
 
 /* the bitmap API -- for raid personalities */