]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - drivers/md/md.c
Merge branch 'devel' of master.kernel.org:/home/rmk/linux-2.6-arm
[linux-2.6-omap-h63xx.git] / drivers / md / md.c
index 2ce750d4be023922bcba40753c4897a64a736c90..2fe32c2619227721468cd1c8e0843bb4b122d0b2 100644 (file)
 */
 
 #include <linux/module.h>
-#include <linux/config.h>
 #include <linux/kthread.h>
 #include <linux/linkage.h>
 #include <linux/raid/md.h>
 #include <linux/raid/bitmap.h>
 #include <linux/sysctl.h>
-#include <linux/devfs_fs_kernel.h>
 #include <linux/buffer_head.h> /* for invalidate_bdev */
 #include <linux/suspend.h>
 #include <linux/poll.h>
@@ -73,6 +71,10 @@ static void autostart_arrays (int part);
 static LIST_HEAD(pers_list);
 static DEFINE_SPINLOCK(pers_lock);
 
+static void md_print_devices(void);
+
+#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
+
 /*
  * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
  * is 1000 KB/sec, so the extra system load does not show up that much.
@@ -171,7 +173,7 @@ EXPORT_SYMBOL_GPL(md_new_event);
 /* Alternate version that can be called from interrupts
  * when calling sysfs_notify isn't needed.
  */
-void md_new_event_inintr(mddev_t *mddev)
+static void md_new_event_inintr(mddev_t *mddev)
 {
        atomic_inc(&md_event_count);
        wake_up(&md_event_waiters);
@@ -733,6 +735,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 {
        mdp_disk_t *desc;
        mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
+       __u64 ev1 = md_event(sb);
 
        rdev->raid_disk = -1;
        rdev->flags = 0;
@@ -749,7 +752,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                mddev->layout = sb->layout;
                mddev->raid_disks = sb->raid_disks;
                mddev->size = sb->size;
-               mddev->events = md_event(sb);
+               mddev->events = ev1;
                mddev->bitmap_offset = 0;
                mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
 
@@ -798,7 +801,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 
        } else if (mddev->pers == NULL) {
                /* Insist on good event counter while assembling */
-               __u64 ev1 = md_event(sb);
                ++ev1;
                if (ev1 < mddev->events) 
                        return -EINVAL;
@@ -806,11 +808,13 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                /* if adding to array with a bitmap, then we can accept an
                 * older device ... but not too old.
                 */
-               __u64 ev1 = md_event(sb);
                if (ev1 < mddev->bitmap->events_cleared)
                        return 0;
-       } else /* just a hot-add of a new device, leave raid_disk at -1 */
-               return 0;
+       } else {
+               if (ev1 < mddev->events)
+                       /* just a hot-add of a new device, leave raid_disk at -1 */
+                       return 0;
+       }
 
        if (mddev->level != LEVEL_MULTIPATH) {
                desc = sb->disks + rdev->desc_nr;
@@ -1101,6 +1105,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 {
        struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+       __u64 ev1 = le64_to_cpu(sb->events);
 
        rdev->raid_disk = -1;
        rdev->flags = 0;
@@ -1116,7 +1121,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                mddev->layout = le32_to_cpu(sb->layout);
                mddev->raid_disks = le32_to_cpu(sb->raid_disks);
                mddev->size = le64_to_cpu(sb->size)/2;
-               mddev->events = le64_to_cpu(sb->events);
+               mddev->events = ev1;
                mddev->bitmap_offset = 0;
                mddev->default_bitmap_offset = 1024 >> 9;
                
@@ -1150,7 +1155,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 
        } else if (mddev->pers == NULL) {
                /* Insist of good event counter while assembling */
-               __u64 ev1 = le64_to_cpu(sb->events);
                ++ev1;
                if (ev1 < mddev->events)
                        return -EINVAL;
@@ -1158,12 +1162,13 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                /* If adding to array with a bitmap, then we can accept an
                 * older device, but not too old.
                 */
-               __u64 ev1 = le64_to_cpu(sb->events);
                if (ev1 < mddev->bitmap->events_cleared)
                        return 0;
-       } else /* just a hot-add of a new device, leave raid_disk at -1 */
-               return 0;
-
+       } else {
+               if (ev1 < mddev->events)
+                       /* just a hot-add of a new device, leave raid_disk at -1 */
+                       return 0;
+       }
        if (mddev->level != LEVEL_MULTIPATH) {
                int role;
                rdev->desc_nr = le32_to_cpu(sb->dev_number);
@@ -1522,7 +1527,7 @@ static void print_rdev(mdk_rdev_t *rdev)
                printk(KERN_INFO "md: no rdev superblock!\n");
 }
 
-void md_print_devices(void)
+static void md_print_devices(void)
 {
        struct list_head *tmp, *tmp2;
        mdk_rdev_t *rdev;
@@ -1551,15 +1556,30 @@ void md_print_devices(void)
 }
 
 
-static void sync_sbs(mddev_t * mddev)
+static void sync_sbs(mddev_t * mddev, int nospares)
 {
+       /* Update each superblock (in-memory image), but
+        * if we are allowed to, skip spares which already
+        * have the right event counter, or have one earlier
+        * (which would mean they aren't being marked as dirty
+        * with the rest of the array)
+        */
        mdk_rdev_t *rdev;
        struct list_head *tmp;
 
        ITERATE_RDEV(mddev,rdev,tmp) {
-               super_types[mddev->major_version].
-                       sync_super(mddev, rdev);
-               rdev->sb_loaded = 1;
+               if (rdev->sb_events == mddev->events ||
+                   (nospares &&
+                    rdev->raid_disk < 0 &&
+                    (rdev->sb_events&1)==0 &&
+                    rdev->sb_events+1 == mddev->events)) {
+                       /* Don't update this superblock */
+                       rdev->sb_loaded = 2;
+               } else {
+                       super_types[mddev->major_version].
+                               sync_super(mddev, rdev);
+                       rdev->sb_loaded = 1;
+               }
        }
 }
 
@@ -1569,12 +1589,42 @@ void md_update_sb(mddev_t * mddev)
        struct list_head *tmp;
        mdk_rdev_t *rdev;
        int sync_req;
+       int nospares = 0;
 
 repeat:
        spin_lock_irq(&mddev->write_lock);
        sync_req = mddev->in_sync;
        mddev->utime = get_seconds();
-       mddev->events ++;
+       if (mddev->sb_dirty == 3)
+               /* just a clean<-> dirty transition, possibly leave spares alone,
+                * though if events isn't the right even/odd, we will have to do
+                * spares after all
+                */
+               nospares = 1;
+
+       /* If this is just a dirty<->clean transition, and the array is clean
+        * and 'events' is odd, we can roll back to the previous clean state */
+       if (mddev->sb_dirty == 3
+           && (mddev->in_sync && mddev->recovery_cp == MaxSector)
+           && (mddev->events & 1))
+               mddev->events--;
+       else {
+               /* otherwise we have to go forward and ... */
+               mddev->events ++;
+               if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
+                       /* .. if the array isn't clean, insist on an odd 'events' */
+                       if ((mddev->events&1)==0) {
+                               mddev->events++;
+                               nospares = 0;
+                       }
+               } else {
+                       /* otherwise insist on an even 'events' (for clean states) */
+                       if ((mddev->events&1)) {
+                               mddev->events++;
+                               nospares = 0;
+                       }
+               }
+       }
 
        if (!mddev->events) {
                /*
@@ -1586,7 +1636,7 @@ repeat:
                mddev->events --;
        }
        mddev->sb_dirty = 2;
-       sync_sbs(mddev);
+       sync_sbs(mddev, nospares);
 
        /*
         * do not write anything to disk if using
@@ -1608,6 +1658,8 @@ repeat:
        ITERATE_RDEV(mddev,rdev,tmp) {
                char b[BDEVNAME_SIZE];
                dprintk(KERN_INFO "md: ");
+               if (rdev->sb_loaded != 1)
+                       continue; /* no noise on spare devices */
                if (test_bit(Faulty, &rdev->flags))
                        dprintk("(skipping faulty ");
 
@@ -1619,6 +1671,7 @@ repeat:
                        dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
                                bdevname(rdev->bdev,b),
                                (unsigned long long)rdev->sb_offset);
+                       rdev->sb_events = mddev->events;
 
                } else
                        dprintk(")\n");
@@ -1682,6 +1735,10 @@ state_show(mdk_rdev_t *rdev, char *page)
                len += sprintf(page+len, "%sin_sync",sep);
                sep = ",";
        }
+       if (test_bit(WriteMostly, &rdev->flags)) {
+               len += sprintf(page+len, "%swrite_mostly",sep);
+               sep = ",";
+       }
        if (!test_bit(Faulty, &rdev->flags) &&
            !test_bit(In_sync, &rdev->flags)) {
                len += sprintf(page+len, "%sspare", sep);
@@ -1690,8 +1747,40 @@ state_show(mdk_rdev_t *rdev, char *page)
        return len+sprintf(page+len, "\n");
 }
 
+static ssize_t
+state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+       /* can write
+        *  faulty  - simulates and error
+        *  remove  - disconnects the device
+        *  writemostly - sets write_mostly
+        *  -writemostly - clears write_mostly
+        */
+       int err = -EINVAL;
+       if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
+               md_error(rdev->mddev, rdev);
+               err = 0;
+       } else if (cmd_match(buf, "remove")) {
+               if (rdev->raid_disk >= 0)
+                       err = -EBUSY;
+               else {
+                       mddev_t *mddev = rdev->mddev;
+                       kick_rdev_from_array(rdev);
+                       md_update_sb(mddev);
+                       md_new_event(mddev);
+                       err = 0;
+               }
+       } else if (cmd_match(buf, "writemostly")) {
+               set_bit(WriteMostly, &rdev->flags);
+               err = 0;
+       } else if (cmd_match(buf, "-writemostly")) {
+               clear_bit(WriteMostly, &rdev->flags);
+               err = 0;
+       }
+       return err ? err : len;
+}
 static struct rdev_sysfs_entry
-rdev_state = __ATTR_RO(state);
+rdev_state = __ATTR(state, 0644, state_show, state_store);
 
 static ssize_t
 super_show(mdk_rdev_t *rdev, char *page)
@@ -1888,6 +1977,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
        rdev->desc_nr = -1;
        rdev->flags = 0;
        rdev->data_offset = 0;
+       rdev->sb_events = 0;
        atomic_set(&rdev->nr_pending, 0);
        atomic_set(&rdev->read_errors, 0);
        atomic_set(&rdev->corrected_errors, 0);
@@ -2075,6 +2165,32 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
 static struct md_sysfs_entry md_level =
 __ATTR(level, 0644, level_show, level_store);
 
+
+static ssize_t
+layout_show(mddev_t *mddev, char *page)
+{
+       /* just a number, not meaningful for all levels */
+       return sprintf(page, "%d\n", mddev->layout);
+}
+
+static ssize_t
+layout_store(mddev_t *mddev, const char *buf, size_t len)
+{
+       char *e;
+       unsigned long n = simple_strtoul(buf, &e, 10);
+       if (mddev->pers)
+               return -EBUSY;
+
+       if (!*buf || (*e && *e != '\n'))
+               return -EINVAL;
+
+       mddev->layout = n;
+       return len;
+}
+static struct md_sysfs_entry md_layout =
+__ATTR(layout, 0655, layout_show, layout_store);
+
+
 static ssize_t
 raid_disks_show(mddev_t *mddev, char *page)
 {
@@ -2129,6 +2245,200 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
 static struct md_sysfs_entry md_chunk_size =
 __ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store);
 
+static ssize_t
+resync_start_show(mddev_t *mddev, char *page)
+{
+       return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
+}
+
+static ssize_t
+resync_start_store(mddev_t *mddev, const char *buf, size_t len)
+{
+       /* can only set chunk_size if array is not yet active */
+       char *e;
+       unsigned long long n = simple_strtoull(buf, &e, 10);
+
+       if (mddev->pers)
+               return -EBUSY;
+       if (!*buf || (*e && *e != '\n'))
+               return -EINVAL;
+
+       mddev->recovery_cp = n;
+       return len;
+}
+static struct md_sysfs_entry md_resync_start =
+__ATTR(resync_start, 0644, resync_start_show, resync_start_store);
+
+/*
+ * The array state can be:
+ *
+ * clear
+ *     No devices, no size, no level
+ *     Equivalent to STOP_ARRAY ioctl
+ * inactive
+ *     May have some settings, but array is not active
+ *        all IO results in error
+ *     When written, doesn't tear down array, but just stops it
+ * suspended (not supported yet)
+ *     All IO requests will block. The array can be reconfigured.
+ *     Writing this, if accepted, will block until array is quiessent
+ * readonly
+ *     no resync can happen.  no superblocks get written.
+ *     write requests fail
+ * read-auto
+ *     like readonly, but behaves like 'clean' on a write request.
+ *
+ * clean - no pending writes, but otherwise active.
+ *     When written to inactive array, starts without resync
+ *     If a write request arrives then
+ *       if metadata is known, mark 'dirty' and switch to 'active'.
+ *       if not known, block and switch to write-pending
+ *     If written to an active array that has pending writes, then fails.
+ * active
+ *     fully active: IO and resync can be happening.
+ *     When written to inactive array, starts with resync
+ *
+ * write-pending
+ *     clean, but writes are blocked waiting for 'active' to be written.
+ *
+ * active-idle
+ *     like active, but no writes have been seen for a while (100msec).
+ *
+ */
+enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
+                  write_pending, active_idle, bad_word};
+static char *array_states[] = {
+       "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
+       "write-pending", "active-idle", NULL };
+
+static int match_word(const char *word, char **list)
+{
+       int n;
+       for (n=0; list[n]; n++)
+               if (cmd_match(word, list[n]))
+                       break;
+       return n;
+}
+
+static ssize_t
+array_state_show(mddev_t *mddev, char *page)
+{
+       enum array_state st = inactive;
+
+       if (mddev->pers)
+               switch(mddev->ro) {
+               case 1:
+                       st = readonly;
+                       break;
+               case 2:
+                       st = read_auto;
+                       break;
+               case 0:
+                       if (mddev->in_sync)
+                               st = clean;
+                       else if (mddev->safemode)
+                               st = active_idle;
+                       else
+                               st = active;
+               }
+       else {
+               if (list_empty(&mddev->disks) &&
+                   mddev->raid_disks == 0 &&
+                   mddev->size == 0)
+                       st = clear;
+               else
+                       st = inactive;
+       }
+       return sprintf(page, "%s\n", array_states[st]);
+}
+
+static int do_md_stop(mddev_t * mddev, int ro);
+static int do_md_run(mddev_t * mddev);
+static int restart_array(mddev_t *mddev);
+
+static ssize_t
+array_state_store(mddev_t *mddev, const char *buf, size_t len)
+{
+       int err = -EINVAL;
+       enum array_state st = match_word(buf, array_states);
+       switch(st) {
+       case bad_word:
+               break;
+       case clear:
+               /* stopping an active array */
+               if (mddev->pers) {
+                       if (atomic_read(&mddev->active) > 1)
+                               return -EBUSY;
+                       err = do_md_stop(mddev, 0);
+               }
+               break;
+       case inactive:
+               /* stopping an active array */
+               if (mddev->pers) {
+                       if (atomic_read(&mddev->active) > 1)
+                               return -EBUSY;
+                       err = do_md_stop(mddev, 2);
+               }
+               break;
+       case suspended:
+               break; /* not supported yet */
+       case readonly:
+               if (mddev->pers)
+                       err = do_md_stop(mddev, 1);
+               else {
+                       mddev->ro = 1;
+                       err = do_md_run(mddev);
+               }
+               break;
+       case read_auto:
+               /* stopping an active array */
+               if (mddev->pers) {
+                       err = do_md_stop(mddev, 1);
+                       if (err == 0)
+                               mddev->ro = 2; /* FIXME mark devices writable */
+               } else {
+                       mddev->ro = 2;
+                       err = do_md_run(mddev);
+               }
+               break;
+       case clean:
+               if (mddev->pers) {
+                       restart_array(mddev);
+                       spin_lock_irq(&mddev->write_lock);
+                       if (atomic_read(&mddev->writes_pending) == 0) {
+                               mddev->in_sync = 1;
+                               mddev->sb_dirty = 1;
+                       }
+                       spin_unlock_irq(&mddev->write_lock);
+               } else {
+                       mddev->ro = 0;
+                       mddev->recovery_cp = MaxSector;
+                       err = do_md_run(mddev);
+               }
+               break;
+       case active:
+               if (mddev->pers) {
+                       restart_array(mddev);
+                       mddev->sb_dirty = 0;
+                       wake_up(&mddev->sb_wait);
+                       err = 0;
+               } else {
+                       mddev->ro = 0;
+                       err = do_md_run(mddev);
+               }
+               break;
+       case write_pending:
+       case active_idle:
+               /* these cannot be set */
+               break;
+       }
+       if (err)
+               return err;
+       else
+               return len;
+}
+static struct md_sysfs_entry md_array_state = __ATTR(array_state, 0644, array_state_show, array_state_store);
+
 static ssize_t
 null_show(mddev_t *mddev, char *page)
 {
@@ -2491,12 +2801,15 @@ __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
 
 static struct attribute *md_default_attrs[] = {
        &md_level.attr,
+       &md_layout.attr,
        &md_raid_disks.attr,
        &md_chunk_size.attr,
        &md_size.attr,
+       &md_resync_start.attr,
        &md_metadata.attr,
        &md_new_device.attr,
        &md_safe_delay.attr,
+       &md_array_state.attr,
        NULL,
 };
 
@@ -2596,13 +2909,10 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
        }
        disk->major = MAJOR(dev);
        disk->first_minor = unit << shift;
-       if (partitioned) {
+       if (partitioned)
                sprintf(disk->disk_name, "md_d%d", unit);
-               sprintf(disk->devfs_name, "md/d%d", unit);
-       } else {
+       else
                sprintf(disk->disk_name, "md%d", unit);
-               sprintf(disk->devfs_name, "md/%d", unit);
-       }
        disk->fops = &md_fops;
        disk->private_data = mddev;
        disk->queue = mddev->queue;
@@ -2863,17 +3173,45 @@ static int restart_array(mddev_t *mddev)
                md_wakeup_thread(mddev->thread);
                md_wakeup_thread(mddev->sync_thread);
                err = 0;
-       } else {
-               printk(KERN_ERR "md: %s has no personality assigned.\n",
-                       mdname(mddev));
+       } else
                err = -EINVAL;
-       }
 
 out:
        return err;
 }
 
-static int do_md_stop(mddev_t * mddev, int ro)
+/* similar to deny_write_access, but accounts for our holding a reference
+ * to the file ourselves */
+static int deny_bitmap_write_access(struct file * file)
+{
+       struct inode *inode = file->f_mapping->host;
+
+       spin_lock(&inode->i_lock);
+       if (atomic_read(&inode->i_writecount) > 1) {
+               spin_unlock(&inode->i_lock);
+               return -ETXTBSY;
+       }
+       atomic_set(&inode->i_writecount, -1);
+       spin_unlock(&inode->i_lock);
+
+       return 0;
+}
+
+static void restore_bitmap_write_access(struct file *file)
+{
+       struct inode *inode = file->f_mapping->host;
+
+       spin_lock(&inode->i_lock);
+       atomic_set(&inode->i_writecount, 1);
+       spin_unlock(&inode->i_lock);
+}
+
+/* mode:
+ *   0 - completely stop and dis-assemble array
+ *   1 - switch to readonly
+ *   2 - stop but do not disassemble array
+ */
+static int do_md_stop(mddev_t * mddev, int mode)
 {
        int err = 0;
        struct gendisk *disk = mddev->gendisk;
@@ -2895,12 +3233,15 @@ static int do_md_stop(mddev_t * mddev, int ro)
 
                invalidate_partition(disk, 0);
 
-               if (ro) {
+               switch(mode) {
+               case 1: /* readonly */
                        err  = -ENXIO;
                        if (mddev->ro==1)
                                goto out;
                        mddev->ro = 1;
-               } else {
+                       break;
+               case 0: /* disassemble */
+               case 2: /* stop */
                        bitmap_flush(mddev);
                        md_super_wait(mddev);
                        if (mddev->ro)
@@ -2920,7 +3261,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
                        mddev->in_sync = 1;
                        md_update_sb(mddev);
                }
-               if (ro)
+               if (mode == 1)
                        set_disk_ro(disk, 1);
                clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
        }
@@ -2928,7 +3269,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
        /*
         * Free resources if final stop
         */
-       if (!ro) {
+       if (mode == 0) {
                mdk_rdev_t *rdev;
                struct list_head *tmp;
                struct gendisk *disk;
@@ -2936,7 +3277,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
 
                bitmap_destroy(mddev);
                if (mddev->bitmap_file) {
-                       atomic_set(&mddev->bitmap_file->f_dentry->d_inode->i_writecount, 1);
+                       restore_bitmap_write_access(mddev->bitmap_file);
                        fput(mddev->bitmap_file);
                        mddev->bitmap_file = NULL;
                }
@@ -2952,6 +3293,10 @@ static int do_md_stop(mddev_t * mddev, int ro)
                export_array(mddev);
 
                mddev->array_size = 0;
+               mddev->size = 0;
+               mddev->raid_disks = 0;
+               mddev->recovery_cp = 0;
+
                disk = mddev->gendisk;
                if (disk)
                        set_capacity(disk, 0);
@@ -3540,23 +3885,6 @@ abort_export:
        return err;
 }
 
-/* similar to deny_write_access, but accounts for our holding a reference
- * to the file ourselves */
-static int deny_bitmap_write_access(struct file * file)
-{
-       struct inode *inode = file->f_mapping->host;
-
-       spin_lock(&inode->i_lock);
-       if (atomic_read(&inode->i_writecount) > 1) {
-               spin_unlock(&inode->i_lock);
-               return -ETXTBSY;
-       }
-       atomic_set(&inode->i_writecount, -1);
-       spin_unlock(&inode->i_lock);
-
-       return 0;
-}
-
 static int set_bitmap_file(mddev_t *mddev, int fd)
 {
        int err;
@@ -3597,12 +3925,17 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
                mddev->pers->quiesce(mddev, 1);
                if (fd >= 0)
                        err = bitmap_create(mddev);
-               if (fd < 0 || err)
+               if (fd < 0 || err) {
                        bitmap_destroy(mddev);
+                       fd = -1; /* make sure to put the file */
+               }
                mddev->pers->quiesce(mddev, 0);
-       } else if (fd < 0) {
-               if (mddev->bitmap_file)
+       }
+       if (fd < 0) {
+               if (mddev->bitmap_file) {
+                       restore_bitmap_write_access(mddev->bitmap_file);
                        fput(mddev->bitmap_file);
+               }
                mddev->bitmap_file = NULL;
        }
 
@@ -4687,7 +5020,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
                spin_lock_irq(&mddev->write_lock);
                if (mddev->in_sync) {
                        mddev->in_sync = 0;
-                       mddev->sb_dirty = 1;
+                       mddev->sb_dirty = 3;
                        md_wakeup_thread(mddev->thread);
                }
                spin_unlock_irq(&mddev->write_lock);
@@ -5034,7 +5367,7 @@ void md_check_recovery(mddev_t *mddev)
                if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
                    !mddev->in_sync && mddev->recovery_cp == MaxSector) {
                        mddev->in_sync = 1;
-                       mddev->sb_dirty = 1;
+                       mddev->sb_dirty = 3;
                }
                if (mddev->safemode == 1)
                        mddev->safemode = 0;
@@ -5200,8 +5533,6 @@ static void md_geninit(void)
 
 static int __init md_init(void)
 {
-       int minor;
-
        printk(KERN_INFO "md: md driver %d.%d.%d MAX_MD_DEVS=%d,"
                        " MD_SB_DISKS=%d\n",
                        MD_MAJOR_VERSION, MD_MINOR_VERSION,
@@ -5215,23 +5546,11 @@ static int __init md_init(void)
                unregister_blkdev(MAJOR_NR, "md");
                return -1;
        }
-       devfs_mk_dir("md");
        blk_register_region(MKDEV(MAJOR_NR, 0), MAX_MD_DEVS, THIS_MODULE,
                                md_probe, NULL, NULL);
        blk_register_region(MKDEV(mdp_major, 0), MAX_MD_DEVS<<MdpMinorShift, THIS_MODULE,
                            md_probe, NULL, NULL);
 
-       for (minor=0; minor < MAX_MD_DEVS; ++minor)
-               devfs_mk_bdev(MKDEV(MAJOR_NR, minor),
-                               S_IFBLK|S_IRUSR|S_IWUSR,
-                               "md/%d", minor);
-
-       for (minor=0; minor < MAX_MD_DEVS; ++minor)
-               devfs_mk_bdev(MKDEV(mdp_major, minor<<MdpMinorShift),
-                             S_IFBLK|S_IRUSR|S_IWUSR,
-                             "md/mdp%d", minor);
-
-
        register_reboot_notifier(&md_notifier);
        raid_table_header = register_sysctl_table(raid_root_table, 1);
 
@@ -5287,15 +5606,9 @@ static __exit void md_exit(void)
 {
        mddev_t *mddev;
        struct list_head *tmp;
-       int i;
+
        blk_unregister_region(MKDEV(MAJOR_NR,0), MAX_MD_DEVS);
        blk_unregister_region(MKDEV(mdp_major,0), MAX_MD_DEVS << MdpMinorShift);
-       for (i=0; i < MAX_MD_DEVS; i++)
-               devfs_remove("md/%d", i);
-       for (i=0; i < MAX_MD_DEVS; i++)
-               devfs_remove("md/d%d", i);
-
-       devfs_remove("md");
 
        unregister_blkdev(MAJOR_NR,"md");
        unregister_blkdev(mdp_major, "mdp");
@@ -5345,7 +5658,6 @@ EXPORT_SYMBOL(md_write_end);
 EXPORT_SYMBOL(md_register_thread);
 EXPORT_SYMBOL(md_unregister_thread);
 EXPORT_SYMBOL(md_wakeup_thread);
-EXPORT_SYMBOL(md_print_devices);
 EXPORT_SYMBOL(md_check_recovery);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md");