*/
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/kthread.h>
#include <linux/linkage.h>
#include <linux/raid/md.h>
#include <linux/raid/bitmap.h>
#include <linux/sysctl.h>
-#include <linux/devfs_fs_kernel.h>
#include <linux/buffer_head.h> /* for invalidate_bdev */
#include <linux/suspend.h>
#include <linux/poll.h>
/* Alternate version that can be called from interrupts
* when calling sysfs_notify isn't needed.
*/
-void md_new_event_inintr(mddev_t *mddev)
+static void md_new_event_inintr(mddev_t *mddev)
{
atomic_inc(&md_event_count);
wake_up(&md_event_waiters);
len += sprintf(page+len, "%sin_sync",sep);
sep = ",";
}
+ if (test_bit(WriteMostly, &rdev->flags)) {
+ len += sprintf(page+len, "%swrite_mostly",sep);
+ sep = ",";
+ }
if (!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags)) {
len += sprintf(page+len, "%sspare", sep);
return len+sprintf(page+len, "\n");
}
+static ssize_t
+state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+ /* can write
+ * faulty - simulates and error
+ * remove - disconnects the device
+ * writemostly - sets write_mostly
+ * -writemostly - clears write_mostly
+ */
+ int err = -EINVAL;
+ if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
+ md_error(rdev->mddev, rdev);
+ err = 0;
+ } else if (cmd_match(buf, "remove")) {
+ if (rdev->raid_disk >= 0)
+ err = -EBUSY;
+ else {
+ mddev_t *mddev = rdev->mddev;
+ kick_rdev_from_array(rdev);
+ md_update_sb(mddev);
+ md_new_event(mddev);
+ err = 0;
+ }
+ } else if (cmd_match(buf, "writemostly")) {
+ set_bit(WriteMostly, &rdev->flags);
+ err = 0;
+ } else if (cmd_match(buf, "-writemostly")) {
+ clear_bit(WriteMostly, &rdev->flags);
+ err = 0;
+ }
+ return err ? err : len;
+}
static struct rdev_sysfs_entry
-rdev_state = __ATTR_RO(state);
+rdev_state = __ATTR(state, 0644, state_show, state_store);
static ssize_t
super_show(mdk_rdev_t *rdev, char *page)
static struct md_sysfs_entry md_level =
__ATTR(level, 0644, level_show, level_store);
+
+static ssize_t
+layout_show(mddev_t *mddev, char *page)
+{
+ /* just a number, not meaningful for all levels */
+ return sprintf(page, "%d\n", mddev->layout);
+}
+
+static ssize_t
+layout_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ char *e;
+ unsigned long n = simple_strtoul(buf, &e, 10);
+ if (mddev->pers)
+ return -EBUSY;
+
+ if (!*buf || (*e && *e != '\n'))
+ return -EINVAL;
+
+ mddev->layout = n;
+ return len;
+}
+static struct md_sysfs_entry md_layout =
+__ATTR(layout, 0655, layout_show, layout_store);
+
+
static ssize_t
raid_disks_show(mddev_t *mddev, char *page)
{
static struct md_sysfs_entry md_chunk_size =
__ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store);
+static ssize_t
+resync_start_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%llu\n", (unsigned long long)mddev->recovery_cp);
+}
+
+static ssize_t
+resync_start_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ /* can only set chunk_size if array is not yet active */
+ char *e;
+ unsigned long long n = simple_strtoull(buf, &e, 10);
+
+ if (mddev->pers)
+ return -EBUSY;
+ if (!*buf || (*e && *e != '\n'))
+ return -EINVAL;
+
+ mddev->recovery_cp = n;
+ return len;
+}
+static struct md_sysfs_entry md_resync_start =
+__ATTR(resync_start, 0644, resync_start_show, resync_start_store);
+
+/*
+ * The array state can be:
+ *
+ * clear
+ * No devices, no size, no level
+ * Equivalent to STOP_ARRAY ioctl
+ * inactive
+ * May have some settings, but array is not active
+ * all IO results in error
+ * When written, doesn't tear down array, but just stops it
+ * suspended (not supported yet)
+ * All IO requests will block. The array can be reconfigured.
+ * Writing this, if accepted, will block until array is quiessent
+ * readonly
+ * no resync can happen. no superblocks get written.
+ * write requests fail
+ * read-auto
+ * like readonly, but behaves like 'clean' on a write request.
+ *
+ * clean - no pending writes, but otherwise active.
+ * When written to inactive array, starts without resync
+ * If a write request arrives then
+ * if metadata is known, mark 'dirty' and switch to 'active'.
+ * if not known, block and switch to write-pending
+ * If written to an active array that has pending writes, then fails.
+ * active
+ * fully active: IO and resync can be happening.
+ * When written to inactive array, starts with resync
+ *
+ * write-pending
+ * clean, but writes are blocked waiting for 'active' to be written.
+ *
+ * active-idle
+ * like active, but no writes have been seen for a while (100msec).
+ *
+ */
+enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
+ write_pending, active_idle, bad_word};
+static char *array_states[] = {
+ "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active",
+ "write-pending", "active-idle", NULL };
+
+static int match_word(const char *word, char **list)
+{
+ int n;
+ for (n=0; list[n]; n++)
+ if (cmd_match(word, list[n]))
+ break;
+ return n;
+}
+
+static ssize_t
+array_state_show(mddev_t *mddev, char *page)
+{
+ enum array_state st = inactive;
+
+ if (mddev->pers)
+ switch(mddev->ro) {
+ case 1:
+ st = readonly;
+ break;
+ case 2:
+ st = read_auto;
+ break;
+ case 0:
+ if (mddev->in_sync)
+ st = clean;
+ else if (mddev->safemode)
+ st = active_idle;
+ else
+ st = active;
+ }
+ else {
+ if (list_empty(&mddev->disks) &&
+ mddev->raid_disks == 0 &&
+ mddev->size == 0)
+ st = clear;
+ else
+ st = inactive;
+ }
+ return sprintf(page, "%s\n", array_states[st]);
+}
+
+static int do_md_stop(mddev_t * mddev, int ro);
+static int do_md_run(mddev_t * mddev);
+static int restart_array(mddev_t *mddev);
+
+static ssize_t
+array_state_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ int err = -EINVAL;
+ enum array_state st = match_word(buf, array_states);
+ switch(st) {
+ case bad_word:
+ break;
+ case clear:
+ /* stopping an active array */
+ if (mddev->pers) {
+ if (atomic_read(&mddev->active) > 1)
+ return -EBUSY;
+ err = do_md_stop(mddev, 0);
+ }
+ break;
+ case inactive:
+ /* stopping an active array */
+ if (mddev->pers) {
+ if (atomic_read(&mddev->active) > 1)
+ return -EBUSY;
+ err = do_md_stop(mddev, 2);
+ }
+ break;
+ case suspended:
+ break; /* not supported yet */
+ case readonly:
+ if (mddev->pers)
+ err = do_md_stop(mddev, 1);
+ else {
+ mddev->ro = 1;
+ err = do_md_run(mddev);
+ }
+ break;
+ case read_auto:
+ /* stopping an active array */
+ if (mddev->pers) {
+ err = do_md_stop(mddev, 1);
+ if (err == 0)
+ mddev->ro = 2; /* FIXME mark devices writable */
+ } else {
+ mddev->ro = 2;
+ err = do_md_run(mddev);
+ }
+ break;
+ case clean:
+ if (mddev->pers) {
+ restart_array(mddev);
+ spin_lock_irq(&mddev->write_lock);
+ if (atomic_read(&mddev->writes_pending) == 0) {
+ mddev->in_sync = 1;
+ mddev->sb_dirty = 1;
+ }
+ spin_unlock_irq(&mddev->write_lock);
+ } else {
+ mddev->ro = 0;
+ mddev->recovery_cp = MaxSector;
+ err = do_md_run(mddev);
+ }
+ break;
+ case active:
+ if (mddev->pers) {
+ restart_array(mddev);
+ mddev->sb_dirty = 0;
+ wake_up(&mddev->sb_wait);
+ err = 0;
+ } else {
+ mddev->ro = 0;
+ err = do_md_run(mddev);
+ }
+ break;
+ case write_pending:
+ case active_idle:
+ /* these cannot be set */
+ break;
+ }
+ if (err)
+ return err;
+ else
+ return len;
+}
+static struct md_sysfs_entry md_array_state = __ATTR(array_state, 0644, array_state_show, array_state_store);
+
static ssize_t
null_show(mddev_t *mddev, char *page)
{
static struct attribute *md_default_attrs[] = {
&md_level.attr,
+ &md_layout.attr,
&md_raid_disks.attr,
&md_chunk_size.attr,
&md_size.attr,
+ &md_resync_start.attr,
&md_metadata.attr,
&md_new_device.attr,
&md_safe_delay.attr,
+ &md_array_state.attr,
NULL,
};
}
disk->major = MAJOR(dev);
disk->first_minor = unit << shift;
- if (partitioned) {
+ if (partitioned)
sprintf(disk->disk_name, "md_d%d", unit);
- sprintf(disk->devfs_name, "md/d%d", unit);
- } else {
+ else
sprintf(disk->disk_name, "md%d", unit);
- sprintf(disk->devfs_name, "md/%d", unit);
- }
disk->fops = &md_fops;
disk->private_data = mddev;
disk->queue = mddev->queue;
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread);
err = 0;
- } else {
- printk(KERN_ERR "md: %s has no personality assigned.\n",
- mdname(mddev));
+ } else
err = -EINVAL;
- }
out:
return err;
spin_unlock(&inode->i_lock);
}
-static int do_md_stop(mddev_t * mddev, int ro)
+/* mode:
+ * 0 - completely stop and dis-assemble array
+ * 1 - switch to readonly
+ * 2 - stop but do not disassemble array
+ */
+static int do_md_stop(mddev_t * mddev, int mode)
{
int err = 0;
struct gendisk *disk = mddev->gendisk;
invalidate_partition(disk, 0);
- if (ro) {
+ switch(mode) {
+ case 1: /* readonly */
err = -ENXIO;
if (mddev->ro==1)
goto out;
mddev->ro = 1;
- } else {
+ break;
+ case 0: /* disassemble */
+ case 2: /* stop */
bitmap_flush(mddev);
md_super_wait(mddev);
if (mddev->ro)
mddev->in_sync = 1;
md_update_sb(mddev);
}
- if (ro)
+ if (mode == 1)
set_disk_ro(disk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
/*
* Free resources if final stop
*/
- if (!ro) {
+ if (mode == 0) {
mdk_rdev_t *rdev;
struct list_head *tmp;
struct gendisk *disk;
export_array(mddev);
mddev->array_size = 0;
+ mddev->size = 0;
+ mddev->raid_disks = 0;
+ mddev->recovery_cp = 0;
+
disk = mddev->gendisk;
if (disk)
set_capacity(disk, 0);
static int __init md_init(void)
{
- int minor;
-
printk(KERN_INFO "md: md driver %d.%d.%d MAX_MD_DEVS=%d,"
" MD_SB_DISKS=%d\n",
MD_MAJOR_VERSION, MD_MINOR_VERSION,
unregister_blkdev(MAJOR_NR, "md");
return -1;
}
- devfs_mk_dir("md");
blk_register_region(MKDEV(MAJOR_NR, 0), MAX_MD_DEVS, THIS_MODULE,
md_probe, NULL, NULL);
blk_register_region(MKDEV(mdp_major, 0), MAX_MD_DEVS<<MdpMinorShift, THIS_MODULE,
md_probe, NULL, NULL);
- for (minor=0; minor < MAX_MD_DEVS; ++minor)
- devfs_mk_bdev(MKDEV(MAJOR_NR, minor),
- S_IFBLK|S_IRUSR|S_IWUSR,
- "md/%d", minor);
-
- for (minor=0; minor < MAX_MD_DEVS; ++minor)
- devfs_mk_bdev(MKDEV(mdp_major, minor<<MdpMinorShift),
- S_IFBLK|S_IRUSR|S_IWUSR,
- "md/mdp%d", minor);
-
-
register_reboot_notifier(&md_notifier);
raid_table_header = register_sysctl_table(raid_root_table, 1);
{
mddev_t *mddev;
struct list_head *tmp;
- int i;
+
blk_unregister_region(MKDEV(MAJOR_NR,0), MAX_MD_DEVS);
blk_unregister_region(MKDEV(mdp_major,0), MAX_MD_DEVS << MdpMinorShift);
- for (i=0; i < MAX_MD_DEVS; i++)
- devfs_remove("md/%d", i);
- for (i=0; i < MAX_MD_DEVS; i++)
- devfs_remove("md/d%d", i);
-
- devfs_remove("md");
unregister_blkdev(MAJOR_NR,"md");
unregister_blkdev(mdp_major, "mdp");