#include <linux/vmalloc.h>
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
+#include <linux/workqueue.h>
#include "dm-exception-store.h"
-#include "dm-snap.h"
#include "dm-bio-list.h"
#define DM_MSG_PREFIX "snapshots"
*/
#define MIN_IOS 256
+#define DM_TRACKED_CHUNK_HASH_SIZE 16
+#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
+ (DM_TRACKED_CHUNK_HASH_SIZE - 1))
+
+struct exception_table {
+ uint32_t hash_mask;
+ unsigned hash_shift;
+ struct list_head *table;
+};
+
+struct dm_snapshot {
+ struct rw_semaphore lock;
+
+ struct dm_dev *origin;
+
+ /* List of snapshots per Origin */
+ struct list_head list;
+
+ /* You can't use a snapshot if this is 0 (e.g. if full) */
+ int valid;
+
+ /* Origin writes don't trigger exceptions until this is set */
+ int active;
+
+ /* Used for display of table */
+ char type;
+
+ mempool_t *pending_pool;
+
+ atomic_t pending_exceptions_count;
+
+ struct exception_table pending;
+ struct exception_table complete;
+
+ /*
+ * pe_lock protects all pending_exception operations and access
+ * as well as the snapshot_bios list.
+ */
+ spinlock_t pe_lock;
+
+ /* The on disk metadata handler */
+ struct dm_exception_store *store;
+
+ struct dm_kcopyd_client *kcopyd_client;
+
+ /* Queue of snapshot writes for ksnapd to flush */
+ struct bio_list queued_bios;
+ struct work_struct queued_bios_work;
+
+ /* Chunks with outstanding reads */
+ mempool_t *tracked_chunk_pool;
+ spinlock_t tracked_chunk_lock;
+ struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+};
+
static struct workqueue_struct *ksnapd;
static void flush_queued_bios(struct work_struct *work);
+static sector_t chunk_to_sector(struct dm_exception_store *store,
+ chunk_t chunk)
+{
+ return chunk << store->chunk_shift;
+}
+
+static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
+{
+ /*
+ * There is only ever one instance of a particular block
+ * device so we can compare pointers safely.
+ */
+ return lhs == rhs;
+}
+
struct dm_snap_pending_exception {
struct dm_snap_exception e;
/*
* Allocate room for a suitable hash table.
*/
-static int init_hash_tables(struct dm_snapshot *s)
+static int init_hash_tables(struct dm_snapshot *s, chunk_t chunk_shift,
+ struct dm_dev *cow)
{
sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
* Calculate based on the size of the original volume or
* the COW volume...
*/
- cow_dev_size = get_dev_size(s->cow->bdev);
+ cow_dev_size = get_dev_size(cow->bdev);
origin_dev_size = get_dev_size(s->origin->bdev);
max_buckets = calc_max_buckets();
- hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
+ hash_size = min(origin_dev_size, cow_dev_size) >> chunk_shift;
hash_size = min(hash_size, max_buckets);
hash_size = rounddown_pow_of_two(hash_size);
}
static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
+ chunk_t *chunk_size, chunk_t *chunk_mask,
+ chunk_t *chunk_shift, struct dm_dev *cow,
char **error)
{
- unsigned long chunk_size;
+ unsigned long chunk_size_ulong;
char *value;
- chunk_size = simple_strtoul(chunk_size_arg, &value, 10);
+ chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
if (*chunk_size_arg == '\0' || *value != '\0') {
*error = "Invalid chunk size";
return -EINVAL;
}
- if (!chunk_size) {
- s->chunk_size = s->chunk_mask = s->chunk_shift = 0;
+ if (!chunk_size_ulong) {
+ *chunk_size = *chunk_mask = *chunk_shift = 0;
return 0;
}
* Chunk size must be multiple of page size. Silently
* round up if it's not.
*/
- chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
+ chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
/* Check chunk_size is a power of 2 */
- if (!is_power_of_2(chunk_size)) {
+ if (!is_power_of_2(chunk_size_ulong)) {
*error = "Chunk size is not a power of 2";
return -EINVAL;
}
/* Validate the chunk size against the device block size */
- if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) {
+ if (chunk_size_ulong % (bdev_hardsect_size(cow->bdev) >> 9)) {
*error = "Chunk size is not a multiple of device blocksize";
return -EINVAL;
}
- s->chunk_size = chunk_size;
- s->chunk_mask = chunk_size - 1;
- s->chunk_shift = ffs(chunk_size) - 1;
+ *chunk_size = chunk_size_ulong;
+ *chunk_mask = chunk_size_ulong - 1;
+ *chunk_shift = ffs(chunk_size_ulong) - 1;
return 0;
}
char persistent;
char *origin_path;
char *cow_path;
+ chunk_t chunk_size, chunk_mask, chunk_shift;
+ struct dm_dev *cow;
if (argc != 4) {
ti->error = "requires exactly 4 arguments";
}
r = dm_get_device(ti, cow_path, 0, 0,
- FMODE_READ | FMODE_WRITE, &s->cow);
+ FMODE_READ | FMODE_WRITE, &cow);
if (r) {
dm_put_device(ti, s->origin);
ti->error = "Cannot get COW device";
goto bad2;
}
- r = set_chunk_size(s, argv[3], &ti->error);
+ r = set_chunk_size(s, argv[3], &chunk_size, &chunk_mask, &chunk_shift,
+ cow, &ti->error);
if (r)
goto bad3;
- s->type = persistent;
-
s->valid = 1;
s->active = 0;
atomic_set(&s->pending_exceptions_count, 0);
init_rwsem(&s->lock);
spin_lock_init(&s->pe_lock);
- s->ti = ti;
/* Allocate hash table for COW data */
- if (init_hash_tables(s)) {
+ if (init_hash_tables(s, chunk_shift, cow)) {
ti->error = "Unable to allocate hash table space";
r = -ENOMEM;
goto bad3;
}
- s->store.snap = s;
-
- if (persistent == 'P')
- r = dm_create_persistent(&s->store);
- else
- r = dm_create_transient(&s->store);
-
+ r = dm_exception_store_create(argv[2], ti, chunk_size, chunk_mask,
+ chunk_shift, cow, &s->store);
if (r) {
ti->error = "Couldn't create exception store";
r = -EINVAL;
spin_lock_init(&s->tracked_chunk_lock);
/* Metadata must only be loaded into one table at once */
- r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s);
+ r = s->store->type->read_metadata(s->store, dm_add_exception,
+ (void *)s);
if (r < 0) {
ti->error = "Failed to read snapshot metadata";
goto bad_load_and_register;
}
ti->private = s;
- ti->split_io = s->chunk_size;
+ ti->split_io = s->store->chunk_size;
return 0;
dm_kcopyd_client_destroy(s->kcopyd_client);
bad5:
- s->store.destroy(&s->store);
+ s->store->type->dtr(s->store);
bad4:
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
bad3:
- dm_put_device(ti, s->cow);
+ dm_put_device(ti, cow);
dm_put_device(ti, s->origin);
bad2:
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
- s->store.destroy(&s->store);
+ s->store->type->dtr(s->store);
}
static void snapshot_dtr(struct dm_target *ti)
int i;
#endif
struct dm_snapshot *s = ti->private;
+ struct dm_dev *cow = s->store->cow;
flush_workqueue(ksnapd);
mempool_destroy(s->pending_pool);
dm_put_device(ti, s->origin);
- dm_put_device(ti, s->cow);
+ dm_put_device(ti, cow);
kfree(s);
}
else if (err == -ENOMEM)
DMERR("Invalidating snapshot: Unable to allocate exception.");
- if (s->store.drop_snapshot)
- s->store.drop_snapshot(&s->store);
+ if (s->store->type->drop_snapshot)
+ s->store->type->drop_snapshot(s->store);
s->valid = 0;
- dm_table_event(s->ti->table);
+ dm_table_event(s->store->ti->table);
}
static void get_pending_exception(struct dm_snap_pending_exception *pe)
else
/* Update the metadata if we are persistent */
- s->store.commit_exception(&s->store, &pe->e, commit_callback,
- pe);
+ s->store->type->commit_exception(s->store, &pe->e,
+ commit_callback, pe);
}
/*
dev_size = get_dev_size(bdev);
src.bdev = bdev;
- src.sector = chunk_to_sector(s, pe->e.old_chunk);
- src.count = min(s->chunk_size, dev_size - src.sector);
+ src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
+ src.count = min(s->store->chunk_size, dev_size - src.sector);
- dest.bdev = s->cow->bdev;
- dest.sector = chunk_to_sector(s, pe->e.new_chunk);
+ dest.bdev = s->store->cow->bdev;
+ dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
dest.count = src.count;
/* Hand over to kcopyd */
&src, 1, &dest, 0, copy_callback, pe);
}
+static struct dm_snap_pending_exception *
+__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
+{
+ struct dm_snap_exception *e = lookup_exception(&s->pending, chunk);
+
+ if (!e)
+ return NULL;
+
+ return container_of(e, struct dm_snap_pending_exception, e);
+}
+
/*
* Looks to see if this snapshot already has a pending exception
* for this chunk, otherwise it allocates a new one and inserts
* this.
*/
static struct dm_snap_pending_exception *
-__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
+__find_pending_exception(struct dm_snapshot *s,
+ struct dm_snap_pending_exception *pe, chunk_t chunk)
{
- struct dm_snap_exception *e;
- struct dm_snap_pending_exception *pe;
- chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
-
- /*
- * Is there a pending exception for this already ?
- */
- e = lookup_exception(&s->pending, chunk);
- if (e) {
- /* cast the exception to a pending exception */
- pe = container_of(e, struct dm_snap_pending_exception, e);
- goto out;
- }
+ struct dm_snap_pending_exception *pe2;
- /*
- * Create a new pending exception, we don't want
- * to hold the lock while we do this.
- */
- up_write(&s->lock);
- pe = alloc_pending_exception(s);
- down_write(&s->lock);
-
- if (!s->valid) {
+ pe2 = __lookup_pending_exception(s, chunk);
+ if (pe2) {
free_pending_exception(pe);
- return NULL;
- }
-
- e = lookup_exception(&s->pending, chunk);
- if (e) {
- free_pending_exception(pe);
- pe = container_of(e, struct dm_snap_pending_exception, e);
- goto out;
+ return pe2;
}
pe->e.old_chunk = chunk;
atomic_set(&pe->ref_count, 0);
pe->started = 0;
- if (s->store.prepare_exception(&s->store, &pe->e)) {
+ if (s->store->type->prepare_exception(s->store, &pe->e)) {
free_pending_exception(pe);
return NULL;
}
get_pending_exception(pe);
insert_exception(&s->pending, &pe->e);
- out:
return pe;
}
static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
struct bio *bio, chunk_t chunk)
{
- bio->bi_bdev = s->cow->bdev;
- bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) +
- (chunk - e->old_chunk)) +
- (bio->bi_sector & s->chunk_mask);
+ bio->bi_bdev = s->store->cow->bdev;
+ bio->bi_sector = chunk_to_sector(s->store,
+ dm_chunk_number(e->new_chunk) +
+ (chunk - e->old_chunk)) +
+ (bio->bi_sector &
+ s->store->chunk_mask);
}
static int snapshot_map(struct dm_target *ti, struct bio *bio,
chunk_t chunk;
struct dm_snap_pending_exception *pe = NULL;
- chunk = sector_to_chunk(s, bio->bi_sector);
+ chunk = sector_to_chunk(s->store, bio->bi_sector);
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
* writeable.
*/
if (bio_rw(bio) == WRITE) {
- pe = __find_pending_exception(s, bio);
+ pe = __lookup_pending_exception(s, chunk);
if (!pe) {
- __invalidate_snapshot(s, -ENOMEM);
- r = -EIO;
- goto out_unlock;
+ up_write(&s->lock);
+ pe = alloc_pending_exception(s);
+ down_write(&s->lock);
+
+ if (!s->valid) {
+ free_pending_exception(pe);
+ r = -EIO;
+ goto out_unlock;
+ }
+
+ e = lookup_exception(&s->complete, chunk);
+ if (e) {
+ free_pending_exception(pe);
+ remap_exception(s, e, bio, chunk);
+ goto out_unlock;
+ }
+
+ pe = __find_pending_exception(s, pe, chunk);
+ if (!pe) {
+ __invalidate_snapshot(s, -ENOMEM);
+ r = -EIO;
+ goto out_unlock;
+ }
}
remap_exception(s, &pe->e, bio, chunk);
if (!snap->valid)
snprintf(result, maxlen, "Invalid");
else {
- if (snap->store.fraction_full) {
+ if (snap->store->type->fraction_full) {
sector_t numerator, denominator;
- snap->store.fraction_full(&snap->store,
- &numerator,
- &denominator);
+ snap->store->type->fraction_full(snap->store,
+ &numerator,
+ &denominator);
snprintf(result, maxlen, "%llu/%llu",
(unsigned long long)numerator,
(unsigned long long)denominator);
* to make private copies if the output is to
* make sense.
*/
- snprintf(result, maxlen, "%s %s %c %llu",
- snap->origin->name, snap->cow->name,
- snap->type,
- (unsigned long long)snap->chunk_size);
+ snprintf(result, maxlen, "%s %s %s %llu",
+ snap->origin->name, snap->store->cow->name,
+ snap->store->type->name,
+ (unsigned long long)snap->store->chunk_size);
break;
}
goto next_snapshot;
/* Nothing to do if writing beyond end of snapshot */
- if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
+ if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
goto next_snapshot;
/*
* Remember, different snapshots can have
* different chunk sizes.
*/
- chunk = sector_to_chunk(snap, bio->bi_sector);
+ chunk = sector_to_chunk(snap->store, bio->bi_sector);
/*
* Check exception table to see if block
if (e)
goto next_snapshot;
- pe = __find_pending_exception(snap, bio);
+ pe = __lookup_pending_exception(snap, chunk);
if (!pe) {
- __invalidate_snapshot(snap, -ENOMEM);
- goto next_snapshot;
+ up_write(&snap->lock);
+ pe = alloc_pending_exception(snap);
+ down_write(&snap->lock);
+
+ if (!snap->valid) {
+ free_pending_exception(pe);
+ goto next_snapshot;
+ }
+
+ e = lookup_exception(&snap->complete, chunk);
+ if (e) {
+ free_pending_exception(pe);
+ goto next_snapshot;
+ }
+
+ pe = __find_pending_exception(snap, pe, chunk);
+ if (!pe) {
+ __invalidate_snapshot(snap, -ENOMEM);
+ goto next_snapshot;
+ }
}
if (!primary_pe) {
o = __lookup_origin(dev->bdev);
if (o)
list_for_each_entry (snap, &o->snapshots, list)
- chunk_size = min_not_zero(chunk_size, snap->chunk_size);
+ chunk_size = min_not_zero(chunk_size,
+ snap->store->chunk_size);
up_read(&_origins_lock);
ti->split_io = chunk_size;