X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=fs%2Fbtrfs%2Fordered-data.c;h=a2094017027489a16473e03db2cddaad41c048cc;hb=b64dc5a4842c3420d7a093bf5e8979c57ceb789c;hp=676e4bd65c529cb3332da5a62679a44ed5439bcd;hpb=3eaa2885276fd6dac7b076a793932428b7168e74;p=linux-2.6-omap-h63xx.git diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 676e4bd65c5..a2094017027 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -26,7 +26,6 @@ #include "btrfs_inode.h" #include "extent_io.h" - static u64 entry_end(struct btrfs_ordered_extent *entry) { if (entry->file_offset + entry->len < entry->file_offset) @@ -34,14 +33,17 @@ static u64 entry_end(struct btrfs_ordered_extent *entry) return entry->file_offset + entry->len; } +/* returns NULL if the insertion worked, or it returns the node it did find + * in the tree + */ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, struct rb_node *node) { - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; struct btrfs_ordered_extent *entry; - while(*p) { + while (*p) { parent = *p; entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); @@ -58,16 +60,20 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, return NULL; } +/* + * look for a given offset in the tree, and if it can't be found return the + * first lesser offset + */ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, struct rb_node **prev_ret) { - struct rb_node * n = root->rb_node; + struct rb_node *n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *test; struct btrfs_ordered_extent *entry; struct btrfs_ordered_extent *prev_entry = NULL; - while(n) { + while (n) { entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); prev = n; prev_entry = entry; @@ -82,7 +88,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, if (!prev_ret) return NULL; - while(prev && file_offset >= entry_end(prev_entry)) { + while (prev && file_offset >= entry_end(prev_entry)) { test = rb_next(prev); if (!test) break; @@ -96,7 +102,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, if (prev) prev_entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node); - while(prev && file_offset < entry_end(prev_entry)) { + while (prev && file_offset < entry_end(prev_entry)) { test = rb_prev(prev); if (!test) break; @@ -108,6 +114,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, return NULL; } +/* + * helper to check if a given offset is inside a given entry + */ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) { if (file_offset < entry->file_offset || @@ -116,6 +125,10 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) return 1; } +/* + * look find the first ordered struct that has this offset, otherwise + * the first one less than this offset + */ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, u64 file_offset) { @@ -152,7 +165,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, * inserted. */ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, - u64 start, u64 len) + u64 start, u64 len, u64 disk_len, int type) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; @@ -167,7 +180,10 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, entry->file_offset = file_offset; entry->start = start; entry->len = len; + entry->disk_len = disk_len; entry->inode = inode; + if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) + set_bit(type, &entry->flags); /* one ref for the tree */ atomic_set(&entry->refs, 1); @@ -177,10 +193,8 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, node = tree_insert(&tree->tree, file_offset, &entry->rb_node); - if (node) { - printk("warning dup entry from add_ordered_extent\n"); - BUG(); - } + BUG_ON(node); + set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, entry_end(entry) - 1, GFP_NOFS); @@ -266,7 +280,7 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) struct btrfs_ordered_sum *sum; if (atomic_dec_and_test(&entry->refs)) { - while(!list_empty(&entry->list)) { + while (!list_empty(&entry->list)) { cur = entry->list.next; sum = list_entry(cur, struct btrfs_ordered_sum, list); list_del(&sum->list); @@ -303,7 +317,11 @@ int btrfs_remove_ordered_extent(struct inode *inode, return 0; } -int btrfs_wait_ordered_extents(struct btrfs_root *root) +/* + * wait for all the ordered extents in a root. This is done when balancing + * space between drives. + */ +int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) { struct list_head splice; struct list_head *cur; @@ -314,27 +332,36 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root) spin_lock(&root->fs_info->ordered_extent_lock); list_splice_init(&root->fs_info->ordered_extents, &splice); - while(!list_empty(&splice)) { + while (!list_empty(&splice)) { cur = splice.next; ordered = list_entry(cur, struct btrfs_ordered_extent, root_extent_list); + if (nocow_only && + !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && + !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { + list_move(&ordered->root_extent_list, + &root->fs_info->ordered_extents); + cond_resched_lock(&root->fs_info->ordered_extent_lock); + continue; + } + list_del_init(&ordered->root_extent_list); atomic_inc(&ordered->refs); - inode = ordered->inode; /* - * the inode can't go away until all the pages are gone - * and the pages won't go away while there is still - * an ordered extent and the ordered extent won't go - * away until it is off this list. So, we can safely - * increment i_count here and call iput later + * the inode may be getting freed (in sys_unlink path). */ - atomic_inc(&inode->i_count); + inode = igrab(ordered->inode); + spin_unlock(&root->fs_info->ordered_extent_lock); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); - iput(inode); + if (inode) { + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + iput(inode); + } else { + btrfs_put_ordered_extent(ordered); + } spin_lock(&root->fs_info->ordered_extent_lock); } @@ -361,16 +388,17 @@ void btrfs_start_ordered_extent(struct inode *inode, * start IO on any dirty ones so the wait doesn't stall waiting * for pdflush to find them */ - btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_NONE); - if (wait) + btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); + if (wait) { wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); + } } /* * Used to wait on ordered extents across a large range of bytes. */ -void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) +int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { u64 end; u64 orig_end; @@ -391,16 +419,21 @@ again: */ btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); + /* The compression code will leave pages locked but return from + * writepage without setting the page writeback. Starting again + * with WB_SYNC_ALL will end up waiting for the IO to actually start. + */ + btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); + btrfs_wait_on_page_writeback_range(inode->i_mapping, start >> PAGE_CACHE_SHIFT, orig_end >> PAGE_CACHE_SHIFT); end = orig_end; - while(1) { + while (1) { ordered = btrfs_lookup_first_ordered_extent(inode, end); - if (!ordered) { + if (!ordered) break; - } if (ordered->file_offset > orig_end) { btrfs_put_ordered_extent(ordered); break; @@ -418,12 +451,10 @@ again: } if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { - printk("inode %lu still ordered or delalloc after wait " - "%llu %llu\n", inode->i_ino, - (unsigned long long)start, - (unsigned long long)orig_end); + schedule_timeout(1); goto again; } + return 0; } /* @@ -458,7 +489,7 @@ out: * if none is found */ struct btrfs_ordered_extent * -btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) +btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; @@ -519,7 +550,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, * yet */ node = &ordered->rb_node; - while(1) { + while (1) { node = rb_prev(node); if (!node) break; @@ -547,9 +578,8 @@ int btrfs_ordered_update_i_size(struct inode *inode, * between our ordered extent and the next one. */ test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - if (test->file_offset > entry_end(ordered)) { - i_size_test = test->file_offset - 1; - } + if (test->file_offset > entry_end(ordered)) + i_size_test = test->file_offset; } else { i_size_test = i_size_read(inode); } @@ -561,7 +591,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, * disk_i_size to the end of the region. */ if (i_size_test > entry_end(ordered) && - !test_range_bit(io_tree, entry_end(ordered), i_size_test, + !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, EXTENT_DELALLOC, 0)) { new_i_size = min_t(u64, i_size_test, i_size_read(inode)); } @@ -576,7 +606,8 @@ out: * try to find a checksum. This is used because we allow pages to * be reclaimed before their checksum is actually put into the btree */ -int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) +int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, + u32 *sum) { struct btrfs_ordered_sum *ordered_sum; struct btrfs_sector_sum *sector_sums; @@ -595,11 +626,11 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) mutex_lock(&tree->mutex); list_for_each_prev(cur, &ordered->list) { ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); - if (offset >= ordered_sum->file_offset) { + if (disk_bytenr >= ordered_sum->bytenr) { num_sectors = ordered_sum->len / sectorsize; sector_sums = ordered_sum->sums; for (i = 0; i < num_sectors; i++) { - if (sector_sums[i].offset == offset) { + if (sector_sums[i].bytenr == disk_bytenr) { *sum = sector_sums[i].sum; ret = 0; goto out;