#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
+#include <linux/sort.h>
+#include <linux/rcupdate.h>
+#include "compat.h"
#include "hash.h"
#include "crc32c.h"
#include "ctree.h"
u64 generation;
u64 orig_generation;
int level;
+ struct list_head list;
+ int del;
};
-static int finish_current_insert(struct btrfs_trans_handle *trans, struct
- btrfs_root *extent_root);
-static int del_pending_extents(struct btrfs_trans_handle *trans, struct
- btrfs_root *extent_root);
-static struct btrfs_block_group_cache *
-__btrfs_find_block_group(struct btrfs_root *root,
- struct btrfs_block_group_cache *hint,
- u64 search_start, int data, int owner);
-
-void maybe_lock_mutex(struct btrfs_root *root)
-{
- if (root != root->fs_info->extent_root &&
- root != root->fs_info->chunk_root &&
- root != root->fs_info->dev_root) {
- mutex_lock(&root->fs_info->alloc_mutex);
- }
-}
+static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, struct btrfs_key *ins,
+ int ref_mod);
+static int update_reserved_extents(struct btrfs_root *root,
+ u64 bytenr, u64 num, int reserve);
+static int update_block_group(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int alloc,
+ int mark_free);
+static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner_objectid, int pin,
+ int ref_to_drop);
-void maybe_unlock_mutex(struct btrfs_root *root)
-{
- if (root != root->fs_info->extent_root &&
- root != root->fs_info->chunk_root &&
- root != root->fs_info->dev_root) {
- mutex_unlock(&root->fs_info->alloc_mutex);
- }
-}
+static int do_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *extent_root, u64 alloc_bytes,
+ u64 flags, int force);
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{
* this adds the block group to the fs_info rb tree for the block group
* cache
*/
-int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
+static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
struct btrfs_block_group_cache *block_group)
{
struct rb_node **p;
break;
}
}
+ if (ret)
+ atomic_inc(&ret->count);
spin_unlock(&info->block_group_cache_lock);
return ret;
u64 extent_start, extent_end, size;
int ret;
+ mutex_lock(&info->pinned_mutex);
while (start < end) {
ret = find_first_extent_bit(&info->pinned_extents, start,
&extent_start, &extent_end,
start = extent_end + 1;
} else if (extent_start > start && extent_start < end) {
size = extent_start - start;
- ret = btrfs_add_free_space(block_group, start, size);
+ ret = btrfs_add_free_space(block_group, start,
+ size);
BUG_ON(ret);
start = extent_end + 1;
} else {
ret = btrfs_add_free_space(block_group, start, size);
BUG_ON(ret);
}
+ mutex_unlock(&info->pinned_mutex);
+
+ return 0;
+}
+static int remove_sb_from_cache(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 bytenr;
+ u64 *logical;
+ int stripe_len;
+ int i, nr, ret;
+
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+ cache->key.objectid, bytenr, 0,
+ &logical, &nr, &stripe_len);
+ BUG_ON(ret);
+ while (nr--) {
+ btrfs_remove_free_space(cache, logical[nr],
+ stripe_len);
+ }
+ kfree(logical);
+ }
return 0;
}
struct btrfs_key key;
struct extent_buffer *leaf;
int slot;
- u64 last = 0;
- u64 first_free;
- int found = 0;
+ u64 last;
if (!block_group)
return 0;
* skip the locking here
*/
path->skip_locking = 1;
- first_free = max_t(u64, block_group->key.objectid,
- BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
- key.objectid = block_group->key.objectid;
+ last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+ key.objectid = last;
key.offset = 0;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto err;
- ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
- if (ret < 0)
- goto err;
- if (ret == 0) {
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid + key.offset > first_free)
- first_free = key.objectid + key.offset;
- }
- while(1) {
+
+ while (1) {
leaf = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
break;
if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
- if (!found) {
- last = first_free;
- found = 1;
- }
-
add_new_free_space(block_group, root->fs_info, last,
key.objectid);
path->slots[0]++;
}
- if (!found)
- last = first_free;
-
add_new_free_space(block_group, root->fs_info, last,
block_group->key.objectid +
block_group->key.offset);
+ remove_sb_from_cache(root, block_group);
block_group->cached = 1;
ret = 0;
err:
/*
* return the block group that starts at or after bytenr
*/
-struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
- btrfs_fs_info *info,
- u64 bytenr)
+static struct btrfs_block_group_cache *
+btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
{
struct btrfs_block_group_cache *cache;
/*
* return the block group that contains teh given bytenr
*/
-struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
- btrfs_fs_info *info,
- u64 bytenr)
+struct btrfs_block_group_cache *btrfs_lookup_block_group(
+ struct btrfs_fs_info *info,
+ u64 bytenr)
{
struct btrfs_block_group_cache *cache;
return cache;
}
-static int noinline find_free_space(struct btrfs_root *root,
- struct btrfs_block_group_cache **cache_ret,
- u64 *start_ret, u64 num, int data)
+static inline void put_block_group(struct btrfs_block_group_cache *cache)
{
- int ret;
- struct btrfs_block_group_cache *cache = *cache_ret;
- struct btrfs_free_space *info = NULL;
- u64 last;
- u64 search_start = *start_ret;
-
- WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
- if (!cache)
- goto out;
-
- last = max(search_start, cache->key.objectid);
-
-again:
- ret = cache_block_group(root, cache);
- if (ret)
- goto out;
+ if (atomic_dec_and_test(&cache->count))
+ kfree(cache);
+}
- if (cache->ro || !block_group_bits(cache, data))
- goto new_group;
+static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
+ u64 flags)
+{
+ struct list_head *head = &info->space_info;
+ struct btrfs_space_info *found;
- info = btrfs_find_free_space(cache, last, num);
- if (info) {
- *start_ret = info->offset;
- return 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags == flags) {
+ rcu_read_unlock();
+ return found;
+ }
}
+ rcu_read_unlock();
+ return NULL;
+}
-new_group:
- last = cache->key.objectid + cache->key.offset;
-
- cache = btrfs_lookup_first_block_group(root->fs_info, last);
- if (!cache)
- goto out;
-
- *cache_ret = cache;
- goto again;
+/*
+ * after adding space to the filesystem, we need to clear the full flags
+ * on all the space infos.
+ */
+void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+{
+ struct list_head *head = &info->space_info;
+ struct btrfs_space_info *found;
-out:
- return -ENOSPC;
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list)
+ found->full = 0;
+ rcu_read_unlock();
}
static u64 div_factor(u64 num, int factor)
return num;
}
-static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
- u64 flags)
-{
- struct list_head *head = &info->space_info;
- struct list_head *cur;
- struct btrfs_space_info *found;
- list_for_each(cur, head) {
- found = list_entry(cur, struct btrfs_space_info, list);
- if (found->flags == flags)
- return found;
- }
- return NULL;
-}
-
-static struct btrfs_block_group_cache *
-__btrfs_find_block_group(struct btrfs_root *root,
- struct btrfs_block_group_cache *hint,
- u64 search_start, int data, int owner)
+u64 btrfs_find_block_group(struct btrfs_root *root,
+ u64 search_start, u64 search_hint, int owner)
{
struct btrfs_block_group_cache *cache;
- struct btrfs_block_group_cache *found_group = NULL;
- struct btrfs_fs_info *info = root->fs_info;
u64 used;
- u64 last = 0;
- u64 free_check;
+ u64 last = max(search_hint, search_start);
+ u64 group_start = 0;
int full_search = 0;
- int factor = 10;
+ int factor = 9;
int wrapped = 0;
-
- if (data & BTRFS_BLOCK_GROUP_METADATA)
- factor = 9;
-
- if (search_start) {
- struct btrfs_block_group_cache *shint;
- shint = btrfs_lookup_first_block_group(info, search_start);
- if (shint && block_group_bits(shint, data) && !shint->ro) {
- spin_lock(&shint->lock);
- used = btrfs_block_group_used(&shint->item);
- if (used + shint->pinned + shint->reserved <
- div_factor(shint->key.offset, factor)) {
- spin_unlock(&shint->lock);
- return shint;
- }
- spin_unlock(&shint->lock);
- }
- }
- if (hint && !hint->ro && block_group_bits(hint, data)) {
- spin_lock(&hint->lock);
- used = btrfs_block_group_used(&hint->item);
- if (used + hint->pinned + hint->reserved <
- div_factor(hint->key.offset, factor)) {
- spin_unlock(&hint->lock);
- return hint;
- }
- spin_unlock(&hint->lock);
- last = hint->key.objectid + hint->key.offset;
- } else {
- if (hint)
- last = max(hint->key.objectid, search_start);
- else
- last = search_start;
- }
again:
while (1) {
cache = btrfs_lookup_first_block_group(root->fs_info, last);
last = cache->key.objectid + cache->key.offset;
used = btrfs_block_group_used(&cache->item);
- if (!cache->ro && block_group_bits(cache, data)) {
- free_check = div_factor(cache->key.offset, factor);
+ if ((full_search || !cache->ro) &&
+ block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
if (used + cache->pinned + cache->reserved <
- free_check) {
- found_group = cache;
+ div_factor(cache->key.offset, factor)) {
+ group_start = cache->key.objectid;
spin_unlock(&cache->lock);
+ put_block_group(cache);
goto found;
}
}
spin_unlock(&cache->lock);
+ put_block_group(cache);
cond_resched();
}
if (!wrapped) {
goto again;
}
found:
- return found_group;
-}
-
-struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
- struct btrfs_block_group_cache
- *hint, u64 search_start,
- int data, int owner)
-{
-
- struct btrfs_block_group_cache *ret;
- ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
- return ret;
+ return group_start;
}
/* simple helper to search for an existing extent at a given offset */
path = btrfs_alloc_path();
BUG_ON(!path);
- maybe_lock_mutex(root);
key.objectid = start;
key.offset = len;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
0, 0);
- maybe_unlock_mutex(root);
btrfs_free_path(path);
return ret;
}
* to the key objectid.
*/
-static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
+static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 bytenr, u64 parent,
return ret;
}
-static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
+static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 bytenr, u64 parent,
u64 ref_root, u64 ref_generation,
- u64 owner_objectid)
+ u64 owner_objectid,
+ int refs_to_add)
{
struct btrfs_key key;
struct extent_buffer *leaf;
btrfs_set_ref_root(leaf, ref, ref_root);
btrfs_set_ref_generation(leaf, ref, ref_generation);
btrfs_set_ref_objectid(leaf, ref, owner_objectid);
- btrfs_set_ref_num_refs(leaf, ref, 1);
+ btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
} else if (ret == -EEXIST) {
u64 existing_owner;
+
BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
num_refs = btrfs_ref_num_refs(leaf, ref);
BUG_ON(num_refs == 0);
- btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
+ btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add);
existing_owner = btrfs_ref_objectid(leaf, ref);
if (existing_owner != owner_objectid &&
} else {
goto out;
}
+ btrfs_unlock_up_safe(path, 1);
btrfs_mark_buffer_dirty(path->nodes[0]);
out:
btrfs_release_path(root, path);
return ret;
}
-static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
+static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ int refs_to_drop)
{
struct extent_buffer *leaf;
struct btrfs_extent_ref *ref;
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
num_refs = btrfs_ref_num_refs(leaf, ref);
- BUG_ON(num_refs == 0);
- num_refs -= 1;
+ BUG_ON(num_refs < refs_to_drop);
+ num_refs -= refs_to_drop;
if (num_refs == 0) {
ret = btrfs_del_item(trans, root, path);
} else {
return ret;
}
+#ifdef BIO_RW_DISCARD
+static void btrfs_issue_discard(struct block_device *bdev,
+ u64 start, u64 len)
+{
+ blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
+}
+#endif
+
+static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes)
+{
+#ifdef BIO_RW_DISCARD
+ int ret;
+ u64 map_length = num_bytes;
+ struct btrfs_multi_bio *multi = NULL;
+
+ /* Tell the block device(s) that the sectors can be discarded */
+ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
+ bytenr, &map_length, &multi, 0);
+ if (!ret) {
+ struct btrfs_bio_stripe *stripe = multi->stripes;
+ int i;
+
+ if (map_length > num_bytes)
+ map_length = num_bytes;
+
+ for (i = 0; i < multi->num_stripes; i++, stripe++) {
+ btrfs_issue_discard(stripe->dev->bdev,
+ stripe->physical,
+ map_length);
+ }
+ kfree(multi);
+ }
+
+ return ret;
+#else
+ return 0;
+#endif
+}
+
static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes,
u64 orig_parent, u64 parent,
u64 orig_root, u64 ref_root,
u64 orig_generation, u64 ref_generation,
u64 owner_objectid)
{
int ret;
- struct btrfs_root *extent_root = root->fs_info->extent_root;
- struct btrfs_path *path;
-
- if (root == root->fs_info->extent_root) {
- struct pending_extent_op *extent_op;
- u64 num_bytes;
-
- BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
- num_bytes = btrfs_level_size(root, (int)owner_objectid);
- if (test_range_bit(&root->fs_info->extent_ins, bytenr,
- bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) {
- u64 priv;
- ret = get_state_private(&root->fs_info->extent_ins,
- bytenr, &priv);
- BUG_ON(ret);
- extent_op = (struct pending_extent_op *)
- (unsigned long)priv;
- BUG_ON(extent_op->parent != orig_parent);
- BUG_ON(extent_op->generation != orig_generation);
- extent_op->parent = parent;
- extent_op->generation = ref_generation;
- } else {
- extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
- BUG_ON(!extent_op);
-
- extent_op->type = PENDING_BACKREF_UPDATE;
- extent_op->bytenr = bytenr;
- extent_op->num_bytes = num_bytes;
- extent_op->parent = parent;
- extent_op->orig_parent = orig_parent;
- extent_op->generation = ref_generation;
- extent_op->orig_generation = orig_generation;
- extent_op->level = (int)owner_objectid;
-
- set_extent_bits(&root->fs_info->extent_ins,
- bytenr, bytenr + num_bytes - 1,
- EXTENT_LOCKED, GFP_NOFS);
- set_state_private(&root->fs_info->extent_ins,
- bytenr, (unsigned long)extent_op);
- }
- return 0;
- }
+ int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- ret = lookup_extent_backref(trans, extent_root, path,
- bytenr, orig_parent, orig_root,
- orig_generation, owner_objectid, 1);
- if (ret)
- goto out;
- ret = remove_extent_backref(trans, extent_root, path);
- if (ret)
- goto out;
- ret = insert_extent_backref(trans, extent_root, path, bytenr,
- parent, ref_root, ref_generation,
- owner_objectid);
+ ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes,
+ orig_parent, parent, orig_root,
+ ref_root, orig_generation,
+ ref_generation, owner_objectid, pin);
BUG_ON(ret);
- finish_current_insert(trans, extent_root);
- del_pending_extents(trans, extent_root);
-out:
- btrfs_free_path(path);
return ret;
}
int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
- u64 orig_parent, u64 parent,
+ u64 num_bytes, u64 orig_parent, u64 parent,
u64 ref_root, u64 ref_generation,
u64 owner_objectid)
{
if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
return 0;
- maybe_lock_mutex(root);
- ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
- parent, ref_root, ref_root,
- ref_generation, ref_generation,
- owner_objectid);
- maybe_unlock_mutex(root);
+
+ ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
+ orig_parent, parent, ref_root,
+ ref_root, ref_generation,
+ ref_generation, owner_objectid);
return ret;
}
-
static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes,
u64 orig_parent, u64 parent,
u64 orig_root, u64 ref_root,
u64 orig_generation, u64 ref_generation,
u64 owner_objectid)
+{
+ int ret;
+
+ ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
+ ref_generation, owner_objectid,
+ BTRFS_ADD_DELAYED_REF, 0);
+ BUG_ON(ret);
+ return ret;
+}
+
+static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes, u64 parent, u64 ref_root,
+ u64 ref_generation, u64 owner_objectid,
+ int refs_to_add)
{
struct btrfs_path *path;
int ret;
return -ENOMEM;
path->reada = 1;
+ path->leave_spinning = 1;
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = (u64)-1;
+ key.offset = num_bytes;
- ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
- 0, 1);
- if (ret < 0)
+ /* first find the extent item and update its reference count */
+ ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
+ path, 0, 1);
+ if (ret < 0) {
+ btrfs_set_path_blocking(path);
return ret;
- BUG_ON(ret == 0 || path->slots[0] == 0);
+ }
- path->slots[0]--;
+ if (ret > 0) {
+ WARN_ON(1);
+ btrfs_free_path(path);
+ return -EIO;
+ }
l = path->nodes[0];
btrfs_item_key_to_cpu(l, &key, path->slots[0]);
- BUG_ON(key.objectid != bytenr);
+ if (key.objectid != bytenr) {
+ btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]);
+ printk(KERN_ERR "btrfs wanted %llu found %llu\n",
+ (unsigned long long)bytenr,
+ (unsigned long long)key.objectid);
+ BUG();
+ }
BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
+
refs = btrfs_extent_refs(l, item);
- btrfs_set_extent_refs(l, item, refs + 1);
+ btrfs_set_extent_refs(l, item, refs + refs_to_add);
+ btrfs_unlock_up_safe(path, 1);
+
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_release_path(root->fs_info->extent_root, path);
path->reada = 1;
+ path->leave_spinning = 1;
+
+ /* now insert the actual backref */
ret = insert_extent_backref(trans, root->fs_info->extent_root,
path, bytenr, parent,
ref_root, ref_generation,
- owner_objectid);
+ owner_objectid, refs_to_add);
BUG_ON(ret);
- finish_current_insert(trans, root->fs_info->extent_root);
- del_pending_extents(trans, root->fs_info->extent_root);
-
btrfs_free_path(path);
return 0;
}
if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
return 0;
- maybe_lock_mutex(root);
- ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
+
+ ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent,
0, ref_root, 0, ref_generation,
owner_objectid);
- maybe_unlock_mutex(root);
return ret;
}
-int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+static int drop_delayed_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_delayed_ref_node *node)
{
- finish_current_insert(trans, root->fs_info->extent_root);
- del_pending_extents(trans, root->fs_info->extent_root);
- return 0;
-}
+ int ret = 0;
+ struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node);
-int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u32 *refs)
-{
- struct btrfs_path *path;
- int ret;
- struct btrfs_key key;
- struct extent_buffer *l;
- struct btrfs_extent_item *item;
+ BUG_ON(node->ref_mod == 0);
+ ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes,
+ node->parent, ref->root, ref->generation,
+ ref->owner_objectid, ref->pin, node->ref_mod);
- WARN_ON(num_bytes < root->sectorsize);
- path = btrfs_alloc_path();
- path->reada = 1;
- key.objectid = bytenr;
- key.offset = num_bytes;
- btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
- ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
- 0, 0);
- if (ret < 0)
- goto out;
- if (ret != 0) {
- btrfs_print_leaf(root, path->nodes[0]);
- printk("failed to find block number %Lu\n", bytenr);
- BUG();
- }
- l = path->nodes[0];
- item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
- *refs = btrfs_extent_refs(l, item);
-out:
- btrfs_free_path(path);
- return 0;
+ return ret;
}
-static int get_reference_status(struct btrfs_root *root, u64 bytenr,
- u64 parent_gen, u64 ref_objectid,
- u64 *min_generation, u32 *ref_count)
+/* helper function to actually process a single delayed ref entry */
+static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_delayed_ref_node *node,
+ int insert_reserved)
{
- struct btrfs_root *extent_root = root->fs_info->extent_root;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_extent_ref *ref_item;
- struct btrfs_key key;
- struct btrfs_key found_key;
- u64 root_objectid = root->root_key.objectid;
- u64 ref_generation;
- u32 nritems;
int ret;
+ struct btrfs_delayed_ref *ref;
- key.objectid = bytenr;
- key.offset = (u64)-1;
- key.type = BTRFS_EXTENT_ITEM_KEY;
+ if (node->parent == (u64)-1) {
+ struct btrfs_delayed_ref_head *head;
+ /*
+ * we've hit the end of the chain and we were supposed
+ * to insert this extent into the tree. But, it got
+ * deleted before we ever needed to insert it, so all
+ * we have to do is clean up the accounting
+ */
+ if (insert_reserved) {
+ update_reserved_extents(root, node->bytenr,
+ node->num_bytes, 0);
+ }
+ head = btrfs_delayed_node_to_head(node);
+ mutex_unlock(&head->mutex);
+ return 0;
+ }
- path = btrfs_alloc_path();
- mutex_lock(&root->fs_info->alloc_mutex);
- ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- BUG_ON(ret == 0);
- if (ret < 0 || path->slots[0] == 0)
- goto out;
+ ref = btrfs_delayed_node_to_ref(node);
+ if (ref->action == BTRFS_ADD_DELAYED_REF) {
+ if (insert_reserved) {
+ struct btrfs_key ins;
- path->slots[0]--;
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ ins.objectid = node->bytenr;
+ ins.offset = node->num_bytes;
+ ins.type = BTRFS_EXTENT_ITEM_KEY;
- if (found_key.objectid != bytenr ||
- found_key.type != BTRFS_EXTENT_ITEM_KEY) {
- ret = 1;
- goto out;
+ /* record the full extent allocation */
+ ret = __btrfs_alloc_reserved_extent(trans, root,
+ node->parent, ref->root,
+ ref->generation, ref->owner_objectid,
+ &ins, node->ref_mod);
+ update_reserved_extents(root, node->bytenr,
+ node->num_bytes, 0);
+ } else {
+ /* just add one backref */
+ ret = add_extent_ref(trans, root, node->bytenr,
+ node->num_bytes,
+ node->parent, ref->root, ref->generation,
+ ref->owner_objectid, node->ref_mod);
+ }
+ BUG_ON(ret);
+ } else if (ref->action == BTRFS_DROP_DELAYED_REF) {
+ WARN_ON(insert_reserved);
+ ret = drop_delayed_ref(trans, root, node);
}
+ return 0;
+}
- *ref_count = 0;
- *min_generation = (u64)-1;
-
+static noinline struct btrfs_delayed_ref_node *
+select_delayed_ref(struct btrfs_delayed_ref_head *head)
+{
+ struct rb_node *node;
+ struct btrfs_delayed_ref_node *ref;
+ int action = BTRFS_ADD_DELAYED_REF;
+again:
+ /*
+ * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
+ * this prevents ref count from going down to zero when
+ * there still are pending delayed ref.
+ */
+ node = rb_prev(&head->node.rb_node);
while (1) {
- leaf = path->nodes[0];
- nritems = btrfs_header_nritems(leaf);
- if (path->slots[0] >= nritems) {
- ret = btrfs_next_leaf(extent_root, path);
- if (ret < 0)
- goto out;
- if (ret == 0)
- continue;
+ if (!node)
break;
- }
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (found_key.objectid != bytenr)
+ ref = rb_entry(node, struct btrfs_delayed_ref_node,
+ rb_node);
+ if (ref->bytenr != head->node.bytenr)
break;
+ if (btrfs_delayed_node_to_ref(ref)->action == action)
+ return ref;
+ node = rb_prev(node);
+ }
+ if (action == BTRFS_ADD_DELAYED_REF) {
+ action = BTRFS_DROP_DELAYED_REF;
+ goto again;
+ }
+ return NULL;
+}
- if (found_key.type != BTRFS_EXTENT_REF_KEY) {
- path->slots[0]++;
- continue;
- }
+static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct list_head *cluster)
+{
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_delayed_ref_node *ref;
+ struct btrfs_delayed_ref_head *locked_ref = NULL;
+ int ret;
+ int count = 0;
+ int must_insert_reserved = 0;
- ref_item = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_extent_ref);
- ref_generation = btrfs_ref_generation(leaf, ref_item);
- /*
- * For (parent_gen > 0 && parent_gen > ref_generation):
- *
- * we reach here through the oldest root, therefore
- * all other reference from same snapshot should have
- * a larger generation.
+ delayed_refs = &trans->transaction->delayed_refs;
+ while (1) {
+ if (!locked_ref) {
+ /* pick a new head ref from the cluster list */
+ if (list_empty(cluster))
+ break;
+
+ locked_ref = list_entry(cluster->next,
+ struct btrfs_delayed_ref_head, cluster);
+
+ /* grab the lock that says we are going to process
+ * all the refs for this head */
+ ret = btrfs_delayed_ref_lock(trans, locked_ref);
+
+ /*
+ * we may have dropped the spin lock to get the head
+ * mutex lock, and that might have given someone else
+ * time to free the head. If that's true, it has been
+ * removed from our list and we can move on.
+ */
+ if (ret == -EAGAIN) {
+ locked_ref = NULL;
+ count++;
+ continue;
+ }
+ }
+
+ /*
+ * record the must insert reserved flag before we
+ * drop the spin lock.
*/
- if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
- (parent_gen > 0 && parent_gen > ref_generation) ||
- (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
- ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
- *ref_count = 2;
- break;
+ must_insert_reserved = locked_ref->must_insert_reserved;
+ locked_ref->must_insert_reserved = 0;
+
+ /*
+ * locked_ref is the head node, so we have to go one
+ * node back for any delayed ref updates
+ */
+ ref = select_delayed_ref(locked_ref);
+ if (!ref) {
+ /* All delayed refs have been processed, Go ahead
+ * and send the head node to run_one_delayed_ref,
+ * so that any accounting fixes can happen
+ */
+ ref = &locked_ref->node;
+ list_del_init(&locked_ref->cluster);
+ locked_ref = NULL;
}
- *ref_count = 1;
- if (*min_generation > ref_generation)
- *min_generation = ref_generation;
+ ref->in_tree = 0;
+ rb_erase(&ref->rb_node, &delayed_refs->root);
+ delayed_refs->num_entries--;
+ spin_unlock(&delayed_refs->lock);
- path->slots[0]++;
+ ret = run_one_delayed_ref(trans, root, ref,
+ must_insert_reserved);
+ BUG_ON(ret);
+ btrfs_put_delayed_ref(ref);
+
+ count++;
+ cond_resched();
+ spin_lock(&delayed_refs->lock);
}
- ret = 0;
-out:
- mutex_unlock(&root->fs_info->alloc_mutex);
- btrfs_free_path(path);
- return ret;
+ return count;
}
-int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_key *key, u64 bytenr)
+/*
+ * this starts processing the delayed reference count updates and
+ * extent insertions we have queued up so far. count can be
+ * 0, which means to process everything in the tree at the start
+ * of the run (but not newly added entries), or it can be some target
+ * number you'd like to process.
+ */
+int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, unsigned long count)
{
- struct btrfs_root *old_root;
- struct btrfs_path *path = NULL;
- struct extent_buffer *eb;
- struct btrfs_file_extent_item *item;
- u64 ref_generation;
- u64 min_generation;
- u64 extent_start;
- u32 ref_count;
- int level;
+ struct rb_node *node;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_delayed_ref_node *ref;
+ struct list_head cluster;
int ret;
+ int run_all = count == (unsigned long)-1;
+ int run_most = 0;
- BUG_ON(trans == NULL);
- BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
- ret = get_reference_status(root, bytenr, 0, key->objectid,
- &min_generation, &ref_count);
- if (ret)
- return ret;
+ if (root == root->fs_info->extent_root)
+ root = root->fs_info->tree_root;
- if (ref_count != 1)
- return 1;
+ delayed_refs = &trans->transaction->delayed_refs;
+ INIT_LIST_HEAD(&cluster);
+again:
+ spin_lock(&delayed_refs->lock);
+ if (count == 0) {
+ count = delayed_refs->num_entries * 2;
+ run_most = 1;
+ }
+ while (1) {
+ if (!(run_all || run_most) &&
+ delayed_refs->num_heads_ready < 64)
+ break;
+
+ /*
+ * go find something we can process in the rbtree. We start at
+ * the beginning of the tree, and then build a cluster
+ * of refs to process starting at the first one we are able to
+ * lock
+ */
+ ret = btrfs_find_ref_cluster(trans, &cluster,
+ delayed_refs->run_delayed_start);
+ if (ret)
+ break;
- old_root = root->dirty_root->root;
- ref_generation = old_root->root_key.offset;
+ ret = run_clustered_refs(trans, root, &cluster);
+ BUG_ON(ret < 0);
- /* all references are created in running transaction */
- if (min_generation > ref_generation) {
- ret = 0;
- goto out;
+ count -= min_t(unsigned long, ret, count);
+
+ if (count == 0)
+ break;
+ }
+
+ if (run_all) {
+ node = rb_first(&delayed_refs->root);
+ if (!node)
+ goto out;
+ count = (unsigned long)-1;
+
+ while (node) {
+ ref = rb_entry(node, struct btrfs_delayed_ref_node,
+ rb_node);
+ if (btrfs_delayed_ref_is_head(ref)) {
+ struct btrfs_delayed_ref_head *head;
+
+ head = btrfs_delayed_node_to_head(ref);
+ atomic_inc(&ref->refs);
+
+ spin_unlock(&delayed_refs->lock);
+ mutex_lock(&head->mutex);
+ mutex_unlock(&head->mutex);
+
+ btrfs_put_delayed_ref(ref);
+ cond_resched();
+ goto again;
+ }
+ node = rb_next(node);
+ }
+ spin_unlock(&delayed_refs->lock);
+ schedule_timeout(1);
+ goto again;
}
+out:
+ spin_unlock(&delayed_refs->lock);
+ return 0;
+}
+
+int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid, u64 bytenr)
+{
+ struct btrfs_root *extent_root = root->fs_info->extent_root;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_extent_ref *ref_item;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ u64 ref_root;
+ u64 last_snapshot;
+ u32 nritems;
+ int ret;
+
+ key.objectid = bytenr;
+ key.offset = (u64)-1;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
+ if (ret < 0)
goto out;
- }
+ BUG_ON(ret == 0);
- path->skip_locking = 1;
- /* if no item found, the extent is referenced by other snapshot */
- ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
- if (ret)
+ ret = -ENOENT;
+ if (path->slots[0] == 0)
goto out;
- eb = path->nodes[0];
- item = btrfs_item_ptr(eb, path->slots[0],
- struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
- btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
- ret = 1;
+ path->slots[0]--;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.objectid != bytenr ||
+ found_key.type != BTRFS_EXTENT_ITEM_KEY)
goto out;
- }
- for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
- if (level >= 0) {
- eb = path->nodes[level];
- if (!eb)
+ last_snapshot = btrfs_root_last_snapshot(&root->root_item);
+ while (1) {
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ if (path->slots[0] >= nritems) {
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret < 0)
+ goto out;
+ if (ret == 0)
continue;
- extent_start = eb->start;
- } else
- extent_start = bytenr;
+ break;
+ }
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid != bytenr)
+ break;
- ret = get_reference_status(root, extent_start, ref_generation,
- 0, &min_generation, &ref_count);
- if (ret)
- goto out;
+ if (found_key.type != BTRFS_EXTENT_REF_KEY) {
+ path->slots[0]++;
+ continue;
+ }
- if (ref_count != 1) {
+ ref_item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_extent_ref);
+ ref_root = btrfs_ref_root(leaf, ref_item);
+ if ((ref_root != root->root_key.objectid &&
+ ref_root != BTRFS_TREE_LOG_OBJECTID) ||
+ objectid != btrfs_ref_objectid(leaf, ref_item)) {
ret = 1;
goto out;
}
- if (level >= 0)
- ref_generation = btrfs_header_generation(eb);
+ if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
+ ret = 1;
+ goto out;
+ }
+
+ path->slots[0]++;
}
ret = 0;
out:
- if (path)
- btrfs_free_path(path);
+ btrfs_free_path(path);
return ret;
}
return ret;
}
-int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct extent_buffer *orig_buf, struct extent_buffer *buf,
- u32 *nr_extents)
+/* when a block goes through cow, we update the reference counts of
+ * everything that block points to. The internal pointers of the block
+ * can be in just about any order, and it is likely to have clusters of
+ * things that are close together and clusters of things that are not.
+ *
+ * To help reduce the seeks that come with updating all of these reference
+ * counts, sort them by byte number before actual updates are done.
+ *
+ * struct refsort is used to match byte number to slot in the btree block.
+ * we sort based on the byte number and then use the slot to actually
+ * find the item.
+ *
+ * struct refsort is smaller than strcut btrfs_item and smaller than
+ * struct btrfs_key_ptr. Since we're currently limited to the page size
+ * for a btree block, there's no way for a kmalloc of refsorts for a
+ * single node to be bigger than a page.
+ */
+struct refsort {
+ u64 bytenr;
+ u32 slot;
+};
+
+/*
+ * for passing into sort()
+ */
+static int refsort_cmp(const void *a_void, const void *b_void)
+{
+ const struct refsort *a = a_void;
+ const struct refsort *b = b_void;
+
+ if (a->bytenr < b->bytenr)
+ return -1;
+ if (a->bytenr > b->bytenr)
+ return 1;
+ return 0;
+}
+
+
+noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *orig_buf,
+ struct extent_buffer *buf, u32 *nr_extents)
{
u64 bytenr;
u64 ref_root;
u64 orig_root;
u64 ref_generation;
u64 orig_generation;
+ struct refsort *sorted;
u32 nritems;
u32 nr_file_extents = 0;
struct btrfs_key key;
int level;
int ret = 0;
int faili = 0;
+ int refi = 0;
+ int slot;
int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
- u64, u64, u64, u64, u64, u64, u64, u64);
+ u64, u64, u64, u64, u64, u64, u64, u64, u64);
ref_root = btrfs_header_owner(buf);
ref_generation = btrfs_header_generation(buf);
nritems = btrfs_header_nritems(buf);
level = btrfs_header_level(buf);
+ sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS);
+ BUG_ON(!sorted);
+
if (root->ref_cows) {
process_func = __btrfs_inc_extent_ref;
} else {
process_func = __btrfs_update_extent_ref;
}
+ /*
+ * we make two passes through the items. In the first pass we
+ * only record the byte number and slot. Then we sort based on
+ * byte number and do the actual work based on the sorted results
+ */
for (i = 0; i < nritems; i++) {
cond_resched();
if (level == 0) {
continue;
nr_file_extents++;
+ sorted[refi].bytenr = bytenr;
+ sorted[refi].slot = i;
+ refi++;
+ } else {
+ bytenr = btrfs_node_blockptr(buf, i);
+ sorted[refi].bytenr = bytenr;
+ sorted[refi].slot = i;
+ refi++;
+ }
+ }
+ /*
+ * if refi == 0, we didn't actually put anything into the sorted
+ * array and we're done
+ */
+ if (refi == 0)
+ goto out;
+
+ sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
+
+ for (i = 0; i < refi; i++) {
+ cond_resched();
+ slot = sorted[i].slot;
+ bytenr = sorted[i].bytenr;
+
+ if (level == 0) {
+ btrfs_item_key_to_cpu(buf, &key, slot);
+ fi = btrfs_item_ptr(buf, slot,
+ struct btrfs_file_extent_item);
+
+ bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
+ if (bytenr == 0)
+ continue;
- maybe_lock_mutex(root);
ret = process_func(trans, root, bytenr,
- orig_buf->start, buf->start,
- orig_root, ref_root,
- orig_generation, ref_generation,
- key.objectid);
- maybe_unlock_mutex(root);
+ btrfs_file_extent_disk_num_bytes(buf, fi),
+ orig_buf->start, buf->start,
+ orig_root, ref_root,
+ orig_generation, ref_generation,
+ key.objectid);
if (ret) {
- faili = i;
+ faili = slot;
WARN_ON(1);
goto fail;
}
} else {
- bytenr = btrfs_node_blockptr(buf, i);
- maybe_lock_mutex(root);
- ret = process_func(trans, root, bytenr,
+ ret = process_func(trans, root, bytenr, buf->len,
orig_buf->start, buf->start,
orig_root, ref_root,
orig_generation, ref_generation,
level - 1);
- maybe_unlock_mutex(root);
if (ret) {
- faili = i;
+ faili = slot;
WARN_ON(1);
goto fail;
}
}
}
out:
+ kfree(sorted);
if (nr_extents) {
if (level == 0)
*nr_extents = nr_file_extents;
}
return 0;
fail:
+ kfree(sorted);
WARN_ON(1);
return ret;
}
bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
if (bytenr == 0)
continue;
- maybe_lock_mutex(root);
ret = __btrfs_update_extent_ref(trans, root, bytenr,
- orig_buf->start, buf->start,
- orig_root, ref_root,
- orig_generation, ref_generation,
- key.objectid);
- maybe_unlock_mutex(root);
+ btrfs_file_extent_disk_num_bytes(buf, fi),
+ orig_buf->start, buf->start,
+ orig_root, ref_root, orig_generation,
+ ref_generation, key.objectid);
if (ret)
goto fail;
} else {
bytenr = btrfs_node_blockptr(buf, slot);
- maybe_lock_mutex(root);
ret = __btrfs_update_extent_ref(trans, root, bytenr,
- orig_buf->start, buf->start,
- orig_root, ref_root,
+ buf->len, orig_buf->start,
+ buf->start, orig_root, ref_root,
orig_generation, ref_generation,
level - 1);
- maybe_unlock_mutex(root);
if (ret)
goto fail;
}
struct btrfs_block_group_cache *cache)
{
int ret;
- int pending_ret;
struct btrfs_root *extent_root = root->fs_info->extent_root;
unsigned long bi;
struct extent_buffer *leaf;
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(extent_root, path);
fail:
- finish_current_insert(trans, extent_root);
- pending_ret = del_pending_extents(trans, extent_root);
if (ret)
return ret;
- if (pending_ret)
- return pending_ret;
return 0;
}
if (!path)
return -ENOMEM;
- mutex_lock(&root->fs_info->alloc_mutex);
- while(1) {
+ while (1) {
cache = NULL;
spin_lock(&root->fs_info->block_group_cache_lock);
for (n = rb_first(&root->fs_info->block_group_cache_tree);
}
}
btrfs_free_path(path);
- mutex_unlock(&root->fs_info->alloc_mutex);
return werr;
}
+int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
+{
+ struct btrfs_block_group_cache *block_group;
+ int readonly = 0;
+
+ block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
+ if (!block_group || block_group->ro)
+ readonly = 1;
+ if (block_group)
+ put_block_group(block_group);
+ return readonly;
+}
+
static int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
struct btrfs_space_info **space_info)
found = __find_space_info(info, flags);
if (found) {
+ spin_lock(&found->lock);
found->total_bytes += total_bytes;
found->bytes_used += bytes_used;
found->full = 0;
+ spin_unlock(&found->lock);
*space_info = found;
return 0;
}
- found = kmalloc(sizeof(*found), GFP_NOFS);
+ found = kzalloc(sizeof(*found), GFP_NOFS);
if (!found)
return -ENOMEM;
- list_add(&found->list, &info->space_info);
INIT_LIST_HEAD(&found->block_groups);
+ init_rwsem(&found->groups_sem);
spin_lock_init(&found->lock);
found->flags = flags;
found->total_bytes = total_bytes;
found->bytes_used = bytes_used;
found->bytes_pinned = 0;
found->bytes_reserved = 0;
+ found->bytes_readonly = 0;
+ found->bytes_delalloc = 0;
found->full = 0;
found->force_alloc = 0;
*space_info = found;
+ list_add_rcu(&found->list, &info->space_info);
return 0;
}
}
}
-static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
+static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
+{
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ if (!cache->ro) {
+ cache->space_info->bytes_readonly += cache->key.offset -
+ btrfs_block_group_used(&cache->item);
+ cache->ro = 1;
+ }
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
+}
+
+u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
- u64 num_devices = root->fs_info->fs_devices->num_devices;
+ u64 num_devices = root->fs_info->fs_devices->rw_devices;
if (num_devices == 1)
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
return flags;
}
+static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ u64 alloc_profile;
+
+ if (data) {
+ alloc_profile = info->avail_data_alloc_bits &
+ info->data_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
+ } else if (root == root->fs_info->chunk_root) {
+ alloc_profile = info->avail_system_alloc_bits &
+ info->system_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
+ } else {
+ alloc_profile = info->avail_metadata_alloc_bits &
+ info->metadata_alloc_profile;
+ data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
+ }
+
+ return btrfs_reduce_alloc_profile(root, data);
+}
+
+void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
+{
+ u64 alloc_target;
+
+ alloc_target = btrfs_get_alloc_profile(root, 1);
+ BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
+ alloc_target);
+}
+
+/*
+ * for now this just makes sure we have at least 5% of our metadata space free
+ * for use.
+ */
+int btrfs_check_metadata_free_space(struct btrfs_root *root)
+{
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_space_info *meta_sinfo;
+ u64 alloc_target, thresh;
+ int committed = 0, ret;
+
+ /* get the space info for where the metadata will live */
+ alloc_target = btrfs_get_alloc_profile(root, 0);
+ meta_sinfo = __find_space_info(info, alloc_target);
+
+again:
+ spin_lock(&meta_sinfo->lock);
+ if (!meta_sinfo->full)
+ thresh = meta_sinfo->total_bytes * 80;
+ else
+ thresh = meta_sinfo->total_bytes * 95;
+
+ do_div(thresh, 100);
+
+ if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
+ meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
+ struct btrfs_trans_handle *trans;
+ if (!meta_sinfo->full) {
+ meta_sinfo->force_alloc = 1;
+ spin_unlock(&meta_sinfo->lock);
+
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ 2 * 1024 * 1024, alloc_target, 0);
+ btrfs_end_transaction(trans, root);
+ goto again;
+ }
+ spin_unlock(&meta_sinfo->lock);
+
+ if (!committed) {
+ committed = 1;
+ trans = btrfs_join_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ return ret;
+ goto again;
+ }
+ return -ENOSPC;
+ }
+ spin_unlock(&meta_sinfo->lock);
+
+ return 0;
+}
+
+/*
+ * This will check the space that the inode allocates from to make sure we have
+ * enough space for bytes.
+ */
+int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes)
+{
+ struct btrfs_space_info *data_sinfo;
+ int ret = 0, committed = 0;
+
+ /* make sure bytes are sectorsize aligned */
+ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+
+ data_sinfo = BTRFS_I(inode)->space_info;
+again:
+ /* make sure we have enough space to handle the data first */
+ spin_lock(&data_sinfo->lock);
+ if (data_sinfo->total_bytes - data_sinfo->bytes_used -
+ data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
+ data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
+ data_sinfo->bytes_may_use < bytes) {
+ struct btrfs_trans_handle *trans;
+
+ /*
+ * if we don't have enough free bytes in this space then we need
+ * to alloc a new chunk.
+ */
+ if (!data_sinfo->full) {
+ u64 alloc_target;
+
+ data_sinfo->force_alloc = 1;
+ spin_unlock(&data_sinfo->lock);
+
+ alloc_target = btrfs_get_alloc_profile(root, 1);
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ bytes + 2 * 1024 * 1024,
+ alloc_target, 0);
+ btrfs_end_transaction(trans, root);
+ if (ret)
+ return ret;
+ goto again;
+ }
+ spin_unlock(&data_sinfo->lock);
+
+ /* commit the current transaction and try again */
+ if (!committed) {
+ committed = 1;
+ trans = btrfs_join_transaction(root, 1);
+ if (!trans)
+ return -ENOMEM;
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret)
+ return ret;
+ goto again;
+ }
+
+ printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
+ ", %llu bytes_used, %llu bytes_reserved, "
+ "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
+ "%llu total\n", bytes, data_sinfo->bytes_delalloc,
+ data_sinfo->bytes_used, data_sinfo->bytes_reserved,
+ data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
+ data_sinfo->bytes_may_use, data_sinfo->total_bytes);
+ return -ENOSPC;
+ }
+ data_sinfo->bytes_may_use += bytes;
+ BTRFS_I(inode)->reserved_bytes += bytes;
+ spin_unlock(&data_sinfo->lock);
+
+ return btrfs_check_metadata_free_space(root);
+}
+
+/*
+ * if there was an error for whatever reason after calling
+ * btrfs_check_data_free_space, call this so we can cleanup the counters.
+ */
+void btrfs_free_reserved_data_space(struct btrfs_root *root,
+ struct inode *inode, u64 bytes)
+{
+ struct btrfs_space_info *data_sinfo;
+
+ /* make sure bytes are sectorsize aligned */
+ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
+
+ data_sinfo = BTRFS_I(inode)->space_info;
+ spin_lock(&data_sinfo->lock);
+ data_sinfo->bytes_may_use -= bytes;
+ BTRFS_I(inode)->reserved_bytes -= bytes;
+ spin_unlock(&data_sinfo->lock);
+}
+
+/* called when we are adding a delalloc extent to the inode's io_tree */
+void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes)
+{
+ struct btrfs_space_info *data_sinfo;
+
+ /* get the space info for where this inode will be storing its data */
+ data_sinfo = BTRFS_I(inode)->space_info;
+
+ /* make sure we have enough space to handle the data first */
+ spin_lock(&data_sinfo->lock);
+ data_sinfo->bytes_delalloc += bytes;
+
+ /*
+ * we are adding a delalloc extent without calling
+ * btrfs_check_data_free_space first. This happens on a weird
+ * writepage condition, but shouldn't hurt our accounting
+ */
+ if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
+ data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
+ BTRFS_I(inode)->reserved_bytes = 0;
+ } else {
+ data_sinfo->bytes_may_use -= bytes;
+ BTRFS_I(inode)->reserved_bytes -= bytes;
+ }
+
+ spin_unlock(&data_sinfo->lock);
+}
+
+/* called when we are clearing an delalloc extent from the inode's io_tree */
+void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
+ u64 bytes)
+{
+ struct btrfs_space_info *info;
+
+ info = BTRFS_I(inode)->space_info;
+
+ spin_lock(&info->lock);
+ info->bytes_delalloc -= bytes;
+ spin_unlock(&info->lock);
+}
+
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force)
{
struct btrfs_space_info *space_info;
u64 thresh;
- u64 start;
- u64 num_bytes;
- int ret = 0, waited = 0;
+ int ret = 0;
+
+ mutex_lock(&extent_root->fs_info->chunk_mutex);
- flags = reduce_alloc_profile(extent_root, flags);
+ flags = btrfs_reduce_alloc_profile(extent_root, flags);
space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
}
BUG_ON(!space_info);
+ spin_lock(&space_info->lock);
if (space_info->force_alloc) {
force = 1;
space_info->force_alloc = 0;
}
- if (space_info->full)
+ if (space_info->full) {
+ spin_unlock(&space_info->lock);
goto out;
+ }
- thresh = div_factor(space_info->total_bytes, 6);
+ thresh = space_info->total_bytes - space_info->bytes_readonly;
+ thresh = div_factor(thresh, 6);
if (!force &&
(space_info->bytes_used + space_info->bytes_pinned +
- space_info->bytes_reserved + alloc_bytes) < thresh)
+ space_info->bytes_reserved + alloc_bytes) < thresh) {
+ spin_unlock(&space_info->lock);
goto out;
-
- while (!mutex_trylock(&extent_root->fs_info->chunk_mutex)) {
- if (!force)
- goto out;
- mutex_unlock(&extent_root->fs_info->alloc_mutex);
- cond_resched();
- mutex_lock(&extent_root->fs_info->alloc_mutex);
- waited = 1;
}
+ spin_unlock(&space_info->lock);
- if (waited && space_info->full)
- goto out_unlock;
-
- ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
- if (ret == -ENOSPC) {
-printk("space info full %Lu\n", flags);
+ ret = btrfs_alloc_chunk(trans, extent_root, flags);
+ if (ret)
space_info->full = 1;
- goto out_unlock;
- }
- BUG_ON(ret);
-
- ret = btrfs_make_block_group(trans, extent_root, 0, flags,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
- BUG_ON(ret);
-
-out_unlock:
- mutex_unlock(&extent_root->fs_info->chunk_mutex);
out:
+ mutex_unlock(&extent_root->fs_info->chunk_mutex);
return ret;
}
u64 old_val;
u64 byte_in_group;
- WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
- while(total) {
+ while (total) {
cache = btrfs_lookup_block_group(info, bytenr);
- if (!cache) {
+ if (!cache)
return -1;
- }
byte_in_group = bytenr - cache->key.objectid;
WARN_ON(byte_in_group > cache->key.offset);
+ spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->dirty = 1;
old_val = btrfs_block_group_used(&cache->item);
if (alloc) {
old_val += num_bytes;
cache->space_info->bytes_used += num_bytes;
+ if (cache->ro)
+ cache->space_info->bytes_readonly -= num_bytes;
btrfs_set_block_group_used(&cache->item, old_val);
spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
} else {
old_val -= num_bytes;
cache->space_info->bytes_used -= num_bytes;
+ if (cache->ro)
+ cache->space_info->bytes_readonly += num_bytes;
btrfs_set_block_group_used(&cache->item, old_val);
spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
if (mark_free) {
int ret;
+
+ ret = btrfs_discard_extent(root, bytenr,
+ num_bytes);
+ WARN_ON(ret);
+
ret = btrfs_add_free_space(cache, bytenr,
num_bytes);
- if (ret)
- return -1;
+ WARN_ON(ret);
}
}
+ put_block_group(cache);
total -= num_bytes;
bytenr += num_bytes;
}
static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
{
struct btrfs_block_group_cache *cache;
+ u64 bytenr;
cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
if (!cache)
return 0;
- return cache->key.objectid;
+ bytenr = cache->key.objectid;
+ put_block_group(cache);
+
+ return bytenr;
}
int btrfs_update_pinned_extents(struct btrfs_root *root,
struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
- WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
+ WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex));
if (pin) {
set_extent_dirty(&fs_info->pinned_extents,
bytenr, bytenr + num - 1, GFP_NOFS);
clear_extent_dirty(&fs_info->pinned_extents,
bytenr, bytenr + num - 1, GFP_NOFS);
}
+ mutex_unlock(&root->fs_info->pinned_mutex);
+
while (num > 0) {
cache = btrfs_lookup_block_group(fs_info, bytenr);
BUG_ON(!cache);
len = min(num, cache->key.offset -
(bytenr - cache->key.objectid));
if (pin) {
+ spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned += len;
cache->space_info->bytes_pinned += len;
spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
fs_info->total_pinned += len;
} else {
+ spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned -= len;
cache->space_info->bytes_pinned -= len;
spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
fs_info->total_pinned -= len;
+ if (cache->cached)
+ btrfs_add_free_space(cache, bytenr, len);
}
+ put_block_group(cache);
bytenr += len;
num -= len;
}
struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
- WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
while (num > 0) {
cache = btrfs_lookup_block_group(fs_info, bytenr);
BUG_ON(!cache);
len = min(num, cache->key.offset -
(bytenr - cache->key.objectid));
+
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
if (reserve) {
- spin_lock(&cache->lock);
cache->reserved += len;
cache->space_info->bytes_reserved += len;
- spin_unlock(&cache->lock);
} else {
- spin_lock(&cache->lock);
cache->reserved -= len;
cache->space_info->bytes_reserved -= len;
- spin_unlock(&cache->lock);
}
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
+ put_block_group(cache);
bytenr += len;
num -= len;
}
struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
int ret;
- while(1) {
+ mutex_lock(&root->fs_info->pinned_mutex);
+ while (1) {
ret = find_first_extent_bit(pinned_extents, last,
&start, &end, EXTENT_DIRTY);
if (ret)
set_extent_dirty(copy, start, end, GFP_NOFS);
last = end + 1;
}
+ mutex_unlock(&root->fs_info->pinned_mutex);
return 0;
}
u64 start;
u64 end;
int ret;
- struct btrfs_block_group_cache *cache;
- mutex_lock(&root->fs_info->alloc_mutex);
- while(1) {
+ while (1) {
+ mutex_lock(&root->fs_info->pinned_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY);
if (ret)
break;
- btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
- clear_extent_dirty(unpin, start, end, GFP_NOFS);
- cache = btrfs_lookup_block_group(root->fs_info, start);
- if (cache->cached)
- btrfs_add_free_space(cache, start, end - start + 1);
- if (need_resched()) {
- mutex_unlock(&root->fs_info->alloc_mutex);
- cond_resched();
- mutex_lock(&root->fs_info->alloc_mutex);
- }
- }
- mutex_unlock(&root->fs_info->alloc_mutex);
- return 0;
-}
-
-static int finish_current_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *extent_root)
-{
- u64 start;
- u64 end;
- u64 priv;
- struct btrfs_fs_info *info = extent_root->fs_info;
- struct btrfs_path *path;
- struct btrfs_extent_ref *ref;
- struct pending_extent_op *extent_op;
- struct btrfs_key key;
- struct btrfs_extent_item extent_item;
- int ret;
- int err = 0;
-
- WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
- btrfs_set_stack_extent_refs(&extent_item, 1);
- path = btrfs_alloc_path();
- while(1) {
- ret = find_first_extent_bit(&info->extent_ins, 0, &start,
- &end, EXTENT_LOCKED);
- if (ret)
- break;
+ ret = btrfs_discard_extent(root, start, end + 1 - start);
- ret = get_state_private(&info->extent_ins, start, &priv);
- BUG_ON(ret);
- extent_op = (struct pending_extent_op *)(unsigned long)priv;
-
- if (extent_op->type == PENDING_EXTENT_INSERT) {
- key.objectid = start;
- key.offset = end + 1 - start;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- err = btrfs_insert_item(trans, extent_root, &key,
- &extent_item, sizeof(extent_item));
- BUG_ON(err);
-
- clear_extent_bits(&info->extent_ins, start, end,
- EXTENT_LOCKED, GFP_NOFS);
-
- err = insert_extent_backref(trans, extent_root, path,
- start, extent_op->parent,
- extent_root->root_key.objectid,
- extent_op->generation,
- extent_op->level);
- BUG_ON(err);
- } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
- err = lookup_extent_backref(trans, extent_root, path,
- start, extent_op->orig_parent,
- extent_root->root_key.objectid,
- extent_op->orig_generation,
- extent_op->level, 0);
- BUG_ON(err);
-
- clear_extent_bits(&info->extent_ins, start, end,
- EXTENT_LOCKED, GFP_NOFS);
-
- key.objectid = start;
- key.offset = extent_op->parent;
- key.type = BTRFS_EXTENT_REF_KEY;
- err = btrfs_set_item_key_safe(trans, extent_root, path,
- &key);
- BUG_ON(err);
- ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_extent_ref);
- btrfs_set_ref_generation(path->nodes[0], ref,
- extent_op->generation);
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_release_path(extent_root, path);
- } else {
- BUG_ON(1);
- }
- kfree(extent_op);
+ /* unlocks the pinned mutex */
+ btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
+ clear_extent_dirty(unpin, start, end, GFP_NOFS);
- if (need_resched()) {
- mutex_unlock(&extent_root->fs_info->alloc_mutex);
- cond_resched();
- mutex_lock(&extent_root->fs_info->alloc_mutex);
- }
+ cond_resched();
}
- btrfs_free_path(path);
- return 0;
+ mutex_unlock(&root->fs_info->pinned_mutex);
+ return ret;
}
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, int is_data)
+ struct btrfs_path *path,
+ u64 bytenr, u64 num_bytes, int is_data,
+ struct extent_buffer **must_clean)
{
int err = 0;
struct extent_buffer *buf;
- WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
if (is_data)
goto pinit;
u64 header_transid = btrfs_header_generation(buf);
if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
header_owner != BTRFS_TREE_RELOC_OBJECTID &&
+ header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
header_transid == trans->transid &&
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
- clean_tree_block(NULL, root, buf);
- btrfs_tree_unlock(buf);
- free_extent_buffer(buf);
+ *must_clean = buf;
return 1;
}
btrfs_tree_unlock(buf);
}
free_extent_buffer(buf);
pinit:
+ btrfs_set_path_blocking(path);
+ mutex_lock(&root->fs_info->pinned_mutex);
+ /* unlocks the pinned mutex */
btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
BUG_ON(err < 0);
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 ref_generation,
- u64 owner_objectid, int pin, int mark_free)
+ u64 owner_objectid, int pin, int mark_free,
+ int refs_to_drop)
{
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_extent_item *ei;
u32 refs;
- WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
key.objectid = bytenr;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
key.offset = num_bytes;
return -ENOMEM;
path->reada = 1;
+ path->leave_spinning = 1;
ret = lookup_extent_backref(trans, extent_root, path,
bytenr, parent, root_objectid,
ref_generation, owner_objectid, 1);
if (ret == 0) {
struct btrfs_key found_key;
extent_slot = path->slots[0];
- while(extent_slot > 0) {
+ while (extent_slot > 0) {
extent_slot--;
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
extent_slot);
break;
}
if (!found_extent) {
- ret = remove_extent_backref(trans, extent_root, path);
+ ret = remove_extent_backref(trans, extent_root, path,
+ refs_to_drop);
BUG_ON(ret);
btrfs_release_path(extent_root, path);
+ path->leave_spinning = 1;
ret = btrfs_search_slot(trans, extent_root,
&key, path, -1, 1);
+ if (ret) {
+ printk(KERN_ERR "umm, got %d back from search"
+ ", was looking for %llu\n", ret,
+ (unsigned long long)bytenr);
+ btrfs_print_leaf(extent_root, path->nodes[0]);
+ }
BUG_ON(ret);
extent_slot = path->slots[0];
}
} else {
btrfs_print_leaf(extent_root, path->nodes[0]);
WARN_ON(1);
- printk("Unable to find ref byte nr %Lu root %Lu "
- "gen %Lu owner %Lu\n", bytenr,
- root_objectid, ref_generation, owner_objectid);
+ printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
+ "parent %llu root %llu gen %llu owner %llu\n",
+ (unsigned long long)bytenr,
+ (unsigned long long)parent,
+ (unsigned long long)root_objectid,
+ (unsigned long long)ref_generation,
+ (unsigned long long)owner_objectid);
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, extent_slot,
struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
- BUG_ON(refs == 0);
- refs -= 1;
- btrfs_set_extent_refs(leaf, ei, refs);
+ /*
+ * we're not allowed to delete the extent item if there
+ * are other delayed ref updates pending
+ */
+
+ BUG_ON(refs < refs_to_drop);
+ refs -= refs_to_drop;
+ btrfs_set_extent_refs(leaf, ei, refs);
btrfs_mark_buffer_dirty(leaf);
- if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
+ if (refs == 0 && found_extent &&
+ path->slots[0] == extent_slot + 1) {
struct btrfs_extent_ref *ref;
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_ref);
- BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
+ BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop);
/* if the back ref and the extent are next to each other
* they get deleted below in one shot
*/
num_to_del = 2;
} else if (found_extent) {
/* otherwise delete the extent back ref */
- ret = remove_extent_backref(trans, extent_root, path);
+ ret = remove_extent_backref(trans, extent_root, path,
+ refs_to_drop);
BUG_ON(ret);
/* if refs are 0, we need to setup the path for deletion */
if (refs == 0) {
btrfs_release_path(extent_root, path);
+ path->leave_spinning = 1;
ret = btrfs_search_slot(trans, extent_root, &key, path,
-1, 1);
BUG_ON(ret);
if (refs == 0) {
u64 super_used;
u64 root_used;
-#ifdef BIO_RW_DISCARD
- u64 map_length = num_bytes;
- struct btrfs_multi_bio *multi = NULL;
-#endif
+ struct extent_buffer *must_clean = NULL;
if (pin) {
- ret = pin_down_bytes(trans, root, bytenr, num_bytes,
- owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
+ ret = pin_down_bytes(trans, root, path,
+ bytenr, num_bytes,
+ owner_objectid >= BTRFS_FIRST_FREE_OBJECTID,
+ &must_clean);
if (ret > 0)
mark_free = 1;
BUG_ON(ret < 0);
}
/* block accounting for super block */
- spin_lock_irq(&info->delalloc_lock);
+ spin_lock(&info->delalloc_lock);
super_used = btrfs_super_bytes_used(&info->super_copy);
btrfs_set_super_bytes_used(&info->super_copy,
super_used - num_bytes);
- spin_unlock_irq(&info->delalloc_lock);
/* block accounting for root item */
root_used = btrfs_root_used(&root->root_item);
btrfs_set_root_used(&root->root_item,
root_used - num_bytes);
+ spin_unlock(&info->delalloc_lock);
+
+ /*
+ * it is going to be very rare for someone to be waiting
+ * on the block we're freeing. del_items might need to
+ * schedule, so rather than get fancy, just force it
+ * to blocking here
+ */
+ if (must_clean)
+ btrfs_set_lock_blocking(must_clean);
+
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
BUG_ON(ret);
- ret = update_block_group(trans, root, bytenr, num_bytes, 0,
- mark_free);
- BUG_ON(ret);
-
-#ifdef BIO_RW_DISCARD
- /* Tell the block device(s) that the sectors can be discarded */
- ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
- bytenr, &map_length, &multi, 0);
- if (!ret) {
- struct btrfs_bio_stripe *stripe = multi->stripes;
- int i;
+ btrfs_release_path(extent_root, path);
- if (map_length > num_bytes)
- map_length = num_bytes;
+ if (must_clean) {
+ clean_tree_block(NULL, root, must_clean);
+ btrfs_tree_unlock(must_clean);
+ free_extent_buffer(must_clean);
+ }
- for (i = 0; i < multi->num_stripes; i++, stripe++) {
- blkdev_issue_discard(stripe->dev->bdev,
- stripe->physical >> 9,
- map_length >> 9);
- }
- kfree(multi);
+ if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
+ ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
+ BUG_ON(ret);
}
-#endif
+
+ ret = update_block_group(trans, root, bytenr, num_bytes, 0,
+ mark_free);
+ BUG_ON(ret);
}
btrfs_free_path(path);
- finish_current_insert(trans, extent_root);
return ret;
}
/*
- * find all the blocks marked as pending in the radix tree and remove
- * them from the extent map
+ * remove an extent from the root, returns 0 on success
*/
-static int del_pending_extents(struct btrfs_trans_handle *trans, struct
- btrfs_root *extent_root)
+static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, u64 parent,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner_objectid, int pin,
+ int refs_to_drop)
{
- int ret;
- int err = 0;
- int mark_free = 0;
- u64 start;
- u64 end;
- u64 priv;
- struct extent_io_tree *pending_del;
- struct extent_io_tree *extent_ins;
- struct pending_extent_op *extent_op;
-
- WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
- extent_ins = &extent_root->fs_info->extent_ins;
- pending_del = &extent_root->fs_info->pending_del;
-
- while(1) {
- ret = find_first_extent_bit(pending_del, 0, &start, &end,
- EXTENT_LOCKED);
- if (ret)
- break;
-
- ret = get_state_private(pending_del, start, &priv);
- BUG_ON(ret);
- extent_op = (struct pending_extent_op *)(unsigned long)priv;
-
- clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
- GFP_NOFS);
-
- ret = pin_down_bytes(trans, extent_root, start,
- end + 1 - start, 0);
- mark_free = ret > 0;
- if (!test_range_bit(extent_ins, start, end,
- EXTENT_LOCKED, 0)) {
-free_extent:
- ret = __free_extent(trans, extent_root,
- start, end + 1 - start,
- extent_op->orig_parent,
- extent_root->root_key.objectid,
- extent_op->orig_generation,
- extent_op->level, 0, mark_free);
- kfree(extent_op);
- } else {
- kfree(extent_op);
- ret = get_state_private(extent_ins, start, &priv);
- BUG_ON(ret);
- extent_op = (struct pending_extent_op *)
- (unsigned long)priv;
-
- clear_extent_bits(extent_ins, start, end,
- EXTENT_LOCKED, GFP_NOFS);
+ WARN_ON(num_bytes < root->sectorsize);
- if (extent_op->type == PENDING_BACKREF_UPDATE)
- goto free_extent;
+ /*
+ * if metadata always pin
+ * if data pin when any transaction has committed this
+ */
+ if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
+ ref_generation != trans->transid)
+ pin = 1;
- ret = update_block_group(trans, extent_root, start,
- end + 1 - start, 0, mark_free);
- BUG_ON(ret);
- kfree(extent_op);
- }
- if (ret)
- err = ret;
+ if (ref_generation != trans->transid)
+ pin = 1;
- if (need_resched()) {
- mutex_unlock(&extent_root->fs_info->alloc_mutex);
- cond_resched();
- mutex_lock(&extent_root->fs_info->alloc_mutex);
- }
- }
- return err;
+ return __free_extent(trans, root, bytenr, num_bytes, parent,
+ root_objectid, ref_generation,
+ owner_objectid, pin, pin == 0, refs_to_drop);
}
/*
- * remove an extent from the root, returns 0 on success
+ * when we free an extent, it is possible (and likely) that we free the last
+ * delayed ref for that extent as well. This searches the delayed ref tree for
+ * a given extent, and if there are no other delayed refs to be processed, it
+ * removes it from the tree.
*/
-static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u64 num_bytes, u64 parent,
- u64 root_objectid, u64 ref_generation,
- u64 owner_objectid, int pin)
+static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 bytenr)
{
- struct btrfs_root *extent_root = root->fs_info->extent_root;
- int pending_ret;
+ struct btrfs_delayed_ref_head *head;
+ struct btrfs_delayed_ref_root *delayed_refs;
+ struct btrfs_delayed_ref_node *ref;
+ struct rb_node *node;
int ret;
- WARN_ON(num_bytes < root->sectorsize);
- if (root == extent_root) {
- struct pending_extent_op *extent_op;
-
- extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
- BUG_ON(!extent_op);
-
- extent_op->type = PENDING_EXTENT_DELETE;
- extent_op->bytenr = bytenr;
- extent_op->num_bytes = num_bytes;
- extent_op->parent = parent;
- extent_op->orig_parent = parent;
- extent_op->generation = ref_generation;
- extent_op->orig_generation = ref_generation;
- extent_op->level = (int)owner_objectid;
-
- set_extent_bits(&root->fs_info->pending_del,
- bytenr, bytenr + num_bytes - 1,
- EXTENT_LOCKED, GFP_NOFS);
- set_state_private(&root->fs_info->pending_del,
- bytenr, (unsigned long)extent_op);
- return 0;
- }
- /* if metadata always pin */
- if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
- if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
- struct btrfs_block_group_cache *cache;
+ delayed_refs = &trans->transaction->delayed_refs;
+ spin_lock(&delayed_refs->lock);
+ head = btrfs_find_delayed_ref_head(trans, bytenr);
+ if (!head)
+ goto out;
- /* btrfs_free_reserved_extent */
- cache = btrfs_lookup_block_group(root->fs_info, bytenr);
- BUG_ON(!cache);
- btrfs_add_free_space(cache, bytenr, num_bytes);
- update_reserved_extents(root, bytenr, num_bytes, 0);
- return 0;
- }
- pin = 1;
- }
+ node = rb_prev(&head->node.rb_node);
+ if (!node)
+ goto out;
- /* if data pin when any transaction has committed this */
- if (ref_generation != trans->transid)
- pin = 1;
+ ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- ret = __free_extent(trans, root, bytenr, num_bytes, parent,
- root_objectid, ref_generation,
- owner_objectid, pin, pin == 0);
+ /* there are still entries for this ref, we can't drop it */
+ if (ref->bytenr == bytenr)
+ goto out;
- finish_current_insert(trans, root->fs_info->extent_root);
- pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
- return ret ? ret : pending_ret;
+ /*
+ * waiting for the lock here would deadlock. If someone else has it
+ * locked they are already in the process of dropping it anyway
+ */
+ if (!mutex_trylock(&head->mutex))
+ goto out;
+
+ /*
+ * at this point we have a head with no other entries. Go
+ * ahead and process it.
+ */
+ head->node.in_tree = 0;
+ rb_erase(&head->node.rb_node, &delayed_refs->root);
+
+ delayed_refs->num_entries--;
+
+ /*
+ * we don't take a ref on the node because we're removing it from the
+ * tree, so we just steal the ref the tree was holding.
+ */
+ delayed_refs->num_heads--;
+ if (list_empty(&head->cluster))
+ delayed_refs->num_heads_ready--;
+
+ list_del_init(&head->cluster);
+ spin_unlock(&delayed_refs->lock);
+
+ ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
+ &head->node, head->must_insert_reserved);
+ BUG_ON(ret);
+ btrfs_put_delayed_ref(&head->node);
+ return 0;
+out:
+ spin_unlock(&delayed_refs->lock);
+ return 0;
}
int btrfs_free_extent(struct btrfs_trans_handle *trans,
{
int ret;
- maybe_lock_mutex(root);
- ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
- root_objectid, ref_generation,
- owner_objectid, pin);
- maybe_unlock_mutex(root);
+ /*
+ * tree log blocks never actually go into the extent allocation
+ * tree, just update pinning info and exit early.
+ *
+ * data extents referenced by the tree log do need to have
+ * their reference counts bumped.
+ */
+ if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
+ owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
+ mutex_lock(&root->fs_info->pinned_mutex);
+
+ /* unlocks the pinned mutex */
+ btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
+ update_reserved_extents(root, bytenr, num_bytes, 0);
+ ret = 0;
+ } else {
+ ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent,
+ root_objectid, ref_generation,
+ owner_objectid,
+ BTRFS_DROP_DELAYED_REF, 1);
+ BUG_ON(ret);
+ ret = check_ref_cleanup(trans, root, bytenr);
+ BUG_ON(ret);
+ }
return ret;
}
* ins->offset == number of blocks
* Any available blocks before search_start are skipped.
*/
-static int noinline find_free_extent(struct btrfs_trans_handle *trans,
+static noinline int find_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *orig_root,
u64 num_bytes, u64 empty_size,
u64 search_start, u64 search_end,
u64 exclude_start, u64 exclude_nr,
int data)
{
- int ret;
- u64 orig_search_start;
- struct btrfs_root * root = orig_root->fs_info->extent_root;
- struct btrfs_fs_info *info = root->fs_info;
+ int ret = 0;
+ struct btrfs_root *root = orig_root->fs_info->extent_root;
u64 total_needed = num_bytes;
u64 *last_ptr = NULL;
- struct btrfs_block_group_cache *block_group;
+ u64 last_wanted = 0;
+ struct btrfs_block_group_cache *block_group = NULL;
int chunk_alloc_done = 0;
int empty_cluster = 2 * 1024 * 1024;
int allowed_chunk_alloc = 0;
+ struct list_head *head = NULL, *cur = NULL;
+ int loop = 0;
+ int extra_loop = 0;
+ struct btrfs_space_info *space_info;
WARN_ON(num_bytes < root->sectorsize);
btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
+ ins->objectid = 0;
+ ins->offset = 0;
if (orig_root->ref_cows || empty_size)
allowed_chunk_alloc = 1;
if (data & BTRFS_BLOCK_GROUP_METADATA) {
last_ptr = &root->fs_info->last_alloc;
- empty_cluster = 256 * 1024;
+ if (!btrfs_test_opt(root, SSD))
+ empty_cluster = 64 * 1024;
}
if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
last_ptr = &root->fs_info->last_data_alloc;
if (last_ptr) {
- if (*last_ptr)
+ if (*last_ptr) {
hint_byte = *last_ptr;
- else
+ last_wanted = *last_ptr;
+ } else
empty_size += empty_cluster;
+ } else {
+ empty_cluster = 0;
}
-
search_start = max(search_start, first_logical_byte(root, 0));
- orig_search_start = search_start;
-
search_start = max(search_start, hint_byte);
- total_needed += empty_size;
-new_group:
- block_group = btrfs_lookup_block_group(info, search_start);
+ if (last_wanted && search_start != last_wanted) {
+ last_wanted = 0;
+ empty_size += empty_cluster;
+ }
+
+ total_needed += empty_size;
+ block_group = btrfs_lookup_block_group(root->fs_info, search_start);
if (!block_group)
- block_group = btrfs_lookup_first_block_group(info,
+ block_group = btrfs_lookup_first_block_group(root->fs_info,
search_start);
+ space_info = __find_space_info(root->fs_info, data);
- /*
- * Ok this looks a little tricky, buts its really simple. First if we
- * didn't find a block group obviously we want to start over.
- * Secondly, if the block group we found does not match the type we
- * need, and we have a last_ptr and its not 0, chances are the last
- * allocation we made was at the end of the block group, so lets go
- * ahead and skip the looking through the rest of the block groups and
- * start at the beginning. This helps with metadata allocations,
- * since you are likely to have a bunch of data block groups to search
- * through first before you realize that you need to start over, so go
- * ahead and start over and save the time.
- */
- if (!block_group || (!block_group_bits(block_group, data) &&
- last_ptr && *last_ptr)) {
- if (search_start != orig_search_start) {
- if (last_ptr && *last_ptr) {
- total_needed += empty_cluster;
- *last_ptr = 0;
- }
- search_start = orig_search_start;
- goto new_group;
- } else if (!chunk_alloc_done && allowed_chunk_alloc) {
- ret = do_chunk_alloc(trans, root,
- num_bytes + 2 * 1024 * 1024,
- data, 1);
- if (ret < 0)
- goto error;
- BUG_ON(ret);
- chunk_alloc_done = 1;
- search_start = orig_search_start;
- goto new_group;
- } else {
- ret = -ENOSPC;
- goto error;
- }
- }
-
- /*
- * this is going to seach through all of the existing block groups it
- * can find, so if we don't find something we need to see if we can
- * allocate what we need.
- */
- ret = find_free_space(root, &block_group, &search_start,
- total_needed, data);
- if (ret == -ENOSPC) {
+ down_read(&space_info->groups_sem);
+ while (1) {
+ struct btrfs_free_space *free_space;
/*
- * instead of allocating, start at the original search start
- * and see if there is something to be found, if not then we
- * allocate
+ * the only way this happens if our hint points to a block
+ * group thats not of the proper type, while looping this
+ * should never happen
*/
- if (search_start != orig_search_start) {
- if (last_ptr && *last_ptr) {
- *last_ptr = 0;
- total_needed += empty_cluster;
- }
- search_start = orig_search_start;
- goto new_group;
- }
+ if (empty_size)
+ extra_loop = 1;
- /*
- * we've already allocated, we're pretty screwed
- */
- if (chunk_alloc_done) {
- goto error;
- } else if (!allowed_chunk_alloc && block_group &&
- block_group_bits(block_group, data)) {
- block_group->space_info->force_alloc = 1;
- goto error;
- } else if (!allowed_chunk_alloc) {
- goto error;
+ if (!block_group)
+ goto new_group_no_lock;
+
+ if (unlikely(!block_group->cached)) {
+ mutex_lock(&block_group->cache_mutex);
+ ret = cache_block_group(root, block_group);
+ mutex_unlock(&block_group->cache_mutex);
+ if (ret)
+ break;
}
- ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024,
- data, 1);
- if (ret < 0)
- goto error;
+ mutex_lock(&block_group->alloc_mutex);
+ if (unlikely(!block_group_bits(block_group, data)))
+ goto new_group;
- BUG_ON(ret);
- chunk_alloc_done = 1;
- if (block_group)
- search_start = block_group->key.objectid +
+ if (unlikely(block_group->ro))
+ goto new_group;
+
+ free_space = btrfs_find_free_space(block_group, search_start,
+ total_needed);
+ if (free_space) {
+ u64 start = block_group->key.objectid;
+ u64 end = block_group->key.objectid +
block_group->key.offset;
- else
- search_start = orig_search_start;
- goto new_group;
- }
- if (ret)
- goto error;
+ search_start = stripe_align(root, free_space->offset);
- search_start = stripe_align(root, search_start);
- ins->objectid = search_start;
- ins->offset = num_bytes;
+ /* move on to the next group */
+ if (search_start + num_bytes >= search_end)
+ goto new_group;
- if (ins->objectid + num_bytes >= search_end) {
- search_start = orig_search_start;
- if (chunk_alloc_done) {
- ret = -ENOSPC;
- goto error;
- }
- goto new_group;
- }
+ /* move on to the next group */
+ if (search_start + num_bytes > end)
+ goto new_group;
- if (ins->objectid + num_bytes >
- block_group->key.objectid + block_group->key.offset) {
- if (search_start == orig_search_start && chunk_alloc_done) {
- ret = -ENOSPC;
- goto error;
+ if (last_wanted && search_start != last_wanted) {
+ total_needed += empty_cluster;
+ empty_size += empty_cluster;
+ last_wanted = 0;
+ /*
+ * if search_start is still in this block group
+ * then we just re-search this block group
+ */
+ if (search_start >= start &&
+ search_start < end) {
+ mutex_unlock(&block_group->alloc_mutex);
+ continue;
+ }
+
+ /* else we go to the next block group */
+ goto new_group;
+ }
+
+ if (exclude_nr > 0 &&
+ (search_start + num_bytes > exclude_start &&
+ search_start < exclude_start + exclude_nr)) {
+ search_start = exclude_start + exclude_nr;
+ /*
+ * if search_start is still in this block group
+ * then we just re-search this block group
+ */
+ if (search_start >= start &&
+ search_start < end) {
+ mutex_unlock(&block_group->alloc_mutex);
+ last_wanted = 0;
+ continue;
+ }
+
+ /* else we go to the next block group */
+ goto new_group;
+ }
+
+ ins->objectid = search_start;
+ ins->offset = num_bytes;
+
+ btrfs_remove_free_space_lock(block_group, search_start,
+ num_bytes);
+ /* we are all good, lets return */
+ mutex_unlock(&block_group->alloc_mutex);
+ break;
}
- search_start = block_group->key.objectid +
- block_group->key.offset;
- goto new_group;
- }
+new_group:
+ mutex_unlock(&block_group->alloc_mutex);
+ put_block_group(block_group);
+ block_group = NULL;
+new_group_no_lock:
+ /* don't try to compare new allocations against the
+ * last allocation any more
+ */
+ last_wanted = 0;
- if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
- ins->objectid < exclude_start + exclude_nr)) {
- search_start = exclude_start + exclude_nr;
- goto new_group;
- }
+ /*
+ * Here's how this works.
+ * loop == 0: we were searching a block group via a hint
+ * and didn't find anything, so we start at
+ * the head of the block groups and keep searching
+ * loop == 1: we're searching through all of the block groups
+ * if we hit the head again we have searched
+ * all of the block groups for this space and we
+ * need to try and allocate, if we cant error out.
+ * loop == 2: we allocated more space and are looping through
+ * all of the block groups again.
+ */
+ if (loop == 0) {
+ head = &space_info->block_groups;
+ cur = head->next;
+ loop++;
+ } else if (loop == 1 && cur == head) {
+ int keep_going;
+
+ /* at this point we give up on the empty_size
+ * allocations and just try to allocate the min
+ * space.
+ *
+ * The extra_loop field was set if an empty_size
+ * allocation was attempted above, and if this
+ * is try we need to try the loop again without
+ * the additional empty_size.
+ */
+ total_needed -= empty_size;
+ empty_size = 0;
+ keep_going = extra_loop;
+ loop++;
+
+ if (allowed_chunk_alloc && !chunk_alloc_done) {
+ up_read(&space_info->groups_sem);
+ ret = do_chunk_alloc(trans, root, num_bytes +
+ 2 * 1024 * 1024, data, 1);
+ down_read(&space_info->groups_sem);
+ if (ret < 0)
+ goto loop_check;
+ head = &space_info->block_groups;
+ /*
+ * we've allocated a new chunk, keep
+ * trying
+ */
+ keep_going = 1;
+ chunk_alloc_done = 1;
+ } else if (!allowed_chunk_alloc) {
+ space_info->force_alloc = 1;
+ }
+loop_check:
+ if (keep_going) {
+ cur = head->next;
+ extra_loop = 0;
+ } else {
+ break;
+ }
+ } else if (cur == head) {
+ break;
+ }
- if (!(data & BTRFS_BLOCK_GROUP_DATA))
- trans->block_group = block_group;
+ block_group = list_entry(cur, struct btrfs_block_group_cache,
+ list);
+ atomic_inc(&block_group->count);
- ins->offset = num_bytes;
- if (last_ptr) {
- *last_ptr = ins->objectid + ins->offset;
- if (*last_ptr ==
- btrfs_super_total_bytes(&root->fs_info->super_copy))
- *last_ptr = 0;
+ search_start = block_group->key.objectid;
+ cur = cur->next;
}
- ret = 0;
-error:
+ /* we found what we needed */
+ if (ins->objectid) {
+ if (!(data & BTRFS_BLOCK_GROUP_DATA))
+ trans->block_group = block_group->key.objectid;
+
+ if (last_ptr)
+ *last_ptr = ins->objectid + ins->offset;
+ ret = 0;
+ } else if (!ret) {
+ printk(KERN_ERR "btrfs searching for %llu bytes, "
+ "num_bytes %llu, loop %d, allowed_alloc %d\n",
+ (unsigned long long)total_needed,
+ (unsigned long long)num_bytes,
+ loop, allowed_chunk_alloc);
+ ret = -ENOSPC;
+ }
+ if (block_group)
+ put_block_group(block_group);
+
+ up_read(&space_info->groups_sem);
return ret;
}
static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
{
struct btrfs_block_group_cache *cache;
- struct list_head *l;
- printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
- info->total_bytes - info->bytes_used - info->bytes_pinned -
- info->bytes_reserved, (info->full) ? "" : "not ");
-
- spin_lock(&info->lock);
- list_for_each(l, &info->block_groups) {
- cache = list_entry(l, struct btrfs_block_group_cache, list);
+ printk(KERN_INFO "space_info has %llu free, is %sfull\n",
+ (unsigned long long)(info->total_bytes - info->bytes_used -
+ info->bytes_pinned - info->bytes_reserved),
+ (info->full) ? "" : "not ");
+ printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
+ " may_use=%llu, used=%llu\n", info->total_bytes,
+ info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
+ info->bytes_used);
+
+ down_read(&info->groups_sem);
+ list_for_each_entry(cache, &info->block_groups, list) {
spin_lock(&cache->lock);
- printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
- "%Lu pinned %Lu reserved\n",
- cache->key.objectid, cache->key.offset,
- btrfs_block_group_used(&cache->item),
- cache->pinned, cache->reserved);
+ printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
+ "%llu pinned %llu reserved\n",
+ (unsigned long long)cache->key.objectid,
+ (unsigned long long)cache->key.offset,
+ (unsigned long long)btrfs_block_group_used(&cache->item),
+ (unsigned long long)cache->pinned,
+ (unsigned long long)cache->reserved);
btrfs_dump_free_space(cache, bytes);
spin_unlock(&cache->lock);
}
- spin_unlock(&info->lock);
+ up_read(&info->groups_sem);
}
static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
{
int ret;
u64 search_start = 0;
- u64 alloc_profile;
struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_block_group_cache *cache;
- if (data) {
- alloc_profile = info->avail_data_alloc_bits &
- info->data_alloc_profile;
- data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
- } else if (root == root->fs_info->chunk_root) {
- alloc_profile = info->avail_system_alloc_bits &
- info->system_alloc_profile;
- data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
- } else {
- alloc_profile = info->avail_metadata_alloc_bits &
- info->metadata_alloc_profile;
- data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
- }
+ data = btrfs_get_alloc_profile(root, data);
again:
- data = reduce_alloc_profile(root, data);
/*
* the only place that sets empty_size is btrfs_realloc_node, which
* is not called recursively on allocations
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, data);
- printk("allocation failed flags %Lu, wanted %Lu\n",
- data, num_bytes);
+ printk(KERN_ERR "btrfs allocation failed flags %llu, "
+ "wanted %llu\n", (unsigned long long)data,
+ (unsigned long long)num_bytes);
dump_space_info(sinfo, num_bytes);
BUG();
}
- cache = btrfs_lookup_block_group(root->fs_info, ins->objectid);
- if (!cache) {
- printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid);
- return -ENOSPC;
- }
-
- ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset);
return ret;
}
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
{
struct btrfs_block_group_cache *cache;
+ int ret = 0;
- maybe_lock_mutex(root);
cache = btrfs_lookup_block_group(root->fs_info, start);
if (!cache) {
- printk(KERN_ERR "Unable to find block group for %Lu\n", start);
- maybe_unlock_mutex(root);
+ printk(KERN_ERR "Unable to find block group for %llu\n",
+ (unsigned long long)start);
return -ENOSPC;
}
+
+ ret = btrfs_discard_extent(root, start, len);
+
btrfs_add_free_space(cache, start, len);
+ put_block_group(cache);
update_reserved_extents(root, start, len, 0);
- maybe_unlock_mutex(root);
- return 0;
+
+ return ret;
}
int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
u64 data)
{
int ret;
- maybe_lock_mutex(root);
ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
empty_size, hint_byte, search_end, ins,
data);
update_reserved_extents(root, ins->objectid, ins->offset, 1);
- maybe_unlock_mutex(root);
return ret;
}
static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 parent,
u64 root_objectid, u64 ref_generation,
- u64 owner, struct btrfs_key *ins)
+ u64 owner, struct btrfs_key *ins,
+ int ref_mod)
{
int ret;
- int pending_ret;
u64 super_used;
u64 root_used;
u64 num_bytes = ins->offset;
parent = ins->objectid;
/* block accounting for super block */
- spin_lock_irq(&info->delalloc_lock);
+ spin_lock(&info->delalloc_lock);
super_used = btrfs_super_bytes_used(&info->super_copy);
btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
- spin_unlock_irq(&info->delalloc_lock);
/* block accounting for root item */
root_used = btrfs_root_used(&root->root_item);
btrfs_set_root_used(&root->root_item, root_used + num_bytes);
-
- if (root == extent_root) {
- struct pending_extent_op *extent_op;
-
- extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
- BUG_ON(!extent_op);
-
- extent_op->type = PENDING_EXTENT_INSERT;
- extent_op->bytenr = ins->objectid;
- extent_op->num_bytes = ins->offset;
- extent_op->parent = parent;
- extent_op->orig_parent = 0;
- extent_op->generation = ref_generation;
- extent_op->orig_generation = 0;
- extent_op->level = (int)owner;
-
- set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
- ins->objectid + ins->offset - 1,
- EXTENT_LOCKED, GFP_NOFS);
- set_state_private(&root->fs_info->extent_ins,
- ins->objectid, (unsigned long)extent_op);
- goto update_block;
- }
+ spin_unlock(&info->delalloc_lock);
memcpy(&keys[0], ins, sizeof(*ins));
keys[1].objectid = ins->objectid;
path = btrfs_alloc_path();
BUG_ON(!path);
+ path->leave_spinning = 1;
ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
sizes, 2);
BUG_ON(ret);
extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_extent_item);
- btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
+ btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod);
ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
struct btrfs_extent_ref);
btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
btrfs_set_ref_objectid(path->nodes[0], ref, owner);
- btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
+ btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod);
btrfs_mark_buffer_dirty(path->nodes[0]);
trans->alloc_exclude_start = 0;
trans->alloc_exclude_nr = 0;
btrfs_free_path(path);
- finish_current_insert(trans, extent_root);
- pending_ret = del_pending_extents(trans, extent_root);
if (ret)
goto out;
- if (pending_ret) {
- ret = pending_ret;
- goto out;
- }
-update_block:
- ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
+ ret = update_block_group(trans, root, ins->objectid,
+ ins->offset, 1, 0);
if (ret) {
- printk("update block group failed for %Lu %Lu\n",
- ins->objectid, ins->offset);
+ printk(KERN_ERR "btrfs update block group failed for %llu "
+ "%llu\n", (unsigned long long)ins->objectid,
+ (unsigned long long)ins->offset);
BUG();
}
out:
if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
return 0;
- maybe_lock_mutex(root);
- ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
- ref_generation, owner, ins);
- update_reserved_extents(root, ins->objectid, ins->offset, 0);
- maybe_unlock_mutex(root);
+
+ ret = btrfs_add_delayed_ref(trans, ins->objectid,
+ ins->offset, parent, root_objectid,
+ ref_generation, owner,
+ BTRFS_ADD_DELAYED_EXTENT, 0);
+ BUG_ON(ret);
return ret;
}
int ret;
struct btrfs_block_group_cache *block_group;
- maybe_lock_mutex(root);
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
+ mutex_lock(&block_group->cache_mutex);
cache_block_group(root, block_group);
+ mutex_unlock(&block_group->cache_mutex);
- ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset);
+ ret = btrfs_remove_free_space(block_group, ins->objectid,
+ ins->offset);
BUG_ON(ret);
+ put_block_group(block_group);
ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
- ref_generation, owner, ins);
- maybe_unlock_mutex(root);
+ ref_generation, owner, ins, 1);
return ret;
}
u64 search_end, struct btrfs_key *ins, u64 data)
{
int ret;
-
- maybe_lock_mutex(root);
-
ret = __btrfs_reserve_extent(trans, root, num_bytes,
min_alloc_size, empty_size, hint_byte,
search_end, ins, data);
BUG_ON(ret);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
- ret = __btrfs_alloc_reserved_extent(trans, root, parent,
- root_objectid, ref_generation,
- owner_objectid, ins);
+ ret = btrfs_add_delayed_ref(trans, ins->objectid,
+ ins->offset, parent, root_objectid,
+ ref_generation, owner_objectid,
+ BTRFS_ADD_DELAYED_EXTENT, 0);
BUG_ON(ret);
-
- } else {
- update_reserved_extents(root, ins->objectid, ins->offset, 1);
}
- maybe_unlock_mutex(root);
+ update_reserved_extents(root, ins->objectid, ins->offset, 1);
return ret;
}
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- u64 bytenr, u32 blocksize)
+ u64 bytenr, u32 blocksize,
+ int level)
{
struct extent_buffer *buf;
if (!buf)
return ERR_PTR(-ENOMEM);
btrfs_set_header_generation(buf, trans->transid);
+ btrfs_set_buffer_lockdep_class(buf, level);
btrfs_tree_lock(buf);
clean_tree_block(trans, root, buf);
+
+ btrfs_set_lock_blocking(buf);
btrfs_set_buffer_uptodate(buf);
+
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
set_extent_dirty(&root->dirty_log_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
buf->start + buf->len - 1, GFP_NOFS);
}
trans->blocks_used++;
+ /* this returns a buffer locked for blocking */
return buf;
}
return ERR_PTR(ret);
}
- buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
+ buf = btrfs_init_new_buffer(trans, root, ins.objectid,
+ blocksize, level);
return buf;
}
{
u64 leaf_owner;
u64 leaf_generation;
+ struct refsort *sorted;
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
int i;
int nritems;
int ret;
+ int refi = 0;
+ int slot;
BUG_ON(!btrfs_is_leaf(leaf));
nritems = btrfs_header_nritems(leaf);
leaf_owner = btrfs_header_owner(leaf);
leaf_generation = btrfs_header_generation(leaf);
+ sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
+ /* we do this loop twice. The first time we build a list
+ * of the extents we have a reference on, then we sort the list
+ * by bytenr. The second time around we actually do the
+ * extent freeing.
+ */
for (i = 0; i < nritems; i++) {
u64 disk_bytenr;
cond_resched();
btrfs_item_key_to_cpu(leaf, &key, i);
+
+ /* only extents have references, skip everything else */
if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
continue;
+
fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
+
+ /* inline extents live in the btree, they don't have refs */
if (btrfs_file_extent_type(leaf, fi) ==
BTRFS_FILE_EXTENT_INLINE)
continue;
- /*
- * FIXME make sure to insert a trans record that
- * repeats the snapshot del on crash
- */
+
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+
+ /* holes don't have refs */
if (disk_bytenr == 0)
continue;
- mutex_lock(&root->fs_info->alloc_mutex);
- ret = __btrfs_free_extent(trans, root, disk_bytenr,
+ sorted[refi].bytenr = disk_bytenr;
+ sorted[refi].slot = i;
+ refi++;
+ }
+
+ if (refi == 0)
+ goto out;
+
+ sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
+
+ for (i = 0; i < refi; i++) {
+ u64 disk_bytenr;
+
+ disk_bytenr = sorted[i].bytenr;
+ slot = sorted[i].slot;
+
+ cond_resched();
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+ ret = btrfs_free_extent(trans, root, disk_bytenr,
btrfs_file_extent_disk_num_bytes(leaf, fi),
leaf->start, leaf_owner, leaf_generation,
key.objectid, 0);
- mutex_unlock(&root->fs_info->alloc_mutex);
BUG_ON(ret);
atomic_inc(&root->fs_info->throttle_gen);
wake_up(&root->fs_info->transaction_throttle);
cond_resched();
}
+out:
+ kfree(sorted);
return 0;
}
-static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
+static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_leaf_ref *ref)
{
int i;
int ret;
- struct btrfs_extent_info *info = ref->extents;
+ struct btrfs_extent_info *info;
+ struct refsort *sorted;
+
+ if (ref->nritems == 0)
+ return 0;
+ sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
for (i = 0; i < ref->nritems; i++) {
- mutex_lock(&root->fs_info->alloc_mutex);
- ret = __btrfs_free_extent(trans, root, info->bytenr,
+ sorted[i].bytenr = ref->extents[i].bytenr;
+ sorted[i].slot = i;
+ }
+ sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
+
+ /*
+ * the items in the ref were sorted when the ref was inserted
+ * into the ref cache, so this is already in order
+ */
+ for (i = 0; i < ref->nritems; i++) {
+ info = ref->extents + sorted[i].slot;
+ ret = btrfs_free_extent(trans, root, info->bytenr,
info->num_bytes, ref->bytenr,
ref->owner, ref->generation,
info->objectid, 0);
- mutex_unlock(&root->fs_info->alloc_mutex);
atomic_inc(&root->fs_info->throttle_gen);
wake_up(&root->fs_info->transaction_throttle);
info++;
}
+ kfree(sorted);
return 0;
}
-int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
- u32 *refs)
+static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 start,
+ u64 len, u32 *refs)
{
int ret;
- ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
+ ret = btrfs_lookup_extent_ref(trans, root, start, len, refs);
BUG_ON(ret);
-#if 0 // some debugging code in case we see problems here
+#if 0 /* some debugging code in case we see problems here */
/* if the refs count is one, it won't get increased again. But
* if the ref count is > 1, someone may be decreasing it at
* the same time we are.
free_extent_buffer(eb);
}
if (*refs == 1) {
- printk("block %llu went down to one during drop_snap\n",
- (unsigned long long)start);
+ printk(KERN_ERR "btrfs block %llu went down to one "
+ "during drop_snap\n", (unsigned long long)start);
}
}
return ret;
}
+/*
+ * this is used while deleting old snapshots, and it drops the refs
+ * on a whole subtree starting from a level 1 node.
+ *
+ * The idea is to sort all the leaf pointers, and then drop the
+ * ref on all the leaves in order. Most of the time the leaves
+ * will have ref cache entries, so no leaf IOs will be required to
+ * find the extents they have references on.
+ *
+ * For each leaf, any references it has are also dropped in order
+ *
+ * This ends up dropping the references in something close to optimal
+ * order for reading and modifying the extent allocation tree.
+ */
+static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path)
+{
+ u64 bytenr;
+ u64 root_owner;
+ u64 root_gen;
+ struct extent_buffer *eb = path->nodes[1];
+ struct extent_buffer *leaf;
+ struct btrfs_leaf_ref *ref;
+ struct refsort *sorted = NULL;
+ int nritems = btrfs_header_nritems(eb);
+ int ret;
+ int i;
+ int refi = 0;
+ int slot = path->slots[1];
+ u32 blocksize = btrfs_level_size(root, 0);
+ u32 refs;
+
+ if (nritems == 0)
+ goto out;
+
+ root_owner = btrfs_header_owner(eb);
+ root_gen = btrfs_header_generation(eb);
+ sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
+
+ /*
+ * step one, sort all the leaf pointers so we don't scribble
+ * randomly into the extent allocation tree
+ */
+ for (i = slot; i < nritems; i++) {
+ sorted[refi].bytenr = btrfs_node_blockptr(eb, i);
+ sorted[refi].slot = i;
+ refi++;
+ }
+
+ /*
+ * nritems won't be zero, but if we're picking up drop_snapshot
+ * after a crash, slot might be > 0, so double check things
+ * just in case.
+ */
+ if (refi == 0)
+ goto out;
+
+ sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
+
+ /*
+ * the first loop frees everything the leaves point to
+ */
+ for (i = 0; i < refi; i++) {
+ u64 ptr_gen;
+
+ bytenr = sorted[i].bytenr;
+
+ /*
+ * check the reference count on this leaf. If it is > 1
+ * we just decrement it below and don't update any
+ * of the refs the leaf points to.
+ */
+ ret = drop_snap_lookup_refcount(trans, root, bytenr,
+ blocksize, &refs);
+ BUG_ON(ret);
+ if (refs != 1)
+ continue;
+
+ ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot);
+
+ /*
+ * the leaf only had one reference, which means the
+ * only thing pointing to this leaf is the snapshot
+ * we're deleting. It isn't possible for the reference
+ * count to increase again later
+ *
+ * The reference cache is checked for the leaf,
+ * and if found we'll be able to drop any refs held by
+ * the leaf without needing to read it in.
+ */
+ ref = btrfs_lookup_leaf_ref(root, bytenr);
+ if (ref && ref->generation != ptr_gen) {
+ btrfs_free_leaf_ref(root, ref);
+ ref = NULL;
+ }
+ if (ref) {
+ ret = cache_drop_leaf_ref(trans, root, ref);
+ BUG_ON(ret);
+ btrfs_remove_leaf_ref(root, ref);
+ btrfs_free_leaf_ref(root, ref);
+ } else {
+ /*
+ * the leaf wasn't in the reference cache, so
+ * we have to read it.
+ */
+ leaf = read_tree_block(root, bytenr, blocksize,
+ ptr_gen);
+ ret = btrfs_drop_leaf_ref(trans, root, leaf);
+ BUG_ON(ret);
+ free_extent_buffer(leaf);
+ }
+ atomic_inc(&root->fs_info->throttle_gen);
+ wake_up(&root->fs_info->transaction_throttle);
+ cond_resched();
+ }
+
+ /*
+ * run through the loop again to free the refs on the leaves.
+ * This is faster than doing it in the loop above because
+ * the leaves are likely to be clustered together. We end up
+ * working in nice chunks on the extent allocation tree.
+ */
+ for (i = 0; i < refi; i++) {
+ bytenr = sorted[i].bytenr;
+ ret = btrfs_free_extent(trans, root, bytenr,
+ blocksize, eb->start,
+ root_owner, root_gen, 0, 1);
+ BUG_ON(ret);
+
+ atomic_inc(&root->fs_info->throttle_gen);
+ wake_up(&root->fs_info->transaction_throttle);
+ cond_resched();
+ }
+out:
+ kfree(sorted);
+
+ /*
+ * update the path to show we've processed the entire level 1
+ * node. This will get saved into the root's drop_snapshot_progress
+ * field so these drops are not repeated again if this transaction
+ * commits.
+ */
+ path->slots[1] = nritems;
+ return 0;
+}
+
/*
* helper function for drop_snapshot, this walks down the tree dropping ref
* counts as it goes.
*/
-static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
+static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int *level)
{
struct extent_buffer *next;
struct extent_buffer *cur;
struct extent_buffer *parent;
- struct btrfs_leaf_ref *ref;
u32 blocksize;
int ret;
u32 refs;
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
- ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
+ ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
path->nodes[*level]->len, &refs);
BUG_ON(ret);
if (refs > 1)
/*
* walk down to the last node level and free all the leaves
*/
- while(*level >= 0) {
+ while (*level >= 0) {
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
if (path->slots[*level] >=
btrfs_header_nritems(cur))
break;
+
+ /* the new code goes down to level 1 and does all the
+ * leaves pointed to that node in bulk. So, this check
+ * for level 0 will always be false.
+ *
+ * But, the disk format allows the drop_snapshot_progress
+ * field in the root to leave things in a state where
+ * a leaf will need cleaning up here. If someone crashes
+ * with the old code and then boots with the new code,
+ * we might find a leaf here.
+ */
if (*level == 0) {
ret = btrfs_drop_leaf_ref(trans, root, cur);
BUG_ON(ret);
break;
}
+
+ /*
+ * once we get to level one, process the whole node
+ * at once, including everything below it.
+ */
+ if (*level == 1) {
+ ret = drop_level_one_refs(trans, root, path);
+ BUG_ON(ret);
+ break;
+ }
+
bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
blocksize = btrfs_level_size(root, *level - 1);
- ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
+ ret = drop_snap_lookup_refcount(trans, root, bytenr,
+ blocksize, &refs);
BUG_ON(ret);
+
+ /*
+ * if there is more than one reference, we don't need
+ * to read that node to drop any references it has. We
+ * just drop the ref we hold on that node and move on to the
+ * next slot in this level.
+ */
if (refs != 1) {
parent = path->nodes[*level];
root_owner = btrfs_header_owner(parent);
root_gen = btrfs_header_generation(parent);
path->slots[*level]++;
- mutex_lock(&root->fs_info->alloc_mutex);
- ret = __btrfs_free_extent(trans, root, bytenr,
+ ret = btrfs_free_extent(trans, root, bytenr,
blocksize, parent->start,
root_owner, root_gen,
*level - 1, 1);
BUG_ON(ret);
- mutex_unlock(&root->fs_info->alloc_mutex);
atomic_inc(&root->fs_info->throttle_gen);
wake_up(&root->fs_info->transaction_throttle);
continue;
}
+
/*
- * at this point, we have a single ref, and since the
- * only place referencing this extent is a dead root
- * the reference count should never go higher.
- * So, we don't need to check it again
+ * we need to keep freeing things in the next level down.
+ * read the block and loop around to process it
*/
- if (*level == 1) {
- ref = btrfs_lookup_leaf_ref(root, bytenr);
- if (ref && ref->generation != ptr_gen) {
- btrfs_free_leaf_ref(root, ref);
- ref = NULL;
- }
- if (ref) {
- ret = cache_drop_leaf_ref(trans, root, ref);
- BUG_ON(ret);
- btrfs_remove_leaf_ref(root, ref);
- btrfs_free_leaf_ref(root, ref);
- *level = 0;
- break;
- }
- if (printk_ratelimit()) {
- printk("leaf ref miss for bytenr %llu\n",
- (unsigned long long)bytenr);
- }
- }
- next = btrfs_find_tree_block(root, bytenr, blocksize);
- if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
- free_extent_buffer(next);
-
- next = read_tree_block(root, bytenr, blocksize,
- ptr_gen);
- cond_resched();
-#if 0
- /*
- * this is a debugging check and can go away
- * the ref should never go all the way down to 1
- * at this point
- */
- ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
- &refs);
- BUG_ON(ret);
- WARN_ON(refs != 1);
-#endif
- }
+ next = read_tree_block(root, bytenr, blocksize, ptr_gen);
WARN_ON(*level <= 0);
if (path->nodes[*level-1])
free_extent_buffer(path->nodes[*level-1]);
cond_resched();
}
out:
- WARN_ON(*level < 0);
- WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
- if (path->nodes[*level] == root->node) {
- parent = path->nodes[*level];
- bytenr = path->nodes[*level]->start;
- } else {
- parent = path->nodes[*level + 1];
- bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
- }
+ WARN_ON(*level < 0);
+ WARN_ON(*level >= BTRFS_MAX_LEVEL);
+
+ if (path->nodes[*level] == root->node) {
+ parent = path->nodes[*level];
+ bytenr = path->nodes[*level]->start;
+ } else {
+ parent = path->nodes[*level + 1];
+ bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
+ }
+
+ blocksize = btrfs_level_size(root, *level);
+ root_owner = btrfs_header_owner(parent);
+ root_gen = btrfs_header_generation(parent);
+
+ /*
+ * cleanup and free the reference on the last node
+ * we processed
+ */
+ ret = btrfs_free_extent(trans, root, bytenr, blocksize,
+ parent->start, root_owner, root_gen,
+ *level, 1);
+ free_extent_buffer(path->nodes[*level]);
+ path->nodes[*level] = NULL;
+
+ *level += 1;
+ BUG_ON(ret);
+
+ cond_resched();
+ return 0;
+}
+
+/*
+ * helper function for drop_subtree, this function is similar to
+ * walk_down_tree. The main difference is that it checks reference
+ * counts while tree blocks are locked.
+ */
+static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int *level)
+{
+ struct extent_buffer *next;
+ struct extent_buffer *cur;
+ struct extent_buffer *parent;
+ u64 bytenr;
+ u64 ptr_gen;
+ u32 blocksize;
+ u32 refs;
+ int ret;
+
+ cur = path->nodes[*level];
+ ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
+ &refs);
+ BUG_ON(ret);
+ if (refs > 1)
+ goto out;
+
+ while (*level >= 0) {
+ cur = path->nodes[*level];
+ if (*level == 0) {
+ ret = btrfs_drop_leaf_ref(trans, root, cur);
+ BUG_ON(ret);
+ clean_tree_block(trans, root, cur);
+ break;
+ }
+ if (path->slots[*level] >= btrfs_header_nritems(cur)) {
+ clean_tree_block(trans, root, cur);
+ break;
+ }
+
+ bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
+ blocksize = btrfs_level_size(root, *level - 1);
+ ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
+
+ next = read_tree_block(root, bytenr, blocksize, ptr_gen);
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+
+ ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
+ &refs);
+ BUG_ON(ret);
+ if (refs > 1) {
+ parent = path->nodes[*level];
+ ret = btrfs_free_extent(trans, root, bytenr,
+ blocksize, parent->start,
+ btrfs_header_owner(parent),
+ btrfs_header_generation(parent),
+ *level - 1, 1);
+ BUG_ON(ret);
+ path->slots[*level]++;
+ btrfs_tree_unlock(next);
+ free_extent_buffer(next);
+ continue;
+ }
+
+ *level = btrfs_header_level(next);
+ path->nodes[*level] = next;
+ path->slots[*level] = 0;
+ path->locks[*level] = 1;
+ cond_resched();
+ }
+out:
+ parent = path->nodes[*level + 1];
+ bytenr = path->nodes[*level]->start;
+ blocksize = path->nodes[*level]->len;
- blocksize = btrfs_level_size(root, *level);
- root_owner = btrfs_header_owner(parent);
- root_gen = btrfs_header_generation(parent);
+ ret = btrfs_free_extent(trans, root, bytenr, blocksize,
+ parent->start, btrfs_header_owner(parent),
+ btrfs_header_generation(parent), *level, 1);
+ BUG_ON(ret);
- mutex_lock(&root->fs_info->alloc_mutex);
- ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
- parent->start, root_owner, root_gen,
- *level, 1);
- mutex_unlock(&root->fs_info->alloc_mutex);
+ if (path->locks[*level]) {
+ btrfs_tree_unlock(path->nodes[*level]);
+ path->locks[*level] = 0;
+ }
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
*level += 1;
- BUG_ON(ret);
-
cond_resched();
return 0;
}
* to find the first node higher up where we haven't yet gone through
* all the slots
*/
-static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
+static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct btrfs_path *path, int *level)
+ struct btrfs_path *path,
+ int *level, int max_level)
{
u64 root_owner;
u64 root_gen;
int slot;
int ret;
- for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
+ for (i = *level; i < max_level && path->nodes[i]; i++) {
slot = path->slots[i];
if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
struct extent_buffer *node;
struct btrfs_disk_key disk_key;
+
+ /*
+ * there is more work to do in this level.
+ * Update the drop_progress marker to reflect
+ * the work we've done so far, and then bump
+ * the slot number
+ */
node = path->nodes[i];
path->slots[i]++;
*level = i;
return 0;
} else {
struct extent_buffer *parent;
+
+ /*
+ * this whole node is done, free our reference
+ * on it and go up one level
+ */
if (path->nodes[*level] == root->node)
parent = path->nodes[*level];
else
root_owner = btrfs_header_owner(parent);
root_gen = btrfs_header_generation(parent);
+
+ clean_tree_block(trans, root, path->nodes[*level]);
ret = btrfs_free_extent(trans, root,
path->nodes[*level]->start,
path->nodes[*level]->len,
parent->start, root_owner,
root_gen, *level, 1);
BUG_ON(ret);
+ if (path->locks[*level]) {
+ btrfs_tree_unlock(path->nodes[*level]);
+ path->locks[*level] = 0;
+ }
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
*level = i + 1;
struct btrfs_path *path;
int i;
int orig_level;
+ int update_count;
struct btrfs_root_item *root_item = &root->root_item;
WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
}
}
}
- while(1) {
+ while (1) {
+ unsigned long update;
wret = walk_down_tree(trans, root, path, &level);
if (wret > 0)
break;
if (wret < 0)
ret = wret;
- wret = walk_up_tree(trans, root, path, &level);
+ wret = walk_up_tree(trans, root, path, &level,
+ BTRFS_MAX_LEVEL);
if (wret > 0)
break;
if (wret < 0)
ret = wret;
- if (trans->transaction->in_commit) {
+ if (trans->transaction->in_commit ||
+ trans->transaction->delayed_refs.flushing) {
ret = -EAGAIN;
break;
}
atomic_inc(&root->fs_info->throttle_gen);
wake_up(&root->fs_info->transaction_throttle);
+ for (update_count = 0; update_count < 16; update_count++) {
+ update = trans->delayed_ref_updates;
+ trans->delayed_ref_updates = 0;
+ if (update)
+ btrfs_run_delayed_refs(trans, root, update);
+ else
+ break;
+ }
}
for (i = 0; i <= orig_level; i++) {
if (path->nodes[i]) {
return ret;
}
+int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct extent_buffer *node,
+ struct extent_buffer *parent)
+{
+ struct btrfs_path *path;
+ int level;
+ int parent_level;
+ int ret = 0;
+ int wret;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+
+ btrfs_assert_tree_locked(parent);
+ parent_level = btrfs_header_level(parent);
+ extent_buffer_get(parent);
+ path->nodes[parent_level] = parent;
+ path->slots[parent_level] = btrfs_header_nritems(parent);
+
+ btrfs_assert_tree_locked(node);
+ level = btrfs_header_level(node);
+ extent_buffer_get(node);
+ path->nodes[level] = node;
+ path->slots[level] = 0;
+
+ while (1) {
+ wret = walk_down_subtree(trans, root, path, &level);
+ if (wret < 0)
+ ret = wret;
+ if (wret != 0)
+ break;
+
+ wret = walk_up_tree(trans, root, path, &level, parent_level);
+ if (wret < 0)
+ ret = wret;
+ if (wret != 0)
+ break;
+ }
+
+ btrfs_free_path(path);
+ return ret;
+}
+
static unsigned long calc_ra(unsigned long start, unsigned long last,
unsigned long nr)
{
return min(last, start + nr - 1);
}
-static int noinline relocate_inode_pages(struct inode *inode, u64 start,
+static noinline int relocate_inode_pages(struct inode *inode, u64 start,
u64 len)
{
u64 page_start;
}
set_page_extent_mapped(page);
- btrfs_set_extent_delalloc(inode, page_start, page_end);
if (i == first_index)
set_extent_bits(io_tree, page_start, page_end,
EXTENT_BOUNDARY, GFP_NOFS);
+ btrfs_set_extent_delalloc(inode, page_start, page_end);
set_page_dirty(page);
total_dirty++;
return ret;
}
-static int noinline relocate_data_extent(struct inode *reloc_inode,
+static noinline int relocate_data_extent(struct inode *reloc_inode,
struct btrfs_key *extent_key,
u64 offset)
{
struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
struct extent_map *em;
+ u64 start = extent_key->objectid - offset;
+ u64 end = start + extent_key->offset - 1;
em = alloc_extent_map(GFP_NOFS);
BUG_ON(!em || IS_ERR(em));
- em->start = extent_key->objectid - offset;
+ em->start = start;
em->len = extent_key->offset;
em->block_len = extent_key->offset;
em->block_start = extent_key->objectid;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
/* setup extent map to cheat btrfs_readpage */
- mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex);
+ lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
while (1) {
int ret;
spin_lock(&em_tree->lock);
free_extent_map(em);
break;
}
- btrfs_drop_extent_cache(reloc_inode, em->start,
- em->start + em->len - 1, 0);
+ btrfs_drop_extent_cache(reloc_inode, start, end, 0);
}
- mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex);
+ unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
- return relocate_inode_pages(reloc_inode, extent_key->objectid - offset,
- extent_key->offset);
+ return relocate_inode_pages(reloc_inode, start, extent_key->offset);
}
struct btrfs_ref_path {
u32 num_refs;
int lowest_level;
int current_level;
+ int shared_level;
+
+ struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
+ u64 new_nodes[BTRFS_MAX_LEVEL];
};
struct disk_extent {
root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
root_objectid == BTRFS_DEV_TREE_OBJECTID ||
- root_objectid == BTRFS_TREE_LOG_OBJECTID)
+ root_objectid == BTRFS_TREE_LOG_OBJECTID ||
+ root_objectid == BTRFS_CSUM_TREE_OBJECTID)
return 1;
return 0;
}
-static int noinline __next_ref_path(struct btrfs_trans_handle *trans,
+static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root,
struct btrfs_ref_path *ref_path,
int first_time)
if (!path)
return -ENOMEM;
- mutex_lock(&extent_root->fs_info->alloc_mutex);
-
if (first_time) {
ref_path->lowest_level = -1;
ref_path->current_level = -1;
+ ref_path->shared_level = -1;
goto walk_up;
}
walk_down:
if (level < ref_path->lowest_level)
break;
- if (level >= 0) {
+ if (level >= 0)
bytenr = ref_path->nodes[level];
- } else {
+ else
bytenr = ref_path->extent_start;
- }
BUG_ON(bytenr == 0);
parent = ref_path->nodes[level + 1];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid == bytenr &&
- found_key.type == BTRFS_EXTENT_REF_KEY)
+ found_key.type == BTRFS_EXTENT_REF_KEY) {
+ if (level < ref_path->shared_level)
+ ref_path->shared_level = level;
goto found;
+ }
next:
level--;
btrfs_release_path(extent_root, path);
- if (need_resched()) {
- mutex_unlock(&extent_root->fs_info->alloc_mutex);
- cond_resched();
- mutex_lock(&extent_root->fs_info->alloc_mutex);
- }
+ cond_resched();
}
/* reached lowest level */
ret = 1;
level = ref_path->current_level;
while (level < BTRFS_MAX_LEVEL - 1) {
u64 ref_objectid;
- if (level >= 0) {
+
+ if (level >= 0)
bytenr = ref_path->nodes[level];
- } else {
+ else
bytenr = ref_path->extent_start;
- }
+
BUG_ON(bytenr == 0);
key.objectid = bytenr;
}
btrfs_release_path(extent_root, path);
- if (need_resched()) {
- mutex_unlock(&extent_root->fs_info->alloc_mutex);
- cond_resched();
- mutex_lock(&extent_root->fs_info->alloc_mutex);
- }
+ cond_resched();
}
/* reached max tree level, but no tree root found. */
BUG();
out:
- mutex_unlock(&extent_root->fs_info->alloc_mutex);
btrfs_free_path(path);
return ret;
}
return __next_ref_path(trans, extent_root, ref_path, 0);
}
-static int noinline get_new_locations(struct inode *reloc_inode,
+static noinline int get_new_locations(struct inode *reloc_inode,
struct btrfs_key *extent_key,
u64 offset, int no_fragment,
struct disk_extent **extents,
exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
fi);
- WARN_ON(exts[nr].offset > 0);
- WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
+ BUG_ON(exts[nr].offset > 0);
+ BUG_ON(exts[nr].compression || exts[nr].encryption);
+ BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
cur_pos += exts[nr].num_bytes;
nr++;
path->slots[0]++;
}
- WARN_ON(cur_pos + offset > last_byte);
+ BUG_ON(cur_pos + offset > last_byte);
if (cur_pos + offset < last_byte) {
ret = -ENOENT;
goto out;
return ret;
}
-static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
+static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_key *extent_key,
u64 lock_end = 0;
u64 num_bytes;
u64 ext_offset;
- u64 first_pos;
+ u64 search_end = (u64)-1;
u32 nritems;
int nr_scaned = 0;
int extent_locked = 0;
+ int extent_type;
int ret;
memcpy(&key, leaf_key, sizeof(key));
- first_pos = INT_LIMIT(loff_t) - extent_key->offset;
if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
if (key.objectid < ref_path->owner_objectid ||
(key.objectid == ref_path->owner_objectid &&
* the file extent item was modified by someone
* before the extent got locked.
*/
- mutex_unlock(&BTRFS_I(inode)->extent_mutex);
unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
lock_end, GFP_NOFS);
extent_locked = 0;
if ((key.objectid > ref_path->owner_objectid) ||
(key.objectid == ref_path->owner_objectid &&
key.type > BTRFS_EXTENT_DATA_KEY) ||
- (key.offset >= first_pos + extent_key->offset))
+ key.offset >= search_end)
break;
}
}
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- if ((btrfs_file_extent_type(leaf, fi) !=
- BTRFS_FILE_EXTENT_REG) ||
+ extent_type = btrfs_file_extent_type(leaf, fi);
+ if ((extent_type != BTRFS_FILE_EXTENT_REG &&
+ extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
(btrfs_file_extent_disk_bytenr(leaf, fi) !=
extent_key->objectid)) {
path->slots[0]++;
num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
ext_offset = btrfs_file_extent_offset(leaf, fi);
- if (first_pos > key.offset - ext_offset)
- first_pos = key.offset - ext_offset;
+ if (search_end == (u64)-1) {
+ search_end = key.offset - ext_offset +
+ btrfs_file_extent_ram_bytes(leaf, fi);
+ }
if (!extent_locked) {
lock_start = key.offset;
lock_end = lock_start + num_bytes - 1;
} else {
- BUG_ON(lock_start != key.offset);
- BUG_ON(lock_end - lock_start + 1 < num_bytes);
+ if (lock_start > key.offset ||
+ lock_end + 1 < key.offset + num_bytes) {
+ unlock_extent(&BTRFS_I(inode)->io_tree,
+ lock_start, lock_end, GFP_NOFS);
+ extent_locked = 0;
+ }
}
if (!inode) {
if (ordered)
btrfs_put_ordered_extent(ordered);
- mutex_lock(&BTRFS_I(inode)->extent_mutex);
extent_locked = 1;
continue;
}
if (nr_extents == 1) {
/* update extent pointer in place */
- btrfs_set_file_extent_generation(leaf, fi,
- trans->transid);
btrfs_set_file_extent_disk_bytenr(leaf, fi,
new_extents[0].disk_bytenr);
btrfs_set_file_extent_disk_num_bytes(leaf, fi,
new_extents[0].disk_num_bytes);
- btrfs_set_file_extent_ram_bytes(leaf, fi,
- new_extents[0].ram_bytes);
- ext_offset += new_extents[0].offset;
- btrfs_set_file_extent_offset(leaf, fi, ext_offset);
btrfs_mark_buffer_dirty(leaf);
btrfs_drop_extent_cache(inode, key.offset,
btrfs_release_path(root, path);
key.offset += num_bytes;
} else {
+ BUG_ON(1);
+#if 0
u64 alloc_hint;
u64 extent_len;
int i;
break;
}
BUG_ON(i >= nr_extents);
+#endif
}
if (extent_locked) {
- mutex_unlock(&BTRFS_I(inode)->extent_mutex);
unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
lock_end, GFP_NOFS);
extent_locked = 0;
}
skip:
if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
- key.offset >= first_pos + extent_key->offset)
+ key.offset >= search_end)
break;
cond_resched();
if (inode) {
mutex_unlock(&inode->i_mutex);
if (extent_locked) {
- mutex_unlock(&BTRFS_I(inode)->extent_mutex);
unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
lock_end, GFP_NOFS);
}
return ret;
}
-int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
- u64 num_bytes, u64 new_bytenr)
-{
- set_extent_bits(&root->fs_info->reloc_mapping_tree,
- orig_bytenr, orig_bytenr + num_bytes - 1,
- EXTENT_LOCKED, GFP_NOFS);
- set_state_private(&root->fs_info->reloc_mapping_tree,
- orig_bytenr, new_bytenr);
- return 0;
-}
-
-int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
- u64 num_bytes, u64 *new_bytenr)
-{
- u64 bytenr;
- u64 cur_bytenr = orig_bytenr;
- u64 prev_bytenr = orig_bytenr;
- int ret;
-
- while (1) {
- ret = get_state_private(&root->fs_info->reloc_mapping_tree,
- cur_bytenr, &bytenr);
- if (ret)
- break;
- prev_bytenr = cur_bytenr;
- cur_bytenr = bytenr;
- }
-
- if (orig_bytenr == cur_bytenr)
- return -ENOENT;
-
- if (prev_bytenr != orig_bytenr) {
- set_state_private(&root->fs_info->reloc_mapping_tree,
- orig_bytenr, cur_bytenr);
- }
- *new_bytenr = cur_bytenr;
- return 0;
-}
-
-void btrfs_free_reloc_mappings(struct btrfs_root *root)
-{
- clear_extent_bits(&root->fs_info->reloc_mapping_tree,
- 0, (u64)-1, -1, GFP_NOFS);
-}
-
int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf, u64 orig_start)
ref->bytenr = buf->start;
ref->owner = btrfs_header_owner(buf);
ref->generation = btrfs_header_generation(buf);
+
ret = btrfs_add_leaf_ref(root, ref, 0);
WARN_ON(ret);
btrfs_free_leaf_ref(root, ref);
return 0;
}
-static int noinline invalidate_extent_cache(struct btrfs_root *root,
+static noinline int invalidate_extent_cache(struct btrfs_root *root,
struct extent_buffer *leaf,
struct btrfs_block_group_cache *group,
struct btrfs_root *target_root)
lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
key.offset + num_bytes - 1, GFP_NOFS);
- mutex_lock(&BTRFS_I(inode)->extent_mutex);
btrfs_drop_extent_cache(inode, key.offset,
key.offset + num_bytes - 1, 1);
- mutex_unlock(&BTRFS_I(inode)->extent_mutex);
unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
key.offset + num_bytes - 1, GFP_NOFS);
cond_resched();
return 0;
}
-static int noinline replace_extents_in_leaf(struct btrfs_trans_handle *trans,
+static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *leaf,
struct btrfs_block_group_cache *group,
ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
- btrfs_set_file_extent_generation(leaf, fi, trans->transid);
- btrfs_set_file_extent_ram_bytes(leaf, fi,
- new_extent->ram_bytes);
btrfs_set_file_extent_disk_bytenr(leaf, fi,
new_extent->disk_bytenr);
btrfs_set_file_extent_disk_num_bytes(leaf, fi,
new_extent->disk_num_bytes);
- new_extent->offset += btrfs_file_extent_offset(leaf, fi);
- btrfs_set_file_extent_offset(leaf, fi, new_extent->offset);
btrfs_mark_buffer_dirty(leaf);
ret = btrfs_inc_extent_ref(trans, root,
root->root_key.objectid,
trans->transid, key.objectid);
BUG_ON(ret);
+
ret = btrfs_free_extent(trans, root,
bytenr, num_bytes, leaf->start,
btrfs_header_owner(leaf),
return 0;
}
-int btrfs_free_reloc_root(struct btrfs_root *root)
+int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
{
struct btrfs_root *reloc_root;
+ int ret;
if (root->reloc_root) {
reloc_root = root->reloc_root;
root->reloc_root = NULL;
list_add(&reloc_root->dead_list,
&root->fs_info->dead_reloc_roots);
+
+ btrfs_set_root_bytenr(&reloc_root->root_item,
+ reloc_root->node->start);
+ btrfs_set_root_level(&root->root_item,
+ btrfs_header_level(reloc_root->node));
+ memset(&reloc_root->root_item.drop_progress, 0,
+ sizeof(struct btrfs_disk_key));
+ reloc_root->root_item.drop_level = 0;
+
+ ret = btrfs_update_root(trans, root->fs_info->tree_root,
+ &reloc_root->root_key,
+ &reloc_root->root_item);
+ BUG_ON(ret);
}
return 0;
}
return 0;
}
-static int noinline init_reloc_tree(struct btrfs_trans_handle *trans,
+static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_root *reloc_root;
btrfs_set_root_refs(root_item, 0);
btrfs_set_root_bytenr(root_item, eb->start);
btrfs_set_root_level(root_item, btrfs_header_level(eb));
- memset(&root_item->drop_progress, 0, sizeof(root_item->drop_progress));
- root_item->drop_level = 0;
+ btrfs_set_root_generation(root_item, trans->transid);
btrfs_tree_unlock(eb);
free_extent_buffer(eb);
* Core function of space balance.
*
* The idea is using reloc trees to relocate tree blocks in reference
- * counted roots. There is one reloc tree for each subvol, all reloc
- * trees share same key objectid. Reloc trees are snapshots of the
- * latest committed roots (subvol root->commit_root). To relocate a tree
- * block referenced by a subvol, the code COW the block through the reloc
- * tree, then update pointer in the subvol to point to the new block.
- * Since all reloc trees share same key objectid, we can easily do special
- * handing to share tree blocks between reloc trees. Once a tree block has
- * been COWed in one reloc tree, we can use the result when the same block
- * is COWed again through other reloc trees.
+ * counted roots. There is one reloc tree for each subvol, and all
+ * reloc trees share same root key objectid. Reloc trees are snapshots
+ * of the latest committed roots of subvols (root->commit_root).
+ *
+ * To relocate a tree block referenced by a subvol, there are two steps.
+ * COW the block through subvol's reloc tree, then update block pointer
+ * in the subvol to point to the new block. Since all reloc trees share
+ * same root key objectid, doing special handing for tree blocks owned
+ * by them is easy. Once a tree block has been COWed in one reloc tree,
+ * we can use the resulting new block directly when the same block is
+ * required to COW again through other reloc trees. By this way, relocated
+ * tree blocks are shared between reloc trees, so they are also shared
+ * between subvols.
*/
-static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
+static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_key *first_key,
struct btrfs_key *keys;
u64 *nodes;
int level;
- int lowest_merge;
+ int shared_level;
int lowest_level = 0;
- int update_refs;
int ret;
if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
lowest_level = ref_path->owner_objectid;
- if (is_cowonly_root(ref_path->root_objectid)) {
+ if (!root->ref_cows) {
path->lowest_level = lowest_level;
ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
BUG_ON(ret < 0);
return 0;
}
- keys = kzalloc(sizeof(*keys) * BTRFS_MAX_LEVEL, GFP_NOFS);
- BUG_ON(!keys);
- nodes = kzalloc(sizeof(*nodes) * BTRFS_MAX_LEVEL, GFP_NOFS);
- BUG_ON(!nodes);
-
mutex_lock(&root->fs_info->tree_reloc_mutex);
ret = init_reloc_tree(trans, root);
BUG_ON(ret);
reloc_root = root->reloc_root;
- path->lowest_level = lowest_level;
- ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 0);
- BUG_ON(ret);
- /*
- * get relocation mapping for tree blocks in the path
- */
- lowest_merge = BTRFS_MAX_LEVEL;
- for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) {
- u64 new_bytenr;
- eb = path->nodes[level];
- if (!eb || eb == reloc_root->node)
- continue;
- ret = btrfs_get_reloc_mapping(reloc_root, eb->start, eb->len,
- &new_bytenr);
- if (ret)
- continue;
- if (level == 0)
- btrfs_item_key_to_cpu(eb, &keys[level], 0);
- else
- btrfs_node_key_to_cpu(eb, &keys[level], 0);
- nodes[level] = new_bytenr;
- lowest_merge = level;
- }
+ shared_level = ref_path->shared_level;
+ ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
- update_refs = 0;
- if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
- eb = path->nodes[0];
- if (btrfs_header_generation(eb) < trans->transid)
- update_refs = 1;
- }
+ keys = ref_path->node_keys;
+ nodes = ref_path->new_nodes;
+ memset(&keys[shared_level + 1], 0,
+ sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
+ memset(&nodes[shared_level + 1], 0,
+ sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
- btrfs_release_path(reloc_root, path);
- /*
- * merge tree blocks that already relocated in other reloc trees
- */
- if (lowest_merge != BTRFS_MAX_LEVEL) {
+ if (nodes[lowest_level] == 0) {
+ path->lowest_level = lowest_level;
+ ret = btrfs_search_slot(trans, reloc_root, first_key, path,
+ 0, 1);
+ BUG_ON(ret);
+ for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
+ eb = path->nodes[level];
+ if (!eb || eb == reloc_root->node)
+ break;
+ nodes[level] = eb->start;
+ if (level == 0)
+ btrfs_item_key_to_cpu(eb, &keys[level], 0);
+ else
+ btrfs_node_key_to_cpu(eb, &keys[level], 0);
+ }
+ if (nodes[0] &&
+ ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
+ eb = path->nodes[0];
+ ret = replace_extents_in_leaf(trans, reloc_root, eb,
+ group, reloc_inode);
+ BUG_ON(ret);
+ }
+ btrfs_release_path(reloc_root, path);
+ } else {
ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
- lowest_merge);
- BUG_ON(ret < 0);
- }
- /*
- * cow any tree blocks that still haven't been relocated
- */
- ret = btrfs_search_slot(trans, reloc_root, first_key, path, 0, 1);
- BUG_ON(ret);
- /*
- * if we are relocating data block group, update extent pointers
- * in the newly created tree leaf.
- */
- eb = path->nodes[0];
- if (update_refs && nodes[0] != eb->start) {
- ret = replace_extents_in_leaf(trans, reloc_root, eb, group,
- reloc_inode);
+ lowest_level);
BUG_ON(ret);
}
- memset(keys, 0, sizeof(*keys) * BTRFS_MAX_LEVEL);
- memset(nodes, 0, sizeof(*nodes) * BTRFS_MAX_LEVEL);
- for (level = BTRFS_MAX_LEVEL - 1; level >= lowest_level; level--) {
- eb = path->nodes[level];
- if (!eb || eb == reloc_root->node)
- continue;
- BUG_ON(btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID);
- nodes[level] = eb->start;
- if (level == 0)
- btrfs_item_key_to_cpu(eb, &keys[level], 0);
- else
- btrfs_node_key_to_cpu(eb, &keys[level], 0);
- }
-
- if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
- eb = path->nodes[0];
- extent_buffer_get(eb);
- }
- btrfs_release_path(reloc_root, path);
/*
* replace tree blocks in the fs tree with tree blocks in
* the reloc tree.
BUG_ON(ret < 0);
if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
+ ret = btrfs_search_slot(trans, reloc_root, first_key, path,
+ 0, 0);
+ BUG_ON(ret);
+ extent_buffer_get(path->nodes[0]);
+ eb = path->nodes[0];
+ btrfs_release_path(reloc_root, path);
ret = invalidate_extent_cache(reloc_root, eb, group, root);
BUG_ON(ret);
free_extent_buffer(eb);
}
- mutex_unlock(&root->fs_info->tree_reloc_mutex);
+ mutex_unlock(&root->fs_info->tree_reloc_mutex);
path->lowest_level = 0;
- kfree(nodes);
- kfree(keys);
return 0;
}
-static int noinline relocate_tree_block(struct btrfs_trans_handle *trans,
+static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_key *first_key,
struct btrfs_ref_path *ref_path)
{
int ret;
- int needs_lock = 0;
-
- if (root == root->fs_info->extent_root ||
- root == root->fs_info->chunk_root ||
- root == root->fs_info->dev_root) {
- needs_lock = 1;
- mutex_lock(&root->fs_info->alloc_mutex);
- }
ret = relocate_one_path(trans, root, path, first_key,
ref_path, NULL, NULL);
BUG_ON(ret);
- if (root == root->fs_info->extent_root)
- btrfs_extent_post_op(trans, root);
- if (needs_lock)
- mutex_unlock(&root->fs_info->alloc_mutex);
-
return 0;
}
-static int noinline del_extent_zero(struct btrfs_trans_handle *trans,
+static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root,
struct btrfs_path *path,
struct btrfs_key *extent_key)
{
int ret;
- mutex_lock(&extent_root->fs_info->alloc_mutex);
ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
if (ret)
goto out;
ret = btrfs_del_item(trans, extent_root, path);
out:
btrfs_release_path(extent_root, path);
- mutex_unlock(&extent_root->fs_info->alloc_mutex);
return ret;
}
-static struct btrfs_root noinline *read_ref_root(struct btrfs_fs_info *fs_info,
+static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
struct btrfs_ref_path *ref_path)
{
struct btrfs_key root_key;
return btrfs_read_fs_root_no_name(fs_info, &root_key);
}
-static int noinline relocate_one_extent(struct btrfs_root *extent_root,
+static noinline int relocate_one_extent(struct btrfs_root *extent_root,
struct btrfs_path *path,
struct btrfs_key *extent_key,
struct btrfs_block_group_cache *group,
struct btrfs_key first_key;
u64 prev_block = 0;
- mutex_unlock(&extent_root->fs_info->alloc_mutex);
trans = btrfs_start_transaction(extent_root, 1);
BUG_ON(!trans);
ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
if (!ref_path) {
- ret = -ENOMEM;
- goto out;
+ ret = -ENOMEM;
+ goto out;
}
for (loops = 0; ; loops++) {
prev_block = block_start;
}
- if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
- pass >= 2) {
+ mutex_lock(&extent_root->fs_info->trans_mutex);
+ btrfs_record_root_in_trans(found_root);
+ mutex_unlock(&extent_root->fs_info->trans_mutex);
+ if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * try to update data extent references while
+ * keeping metadata shared between snapshots.
+ */
+ if (pass == 1) {
+ ret = relocate_one_path(trans, found_root,
+ path, &first_key, ref_path,
+ group, reloc_inode);
+ if (ret < 0)
+ goto out;
+ continue;
+ }
/*
* use fallback method to process the remaining
* references.
*/
if (!new_extents) {
u64 group_start = group->key.objectid;
+ new_extents = kmalloc(sizeof(*new_extents),
+ GFP_NOFS);
+ nr_extents = 1;
ret = get_new_locations(reloc_inode,
extent_key,
- group_start, 0,
+ group_start, 1,
&new_extents,
&nr_extents);
- if (ret < 0)
+ if (ret)
goto out;
}
- btrfs_record_root_in_trans(found_root);
ret = replace_one_extent(trans, found_root,
path, extent_key,
&first_key, ref_path,
new_extents, nr_extents);
- if (ret < 0)
- goto out;
- continue;
- }
-
- btrfs_record_root_in_trans(found_root);
- if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
+ } else {
ret = relocate_tree_block(trans, found_root, path,
&first_key, ref_path);
- } else {
- /*
- * try to update data extent references while
- * keeping metadata shared between snapshots.
- */
- ret = relocate_one_path(trans, found_root, path,
- &first_key, ref_path,
- group, reloc_inode);
}
if (ret < 0)
goto out;
btrfs_end_transaction(trans, extent_root);
kfree(new_extents);
kfree(ref_path);
- mutex_lock(&extent_root->fs_info->alloc_mutex);
return ret;
}
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
- num_devices = root->fs_info->fs_devices->num_devices;
+ num_devices = root->fs_info->fs_devices->rw_devices;
if (num_devices == 1) {
stripped |= BTRFS_BLOCK_GROUP_DUP;
stripped = flags & ~stripped;
return flags;
}
-int __alloc_chunk_for_shrink(struct btrfs_root *root,
+static int __alloc_chunk_for_shrink(struct btrfs_root *root,
struct btrfs_block_group_cache *shrink_block_group,
int force)
{
spin_lock(&shrink_block_group->lock);
if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
spin_unlock(&shrink_block_group->lock);
- mutex_unlock(&root->fs_info->alloc_mutex);
trans = btrfs_start_transaction(root, 1);
- mutex_lock(&root->fs_info->alloc_mutex);
spin_lock(&shrink_block_group->lock);
new_alloc_flags = update_block_group_flags(root,
do_chunk_alloc(trans, root->fs_info->extent_root,
calc + 2 * 1024 * 1024, new_alloc_flags, force);
- mutex_unlock(&root->fs_info->alloc_mutex);
btrfs_end_transaction(trans, root);
- mutex_lock(&root->fs_info->alloc_mutex);
} else
spin_unlock(&shrink_block_group->lock);
return 0;
if (!path)
return -ENOMEM;
+ path->leave_spinning = 1;
ret = btrfs_insert_empty_inode(trans, root, path, objectid);
if (ret)
goto out;
btrfs_set_inode_generation(leaf, item, 1);
btrfs_set_inode_size(leaf, item, size);
btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
- btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NODATASUM);
+ btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(root, path);
out:
return ret;
}
-static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
+static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *group)
{
struct inode *inode = NULL;
} else {
BUG_ON(1);
}
+ BTRFS_I(inode)->index_cnt = group->key.objectid;
err = btrfs_orphan_add(trans, inode);
out:
return inode;
}
+int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
+{
+
+ struct btrfs_ordered_sum *sums;
+ struct btrfs_sector_sum *sector_sum;
+ struct btrfs_ordered_extent *ordered;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct list_head list;
+ size_t offset;
+ int ret;
+ u64 disk_bytenr;
+
+ INIT_LIST_HEAD(&list);
+
+ ordered = btrfs_lookup_ordered_extent(inode, file_pos);
+ BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
+
+ disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
+ ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
+ disk_bytenr + len - 1, &list);
+
+ while (!list_empty(&list)) {
+ sums = list_entry(list.next, struct btrfs_ordered_sum, list);
+ list_del_init(&sums->list);
+
+ sector_sum = sums->sums;
+ sums->bytenr = ordered->start;
+
+ offset = 0;
+ while (offset < sums->len) {
+ sector_sum->bytenr += ordered->start - disk_bytenr;
+ sector_sum++;
+ offset += root->sectorsize;
+ }
+
+ btrfs_add_ordered_sum(inode, ordered, sums);
+ }
+ btrfs_put_ordered_extent(ordered);
+ return 0;
+}
+
int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
{
struct btrfs_trans_handle *trans;
struct inode *reloc_inode;
struct btrfs_block_group_cache *block_group;
struct btrfs_key key;
+ u64 skipped;
u64 cur_byte;
u64 total_found;
u32 nritems;
block_group = btrfs_lookup_block_group(info, group_start);
BUG_ON(!block_group);
- printk("btrfs relocating block group %llu flags %llu\n",
+ printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
(unsigned long long)block_group->key.objectid,
(unsigned long long)block_group->flags);
reloc_inode = create_reloc_inode(info, block_group);
BUG_ON(IS_ERR(reloc_inode));
- mutex_lock(&root->fs_info->alloc_mutex);
-
__alloc_chunk_for_shrink(root, block_group, 1);
- block_group->ro = 1;
- block_group->space_info->total_bytes -= block_group->key.offset;
-
- mutex_unlock(&root->fs_info->alloc_mutex);
+ set_block_group_readonly(block_group);
btrfs_start_delalloc_inodes(info->tree_root);
btrfs_wait_ordered_extents(info->tree_root, 0);
again:
+ skipped = 0;
total_found = 0;
progress = 0;
key.objectid = block_group->key.objectid;
btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
mutex_unlock(&root->fs_info->cleaner_mutex);
- mutex_lock(&root->fs_info->alloc_mutex);
+ trans = btrfs_start_transaction(info->tree_root, 1);
+ btrfs_commit_transaction(trans, info->tree_root);
- while(1) {
+ while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
if (progress && need_resched()) {
btrfs_release_path(root, path);
- mutex_unlock(&root->fs_info->alloc_mutex);
cond_resched();
- mutex_lock(&root->fs_info->alloc_mutex);
progress = 0;
continue;
}
ret = relocate_one_extent(root, path, &key, block_group,
reloc_inode, pass);
BUG_ON(ret < 0);
+ if (ret > 0)
+ skipped++;
key.objectid = cur_byte;
key.type = 0;
}
btrfs_release_path(root, path);
- mutex_unlock(&root->fs_info->alloc_mutex);
if (pass == 0) {
btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
- WARN_ON(reloc_inode->i_mapping->nrpages);
}
if (total_found > 0) {
- printk("btrfs found %llu extents in pass %d\n",
+ printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
(unsigned long long)total_found, pass);
pass++;
+ if (total_found == skipped && pass > 2) {
+ iput(reloc_inode);
+ reloc_inode = create_reloc_inode(info, block_group);
+ pass = 0;
+ }
goto again;
}
trans = btrfs_start_transaction(info->tree_root, 1);
btrfs_commit_transaction(trans, info->tree_root);
- mutex_lock(&root->fs_info->alloc_mutex);
-
spin_lock(&block_group->lock);
WARN_ON(block_group->pinned > 0);
WARN_ON(block_group->reserved > 0);
WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
spin_unlock(&block_group->lock);
+ put_block_group(block_group);
ret = 0;
out:
- mutex_unlock(&root->fs_info->alloc_mutex);
btrfs_free_path(path);
return ret;
}
-int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *key)
+static int find_first_block_group(struct btrfs_root *root,
+ struct btrfs_path *path, struct btrfs_key *key)
{
int ret = 0;
struct btrfs_key found_key;
if (ret < 0)
goto out;
- while(1) {
+ while (1) {
slot = path->slots[0];
leaf = path->nodes[0];
if (slot >= btrfs_header_nritems(leaf)) {
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_block_group_cache *block_group;
+ struct btrfs_space_info *space_info;
struct rb_node *n;
- mutex_lock(&info->alloc_mutex);
spin_lock(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
block_group = rb_entry(n, struct btrfs_block_group_cache,
cache_node);
-
- spin_unlock(&info->block_group_cache_lock);
- btrfs_remove_free_space_cache(block_group);
- spin_lock(&info->block_group_cache_lock);
-
rb_erase(&block_group->cache_node,
&info->block_group_cache_tree);
- spin_lock(&block_group->space_info->lock);
+ spin_unlock(&info->block_group_cache_lock);
+
+ btrfs_remove_free_space_cache(block_group);
+ down_write(&block_group->space_info->groups_sem);
list_del(&block_group->list);
- spin_unlock(&block_group->space_info->lock);
+ up_write(&block_group->space_info->groups_sem);
+
+ WARN_ON(atomic_read(&block_group->count) != 1);
kfree(block_group);
+
+ spin_lock(&info->block_group_cache_lock);
}
spin_unlock(&info->block_group_cache_lock);
- mutex_unlock(&info->alloc_mutex);
+
+ /* now that all the block groups are freed, go through and
+ * free all the space_info structs. This is only called during
+ * the final stages of unmount, and so we know nobody is
+ * using them. We call synchronize_rcu() once before we start,
+ * just to be on the safe side.
+ */
+ synchronize_rcu();
+
+ while(!list_empty(&info->space_info)) {
+ space_info = list_entry(info->space_info.next,
+ struct btrfs_space_info,
+ list);
+
+ list_del(&space_info->list);
+ kfree(space_info);
+ }
return 0;
}
if (!path)
return -ENOMEM;
- mutex_lock(&root->fs_info->alloc_mutex);
- while(1) {
+ while (1) {
ret = find_first_block_group(root, path, &key);
if (ret > 0) {
ret = 0;
break;
}
+ atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
+ mutex_init(&cache->alloc_mutex);
+ mutex_init(&cache->cache_mutex);
INIT_LIST_HEAD(&cache->list);
read_extent_buffer(leaf, &cache->item,
btrfs_item_ptr_offset(leaf, path->slots[0]),
&space_info);
BUG_ON(ret);
cache->space_info = space_info;
- spin_lock(&space_info->lock);
- list_add(&cache->list, &space_info->block_groups);
- spin_unlock(&space_info->lock);
+ down_write(&space_info->groups_sem);
+ list_add_tail(&cache->list, &space_info->block_groups);
+ up_write(&space_info->groups_sem);
ret = btrfs_add_block_group_cache(root->fs_info, cache);
BUG_ON(ret);
set_avail_alloc_bits(root->fs_info, cache->flags);
+ if (btrfs_chunk_readonly(root, cache->key.objectid))
+ set_block_group_readonly(cache);
}
ret = 0;
error:
btrfs_free_path(path);
- mutex_unlock(&root->fs_info->alloc_mutex);
return ret;
}
struct btrfs_root *extent_root;
struct btrfs_block_group_cache *cache;
- WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
extent_root = root->fs_info->extent_root;
root->fs_info->last_trans_new_blockgroup = trans->transid;
cache->key.objectid = chunk_offset;
cache->key.offset = size;
+ cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
+ mutex_init(&cache->alloc_mutex);
+ mutex_init(&cache->cache_mutex);
INIT_LIST_HEAD(&cache->list);
- btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
btrfs_set_block_group_used(&cache->item, bytes_used);
btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
BUG_ON(ret);
- spin_lock(&cache->space_info->lock);
- list_add(&cache->list, &cache->space_info->block_groups);
- spin_unlock(&cache->space_info->lock);
+ down_write(&cache->space_info->groups_sem);
+ list_add_tail(&cache->list, &cache->space_info->block_groups);
+ up_write(&cache->space_info->groups_sem);
ret = btrfs_add_block_group_cache(root->fs_info, cache);
BUG_ON(ret);
sizeof(cache->item));
BUG_ON(ret);
- finish_current_insert(trans, extent_root);
- ret = del_pending_extents(trans, extent_root);
- BUG_ON(ret);
set_avail_alloc_bits(extent_root->fs_info, type);
return 0;
struct btrfs_key key;
int ret;
- BUG_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
root = root->fs_info->extent_root;
block_group = btrfs_lookup_block_group(root->fs_info, group_start);
BUG_ON(!block_group);
+ BUG_ON(!block_group->ro);
memcpy(&key, &block_group->key, sizeof(key));
path = btrfs_alloc_path();
BUG_ON(!path);
- btrfs_remove_free_space_cache(block_group);
+ spin_lock(&root->fs_info->block_group_cache_lock);
rb_erase(&block_group->cache_node,
&root->fs_info->block_group_cache_tree);
- spin_lock(&block_group->space_info->lock);
+ spin_unlock(&root->fs_info->block_group_cache_lock);
+ btrfs_remove_free_space_cache(block_group);
+ down_write(&block_group->space_info->groups_sem);
list_del(&block_group->list);
+ up_write(&block_group->space_info->groups_sem);
+
+ spin_lock(&block_group->space_info->lock);
+ block_group->space_info->total_bytes -= block_group->key.offset;
+ block_group->space_info->bytes_readonly -= block_group->key.offset;
spin_unlock(&block_group->space_info->lock);
+ block_group->space_info->full = 0;
- /*
- memset(shrink_block_group, 0, sizeof(*shrink_block_group));
- kfree(shrink_block_group);
- */
+ put_block_group(block_group);
+ put_block_group(block_group);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)