1 #include <linux/module.h>
5 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
6 *root, struct btrfs_path *path, int level);
7 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
8 *root, struct btrfs_path *path, int data_size);
9 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
10 *root, struct buffer_head *dst, struct buffer_head
12 static int balance_node_right(struct btrfs_trans_handle *trans, struct
13 btrfs_root *root, struct buffer_head *dst_buf,
14 struct buffer_head *src_buf);
15 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
16 struct btrfs_path *path, int level, int slot);
18 inline void btrfs_init_path(struct btrfs_path *p)
20 memset(p, 0, sizeof(*p));
23 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
26 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
29 btrfs_block_release(root, p->nodes[i]);
31 memset(p, 0, sizeof(*p));
34 static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
35 *root, struct buffer_head *buf, struct buffer_head
36 *parent, int parent_slot, struct buffer_head
39 struct buffer_head *cow;
40 struct btrfs_node *cow_node;
42 if (buffer_dirty(buf)) {
46 cow = btrfs_alloc_free_block(trans, root);
47 cow_node = btrfs_buffer_node(cow);
48 memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
49 btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr);
51 mark_buffer_dirty(cow);
52 btrfs_inc_ref(trans, root, buf);
53 if (buf == root->node) {
56 if (buf != root->commit_root)
57 btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
58 btrfs_block_release(root, buf);
60 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
62 mark_buffer_dirty(parent);
63 btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
65 btrfs_block_release(root, buf);
70 * The leaf data grows from end-to-front in the node.
71 * this returns the address of the start of the last item,
72 * which is the stop of the leaf data stack
74 static inline unsigned int leaf_data_end(struct btrfs_root *root,
75 struct btrfs_leaf *leaf)
77 u32 nr = btrfs_header_nritems(&leaf->header);
79 return BTRFS_LEAF_DATA_SIZE(root);
80 return btrfs_item_offset(leaf->items + nr - 1);
84 * The space between the end of the leaf items and
85 * the start of the leaf data. IOW, how much room
86 * the leaf has left for both items and data
88 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
90 int data_end = leaf_data_end(root, leaf);
91 int nritems = btrfs_header_nritems(&leaf->header);
92 char *items_end = (char *)(leaf->items + nritems + 1);
93 return (char *)(btrfs_leaf_data(leaf) + data_end) - (char *)items_end;
97 * compare two keys in a memcmp fashion
99 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
103 btrfs_disk_key_to_cpu(&k1, disk);
105 if (k1.objectid > k2->objectid)
107 if (k1.objectid < k2->objectid)
109 if (k1.flags > k2->flags)
111 if (k1.flags < k2->flags)
113 if (k1.offset > k2->offset)
115 if (k1.offset < k2->offset)
120 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
124 struct btrfs_node *parent = NULL;
125 struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
127 u32 nritems = btrfs_header_nritems(&node->header);
129 if (path->nodes[level + 1])
130 parent = btrfs_buffer_node(path->nodes[level + 1]);
131 parent_slot = path->slots[level + 1];
132 BUG_ON(nritems == 0);
134 struct btrfs_disk_key *parent_key;
135 parent_key = &parent->ptrs[parent_slot].key;
136 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
137 sizeof(struct btrfs_disk_key)));
138 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
139 btrfs_header_blocknr(&node->header));
141 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
142 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
143 struct btrfs_key cpukey;
144 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
145 BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
150 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
154 struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]);
155 struct btrfs_node *parent = NULL;
157 u32 nritems = btrfs_header_nritems(&leaf->header);
159 if (path->nodes[level + 1])
160 parent = btrfs_buffer_node(path->nodes[level + 1]);
161 parent_slot = path->slots[level + 1];
162 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
168 struct btrfs_disk_key *parent_key;
169 parent_key = &parent->ptrs[parent_slot].key;
170 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
171 sizeof(struct btrfs_disk_key)));
172 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
173 btrfs_header_blocknr(&leaf->header));
175 for (i = 0; nritems > 1 && i < nritems - 2; i++) {
176 struct btrfs_key cpukey;
177 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
178 BUG_ON(comp_keys(&leaf->items[i].key,
180 BUG_ON(btrfs_item_offset(leaf->items + i) !=
181 btrfs_item_end(leaf->items + i + 1));
183 BUG_ON(btrfs_item_offset(leaf->items + i) +
184 btrfs_item_size(leaf->items + i) !=
185 BTRFS_LEAF_DATA_SIZE(root));
191 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
195 return check_leaf(root, path, level);
196 return check_node(root, path, level);
200 * search for key in the array p. items p are item_size apart
201 * and there are 'max' items in p
202 * the slot in the array is returned via slot, and it points to
203 * the place where you would insert key if it is not found in
206 * slot may point to max if the key is bigger than all of the keys
208 static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
215 struct btrfs_disk_key *tmp;
218 mid = (low + high) / 2;
219 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
220 ret = comp_keys(tmp, key);
236 * simple bin_search frontend that does the right thing for
239 static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
241 if (btrfs_is_leaf(c)) {
242 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
243 return generic_bin_search((void *)l->items,
244 sizeof(struct btrfs_item),
245 key, btrfs_header_nritems(&c->header),
248 return generic_bin_search((void *)c->ptrs,
249 sizeof(struct btrfs_key_ptr),
250 key, btrfs_header_nritems(&c->header),
256 static struct buffer_head *read_node_slot(struct btrfs_root *root,
257 struct buffer_head *parent_buf,
260 struct btrfs_node *node = btrfs_buffer_node(parent_buf);
263 if (slot >= btrfs_header_nritems(&node->header))
265 return read_tree_block(root, btrfs_node_blockptr(node, slot));
268 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
269 *root, struct btrfs_path *path, int level)
271 struct buffer_head *right_buf;
272 struct buffer_head *mid_buf;
273 struct buffer_head *left_buf;
274 struct buffer_head *parent_buf = NULL;
275 struct btrfs_node *right = NULL;
276 struct btrfs_node *mid;
277 struct btrfs_node *left = NULL;
278 struct btrfs_node *parent = NULL;
282 int orig_slot = path->slots[level];
288 mid_buf = path->nodes[level];
289 mid = btrfs_buffer_node(mid_buf);
290 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
292 if (level < BTRFS_MAX_LEVEL - 1)
293 parent_buf = path->nodes[level + 1];
294 pslot = path->slots[level + 1];
297 * deal with the case where there is only one pointer in the root
298 * by promoting the node below to a root
301 struct buffer_head *child;
302 u64 blocknr = mid_buf->b_blocknr;
304 if (btrfs_header_nritems(&mid->header) != 1)
307 /* promote the child to a root */
308 child = read_node_slot(root, mid_buf, 0);
311 path->nodes[level] = NULL;
312 /* once for the path */
313 btrfs_block_release(root, mid_buf);
314 /* once for the root ptr */
315 btrfs_block_release(root, mid_buf);
316 clean_tree_block(trans, root, mid_buf);
317 return btrfs_free_extent(trans, root, blocknr, 1, 1);
319 parent = btrfs_buffer_node(parent_buf);
321 if (btrfs_header_nritems(&mid->header) >
322 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
325 left_buf = read_node_slot(root, parent_buf, pslot - 1);
326 right_buf = read_node_slot(root, parent_buf, pslot + 1);
328 /* first, try to make some room in the middle buffer */
330 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
332 left = btrfs_buffer_node(left_buf);
333 orig_slot += btrfs_header_nritems(&left->header);
334 wret = push_node_left(trans, root, left_buf, mid_buf);
340 * then try to empty the right most buffer into the middle
343 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
345 right = btrfs_buffer_node(right_buf);
346 wret = push_node_left(trans, root, mid_buf, right_buf);
349 if (btrfs_header_nritems(&right->header) == 0) {
350 u64 blocknr = right_buf->b_blocknr;
351 btrfs_block_release(root, right_buf);
352 clean_tree_block(trans, root, right_buf);
355 wret = del_ptr(trans, root, path, level + 1, pslot +
359 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
363 memcpy(&parent->ptrs[pslot + 1].key,
365 sizeof(struct btrfs_disk_key));
366 mark_buffer_dirty(parent_buf);
369 if (btrfs_header_nritems(&mid->header) == 1) {
371 * we're not allowed to leave a node with one item in the
372 * tree during a delete. A deletion from lower in the tree
373 * could try to delete the only pointer in this node.
374 * So, pull some keys from the left.
375 * There has to be a left pointer at this point because
376 * otherwise we would have pulled some pointers from the
380 wret = balance_node_right(trans, root, mid_buf, left_buf);
385 if (btrfs_header_nritems(&mid->header) == 0) {
386 /* we've managed to empty the middle node, drop it */
387 u64 blocknr = mid_buf->b_blocknr;
388 btrfs_block_release(root, mid_buf);
389 clean_tree_block(trans, root, mid_buf);
392 wret = del_ptr(trans, root, path, level + 1, pslot);
395 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
399 /* update the parent key to reflect our changes */
400 memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key,
401 sizeof(struct btrfs_disk_key));
402 mark_buffer_dirty(parent_buf);
405 /* update the path */
407 if (btrfs_header_nritems(&left->header) > orig_slot) {
409 path->nodes[level] = left_buf;
410 path->slots[level + 1] -= 1;
411 path->slots[level] = orig_slot;
413 btrfs_block_release(root, mid_buf);
415 orig_slot -= btrfs_header_nritems(&left->header);
416 path->slots[level] = orig_slot;
419 /* double check we haven't messed things up */
420 check_block(root, path, level);
422 btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]),
427 btrfs_block_release(root, right_buf);
429 btrfs_block_release(root, left_buf);
434 * look for key in the tree. path is filled in with nodes along the way
435 * if key is found, we return zero and you can find the item in the leaf
436 * level of the path (level 0)
438 * If the key isn't found, the path points to the slot where it should
439 * be inserted, and 1 is returned. If there are other errors during the
440 * search a negative error number is returned.
442 * if ins_len > 0, nodes and leaves will be split as we walk down the
443 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
446 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
447 *root, struct btrfs_key *key, struct btrfs_path *p, int
450 struct buffer_head *b;
451 struct buffer_head *cow_buf;
452 struct btrfs_node *c;
461 c = btrfs_buffer_node(b);
462 level = btrfs_header_level(&c->header);
465 wret = btrfs_cow_block(trans, root, b,
471 BUG_ON(!cow && ins_len);
472 c = btrfs_buffer_node(b);
474 ret = check_block(root, p, level);
477 ret = bin_search(c, key, &slot);
478 if (!btrfs_is_leaf(c)) {
481 p->slots[level] = slot;
482 if (ins_len > 0 && btrfs_header_nritems(&c->header) ==
483 BTRFS_NODEPTRS_PER_BLOCK(root)) {
484 int sret = split_node(trans, root, p, level);
489 c = btrfs_buffer_node(b);
490 slot = p->slots[level];
491 } else if (ins_len < 0) {
492 int sret = balance_level(trans, root, p,
499 c = btrfs_buffer_node(b);
500 slot = p->slots[level];
501 BUG_ON(btrfs_header_nritems(&c->header) == 1);
503 b = read_tree_block(root, btrfs_node_blockptr(c, slot));
505 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
506 p->slots[level] = slot;
507 if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
508 sizeof(struct btrfs_item) + ins_len) {
509 int sret = split_leaf(trans, root, p, ins_len);
521 * adjust the pointers going up the tree, starting at level
522 * making sure the right key of each node is points to 'key'.
523 * This is used after shifting pointers to the left, so it stops
524 * fixing up pointers when a given leaf/node is not in slot 0 of the
527 * If this fails to write a tree block, it returns -1, but continues
528 * fixing up the blocks in ram so the tree is consistent.
530 static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
531 *root, struct btrfs_path *path, struct btrfs_disk_key
536 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
537 struct btrfs_node *t;
538 int tslot = path->slots[i];
541 t = btrfs_buffer_node(path->nodes[i]);
542 memcpy(&t->ptrs[tslot].key, key, sizeof(*key));
543 mark_buffer_dirty(path->nodes[i]);
551 * try to push data from one node into the next node left in the
554 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
555 * error, and > 0 if there was no room in the left hand block.
557 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
558 *root, struct buffer_head *dst_buf, struct
559 buffer_head *src_buf)
561 struct btrfs_node *src = btrfs_buffer_node(src_buf);
562 struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
568 src_nritems = btrfs_header_nritems(&src->header);
569 dst_nritems = btrfs_header_nritems(&dst->header);
570 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
571 if (push_items <= 0) {
575 if (src_nritems < push_items)
576 push_items = src_nritems;
578 memcpy(dst->ptrs + dst_nritems, src->ptrs,
579 push_items * sizeof(struct btrfs_key_ptr));
580 if (push_items < src_nritems) {
581 memmove(src->ptrs, src->ptrs + push_items,
582 (src_nritems - push_items) *
583 sizeof(struct btrfs_key_ptr));
585 btrfs_set_header_nritems(&src->header, src_nritems - push_items);
586 btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
587 mark_buffer_dirty(src_buf);
588 mark_buffer_dirty(dst_buf);
593 * try to push data from one node into the next node right in the
596 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
597 * error, and > 0 if there was no room in the right hand block.
599 * this will only push up to 1/2 the contents of the left node over
601 static int balance_node_right(struct btrfs_trans_handle *trans, struct
602 btrfs_root *root, struct buffer_head *dst_buf,
603 struct buffer_head *src_buf)
605 struct btrfs_node *src = btrfs_buffer_node(src_buf);
606 struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
613 src_nritems = btrfs_header_nritems(&src->header);
614 dst_nritems = btrfs_header_nritems(&dst->header);
615 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
616 if (push_items <= 0) {
620 max_push = src_nritems / 2 + 1;
621 /* don't try to empty the node */
622 if (max_push > src_nritems)
624 if (max_push < push_items)
625 push_items = max_push;
627 memmove(dst->ptrs + push_items, dst->ptrs,
628 dst_nritems * sizeof(struct btrfs_key_ptr));
629 memcpy(dst->ptrs, src->ptrs + src_nritems - push_items,
630 push_items * sizeof(struct btrfs_key_ptr));
632 btrfs_set_header_nritems(&src->header, src_nritems - push_items);
633 btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
635 mark_buffer_dirty(src_buf);
636 mark_buffer_dirty(dst_buf);
641 * helper function to insert a new root level in the tree.
642 * A new node is allocated, and a single item is inserted to
643 * point to the existing root
645 * returns zero on success or < 0 on failure.
647 static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
648 *root, struct btrfs_path *path, int level)
650 struct buffer_head *t;
651 struct btrfs_node *lower;
652 struct btrfs_node *c;
653 struct btrfs_disk_key *lower_key;
655 BUG_ON(path->nodes[level]);
656 BUG_ON(path->nodes[level-1] != root->node);
658 t = btrfs_alloc_free_block(trans, root);
659 c = btrfs_buffer_node(t);
660 memset(c, 0, root->blocksize);
661 btrfs_set_header_nritems(&c->header, 1);
662 btrfs_set_header_level(&c->header, level);
663 btrfs_set_header_blocknr(&c->header, t->b_blocknr);
664 btrfs_set_header_parentid(&c->header,
665 btrfs_header_parentid(btrfs_buffer_header(root->node)));
666 lower = btrfs_buffer_node(path->nodes[level-1]);
667 if (btrfs_is_leaf(lower))
668 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
670 lower_key = &lower->ptrs[0].key;
671 memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key));
672 btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr);
674 mark_buffer_dirty(t);
676 /* the super has an extra ref to root->node */
677 btrfs_block_release(root, root->node);
680 path->nodes[level] = t;
681 path->slots[level] = 0;
686 * worker function to insert a single pointer in a node.
687 * the node should have enough room for the pointer already
689 * slot and level indicate where you want the key to go, and
690 * blocknr is the block the key points to.
692 * returns zero on success and < 0 on any error
694 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
695 *root, struct btrfs_path *path, struct btrfs_disk_key
696 *key, u64 blocknr, int slot, int level)
698 struct btrfs_node *lower;
701 BUG_ON(!path->nodes[level]);
702 lower = btrfs_buffer_node(path->nodes[level]);
703 nritems = btrfs_header_nritems(&lower->header);
706 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
708 if (slot != nritems) {
709 memmove(lower->ptrs + slot + 1, lower->ptrs + slot,
710 (nritems - slot) * sizeof(struct btrfs_key_ptr));
712 memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key));
713 btrfs_set_node_blockptr(lower, slot, blocknr);
714 btrfs_set_header_nritems(&lower->header, nritems + 1);
715 mark_buffer_dirty(path->nodes[level]);
720 * split the node at the specified level in path in two.
721 * The path is corrected to point to the appropriate node after the split
723 * Before splitting this tries to make some room in the node by pushing
724 * left and right, if either one works, it returns right away.
726 * returns 0 on success and < 0 on failure
728 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
729 *root, struct btrfs_path *path, int level)
731 struct buffer_head *t;
732 struct btrfs_node *c;
733 struct buffer_head *split_buffer;
734 struct btrfs_node *split;
740 t = path->nodes[level];
741 c = btrfs_buffer_node(t);
742 if (t == root->node) {
743 /* trying to split the root, lets make a new one */
744 ret = insert_new_root(trans, root, path, level + 1);
748 c_nritems = btrfs_header_nritems(&c->header);
749 split_buffer = btrfs_alloc_free_block(trans, root);
750 split = btrfs_buffer_node(split_buffer);
751 btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
752 btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr);
753 btrfs_set_header_parentid(&split->header,
754 btrfs_header_parentid(btrfs_buffer_header(root->node)));
755 mid = (c_nritems + 1) / 2;
756 memcpy(split->ptrs, c->ptrs + mid,
757 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
758 btrfs_set_header_nritems(&split->header, c_nritems - mid);
759 btrfs_set_header_nritems(&c->header, mid);
762 mark_buffer_dirty(t);
763 mark_buffer_dirty(split_buffer);
764 wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
765 split_buffer->b_blocknr, path->slots[level + 1] + 1,
770 if (path->slots[level] >= mid) {
771 path->slots[level] -= mid;
772 btrfs_block_release(root, t);
773 path->nodes[level] = split_buffer;
774 path->slots[level + 1] += 1;
776 btrfs_block_release(root, split_buffer);
782 * how many bytes are required to store the items in a leaf. start
783 * and nr indicate which items in the leaf to check. This totals up the
784 * space used both by the item structs and the item data
786 static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
789 int end = start + nr - 1;
793 data_len = btrfs_item_end(l->items + start);
794 data_len = data_len - btrfs_item_offset(l->items + end);
795 data_len += sizeof(struct btrfs_item) * nr;
800 * push some data in the path leaf to the right, trying to free up at
801 * least data_size bytes. returns zero if the push worked, nonzero otherwise
803 * returns 1 if the push failed because the other node didn't have enough
804 * room, 0 if everything worked out and < 0 if there were major errors.
806 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
807 *root, struct btrfs_path *path, int data_size)
809 struct buffer_head *left_buf = path->nodes[0];
810 struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf);
811 struct btrfs_leaf *right;
812 struct buffer_head *right_buf;
813 struct buffer_head *upper;
814 struct btrfs_node *upper_node;
820 struct btrfs_item *item;
824 slot = path->slots[1];
825 if (!path->nodes[1]) {
828 upper = path->nodes[1];
829 upper_node = btrfs_buffer_node(upper);
830 if (slot >= btrfs_header_nritems(&upper_node->header) - 1) {
833 right_buf = read_tree_block(root,
834 btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1));
835 right = btrfs_buffer_leaf(right_buf);
836 free_space = btrfs_leaf_free_space(root, right);
837 if (free_space < data_size + sizeof(struct btrfs_item)) {
838 btrfs_block_release(root, right_buf);
841 /* cow and double check */
842 btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
843 right = btrfs_buffer_leaf(right_buf);
844 free_space = btrfs_leaf_free_space(root, right);
845 if (free_space < data_size + sizeof(struct btrfs_item)) {
846 btrfs_block_release(root, right_buf);
850 left_nritems = btrfs_header_nritems(&left->header);
851 for (i = left_nritems - 1; i >= 0; i--) {
852 item = left->items + i;
853 if (path->slots[0] == i)
854 push_space += data_size + sizeof(*item);
855 if (btrfs_item_size(item) + sizeof(*item) + push_space >
859 push_space += btrfs_item_size(item) + sizeof(*item);
861 if (push_items == 0) {
862 btrfs_block_release(root, right_buf);
865 right_nritems = btrfs_header_nritems(&right->header);
866 /* push left to right */
867 push_space = btrfs_item_end(left->items + left_nritems - push_items);
868 push_space -= leaf_data_end(root, left);
869 /* make room in the right data area */
870 memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) -
871 push_space, btrfs_leaf_data(right) + leaf_data_end(root, right),
872 BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right));
873 /* copy from the left data area */
874 memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space,
875 btrfs_leaf_data(left) + leaf_data_end(root, left), push_space);
876 memmove(right->items + push_items, right->items,
877 right_nritems * sizeof(struct btrfs_item));
878 /* copy the items from left to right */
879 memcpy(right->items, left->items + left_nritems - push_items,
880 push_items * sizeof(struct btrfs_item));
882 /* update the item pointers */
883 right_nritems += push_items;
884 btrfs_set_header_nritems(&right->header, right_nritems);
885 push_space = BTRFS_LEAF_DATA_SIZE(root);
886 for (i = 0; i < right_nritems; i++) {
887 btrfs_set_item_offset(right->items + i, push_space -
888 btrfs_item_size(right->items + i));
889 push_space = btrfs_item_offset(right->items + i);
891 left_nritems -= push_items;
892 btrfs_set_header_nritems(&left->header, left_nritems);
894 mark_buffer_dirty(left_buf);
895 mark_buffer_dirty(right_buf);
896 memcpy(&upper_node->ptrs[slot + 1].key,
897 &right->items[0].key, sizeof(struct btrfs_disk_key));
898 mark_buffer_dirty(upper);
900 /* then fixup the leaf pointer in the path */
901 if (path->slots[0] >= left_nritems) {
902 path->slots[0] -= left_nritems;
903 btrfs_block_release(root, path->nodes[0]);
904 path->nodes[0] = right_buf;
907 btrfs_block_release(root, right_buf);
912 * push some data in the path leaf to the left, trying to free up at
913 * least data_size bytes. returns zero if the push worked, nonzero otherwise
915 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
916 *root, struct btrfs_path *path, int data_size)
918 struct buffer_head *right_buf = path->nodes[0];
919 struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf);
920 struct buffer_head *t;
921 struct btrfs_leaf *left;
927 struct btrfs_item *item;
928 u32 old_left_nritems;
932 slot = path->slots[1];
936 if (!path->nodes[1]) {
939 t = read_tree_block(root,
940 btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1));
941 left = btrfs_buffer_leaf(t);
942 free_space = btrfs_leaf_free_space(root, left);
943 if (free_space < data_size + sizeof(struct btrfs_item)) {
944 btrfs_block_release(root, t);
948 /* cow and double check */
949 btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
950 left = btrfs_buffer_leaf(t);
951 free_space = btrfs_leaf_free_space(root, left);
952 if (free_space < data_size + sizeof(struct btrfs_item)) {
953 btrfs_block_release(root, t);
957 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
958 item = right->items + i;
959 if (path->slots[0] == i)
960 push_space += data_size + sizeof(*item);
961 if (btrfs_item_size(item) + sizeof(*item) + push_space >
965 push_space += btrfs_item_size(item) + sizeof(*item);
967 if (push_items == 0) {
968 btrfs_block_release(root, t);
971 /* push data from right to left */
972 memcpy(left->items + btrfs_header_nritems(&left->header),
973 right->items, push_items * sizeof(struct btrfs_item));
974 push_space = BTRFS_LEAF_DATA_SIZE(root) -
975 btrfs_item_offset(right->items + push_items -1);
976 memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space,
977 btrfs_leaf_data(right) +
978 btrfs_item_offset(right->items + push_items - 1),
980 old_left_nritems = btrfs_header_nritems(&left->header);
981 BUG_ON(old_left_nritems < 0);
983 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
984 u32 ioff = btrfs_item_offset(left->items + i);
985 btrfs_set_item_offset(left->items + i, ioff -
986 (BTRFS_LEAF_DATA_SIZE(root) -
987 btrfs_item_offset(left->items +
988 old_left_nritems - 1)));
990 btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
992 /* fixup right node */
993 push_space = btrfs_item_offset(right->items + push_items - 1) -
994 leaf_data_end(root, right);
995 memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
996 push_space, btrfs_leaf_data(right) +
997 leaf_data_end(root, right), push_space);
998 memmove(right->items, right->items + push_items,
999 (btrfs_header_nritems(&right->header) - push_items) *
1000 sizeof(struct btrfs_item));
1001 btrfs_set_header_nritems(&right->header,
1002 btrfs_header_nritems(&right->header) -
1004 push_space = BTRFS_LEAF_DATA_SIZE(root);
1006 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1007 btrfs_set_item_offset(right->items + i, push_space -
1008 btrfs_item_size(right->items + i));
1009 push_space = btrfs_item_offset(right->items + i);
1012 mark_buffer_dirty(t);
1013 mark_buffer_dirty(right_buf);
1015 wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
1019 /* then fixup the leaf pointer in the path */
1020 if (path->slots[0] < push_items) {
1021 path->slots[0] += old_left_nritems;
1022 btrfs_block_release(root, path->nodes[0]);
1024 path->slots[1] -= 1;
1026 btrfs_block_release(root, t);
1027 path->slots[0] -= push_items;
1029 BUG_ON(path->slots[0] < 0);
1034 * split the path's leaf in two, making sure there is at least data_size
1035 * available for the resulting leaf level of the path.
1037 * returns 0 if all went well and < 0 on failure.
1039 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1040 *root, struct btrfs_path *path, int data_size)
1042 struct buffer_head *l_buf;
1043 struct btrfs_leaf *l;
1047 struct btrfs_leaf *right;
1048 struct buffer_head *right_buffer;
1049 int space_needed = data_size + sizeof(struct btrfs_item);
1056 /* first try to make some room by pushing left and right */
1057 wret = push_leaf_left(trans, root, path, data_size);
1061 wret = push_leaf_right(trans, root, path, data_size);
1065 l_buf = path->nodes[0];
1066 l = btrfs_buffer_leaf(l_buf);
1068 /* did the pushes work? */
1069 if (btrfs_leaf_free_space(root, l) >=
1070 sizeof(struct btrfs_item) + data_size)
1073 if (!path->nodes[1]) {
1074 ret = insert_new_root(trans, root, path, 1);
1078 slot = path->slots[0];
1079 nritems = btrfs_header_nritems(&l->header);
1080 mid = (nritems + 1)/ 2;
1081 right_buffer = btrfs_alloc_free_block(trans, root);
1082 BUG_ON(!right_buffer);
1083 BUG_ON(mid == nritems);
1084 right = btrfs_buffer_leaf(right_buffer);
1085 memset(&right->header, 0, sizeof(right->header));
1087 /* FIXME, just alloc a new leaf here */
1088 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
1089 BTRFS_LEAF_DATA_SIZE(root))
1092 /* FIXME, just alloc a new leaf here */
1093 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1094 BTRFS_LEAF_DATA_SIZE(root))
1097 btrfs_set_header_nritems(&right->header, nritems - mid);
1098 btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr);
1099 btrfs_set_header_level(&right->header, 0);
1100 btrfs_set_header_parentid(&right->header,
1101 btrfs_header_parentid(btrfs_buffer_header(root->node)));
1102 data_copy_size = btrfs_item_end(l->items + mid) -
1103 leaf_data_end(root, l);
1104 memcpy(right->items, l->items + mid,
1105 (nritems - mid) * sizeof(struct btrfs_item));
1106 memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1107 data_copy_size, btrfs_leaf_data(l) +
1108 leaf_data_end(root, l), data_copy_size);
1109 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1110 btrfs_item_end(l->items + mid);
1112 for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1113 u32 ioff = btrfs_item_offset(right->items + i);
1114 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1117 btrfs_set_header_nritems(&l->header, mid);
1119 wret = insert_ptr(trans, root, path, &right->items[0].key,
1120 right_buffer->b_blocknr, path->slots[1] + 1, 1);
1123 mark_buffer_dirty(right_buffer);
1124 mark_buffer_dirty(l_buf);
1125 BUG_ON(path->slots[0] != slot);
1127 btrfs_block_release(root, path->nodes[0]);
1128 path->nodes[0] = right_buffer;
1129 path->slots[0] -= mid;
1130 path->slots[1] += 1;
1132 btrfs_block_release(root, right_buffer);
1133 BUG_ON(path->slots[0] < 0);
1138 * Given a key and some data, insert an item into the tree.
1139 * This does all the path init required, making room in the tree if needed.
1141 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1142 *root, struct btrfs_path *path, struct btrfs_key
1143 *cpu_key, u32 data_size)
1148 struct btrfs_leaf *leaf;
1149 struct buffer_head *leaf_buf;
1151 unsigned int data_end;
1152 struct btrfs_disk_key disk_key;
1154 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
1156 /* create a root if there isn't one */
1159 ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1161 btrfs_release_path(root, path);
1167 slot_orig = path->slots[0];
1168 leaf_buf = path->nodes[0];
1169 leaf = btrfs_buffer_leaf(leaf_buf);
1171 nritems = btrfs_header_nritems(&leaf->header);
1172 data_end = leaf_data_end(root, leaf);
1174 if (btrfs_leaf_free_space(root, leaf) <
1175 sizeof(struct btrfs_item) + data_size)
1178 slot = path->slots[0];
1180 if (slot != nritems) {
1182 unsigned int old_data = btrfs_item_end(leaf->items + slot);
1185 * item0..itemN ... dataN.offset..dataN.size .. data0.size
1187 /* first correct the data pointers */
1188 for (i = slot; i < nritems; i++) {
1189 u32 ioff = btrfs_item_offset(leaf->items + i);
1190 btrfs_set_item_offset(leaf->items + i,
1194 /* shift the items */
1195 memmove(leaf->items + slot + 1, leaf->items + slot,
1196 (nritems - slot) * sizeof(struct btrfs_item));
1198 /* shift the data */
1199 memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1200 btrfs_leaf_data(leaf) +
1201 data_end, old_data - data_end);
1202 data_end = old_data;
1204 /* setup the item for the new data */
1205 memcpy(&leaf->items[slot].key, &disk_key,
1206 sizeof(struct btrfs_disk_key));
1207 btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1208 btrfs_set_item_size(leaf->items + slot, data_size);
1209 btrfs_set_header_nritems(&leaf->header, nritems + 1);
1210 mark_buffer_dirty(leaf_buf);
1214 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
1216 if (btrfs_leaf_free_space(root, leaf) < 0)
1218 check_leaf(root, path, 0);
1224 * Given a key and some data, insert an item into the tree.
1225 * This does all the path init required, making room in the tree if needed.
1227 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1228 *root, struct btrfs_key *cpu_key, void *data, u32
1232 struct btrfs_path path;
1235 btrfs_init_path(&path);
1236 ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size);
1238 ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
1240 memcpy(ptr, data, data_size);
1241 mark_buffer_dirty(path.nodes[0]);
1243 btrfs_release_path(root, &path);
1248 * delete the pointer from a given node.
1250 * If the delete empties a node, the node is removed from the tree,
1251 * continuing all the way the root if required. The root is converted into
1252 * a leaf if all the nodes are emptied.
1254 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1255 struct btrfs_path *path, int level, int slot)
1257 struct btrfs_node *node;
1258 struct buffer_head *parent = path->nodes[level];
1263 node = btrfs_buffer_node(parent);
1264 nritems = btrfs_header_nritems(&node->header);
1265 if (slot != nritems -1) {
1266 memmove(node->ptrs + slot, node->ptrs + slot + 1,
1267 sizeof(struct btrfs_key_ptr) * (nritems - slot - 1));
1270 btrfs_set_header_nritems(&node->header, nritems);
1271 if (nritems == 0 && parent == root->node) {
1272 struct btrfs_header *header = btrfs_buffer_header(root->node);
1273 BUG_ON(btrfs_header_level(header) != 1);
1274 /* just turn the root into a leaf and break */
1275 btrfs_set_header_level(header, 0);
1276 } else if (slot == 0) {
1277 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
1282 mark_buffer_dirty(parent);
1287 * delete the item at the leaf level in path. If that empties
1288 * the leaf, remove it from the tree
1290 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1291 struct btrfs_path *path)
1294 struct btrfs_leaf *leaf;
1295 struct buffer_head *leaf_buf;
1302 leaf_buf = path->nodes[0];
1303 leaf = btrfs_buffer_leaf(leaf_buf);
1304 slot = path->slots[0];
1305 doff = btrfs_item_offset(leaf->items + slot);
1306 dsize = btrfs_item_size(leaf->items + slot);
1307 nritems = btrfs_header_nritems(&leaf->header);
1309 if (slot != nritems - 1) {
1311 int data_end = leaf_data_end(root, leaf);
1312 memmove(btrfs_leaf_data(leaf) + data_end + dsize,
1313 btrfs_leaf_data(leaf) + data_end,
1315 for (i = slot + 1; i < nritems; i++) {
1316 u32 ioff = btrfs_item_offset(leaf->items + i);
1317 btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1319 memmove(leaf->items + slot, leaf->items + slot + 1,
1320 sizeof(struct btrfs_item) *
1321 (nritems - slot - 1));
1323 btrfs_set_header_nritems(&leaf->header, nritems - 1);
1325 /* delete the leaf if we've emptied it */
1327 if (leaf_buf == root->node) {
1328 btrfs_set_header_level(&leaf->header, 0);
1330 clean_tree_block(trans, root, leaf_buf);
1331 wret = del_ptr(trans, root, path, 1, path->slots[1]);
1334 wret = btrfs_free_extent(trans, root,
1335 leaf_buf->b_blocknr, 1, 1);
1340 int used = leaf_space_used(leaf, 0, nritems);
1342 wret = fixup_low_keys(trans, root, path,
1343 &leaf->items[0].key, 1);
1348 /* delete the leaf if it is mostly empty */
1349 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
1350 /* push_leaf_left fixes the path.
1351 * make sure the path still points to our leaf
1352 * for possible call to del_ptr below
1354 slot = path->slots[1];
1356 wret = push_leaf_left(trans, root, path, 1);
1359 if (path->nodes[0] == leaf_buf &&
1360 btrfs_header_nritems(&leaf->header)) {
1361 wret = push_leaf_right(trans, root, path, 1);
1365 if (btrfs_header_nritems(&leaf->header) == 0) {
1366 u64 blocknr = leaf_buf->b_blocknr;
1367 clean_tree_block(trans, root, leaf_buf);
1368 wret = del_ptr(trans, root, path, 1, slot);
1371 btrfs_block_release(root, leaf_buf);
1372 wret = btrfs_free_extent(trans, root, blocknr,
1377 mark_buffer_dirty(leaf_buf);
1378 btrfs_block_release(root, leaf_buf);
1381 mark_buffer_dirty(leaf_buf);
1388 * walk up the tree as far as required to find the next leaf.
1389 * returns 0 if it found something or 1 if there are no greater leaves.
1390 * returns < 0 on io errors.
1392 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1397 struct buffer_head *c;
1398 struct btrfs_node *c_node;
1399 struct buffer_head *next = NULL;
1401 while(level < BTRFS_MAX_LEVEL) {
1402 if (!path->nodes[level])
1404 slot = path->slots[level] + 1;
1405 c = path->nodes[level];
1406 c_node = btrfs_buffer_node(c);
1407 if (slot >= btrfs_header_nritems(&c_node->header)) {
1411 blocknr = btrfs_node_blockptr(c_node, slot);
1413 btrfs_block_release(root, next);
1414 next = read_tree_block(root, blocknr);
1417 path->slots[level] = slot;
1420 c = path->nodes[level];
1421 btrfs_block_release(root, c);
1422 path->nodes[level] = next;
1423 path->slots[level] = 0;
1426 next = read_tree_block(root,
1427 btrfs_node_blockptr(btrfs_buffer_node(next), 0));