]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - fs/btrfs/ctree.c
btrfs_create, btrfs_write_super, btrfs_sync_fs
[linux-2.6-omap-h63xx.git] / fs / btrfs / ctree.c
1 #include <linux/module.h>
2 #include "ctree.h"
3 #include "disk-io.h"
4
5 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
6                       *root, struct btrfs_path *path, int level);
7 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
8                       *root, struct btrfs_path *path, int data_size);
9 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
10                           *root, struct buffer_head *dst, struct buffer_head
11                           *src);
12 static int balance_node_right(struct btrfs_trans_handle *trans, struct
13                               btrfs_root *root, struct buffer_head *dst_buf,
14                               struct buffer_head *src_buf);
15 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
16                    struct btrfs_path *path, int level, int slot);
17
18 inline void btrfs_init_path(struct btrfs_path *p)
19 {
20         memset(p, 0, sizeof(*p));
21 }
22
23 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
24 {
25         int i;
26         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
27                 if (!p->nodes[i])
28                         break;
29                 btrfs_block_release(root, p->nodes[i]);
30         }
31         memset(p, 0, sizeof(*p));
32 }
33
34 static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
35                            *root, struct buffer_head *buf, struct buffer_head
36                            *parent, int parent_slot, struct buffer_head
37                            **cow_ret)
38 {
39         struct buffer_head *cow;
40         struct btrfs_node *cow_node;
41
42         if (buffer_dirty(buf)) {
43                 *cow_ret = buf;
44                 return 0;
45         }
46         cow = btrfs_alloc_free_block(trans, root);
47         cow_node = btrfs_buffer_node(cow);
48         memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
49         btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr);
50         *cow_ret = cow;
51         mark_buffer_dirty(cow);
52         btrfs_inc_ref(trans, root, buf);
53         if (buf == root->node) {
54                 root->node = cow;
55                 get_bh(cow);
56                 if (buf != root->commit_root)
57                         btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
58                 btrfs_block_release(root, buf);
59         } else {
60                 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
61                                         cow->b_blocknr);
62                 mark_buffer_dirty(parent);
63                 btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1);
64         }
65         btrfs_block_release(root, buf);
66         return 0;
67 }
68
69 /*
70  * The leaf data grows from end-to-front in the node.
71  * this returns the address of the start of the last item,
72  * which is the stop of the leaf data stack
73  */
74 static inline unsigned int leaf_data_end(struct btrfs_root *root,
75                                          struct btrfs_leaf *leaf)
76 {
77         u32 nr = btrfs_header_nritems(&leaf->header);
78         if (nr == 0)
79                 return BTRFS_LEAF_DATA_SIZE(root);
80         return btrfs_item_offset(leaf->items + nr - 1);
81 }
82
83 /*
84  * The space between the end of the leaf items and
85  * the start of the leaf data.  IOW, how much room
86  * the leaf has left for both items and data
87  */
88 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
89 {
90         int data_end = leaf_data_end(root, leaf);
91         int nritems = btrfs_header_nritems(&leaf->header);
92         char *items_end = (char *)(leaf->items + nritems + 1);
93         return (char *)(btrfs_leaf_data(leaf) + data_end) - (char *)items_end;
94 }
95
96 /*
97  * compare two keys in a memcmp fashion
98  */
99 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
100 {
101         struct btrfs_key k1;
102
103         btrfs_disk_key_to_cpu(&k1, disk);
104
105         if (k1.objectid > k2->objectid)
106                 return 1;
107         if (k1.objectid < k2->objectid)
108                 return -1;
109         if (k1.flags > k2->flags)
110                 return 1;
111         if (k1.flags < k2->flags)
112                 return -1;
113         if (k1.offset > k2->offset)
114                 return 1;
115         if (k1.offset < k2->offset)
116                 return -1;
117         return 0;
118 }
119
120 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
121                       int level)
122 {
123         int i;
124         struct btrfs_node *parent = NULL;
125         struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
126         int parent_slot;
127         u32 nritems = btrfs_header_nritems(&node->header);
128
129         if (path->nodes[level + 1])
130                 parent = btrfs_buffer_node(path->nodes[level + 1]);
131         parent_slot = path->slots[level + 1];
132         BUG_ON(nritems == 0);
133         if (parent) {
134                 struct btrfs_disk_key *parent_key;
135                 parent_key = &parent->ptrs[parent_slot].key;
136                 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
137                               sizeof(struct btrfs_disk_key)));
138                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
139                        btrfs_header_blocknr(&node->header));
140         }
141         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
142         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
143                 struct btrfs_key cpukey;
144                 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
145                 BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
146         }
147         return 0;
148 }
149
150 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
151                       int level)
152 {
153         int i;
154         struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]);
155         struct btrfs_node *parent = NULL;
156         int parent_slot;
157         u32 nritems = btrfs_header_nritems(&leaf->header);
158
159         if (path->nodes[level + 1])
160                 parent = btrfs_buffer_node(path->nodes[level + 1]);
161         parent_slot = path->slots[level + 1];
162         BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
163
164         if (nritems == 0)
165                 return 0;
166
167         if (parent) {
168                 struct btrfs_disk_key *parent_key;
169                 parent_key = &parent->ptrs[parent_slot].key;
170                 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
171                        sizeof(struct btrfs_disk_key)));
172                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
173                        btrfs_header_blocknr(&leaf->header));
174         }
175         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
176                 struct btrfs_key cpukey;
177                 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
178                 BUG_ON(comp_keys(&leaf->items[i].key,
179                                  &cpukey) >= 0);
180                 BUG_ON(btrfs_item_offset(leaf->items + i) !=
181                         btrfs_item_end(leaf->items + i + 1));
182                 if (i == 0) {
183                         BUG_ON(btrfs_item_offset(leaf->items + i) +
184                                btrfs_item_size(leaf->items + i) !=
185                                BTRFS_LEAF_DATA_SIZE(root));
186                 }
187         }
188         return 0;
189 }
190
191 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
192                         int level)
193 {
194         if (level == 0)
195                 return check_leaf(root, path, level);
196         return check_node(root, path, level);
197 }
198
199 /*
200  * search for key in the array p.  items p are item_size apart
201  * and there are 'max' items in p
202  * the slot in the array is returned via slot, and it points to
203  * the place where you would insert key if it is not found in
204  * the array.
205  *
206  * slot may point to max if the key is bigger than all of the keys
207  */
208 static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
209                        int max, int *slot)
210 {
211         int low = 0;
212         int high = max;
213         int mid;
214         int ret;
215         struct btrfs_disk_key *tmp;
216
217         while(low < high) {
218                 mid = (low + high) / 2;
219                 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
220                 ret = comp_keys(tmp, key);
221
222                 if (ret < 0)
223                         low = mid + 1;
224                 else if (ret > 0)
225                         high = mid;
226                 else {
227                         *slot = mid;
228                         return 0;
229                 }
230         }
231         *slot = low;
232         return 1;
233 }
234
235 /*
236  * simple bin_search frontend that does the right thing for
237  * leaves vs nodes
238  */
239 static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
240 {
241         if (btrfs_is_leaf(c)) {
242                 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
243                 return generic_bin_search((void *)l->items,
244                                           sizeof(struct btrfs_item),
245                                           key, btrfs_header_nritems(&c->header),
246                                           slot);
247         } else {
248                 return generic_bin_search((void *)c->ptrs,
249                                           sizeof(struct btrfs_key_ptr),
250                                           key, btrfs_header_nritems(&c->header),
251                                           slot);
252         }
253         return -1;
254 }
255
256 static struct buffer_head *read_node_slot(struct btrfs_root *root,
257                                    struct buffer_head *parent_buf,
258                                    int slot)
259 {
260         struct btrfs_node *node = btrfs_buffer_node(parent_buf);
261         if (slot < 0)
262                 return NULL;
263         if (slot >= btrfs_header_nritems(&node->header))
264                 return NULL;
265         return read_tree_block(root, btrfs_node_blockptr(node, slot));
266 }
267
268 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
269                          *root, struct btrfs_path *path, int level)
270 {
271         struct buffer_head *right_buf;
272         struct buffer_head *mid_buf;
273         struct buffer_head *left_buf;
274         struct buffer_head *parent_buf = NULL;
275         struct btrfs_node *right = NULL;
276         struct btrfs_node *mid;
277         struct btrfs_node *left = NULL;
278         struct btrfs_node *parent = NULL;
279         int ret = 0;
280         int wret;
281         int pslot;
282         int orig_slot = path->slots[level];
283         u64 orig_ptr;
284
285         if (level == 0)
286                 return 0;
287
288         mid_buf = path->nodes[level];
289         mid = btrfs_buffer_node(mid_buf);
290         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
291
292         if (level < BTRFS_MAX_LEVEL - 1)
293                 parent_buf = path->nodes[level + 1];
294         pslot = path->slots[level + 1];
295
296         /*
297          * deal with the case where there is only one pointer in the root
298          * by promoting the node below to a root
299          */
300         if (!parent_buf) {
301                 struct buffer_head *child;
302                 u64 blocknr = mid_buf->b_blocknr;
303
304                 if (btrfs_header_nritems(&mid->header) != 1)
305                         return 0;
306
307                 /* promote the child to a root */
308                 child = read_node_slot(root, mid_buf, 0);
309                 BUG_ON(!child);
310                 root->node = child;
311                 path->nodes[level] = NULL;
312                 /* once for the path */
313                 btrfs_block_release(root, mid_buf);
314                 /* once for the root ptr */
315                 btrfs_block_release(root, mid_buf);
316                 clean_tree_block(trans, root, mid_buf);
317                 return btrfs_free_extent(trans, root, blocknr, 1, 1);
318         }
319         parent = btrfs_buffer_node(parent_buf);
320
321         if (btrfs_header_nritems(&mid->header) >
322             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
323                 return 0;
324
325         left_buf = read_node_slot(root, parent_buf, pslot - 1);
326         right_buf = read_node_slot(root, parent_buf, pslot + 1);
327
328         /* first, try to make some room in the middle buffer */
329         if (left_buf) {
330                 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
331                                 &left_buf);
332                 left = btrfs_buffer_node(left_buf);
333                 orig_slot += btrfs_header_nritems(&left->header);
334                 wret = push_node_left(trans, root, left_buf, mid_buf);
335                 if (wret < 0)
336                         ret = wret;
337         }
338
339         /*
340          * then try to empty the right most buffer into the middle
341          */
342         if (right_buf) {
343                 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
344                                 &right_buf);
345                 right = btrfs_buffer_node(right_buf);
346                 wret = push_node_left(trans, root, mid_buf, right_buf);
347                 if (wret < 0)
348                         ret = wret;
349                 if (btrfs_header_nritems(&right->header) == 0) {
350                         u64 blocknr = right_buf->b_blocknr;
351                         btrfs_block_release(root, right_buf);
352                         clean_tree_block(trans, root, right_buf);
353                         right_buf = NULL;
354                         right = NULL;
355                         wret = del_ptr(trans, root, path, level + 1, pslot +
356                                        1);
357                         if (wret)
358                                 ret = wret;
359                         wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
360                         if (wret)
361                                 ret = wret;
362                 } else {
363                         memcpy(&parent->ptrs[pslot + 1].key,
364                                 &right->ptrs[0].key,
365                                 sizeof(struct btrfs_disk_key));
366                         mark_buffer_dirty(parent_buf);
367                 }
368         }
369         if (btrfs_header_nritems(&mid->header) == 1) {
370                 /*
371                  * we're not allowed to leave a node with one item in the
372                  * tree during a delete.  A deletion from lower in the tree
373                  * could try to delete the only pointer in this node.
374                  * So, pull some keys from the left.
375                  * There has to be a left pointer at this point because
376                  * otherwise we would have pulled some pointers from the
377                  * right
378                  */
379                 BUG_ON(!left_buf);
380                 wret = balance_node_right(trans, root, mid_buf, left_buf);
381                 if (wret < 0)
382                         ret = wret;
383                 BUG_ON(wret == 1);
384         }
385         if (btrfs_header_nritems(&mid->header) == 0) {
386                 /* we've managed to empty the middle node, drop it */
387                 u64 blocknr = mid_buf->b_blocknr;
388                 btrfs_block_release(root, mid_buf);
389                 clean_tree_block(trans, root, mid_buf);
390                 mid_buf = NULL;
391                 mid = NULL;
392                 wret = del_ptr(trans, root, path, level + 1, pslot);
393                 if (wret)
394                         ret = wret;
395                 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
396                 if (wret)
397                         ret = wret;
398         } else {
399                 /* update the parent key to reflect our changes */
400                 memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key,
401                        sizeof(struct btrfs_disk_key));
402                 mark_buffer_dirty(parent_buf);
403         }
404
405         /* update the path */
406         if (left_buf) {
407                 if (btrfs_header_nritems(&left->header) > orig_slot) {
408                         get_bh(left_buf);
409                         path->nodes[level] = left_buf;
410                         path->slots[level + 1] -= 1;
411                         path->slots[level] = orig_slot;
412                         if (mid_buf)
413                                 btrfs_block_release(root, mid_buf);
414                 } else {
415                         orig_slot -= btrfs_header_nritems(&left->header);
416                         path->slots[level] = orig_slot;
417                 }
418         }
419         /* double check we haven't messed things up */
420         check_block(root, path, level);
421         if (orig_ptr !=
422             btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]),
423                                 path->slots[level]))
424                 BUG();
425
426         if (right_buf)
427                 btrfs_block_release(root, right_buf);
428         if (left_buf)
429                 btrfs_block_release(root, left_buf);
430         return ret;
431 }
432
433 /*
434  * look for key in the tree.  path is filled in with nodes along the way
435  * if key is found, we return zero and you can find the item in the leaf
436  * level of the path (level 0)
437  *
438  * If the key isn't found, the path points to the slot where it should
439  * be inserted, and 1 is returned.  If there are other errors during the
440  * search a negative error number is returned.
441  *
442  * if ins_len > 0, nodes and leaves will be split as we walk down the
443  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
444  * possible)
445  */
446 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
447                       *root, struct btrfs_key *key, struct btrfs_path *p, int
448                       ins_len, int cow)
449 {
450         struct buffer_head *b;
451         struct buffer_head *cow_buf;
452         struct btrfs_node *c;
453         int slot;
454         int ret;
455         int level;
456
457 again:
458         b = root->node;
459         get_bh(b);
460         while (b) {
461                 c = btrfs_buffer_node(b);
462                 level = btrfs_header_level(&c->header);
463                 if (cow) {
464                         int wret;
465                         wret = btrfs_cow_block(trans, root, b,
466                                                p->nodes[level + 1],
467                                                p->slots[level + 1],
468                                                &cow_buf);
469                         b = cow_buf;
470                 }
471                 BUG_ON(!cow && ins_len);
472                 c = btrfs_buffer_node(b);
473                 p->nodes[level] = b;
474                 ret = check_block(root, p, level);
475                 if (ret)
476                         return -1;
477                 ret = bin_search(c, key, &slot);
478                 if (!btrfs_is_leaf(c)) {
479                         if (ret && slot > 0)
480                                 slot -= 1;
481                         p->slots[level] = slot;
482                         if (ins_len > 0 && btrfs_header_nritems(&c->header) ==
483                             BTRFS_NODEPTRS_PER_BLOCK(root)) {
484                                 int sret = split_node(trans, root, p, level);
485                                 BUG_ON(sret > 0);
486                                 if (sret)
487                                         return sret;
488                                 b = p->nodes[level];
489                                 c = btrfs_buffer_node(b);
490                                 slot = p->slots[level];
491                         } else if (ins_len < 0) {
492                                 int sret = balance_level(trans, root, p,
493                                                          level);
494                                 if (sret)
495                                         return sret;
496                                 b = p->nodes[level];
497                                 if (!b)
498                                         goto again;
499                                 c = btrfs_buffer_node(b);
500                                 slot = p->slots[level];
501                                 BUG_ON(btrfs_header_nritems(&c->header) == 1);
502                         }
503                         b = read_tree_block(root, btrfs_node_blockptr(c, slot));
504                 } else {
505                         struct btrfs_leaf *l = (struct btrfs_leaf *)c;
506                         p->slots[level] = slot;
507                         if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
508                             sizeof(struct btrfs_item) + ins_len) {
509                                 int sret = split_leaf(trans, root, p, ins_len);
510                                 BUG_ON(sret > 0);
511                                 if (sret)
512                                         return sret;
513                         }
514                         return ret;
515                 }
516         }
517         return 1;
518 }
519
520 /*
521  * adjust the pointers going up the tree, starting at level
522  * making sure the right key of each node is points to 'key'.
523  * This is used after shifting pointers to the left, so it stops
524  * fixing up pointers when a given leaf/node is not in slot 0 of the
525  * higher levels
526  *
527  * If this fails to write a tree block, it returns -1, but continues
528  * fixing up the blocks in ram so the tree is consistent.
529  */
530 static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
531                           *root, struct btrfs_path *path, struct btrfs_disk_key
532                           *key, int level)
533 {
534         int i;
535         int ret = 0;
536         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
537                 struct btrfs_node *t;
538                 int tslot = path->slots[i];
539                 if (!path->nodes[i])
540                         break;
541                 t = btrfs_buffer_node(path->nodes[i]);
542                 memcpy(&t->ptrs[tslot].key, key, sizeof(*key));
543                 mark_buffer_dirty(path->nodes[i]);
544                 if (tslot != 0)
545                         break;
546         }
547         return ret;
548 }
549
550 /*
551  * try to push data from one node into the next node left in the
552  * tree.
553  *
554  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
555  * error, and > 0 if there was no room in the left hand block.
556  */
557 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
558                           *root, struct buffer_head *dst_buf, struct
559                           buffer_head *src_buf)
560 {
561         struct btrfs_node *src = btrfs_buffer_node(src_buf);
562         struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
563         int push_items = 0;
564         int src_nritems;
565         int dst_nritems;
566         int ret = 0;
567
568         src_nritems = btrfs_header_nritems(&src->header);
569         dst_nritems = btrfs_header_nritems(&dst->header);
570         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
571         if (push_items <= 0) {
572                 return 1;
573         }
574
575         if (src_nritems < push_items)
576                 push_items = src_nritems;
577
578         memcpy(dst->ptrs + dst_nritems, src->ptrs,
579                 push_items * sizeof(struct btrfs_key_ptr));
580         if (push_items < src_nritems) {
581                 memmove(src->ptrs, src->ptrs + push_items,
582                         (src_nritems - push_items) *
583                         sizeof(struct btrfs_key_ptr));
584         }
585         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
586         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
587         mark_buffer_dirty(src_buf);
588         mark_buffer_dirty(dst_buf);
589         return ret;
590 }
591
592 /*
593  * try to push data from one node into the next node right in the
594  * tree.
595  *
596  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
597  * error, and > 0 if there was no room in the right hand block.
598  *
599  * this will  only push up to 1/2 the contents of the left node over
600  */
601 static int balance_node_right(struct btrfs_trans_handle *trans, struct
602                               btrfs_root *root, struct buffer_head *dst_buf,
603                               struct buffer_head *src_buf)
604 {
605         struct btrfs_node *src = btrfs_buffer_node(src_buf);
606         struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
607         int push_items = 0;
608         int max_push;
609         int src_nritems;
610         int dst_nritems;
611         int ret = 0;
612
613         src_nritems = btrfs_header_nritems(&src->header);
614         dst_nritems = btrfs_header_nritems(&dst->header);
615         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
616         if (push_items <= 0) {
617                 return 1;
618         }
619
620         max_push = src_nritems / 2 + 1;
621         /* don't try to empty the node */
622         if (max_push > src_nritems)
623                 return 1;
624         if (max_push < push_items)
625                 push_items = max_push;
626
627         memmove(dst->ptrs + push_items, dst->ptrs,
628                 dst_nritems * sizeof(struct btrfs_key_ptr));
629         memcpy(dst->ptrs, src->ptrs + src_nritems - push_items,
630                 push_items * sizeof(struct btrfs_key_ptr));
631
632         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
633         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
634
635         mark_buffer_dirty(src_buf);
636         mark_buffer_dirty(dst_buf);
637         return ret;
638 }
639
640 /*
641  * helper function to insert a new root level in the tree.
642  * A new node is allocated, and a single item is inserted to
643  * point to the existing root
644  *
645  * returns zero on success or < 0 on failure.
646  */
647 static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
648                            *root, struct btrfs_path *path, int level)
649 {
650         struct buffer_head *t;
651         struct btrfs_node *lower;
652         struct btrfs_node *c;
653         struct btrfs_disk_key *lower_key;
654
655         BUG_ON(path->nodes[level]);
656         BUG_ON(path->nodes[level-1] != root->node);
657
658         t = btrfs_alloc_free_block(trans, root);
659         c = btrfs_buffer_node(t);
660         memset(c, 0, root->blocksize);
661         btrfs_set_header_nritems(&c->header, 1);
662         btrfs_set_header_level(&c->header, level);
663         btrfs_set_header_blocknr(&c->header, t->b_blocknr);
664         btrfs_set_header_parentid(&c->header,
665               btrfs_header_parentid(btrfs_buffer_header(root->node)));
666         lower = btrfs_buffer_node(path->nodes[level-1]);
667         if (btrfs_is_leaf(lower))
668                 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
669         else
670                 lower_key = &lower->ptrs[0].key;
671         memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key));
672         btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr);
673
674         mark_buffer_dirty(t);
675
676         /* the super has an extra ref to root->node */
677         btrfs_block_release(root, root->node);
678         root->node = t;
679         get_bh(t);
680         path->nodes[level] = t;
681         path->slots[level] = 0;
682         return 0;
683 }
684
685 /*
686  * worker function to insert a single pointer in a node.
687  * the node should have enough room for the pointer already
688  *
689  * slot and level indicate where you want the key to go, and
690  * blocknr is the block the key points to.
691  *
692  * returns zero on success and < 0 on any error
693  */
694 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
695                       *root, struct btrfs_path *path, struct btrfs_disk_key
696                       *key, u64 blocknr, int slot, int level)
697 {
698         struct btrfs_node *lower;
699         int nritems;
700
701         BUG_ON(!path->nodes[level]);
702         lower = btrfs_buffer_node(path->nodes[level]);
703         nritems = btrfs_header_nritems(&lower->header);
704         if (slot > nritems)
705                 BUG();
706         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
707                 BUG();
708         if (slot != nritems) {
709                 memmove(lower->ptrs + slot + 1, lower->ptrs + slot,
710                         (nritems - slot) * sizeof(struct btrfs_key_ptr));
711         }
712         memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key));
713         btrfs_set_node_blockptr(lower, slot, blocknr);
714         btrfs_set_header_nritems(&lower->header, nritems + 1);
715         mark_buffer_dirty(path->nodes[level]);
716         return 0;
717 }
718
719 /*
720  * split the node at the specified level in path in two.
721  * The path is corrected to point to the appropriate node after the split
722  *
723  * Before splitting this tries to make some room in the node by pushing
724  * left and right, if either one works, it returns right away.
725  *
726  * returns 0 on success and < 0 on failure
727  */
728 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
729                       *root, struct btrfs_path *path, int level)
730 {
731         struct buffer_head *t;
732         struct btrfs_node *c;
733         struct buffer_head *split_buffer;
734         struct btrfs_node *split;
735         int mid;
736         int ret;
737         int wret;
738         u32 c_nritems;
739
740         t = path->nodes[level];
741         c = btrfs_buffer_node(t);
742         if (t == root->node) {
743                 /* trying to split the root, lets make a new one */
744                 ret = insert_new_root(trans, root, path, level + 1);
745                 if (ret)
746                         return ret;
747         }
748         c_nritems = btrfs_header_nritems(&c->header);
749         split_buffer = btrfs_alloc_free_block(trans, root);
750         split = btrfs_buffer_node(split_buffer);
751         btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
752         btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr);
753         btrfs_set_header_parentid(&split->header,
754               btrfs_header_parentid(btrfs_buffer_header(root->node)));
755         mid = (c_nritems + 1) / 2;
756         memcpy(split->ptrs, c->ptrs + mid,
757                 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
758         btrfs_set_header_nritems(&split->header, c_nritems - mid);
759         btrfs_set_header_nritems(&c->header, mid);
760         ret = 0;
761
762         mark_buffer_dirty(t);
763         mark_buffer_dirty(split_buffer);
764         wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
765                           split_buffer->b_blocknr, path->slots[level + 1] + 1,
766                           level + 1);
767         if (wret)
768                 ret = wret;
769
770         if (path->slots[level] >= mid) {
771                 path->slots[level] -= mid;
772                 btrfs_block_release(root, t);
773                 path->nodes[level] = split_buffer;
774                 path->slots[level + 1] += 1;
775         } else {
776                 btrfs_block_release(root, split_buffer);
777         }
778         return ret;
779 }
780
781 /*
782  * how many bytes are required to store the items in a leaf.  start
783  * and nr indicate which items in the leaf to check.  This totals up the
784  * space used both by the item structs and the item data
785  */
786 static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
787 {
788         int data_len;
789         int end = start + nr - 1;
790
791         if (!nr)
792                 return 0;
793         data_len = btrfs_item_end(l->items + start);
794         data_len = data_len - btrfs_item_offset(l->items + end);
795         data_len += sizeof(struct btrfs_item) * nr;
796         return data_len;
797 }
798
799 /*
800  * push some data in the path leaf to the right, trying to free up at
801  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
802  *
803  * returns 1 if the push failed because the other node didn't have enough
804  * room, 0 if everything worked out and < 0 if there were major errors.
805  */
806 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
807                            *root, struct btrfs_path *path, int data_size)
808 {
809         struct buffer_head *left_buf = path->nodes[0];
810         struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf);
811         struct btrfs_leaf *right;
812         struct buffer_head *right_buf;
813         struct buffer_head *upper;
814         struct btrfs_node *upper_node;
815         int slot;
816         int i;
817         int free_space;
818         int push_space = 0;
819         int push_items = 0;
820         struct btrfs_item *item;
821         u32 left_nritems;
822         u32 right_nritems;
823
824         slot = path->slots[1];
825         if (!path->nodes[1]) {
826                 return 1;
827         }
828         upper = path->nodes[1];
829         upper_node = btrfs_buffer_node(upper);
830         if (slot >= btrfs_header_nritems(&upper_node->header) - 1) {
831                 return 1;
832         }
833         right_buf = read_tree_block(root,
834                     btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1));
835         right = btrfs_buffer_leaf(right_buf);
836         free_space = btrfs_leaf_free_space(root, right);
837         if (free_space < data_size + sizeof(struct btrfs_item)) {
838                 btrfs_block_release(root, right_buf);
839                 return 1;
840         }
841         /* cow and double check */
842         btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
843         right = btrfs_buffer_leaf(right_buf);
844         free_space = btrfs_leaf_free_space(root, right);
845         if (free_space < data_size + sizeof(struct btrfs_item)) {
846                 btrfs_block_release(root, right_buf);
847                 return 1;
848         }
849
850         left_nritems = btrfs_header_nritems(&left->header);
851         for (i = left_nritems - 1; i >= 0; i--) {
852                 item = left->items + i;
853                 if (path->slots[0] == i)
854                         push_space += data_size + sizeof(*item);
855                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
856                     free_space)
857                         break;
858                 push_items++;
859                 push_space += btrfs_item_size(item) + sizeof(*item);
860         }
861         if (push_items == 0) {
862                 btrfs_block_release(root, right_buf);
863                 return 1;
864         }
865         right_nritems = btrfs_header_nritems(&right->header);
866         /* push left to right */
867         push_space = btrfs_item_end(left->items + left_nritems - push_items);
868         push_space -= leaf_data_end(root, left);
869         /* make room in the right data area */
870         memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) -
871                 push_space, btrfs_leaf_data(right) + leaf_data_end(root, right),
872                 BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right));
873         /* copy from the left data area */
874         memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space,
875                 btrfs_leaf_data(left) + leaf_data_end(root, left), push_space);
876         memmove(right->items + push_items, right->items,
877                 right_nritems * sizeof(struct btrfs_item));
878         /* copy the items from left to right */
879         memcpy(right->items, left->items + left_nritems - push_items,
880                 push_items * sizeof(struct btrfs_item));
881
882         /* update the item pointers */
883         right_nritems += push_items;
884         btrfs_set_header_nritems(&right->header, right_nritems);
885         push_space = BTRFS_LEAF_DATA_SIZE(root);
886         for (i = 0; i < right_nritems; i++) {
887                 btrfs_set_item_offset(right->items + i, push_space -
888                                       btrfs_item_size(right->items + i));
889                 push_space = btrfs_item_offset(right->items + i);
890         }
891         left_nritems -= push_items;
892         btrfs_set_header_nritems(&left->header, left_nritems);
893
894         mark_buffer_dirty(left_buf);
895         mark_buffer_dirty(right_buf);
896         memcpy(&upper_node->ptrs[slot + 1].key,
897                 &right->items[0].key, sizeof(struct btrfs_disk_key));
898         mark_buffer_dirty(upper);
899
900         /* then fixup the leaf pointer in the path */
901         if (path->slots[0] >= left_nritems) {
902                 path->slots[0] -= left_nritems;
903                 btrfs_block_release(root, path->nodes[0]);
904                 path->nodes[0] = right_buf;
905                 path->slots[1] += 1;
906         } else {
907                 btrfs_block_release(root, right_buf);
908         }
909         return 0;
910 }
911 /*
912  * push some data in the path leaf to the left, trying to free up at
913  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
914  */
915 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
916                           *root, struct btrfs_path *path, int data_size)
917 {
918         struct buffer_head *right_buf = path->nodes[0];
919         struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf);
920         struct buffer_head *t;
921         struct btrfs_leaf *left;
922         int slot;
923         int i;
924         int free_space;
925         int push_space = 0;
926         int push_items = 0;
927         struct btrfs_item *item;
928         u32 old_left_nritems;
929         int ret = 0;
930         int wret;
931
932         slot = path->slots[1];
933         if (slot == 0) {
934                 return 1;
935         }
936         if (!path->nodes[1]) {
937                 return 1;
938         }
939         t = read_tree_block(root,
940             btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1));
941         left = btrfs_buffer_leaf(t);
942         free_space = btrfs_leaf_free_space(root, left);
943         if (free_space < data_size + sizeof(struct btrfs_item)) {
944                 btrfs_block_release(root, t);
945                 return 1;
946         }
947
948         /* cow and double check */
949         btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
950         left = btrfs_buffer_leaf(t);
951         free_space = btrfs_leaf_free_space(root, left);
952         if (free_space < data_size + sizeof(struct btrfs_item)) {
953                 btrfs_block_release(root, t);
954                 return 1;
955         }
956
957         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
958                 item = right->items + i;
959                 if (path->slots[0] == i)
960                         push_space += data_size + sizeof(*item);
961                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
962                     free_space)
963                         break;
964                 push_items++;
965                 push_space += btrfs_item_size(item) + sizeof(*item);
966         }
967         if (push_items == 0) {
968                 btrfs_block_release(root, t);
969                 return 1;
970         }
971         /* push data from right to left */
972         memcpy(left->items + btrfs_header_nritems(&left->header),
973                 right->items, push_items * sizeof(struct btrfs_item));
974         push_space = BTRFS_LEAF_DATA_SIZE(root) -
975                      btrfs_item_offset(right->items + push_items -1);
976         memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space,
977                 btrfs_leaf_data(right) +
978                 btrfs_item_offset(right->items + push_items - 1),
979                 push_space);
980         old_left_nritems = btrfs_header_nritems(&left->header);
981         BUG_ON(old_left_nritems < 0);
982
983         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
984                 u32 ioff = btrfs_item_offset(left->items + i);
985                 btrfs_set_item_offset(left->items + i, ioff -
986                                      (BTRFS_LEAF_DATA_SIZE(root) -
987                                       btrfs_item_offset(left->items +
988                                                         old_left_nritems - 1)));
989         }
990         btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
991
992         /* fixup right node */
993         push_space = btrfs_item_offset(right->items + push_items - 1) -
994                      leaf_data_end(root, right);
995         memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
996                 push_space, btrfs_leaf_data(right) +
997                 leaf_data_end(root, right), push_space);
998         memmove(right->items, right->items + push_items,
999                 (btrfs_header_nritems(&right->header) - push_items) *
1000                 sizeof(struct btrfs_item));
1001         btrfs_set_header_nritems(&right->header,
1002                                  btrfs_header_nritems(&right->header) -
1003                                  push_items);
1004         push_space = BTRFS_LEAF_DATA_SIZE(root);
1005
1006         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1007                 btrfs_set_item_offset(right->items + i, push_space -
1008                                       btrfs_item_size(right->items + i));
1009                 push_space = btrfs_item_offset(right->items + i);
1010         }
1011
1012         mark_buffer_dirty(t);
1013         mark_buffer_dirty(right_buf);
1014
1015         wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
1016         if (wret)
1017                 ret = wret;
1018
1019         /* then fixup the leaf pointer in the path */
1020         if (path->slots[0] < push_items) {
1021                 path->slots[0] += old_left_nritems;
1022                 btrfs_block_release(root, path->nodes[0]);
1023                 path->nodes[0] = t;
1024                 path->slots[1] -= 1;
1025         } else {
1026                 btrfs_block_release(root, t);
1027                 path->slots[0] -= push_items;
1028         }
1029         BUG_ON(path->slots[0] < 0);
1030         return ret;
1031 }
1032
1033 /*
1034  * split the path's leaf in two, making sure there is at least data_size
1035  * available for the resulting leaf level of the path.
1036  *
1037  * returns 0 if all went well and < 0 on failure.
1038  */
1039 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1040                       *root, struct btrfs_path *path, int data_size)
1041 {
1042         struct buffer_head *l_buf;
1043         struct btrfs_leaf *l;
1044         u32 nritems;
1045         int mid;
1046         int slot;
1047         struct btrfs_leaf *right;
1048         struct buffer_head *right_buffer;
1049         int space_needed = data_size + sizeof(struct btrfs_item);
1050         int data_copy_size;
1051         int rt_data_off;
1052         int i;
1053         int ret;
1054         int wret;
1055
1056         /* first try to make some room by pushing left and right */
1057         wret = push_leaf_left(trans, root, path, data_size);
1058         if (wret < 0)
1059                 return wret;
1060         if (wret) {
1061                 wret = push_leaf_right(trans, root, path, data_size);
1062                 if (wret < 0)
1063                         return wret;
1064         }
1065         l_buf = path->nodes[0];
1066         l = btrfs_buffer_leaf(l_buf);
1067
1068         /* did the pushes work? */
1069         if (btrfs_leaf_free_space(root, l) >=
1070             sizeof(struct btrfs_item) + data_size)
1071                 return 0;
1072
1073         if (!path->nodes[1]) {
1074                 ret = insert_new_root(trans, root, path, 1);
1075                 if (ret)
1076                         return ret;
1077         }
1078         slot = path->slots[0];
1079         nritems = btrfs_header_nritems(&l->header);
1080         mid = (nritems + 1)/ 2;
1081         right_buffer = btrfs_alloc_free_block(trans, root);
1082         BUG_ON(!right_buffer);
1083         BUG_ON(mid == nritems);
1084         right = btrfs_buffer_leaf(right_buffer);
1085         memset(&right->header, 0, sizeof(right->header));
1086         if (mid <= slot) {
1087                 /* FIXME, just alloc a new leaf here */
1088                 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
1089                         BTRFS_LEAF_DATA_SIZE(root))
1090                         BUG();
1091         } else {
1092                 /* FIXME, just alloc a new leaf here */
1093                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1094                         BTRFS_LEAF_DATA_SIZE(root))
1095                         BUG();
1096         }
1097         btrfs_set_header_nritems(&right->header, nritems - mid);
1098         btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr);
1099         btrfs_set_header_level(&right->header, 0);
1100         btrfs_set_header_parentid(&right->header,
1101               btrfs_header_parentid(btrfs_buffer_header(root->node)));
1102         data_copy_size = btrfs_item_end(l->items + mid) -
1103                          leaf_data_end(root, l);
1104         memcpy(right->items, l->items + mid,
1105                (nritems - mid) * sizeof(struct btrfs_item));
1106         memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1107                 data_copy_size, btrfs_leaf_data(l) +
1108                 leaf_data_end(root, l), data_copy_size);
1109         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1110                       btrfs_item_end(l->items + mid);
1111
1112         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1113                 u32 ioff = btrfs_item_offset(right->items + i);
1114                 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1115         }
1116
1117         btrfs_set_header_nritems(&l->header, mid);
1118         ret = 0;
1119         wret = insert_ptr(trans, root, path, &right->items[0].key,
1120                           right_buffer->b_blocknr, path->slots[1] + 1, 1);
1121         if (wret)
1122                 ret = wret;
1123         mark_buffer_dirty(right_buffer);
1124         mark_buffer_dirty(l_buf);
1125         BUG_ON(path->slots[0] != slot);
1126         if (mid <= slot) {
1127                 btrfs_block_release(root, path->nodes[0]);
1128                 path->nodes[0] = right_buffer;
1129                 path->slots[0] -= mid;
1130                 path->slots[1] += 1;
1131         } else
1132                 btrfs_block_release(root, right_buffer);
1133         BUG_ON(path->slots[0] < 0);
1134         return ret;
1135 }
1136
1137 /*
1138  * Given a key and some data, insert an item into the tree.
1139  * This does all the path init required, making room in the tree if needed.
1140  */
1141 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1142                             *root, struct btrfs_path *path, struct btrfs_key
1143                             *cpu_key, u32 data_size)
1144 {
1145         int ret = 0;
1146         int slot;
1147         int slot_orig;
1148         struct btrfs_leaf *leaf;
1149         struct buffer_head *leaf_buf;
1150         u32 nritems;
1151         unsigned int data_end;
1152         struct btrfs_disk_key disk_key;
1153
1154         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
1155
1156         /* create a root if there isn't one */
1157         if (!root->node)
1158                 BUG();
1159         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1160         if (ret == 0) {
1161                 btrfs_release_path(root, path);
1162                 return -EEXIST;
1163         }
1164         if (ret < 0)
1165                 goto out;
1166
1167         slot_orig = path->slots[0];
1168         leaf_buf = path->nodes[0];
1169         leaf = btrfs_buffer_leaf(leaf_buf);
1170
1171         nritems = btrfs_header_nritems(&leaf->header);
1172         data_end = leaf_data_end(root, leaf);
1173
1174         if (btrfs_leaf_free_space(root, leaf) <
1175             sizeof(struct btrfs_item) + data_size)
1176                 BUG();
1177
1178         slot = path->slots[0];
1179         BUG_ON(slot < 0);
1180         if (slot != nritems) {
1181                 int i;
1182                 unsigned int old_data = btrfs_item_end(leaf->items + slot);
1183
1184                 /*
1185                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
1186                  */
1187                 /* first correct the data pointers */
1188                 for (i = slot; i < nritems; i++) {
1189                         u32 ioff = btrfs_item_offset(leaf->items + i);
1190                         btrfs_set_item_offset(leaf->items + i,
1191                                               ioff - data_size);
1192                 }
1193
1194                 /* shift the items */
1195                 memmove(leaf->items + slot + 1, leaf->items + slot,
1196                         (nritems - slot) * sizeof(struct btrfs_item));
1197
1198                 /* shift the data */
1199                 memmove(btrfs_leaf_data(leaf) + data_end - data_size,
1200                         btrfs_leaf_data(leaf) +
1201                         data_end, old_data - data_end);
1202                 data_end = old_data;
1203         }
1204         /* setup the item for the new data */
1205         memcpy(&leaf->items[slot].key, &disk_key,
1206                 sizeof(struct btrfs_disk_key));
1207         btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1208         btrfs_set_item_size(leaf->items + slot, data_size);
1209         btrfs_set_header_nritems(&leaf->header, nritems + 1);
1210         mark_buffer_dirty(leaf_buf);
1211
1212         ret = 0;
1213         if (slot == 0)
1214                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
1215
1216         if (btrfs_leaf_free_space(root, leaf) < 0)
1217                 BUG();
1218         check_leaf(root, path, 0);
1219 out:
1220         return ret;
1221 }
1222
1223 /*
1224  * Given a key and some data, insert an item into the tree.
1225  * This does all the path init required, making room in the tree if needed.
1226  */
1227 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1228                       *root, struct btrfs_key *cpu_key, void *data, u32
1229                       data_size)
1230 {
1231         int ret = 0;
1232         struct btrfs_path path;
1233         u8 *ptr;
1234
1235         btrfs_init_path(&path);
1236         ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size);
1237         if (!ret) {
1238                 ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]),
1239                                      path.slots[0], u8);
1240                 memcpy(ptr, data, data_size);
1241                 mark_buffer_dirty(path.nodes[0]);
1242         }
1243         btrfs_release_path(root, &path);
1244         return ret;
1245 }
1246
1247 /*
1248  * delete the pointer from a given node.
1249  *
1250  * If the delete empties a node, the node is removed from the tree,
1251  * continuing all the way the root if required.  The root is converted into
1252  * a leaf if all the nodes are emptied.
1253  */
1254 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1255                    struct btrfs_path *path, int level, int slot)
1256 {
1257         struct btrfs_node *node;
1258         struct buffer_head *parent = path->nodes[level];
1259         u32 nritems;
1260         int ret = 0;
1261         int wret;
1262
1263         node = btrfs_buffer_node(parent);
1264         nritems = btrfs_header_nritems(&node->header);
1265         if (slot != nritems -1) {
1266                 memmove(node->ptrs + slot, node->ptrs + slot + 1,
1267                         sizeof(struct btrfs_key_ptr) * (nritems - slot - 1));
1268         }
1269         nritems--;
1270         btrfs_set_header_nritems(&node->header, nritems);
1271         if (nritems == 0 && parent == root->node) {
1272                 struct btrfs_header *header = btrfs_buffer_header(root->node);
1273                 BUG_ON(btrfs_header_level(header) != 1);
1274                 /* just turn the root into a leaf and break */
1275                 btrfs_set_header_level(header, 0);
1276         } else if (slot == 0) {
1277                 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
1278                                       level + 1);
1279                 if (wret)
1280                         ret = wret;
1281         }
1282         mark_buffer_dirty(parent);
1283         return ret;
1284 }
1285
1286 /*
1287  * delete the item at the leaf level in path.  If that empties
1288  * the leaf, remove it from the tree
1289  */
1290 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1291                    struct btrfs_path *path)
1292 {
1293         int slot;
1294         struct btrfs_leaf *leaf;
1295         struct buffer_head *leaf_buf;
1296         int doff;
1297         int dsize;
1298         int ret = 0;
1299         int wret;
1300         u32 nritems;
1301
1302         leaf_buf = path->nodes[0];
1303         leaf = btrfs_buffer_leaf(leaf_buf);
1304         slot = path->slots[0];
1305         doff = btrfs_item_offset(leaf->items + slot);
1306         dsize = btrfs_item_size(leaf->items + slot);
1307         nritems = btrfs_header_nritems(&leaf->header);
1308
1309         if (slot != nritems - 1) {
1310                 int i;
1311                 int data_end = leaf_data_end(root, leaf);
1312                 memmove(btrfs_leaf_data(leaf) + data_end + dsize,
1313                         btrfs_leaf_data(leaf) + data_end,
1314                         doff - data_end);
1315                 for (i = slot + 1; i < nritems; i++) {
1316                         u32 ioff = btrfs_item_offset(leaf->items + i);
1317                         btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1318                 }
1319                 memmove(leaf->items + slot, leaf->items + slot + 1,
1320                         sizeof(struct btrfs_item) *
1321                         (nritems - slot - 1));
1322         }
1323         btrfs_set_header_nritems(&leaf->header, nritems - 1);
1324         nritems--;
1325         /* delete the leaf if we've emptied it */
1326         if (nritems == 0) {
1327                 if (leaf_buf == root->node) {
1328                         btrfs_set_header_level(&leaf->header, 0);
1329                 } else {
1330                         clean_tree_block(trans, root, leaf_buf);
1331                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
1332                         if (wret)
1333                                 ret = wret;
1334                         wret = btrfs_free_extent(trans, root,
1335                                                  leaf_buf->b_blocknr, 1, 1);
1336                         if (wret)
1337                                 ret = wret;
1338                 }
1339         } else {
1340                 int used = leaf_space_used(leaf, 0, nritems);
1341                 if (slot == 0) {
1342                         wret = fixup_low_keys(trans, root, path,
1343                                               &leaf->items[0].key, 1);
1344                         if (wret)
1345                                 ret = wret;
1346                 }
1347
1348                 /* delete the leaf if it is mostly empty */
1349                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
1350                         /* push_leaf_left fixes the path.
1351                          * make sure the path still points to our leaf
1352                          * for possible call to del_ptr below
1353                          */
1354                         slot = path->slots[1];
1355                         get_bh(leaf_buf);
1356                         wret = push_leaf_left(trans, root, path, 1);
1357                         if (wret < 0)
1358                                 ret = wret;
1359                         if (path->nodes[0] == leaf_buf &&
1360                             btrfs_header_nritems(&leaf->header)) {
1361                                 wret = push_leaf_right(trans, root, path, 1);
1362                                 if (wret < 0)
1363                                         ret = wret;
1364                         }
1365                         if (btrfs_header_nritems(&leaf->header) == 0) {
1366                                 u64 blocknr = leaf_buf->b_blocknr;
1367                                 clean_tree_block(trans, root, leaf_buf);
1368                                 wret = del_ptr(trans, root, path, 1, slot);
1369                                 if (wret)
1370                                         ret = wret;
1371                                 btrfs_block_release(root, leaf_buf);
1372                                 wret = btrfs_free_extent(trans, root, blocknr,
1373                                                          1, 1);
1374                                 if (wret)
1375                                         ret = wret;
1376                         } else {
1377                                 mark_buffer_dirty(leaf_buf);
1378                                 btrfs_block_release(root, leaf_buf);
1379                         }
1380                 } else {
1381                         mark_buffer_dirty(leaf_buf);
1382                 }
1383         }
1384         return ret;
1385 }
1386
1387 /*
1388  * walk up the tree as far as required to find the next leaf.
1389  * returns 0 if it found something or 1 if there are no greater leaves.
1390  * returns < 0 on io errors.
1391  */
1392 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1393 {
1394         int slot;
1395         int level = 1;
1396         u64 blocknr;
1397         struct buffer_head *c;
1398         struct btrfs_node *c_node;
1399         struct buffer_head *next = NULL;
1400
1401         while(level < BTRFS_MAX_LEVEL) {
1402                 if (!path->nodes[level])
1403                         return 1;
1404                 slot = path->slots[level] + 1;
1405                 c = path->nodes[level];
1406                 c_node = btrfs_buffer_node(c);
1407                 if (slot >= btrfs_header_nritems(&c_node->header)) {
1408                         level++;
1409                         continue;
1410                 }
1411                 blocknr = btrfs_node_blockptr(c_node, slot);
1412                 if (next)
1413                         btrfs_block_release(root, next);
1414                 next = read_tree_block(root, blocknr);
1415                 break;
1416         }
1417         path->slots[level] = slot;
1418         while(1) {
1419                 level--;
1420                 c = path->nodes[level];
1421                 btrfs_block_release(root, c);
1422                 path->nodes[level] = next;
1423                 path->slots[level] = 0;
1424                 if (!level)
1425                         break;
1426                 next = read_tree_block(root,
1427                        btrfs_node_blockptr(btrfs_buffer_node(next), 0));
1428         }
1429         return 0;
1430 }