]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - fs/xfs/xfs_ialloc_btree.c
[XFS] implement semi-generic xfs_btree_new_root
[linux-2.6-omap-h63xx.git] / fs / xfs / xfs_ialloc_btree.c
1 /*
2  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_btree_trace.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_alloc.h"
41 #include "xfs_error.h"
42
43 STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
44 STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
45 STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
46 STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
47
48 /*
49  * Single level of the xfs_inobt_delete record deletion routine.
50  * Delete record pointed to by cur/level.
51  * Remove the record from its block then rebalance the tree.
52  * Return 0 for error, 1 for done, 2 to go on to the next level.
53  */
54 STATIC int                              /* error */
55 xfs_inobt_delrec(
56         xfs_btree_cur_t         *cur,   /* btree cursor */
57         int                     level,  /* level removing record from */
58         int                     *stat)  /* fail/done/go-on */
59 {
60         xfs_buf_t               *agbp;  /* buffer for a.g. inode header */
61         xfs_mount_t             *mp;    /* mount structure */
62         xfs_agi_t               *agi;   /* allocation group inode header */
63         xfs_inobt_block_t       *block; /* btree block record/key lives in */
64         xfs_agblock_t           bno;    /* btree block number */
65         xfs_buf_t               *bp;    /* buffer for block */
66         int                     error;  /* error return value */
67         int                     i;      /* loop index */
68         xfs_inobt_key_t         key;    /* kp points here if block is level 0 */
69         xfs_inobt_key_t         *kp = NULL;     /* pointer to btree keys */
70         xfs_agblock_t           lbno;   /* left block's block number */
71         xfs_buf_t               *lbp;   /* left block's buffer pointer */
72         xfs_inobt_block_t       *left;  /* left btree block */
73         xfs_inobt_key_t         *lkp;   /* left block key pointer */
74         xfs_inobt_ptr_t         *lpp;   /* left block address pointer */
75         int                     lrecs = 0;      /* number of records in left block */
76         xfs_inobt_rec_t         *lrp;   /* left block record pointer */
77         xfs_inobt_ptr_t         *pp = NULL;     /* pointer to btree addresses */
78         int                     ptr;    /* index in btree block for this rec */
79         xfs_agblock_t           rbno;   /* right block's block number */
80         xfs_buf_t               *rbp;   /* right block's buffer pointer */
81         xfs_inobt_block_t       *right; /* right btree block */
82         xfs_inobt_key_t         *rkp;   /* right block key pointer */
83         xfs_inobt_rec_t         *rp;    /* pointer to btree records */
84         xfs_inobt_ptr_t         *rpp;   /* right block address pointer */
85         int                     rrecs = 0;      /* number of records in right block */
86         int                     numrecs;
87         xfs_inobt_rec_t         *rrp;   /* right block record pointer */
88         xfs_btree_cur_t         *tcur;  /* temporary btree cursor */
89
90         mp = cur->bc_mp;
91
92         /*
93          * Get the index of the entry being deleted, check for nothing there.
94          */
95         ptr = cur->bc_ptrs[level];
96         if (ptr == 0) {
97                 *stat = 0;
98                 return 0;
99         }
100
101         /*
102          * Get the buffer & block containing the record or key/ptr.
103          */
104         bp = cur->bc_bufs[level];
105         block = XFS_BUF_TO_INOBT_BLOCK(bp);
106 #ifdef DEBUG
107         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
108                 return error;
109 #endif
110         /*
111          * Fail if we're off the end of the block.
112          */
113
114         numrecs = be16_to_cpu(block->bb_numrecs);
115         if (ptr > numrecs) {
116                 *stat = 0;
117                 return 0;
118         }
119         /*
120          * It's a nonleaf.  Excise the key and ptr being deleted, by
121          * sliding the entries past them down one.
122          * Log the changed areas of the block.
123          */
124         if (level > 0) {
125                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
126                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
127 #ifdef DEBUG
128                 for (i = ptr; i < numrecs; i++) {
129                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
130                                 return error;
131                 }
132 #endif
133                 if (ptr < numrecs) {
134                         memmove(&kp[ptr - 1], &kp[ptr],
135                                 (numrecs - ptr) * sizeof(*kp));
136                         memmove(&pp[ptr - 1], &pp[ptr],
137                                 (numrecs - ptr) * sizeof(*kp));
138                         xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
139                         xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
140                 }
141         }
142         /*
143          * It's a leaf.  Excise the record being deleted, by sliding the
144          * entries past it down one.  Log the changed areas of the block.
145          */
146         else {
147                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
148                 if (ptr < numrecs) {
149                         memmove(&rp[ptr - 1], &rp[ptr],
150                                 (numrecs - ptr) * sizeof(*rp));
151                         xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
152                 }
153                 /*
154                  * If it's the first record in the block, we'll need a key
155                  * structure to pass up to the next level (updkey).
156                  */
157                 if (ptr == 1) {
158                         key.ir_startino = rp->ir_startino;
159                         kp = &key;
160                 }
161         }
162         /*
163          * Decrement and log the number of entries in the block.
164          */
165         numrecs--;
166         block->bb_numrecs = cpu_to_be16(numrecs);
167         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
168         /*
169          * Is this the root level?  If so, we're almost done.
170          */
171         if (level == cur->bc_nlevels - 1) {
172                 /*
173                  * If this is the root level,
174                  * and there's only one entry left,
175                  * and it's NOT the leaf level,
176                  * then we can get rid of this level.
177                  */
178                 if (numrecs == 1 && level > 0) {
179                         agbp = cur->bc_private.a.agbp;
180                         agi = XFS_BUF_TO_AGI(agbp);
181                         /*
182                          * pp is still set to the first pointer in the block.
183                          * Make it the new root of the btree.
184                          */
185                         bno = be32_to_cpu(agi->agi_root);
186                         agi->agi_root = *pp;
187                         be32_add_cpu(&agi->agi_level, -1);
188                         /*
189                          * Free the block.
190                          */
191                         if ((error = xfs_free_extent(cur->bc_tp,
192                                 XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))
193                                 return error;
194                         xfs_trans_binval(cur->bc_tp, bp);
195                         xfs_ialloc_log_agi(cur->bc_tp, agbp,
196                                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
197                         /*
198                          * Update the cursor so there's one fewer level.
199                          */
200                         cur->bc_bufs[level] = NULL;
201                         cur->bc_nlevels--;
202                 } else if (level > 0 &&
203                            (error = xfs_btree_decrement(cur, level, &i)))
204                         return error;
205                 *stat = 1;
206                 return 0;
207         }
208         /*
209          * If we deleted the leftmost entry in the block, update the
210          * key values above us in the tree.
211          */
212         if (ptr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)kp, level + 1)))
213                 return error;
214         /*
215          * If the number of records remaining in the block is at least
216          * the minimum, we're done.
217          */
218         if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
219                 if (level > 0 &&
220                     (error = xfs_btree_decrement(cur, level, &i)))
221                         return error;
222                 *stat = 1;
223                 return 0;
224         }
225         /*
226          * Otherwise, we have to move some records around to keep the
227          * tree balanced.  Look at the left and right sibling blocks to
228          * see if we can re-balance by moving only one record.
229          */
230         rbno = be32_to_cpu(block->bb_rightsib);
231         lbno = be32_to_cpu(block->bb_leftsib);
232         bno = NULLAGBLOCK;
233         ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
234         /*
235          * Duplicate the cursor so our btree manipulations here won't
236          * disrupt the next level up.
237          */
238         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
239                 return error;
240         /*
241          * If there's a right sibling, see if it's ok to shift an entry
242          * out of it.
243          */
244         if (rbno != NULLAGBLOCK) {
245                 /*
246                  * Move the temp cursor to the last entry in the next block.
247                  * Actually any entry but the first would suffice.
248                  */
249                 i = xfs_btree_lastrec(tcur, level);
250                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
251                 if ((error = xfs_btree_increment(tcur, level, &i)))
252                         goto error0;
253                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
254                 i = xfs_btree_lastrec(tcur, level);
255                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
256                 /*
257                  * Grab a pointer to the block.
258                  */
259                 rbp = tcur->bc_bufs[level];
260                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
261 #ifdef DEBUG
262                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
263                         goto error0;
264 #endif
265                 /*
266                  * Grab the current block number, for future use.
267                  */
268                 bno = be32_to_cpu(right->bb_leftsib);
269                 /*
270                  * If right block is full enough so that removing one entry
271                  * won't make it too empty, and left-shifting an entry out
272                  * of right to us works, we're done.
273                  */
274                 if (be16_to_cpu(right->bb_numrecs) - 1 >=
275                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
276                         if ((error = xfs_btree_lshift(tcur, level, &i)))
277                                 goto error0;
278                         if (i) {
279                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
280                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
281                                 xfs_btree_del_cursor(tcur,
282                                                      XFS_BTREE_NOERROR);
283                                 if (level > 0 &&
284                                     (error = xfs_btree_decrement(cur, level,
285                                                 &i)))
286                                         return error;
287                                 *stat = 1;
288                                 return 0;
289                         }
290                 }
291                 /*
292                  * Otherwise, grab the number of records in right for
293                  * future reference, and fix up the temp cursor to point
294                  * to our block again (last record).
295                  */
296                 rrecs = be16_to_cpu(right->bb_numrecs);
297                 if (lbno != NULLAGBLOCK) {
298                         xfs_btree_firstrec(tcur, level);
299                         if ((error = xfs_btree_decrement(tcur, level, &i)))
300                                 goto error0;
301                 }
302         }
303         /*
304          * If there's a left sibling, see if it's ok to shift an entry
305          * out of it.
306          */
307         if (lbno != NULLAGBLOCK) {
308                 /*
309                  * Move the temp cursor to the first entry in the
310                  * previous block.
311                  */
312                 xfs_btree_firstrec(tcur, level);
313                 if ((error = xfs_btree_decrement(tcur, level, &i)))
314                         goto error0;
315                 xfs_btree_firstrec(tcur, level);
316                 /*
317                  * Grab a pointer to the block.
318                  */
319                 lbp = tcur->bc_bufs[level];
320                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
321 #ifdef DEBUG
322                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
323                         goto error0;
324 #endif
325                 /*
326                  * Grab the current block number, for future use.
327                  */
328                 bno = be32_to_cpu(left->bb_rightsib);
329                 /*
330                  * If left block is full enough so that removing one entry
331                  * won't make it too empty, and right-shifting an entry out
332                  * of left to us works, we're done.
333                  */
334                 if (be16_to_cpu(left->bb_numrecs) - 1 >=
335                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
336                         if ((error = xfs_btree_rshift(tcur, level, &i)))
337                                 goto error0;
338                         if (i) {
339                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
340                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
341                                 xfs_btree_del_cursor(tcur,
342                                                      XFS_BTREE_NOERROR);
343                                 if (level == 0)
344                                         cur->bc_ptrs[0]++;
345                                 *stat = 1;
346                                 return 0;
347                         }
348                 }
349                 /*
350                  * Otherwise, grab the number of records in right for
351                  * future reference.
352                  */
353                 lrecs = be16_to_cpu(left->bb_numrecs);
354         }
355         /*
356          * Delete the temp cursor, we're done with it.
357          */
358         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
359         /*
360          * If here, we need to do a join to keep the tree balanced.
361          */
362         ASSERT(bno != NULLAGBLOCK);
363         /*
364          * See if we can join with the left neighbor block.
365          */
366         if (lbno != NULLAGBLOCK &&
367             lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
368                 /*
369                  * Set "right" to be the starting block,
370                  * "left" to be the left neighbor.
371                  */
372                 rbno = bno;
373                 right = block;
374                 rrecs = be16_to_cpu(right->bb_numrecs);
375                 rbp = bp;
376                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
377                                 cur->bc_private.a.agno, lbno, 0, &lbp,
378                                 XFS_INO_BTREE_REF)))
379                         return error;
380                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
381                 lrecs = be16_to_cpu(left->bb_numrecs);
382                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
383                         return error;
384         }
385         /*
386          * If that won't work, see if we can join with the right neighbor block.
387          */
388         else if (rbno != NULLAGBLOCK &&
389                  rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
390                 /*
391                  * Set "left" to be the starting block,
392                  * "right" to be the right neighbor.
393                  */
394                 lbno = bno;
395                 left = block;
396                 lrecs = be16_to_cpu(left->bb_numrecs);
397                 lbp = bp;
398                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
399                                 cur->bc_private.a.agno, rbno, 0, &rbp,
400                                 XFS_INO_BTREE_REF)))
401                         return error;
402                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
403                 rrecs = be16_to_cpu(right->bb_numrecs);
404                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
405                         return error;
406         }
407         /*
408          * Otherwise, we can't fix the imbalance.
409          * Just return.  This is probably a logic error, but it's not fatal.
410          */
411         else {
412                 if (level > 0 && (error = xfs_btree_decrement(cur, level, &i)))
413                         return error;
414                 *stat = 1;
415                 return 0;
416         }
417         /*
418          * We're now going to join "left" and "right" by moving all the stuff
419          * in "right" to "left" and deleting "right".
420          */
421         if (level > 0) {
422                 /*
423                  * It's a non-leaf.  Move keys and pointers.
424                  */
425                 lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
426                 lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
427                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
428                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
429 #ifdef DEBUG
430                 for (i = 0; i < rrecs; i++) {
431                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
432                                 return error;
433                 }
434 #endif
435                 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
436                 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
437                 xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
438                 xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
439         } else {
440                 /*
441                  * It's a leaf.  Move records.
442                  */
443                 lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
444                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
445                 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
446                 xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
447         }
448         /*
449          * If we joined with the left neighbor, set the buffer in the
450          * cursor to the left block, and fix up the index.
451          */
452         if (bp != lbp) {
453                 xfs_btree_setbuf(cur, level, lbp);
454                 cur->bc_ptrs[level] += lrecs;
455         }
456         /*
457          * If we joined with the right neighbor and there's a level above
458          * us, increment the cursor at that level.
459          */
460         else if (level + 1 < cur->bc_nlevels &&
461                  (error = xfs_btree_increment(cur, level + 1, &i)))
462                 return error;
463         /*
464          * Fix up the number of records in the surviving block.
465          */
466         lrecs += rrecs;
467         left->bb_numrecs = cpu_to_be16(lrecs);
468         /*
469          * Fix up the right block pointer in the surviving block, and log it.
470          */
471         left->bb_rightsib = right->bb_rightsib;
472         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
473         /*
474          * If there is a right sibling now, make it point to the
475          * remaining block.
476          */
477         if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
478                 xfs_inobt_block_t       *rrblock;
479                 xfs_buf_t               *rrbp;
480
481                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
482                                 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
483                                 &rrbp, XFS_INO_BTREE_REF)))
484                         return error;
485                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
486                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
487                         return error;
488                 rrblock->bb_leftsib = cpu_to_be32(lbno);
489                 xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
490         }
491         /*
492          * Free the deleting block.
493          */
494         if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
495                                      cur->bc_private.a.agno, rbno), 1)))
496                 return error;
497         xfs_trans_binval(cur->bc_tp, rbp);
498         /*
499          * Readjust the ptr at this level if it's not a leaf, since it's
500          * still pointing at the deletion point, which makes the cursor
501          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
502          * We can't use decrement because it would change the next level up.
503          */
504         if (level > 0)
505                 cur->bc_ptrs[level]--;
506         /*
507          * Return value means the next level up has something to do.
508          */
509         *stat = 2;
510         return 0;
511
512 error0:
513         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
514         return error;
515 }
516
517 /*
518  * Insert one record/level.  Return information to the caller
519  * allowing the next level up to proceed if necessary.
520  */
521 STATIC int                              /* error */
522 xfs_inobt_insrec(
523         xfs_btree_cur_t         *cur,   /* btree cursor */
524         int                     level,  /* level to insert record at */
525         xfs_agblock_t           *bnop,  /* i/o: block number inserted */
526         xfs_inobt_rec_t         *recp,  /* i/o: record data inserted */
527         xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
528         int                     *stat)  /* success/failure */
529 {
530         xfs_inobt_block_t       *block; /* btree block record/key lives in */
531         xfs_buf_t               *bp;    /* buffer for block */
532         int                     error;  /* error return value */
533         int                     i;      /* loop index */
534         xfs_inobt_key_t         key;    /* key value being inserted */
535         xfs_inobt_key_t         *kp=NULL;       /* pointer to btree keys */
536         xfs_agblock_t           nbno;   /* block number of allocated block */
537         xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
538         xfs_inobt_key_t         nkey;   /* new key value, from split */
539         xfs_inobt_rec_t         nrec;   /* new record value, for caller */
540         int                     numrecs;
541         int                     optr;   /* old ptr value */
542         xfs_inobt_ptr_t         *pp;    /* pointer to btree addresses */
543         int                     ptr;    /* index in btree block for this rec */
544         xfs_inobt_rec_t         *rp=NULL;       /* pointer to btree records */
545
546         /*
547          * GCC doesn't understand the (arguably complex) control flow in
548          * this function and complains about uninitialized structure fields
549          * without this.
550          */
551         memset(&nrec, 0, sizeof(nrec));
552
553         /*
554          * If we made it to the root level, allocate a new root block
555          * and we're done.
556          */
557         if (level >= cur->bc_nlevels) {
558                 error = xfs_btree_new_root(cur, &i);
559                 *bnop = NULLAGBLOCK;
560                 *stat = i;
561                 return error;
562         }
563         /*
564          * Make a key out of the record data to be inserted, and save it.
565          */
566         key.ir_startino = recp->ir_startino;
567         optr = ptr = cur->bc_ptrs[level];
568         /*
569          * If we're off the left edge, return failure.
570          */
571         if (ptr == 0) {
572                 *stat = 0;
573                 return 0;
574         }
575         /*
576          * Get pointers to the btree buffer and block.
577          */
578         bp = cur->bc_bufs[level];
579         block = XFS_BUF_TO_INOBT_BLOCK(bp);
580         numrecs = be16_to_cpu(block->bb_numrecs);
581 #ifdef DEBUG
582         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
583                 return error;
584         /*
585          * Check that the new entry is being inserted in the right place.
586          */
587         if (ptr <= numrecs) {
588                 if (level == 0) {
589                         rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
590                         xfs_btree_check_rec(cur->bc_btnum, recp, rp);
591                 } else {
592                         kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
593                         xfs_btree_check_key(cur->bc_btnum, &key, kp);
594                 }
595         }
596 #endif
597         nbno = NULLAGBLOCK;
598         ncur = NULL;
599         /*
600          * If the block is full, we can't insert the new entry until we
601          * make the block un-full.
602          */
603         if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
604                 /*
605                  * First, try shifting an entry to the right neighbor.
606                  */
607                 if ((error = xfs_btree_rshift(cur, level, &i)))
608                         return error;
609                 if (i) {
610                         /* nothing */
611                 }
612                 /*
613                  * Next, try shifting an entry to the left neighbor.
614                  */
615                 else {
616                         if ((error = xfs_btree_lshift(cur, level, &i)))
617                                 return error;
618                         if (i) {
619                                 optr = ptr = cur->bc_ptrs[level];
620                         } else {
621                                 union xfs_btree_ptr bno = { .s = cpu_to_be32(nbno) };
622                                 /*
623                                  * Next, try splitting the current block
624                                  * in half. If this works we have to
625                                  * re-set our variables because
626                                  * we could be in a different block now.
627                                  */
628                                 if ((error = xfs_btree_split(cur, level, &bno,
629                                                 (union xfs_btree_key *)&nkey,
630                                                 &ncur, &i)))
631                                         return error;
632                                 nbno = be32_to_cpu(bno.s);
633                                 if (i) {
634                                         bp = cur->bc_bufs[level];
635                                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
636 #ifdef DEBUG
637                                         if ((error = xfs_btree_check_sblock(cur,
638                                                         block, level, bp)))
639                                                 return error;
640 #endif
641                                         ptr = cur->bc_ptrs[level];
642                                         nrec.ir_startino = nkey.ir_startino;
643                                 } else {
644                                         /*
645                                          * Otherwise the insert fails.
646                                          */
647                                         *stat = 0;
648                                         return 0;
649                                 }
650                         }
651                 }
652         }
653         /*
654          * At this point we know there's room for our new entry in the block
655          * we're pointing at.
656          */
657         numrecs = be16_to_cpu(block->bb_numrecs);
658         if (level > 0) {
659                 /*
660                  * It's a non-leaf entry.  Make a hole for the new data
661                  * in the key and ptr regions of the block.
662                  */
663                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
664                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
665 #ifdef DEBUG
666                 for (i = numrecs; i >= ptr; i--) {
667                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
668                                 return error;
669                 }
670 #endif
671                 memmove(&kp[ptr], &kp[ptr - 1],
672                         (numrecs - ptr + 1) * sizeof(*kp));
673                 memmove(&pp[ptr], &pp[ptr - 1],
674                         (numrecs - ptr + 1) * sizeof(*pp));
675                 /*
676                  * Now stuff the new data in, bump numrecs and log the new data.
677                  */
678 #ifdef DEBUG
679                 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
680                         return error;
681 #endif
682                 kp[ptr - 1] = key;
683                 pp[ptr - 1] = cpu_to_be32(*bnop);
684                 numrecs++;
685                 block->bb_numrecs = cpu_to_be16(numrecs);
686                 xfs_inobt_log_keys(cur, bp, ptr, numrecs);
687                 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
688         } else {
689                 /*
690                  * It's a leaf entry.  Make a hole for the new record.
691                  */
692                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
693                 memmove(&rp[ptr], &rp[ptr - 1],
694                         (numrecs - ptr + 1) * sizeof(*rp));
695                 /*
696                  * Now stuff the new record in, bump numrecs
697                  * and log the new data.
698                  */
699                 rp[ptr - 1] = *recp;
700                 numrecs++;
701                 block->bb_numrecs = cpu_to_be16(numrecs);
702                 xfs_inobt_log_recs(cur, bp, ptr, numrecs);
703         }
704         /*
705          * Log the new number of records in the btree header.
706          */
707         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
708 #ifdef DEBUG
709         /*
710          * Check that the key/record is in the right place, now.
711          */
712         if (ptr < numrecs) {
713                 if (level == 0)
714                         xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
715                                 rp + ptr);
716                 else
717                         xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
718                                 kp + ptr);
719         }
720 #endif
721         /*
722          * If we inserted at the start of a block, update the parents' keys.
723          */
724         if (optr == 1 && (error = xfs_btree_updkey(cur, (union xfs_btree_key *)&key, level + 1)))
725                 return error;
726         /*
727          * Return the new block number, if any.
728          * If there is one, give back a record value and a cursor too.
729          */
730         *bnop = nbno;
731         if (nbno != NULLAGBLOCK) {
732                 *recp = nrec;
733                 *curp = ncur;
734         }
735         *stat = 1;
736         return 0;
737 }
738
739 /*
740  * Log header fields from a btree block.
741  */
742 STATIC void
743 xfs_inobt_log_block(
744         xfs_trans_t             *tp,    /* transaction pointer */
745         xfs_buf_t               *bp,    /* buffer containing btree block */
746         int                     fields) /* mask of fields: XFS_BB_... */
747 {
748         int                     first;  /* first byte offset logged */
749         int                     last;   /* last byte offset logged */
750         static const short      offsets[] = {   /* table of offsets */
751                 offsetof(xfs_inobt_block_t, bb_magic),
752                 offsetof(xfs_inobt_block_t, bb_level),
753                 offsetof(xfs_inobt_block_t, bb_numrecs),
754                 offsetof(xfs_inobt_block_t, bb_leftsib),
755                 offsetof(xfs_inobt_block_t, bb_rightsib),
756                 sizeof(xfs_inobt_block_t)
757         };
758
759         xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
760         xfs_trans_log_buf(tp, bp, first, last);
761 }
762
763 /*
764  * Log keys from a btree block (nonleaf).
765  */
766 STATIC void
767 xfs_inobt_log_keys(
768         xfs_btree_cur_t         *cur,   /* btree cursor */
769         xfs_buf_t               *bp,    /* buffer containing btree block */
770         int                     kfirst, /* index of first key to log */
771         int                     klast)  /* index of last key to log */
772 {
773         xfs_inobt_block_t       *block; /* btree block to log from */
774         int                     first;  /* first byte offset logged */
775         xfs_inobt_key_t         *kp;    /* key pointer in btree block */
776         int                     last;   /* last byte offset logged */
777
778         block = XFS_BUF_TO_INOBT_BLOCK(bp);
779         kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
780         first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
781         last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
782         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
783 }
784
785 /*
786  * Log block pointer fields from a btree block (nonleaf).
787  */
788 STATIC void
789 xfs_inobt_log_ptrs(
790         xfs_btree_cur_t         *cur,   /* btree cursor */
791         xfs_buf_t               *bp,    /* buffer containing btree block */
792         int                     pfirst, /* index of first pointer to log */
793         int                     plast)  /* index of last pointer to log */
794 {
795         xfs_inobt_block_t       *block; /* btree block to log from */
796         int                     first;  /* first byte offset logged */
797         int                     last;   /* last byte offset logged */
798         xfs_inobt_ptr_t         *pp;    /* block-pointer pointer in btree blk */
799
800         block = XFS_BUF_TO_INOBT_BLOCK(bp);
801         pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
802         first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
803         last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
804         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
805 }
806
807 /*
808  * Log records from a btree block (leaf).
809  */
810 STATIC void
811 xfs_inobt_log_recs(
812         xfs_btree_cur_t         *cur,   /* btree cursor */
813         xfs_buf_t               *bp,    /* buffer containing btree block */
814         int                     rfirst, /* index of first record to log */
815         int                     rlast)  /* index of last record to log */
816 {
817         xfs_inobt_block_t       *block; /* btree block to log from */
818         int                     first;  /* first byte offset logged */
819         int                     last;   /* last byte offset logged */
820         xfs_inobt_rec_t         *rp;    /* record pointer for btree block */
821
822         block = XFS_BUF_TO_INOBT_BLOCK(bp);
823         rp = XFS_INOBT_REC_ADDR(block, 1, cur);
824         first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
825         last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
826         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
827 }
828
829
830 /*
831  * Externally visible routines.
832  */
833
834 /*
835  * Delete the record pointed to by cur.
836  * The cursor refers to the place where the record was (could be inserted)
837  * when the operation returns.
838  */
839 int                                     /* error */
840 xfs_inobt_delete(
841         xfs_btree_cur_t *cur,           /* btree cursor */
842         int             *stat)          /* success/failure */
843 {
844         int             error;
845         int             i;              /* result code */
846         int             level;          /* btree level */
847
848         /*
849          * Go up the tree, starting at leaf level.
850          * If 2 is returned then a join was done; go to the next level.
851          * Otherwise we are done.
852          */
853         for (level = 0, i = 2; i == 2; level++) {
854                 if ((error = xfs_inobt_delrec(cur, level, &i)))
855                         return error;
856         }
857         if (i == 0) {
858                 for (level = 1; level < cur->bc_nlevels; level++) {
859                         if (cur->bc_ptrs[level] == 0) {
860                                 if ((error = xfs_btree_decrement(cur, level, &i)))
861                                         return error;
862                                 break;
863                         }
864                 }
865         }
866         *stat = i;
867         return 0;
868 }
869
870
871 /*
872  * Get the data from the pointed-to record.
873  */
874 int                                     /* error */
875 xfs_inobt_get_rec(
876         xfs_btree_cur_t         *cur,   /* btree cursor */
877         xfs_agino_t             *ino,   /* output: starting inode of chunk */
878         __int32_t               *fcnt,  /* output: number of free inodes */
879         xfs_inofree_t           *free,  /* output: free inode mask */
880         int                     *stat)  /* output: success/failure */
881 {
882         xfs_inobt_block_t       *block; /* btree block */
883         xfs_buf_t               *bp;    /* buffer containing btree block */
884 #ifdef DEBUG
885         int                     error;  /* error return value */
886 #endif
887         int                     ptr;    /* record number */
888         xfs_inobt_rec_t         *rec;   /* record data */
889
890         bp = cur->bc_bufs[0];
891         ptr = cur->bc_ptrs[0];
892         block = XFS_BUF_TO_INOBT_BLOCK(bp);
893 #ifdef DEBUG
894         if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
895                 return error;
896 #endif
897         /*
898          * Off the right end or left end, return failure.
899          */
900         if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
901                 *stat = 0;
902                 return 0;
903         }
904         /*
905          * Point to the record and extract its data.
906          */
907         rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
908         *ino = be32_to_cpu(rec->ir_startino);
909         *fcnt = be32_to_cpu(rec->ir_freecount);
910         *free = be64_to_cpu(rec->ir_free);
911         *stat = 1;
912         return 0;
913 }
914
915 /*
916  * Insert the current record at the point referenced by cur.
917  * The cursor may be inconsistent on return if splits have been done.
918  */
919 int                                     /* error */
920 xfs_inobt_insert(
921         xfs_btree_cur_t *cur,           /* btree cursor */
922         int             *stat)          /* success/failure */
923 {
924         int             error;          /* error return value */
925         int             i;              /* result value, 0 for failure */
926         int             level;          /* current level number in btree */
927         xfs_agblock_t   nbno;           /* new block number (split result) */
928         xfs_btree_cur_t *ncur;          /* new cursor (split result) */
929         xfs_inobt_rec_t nrec;           /* record being inserted this level */
930         xfs_btree_cur_t *pcur;          /* previous level's cursor */
931
932         level = 0;
933         nbno = NULLAGBLOCK;
934         nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
935         nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
936         nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
937         ncur = NULL;
938         pcur = cur;
939         /*
940          * Loop going up the tree, starting at the leaf level.
941          * Stop when we don't get a split block, that must mean that
942          * the insert is finished with this level.
943          */
944         do {
945                 /*
946                  * Insert nrec/nbno into this level of the tree.
947                  * Note if we fail, nbno will be null.
948                  */
949                 if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
950                                 &i))) {
951                         if (pcur != cur)
952                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
953                         return error;
954                 }
955                 /*
956                  * See if the cursor we just used is trash.
957                  * Can't trash the caller's cursor, but otherwise we should
958                  * if ncur is a new cursor or we're about to be done.
959                  */
960                 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
961                         cur->bc_nlevels = pcur->bc_nlevels;
962                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
963                 }
964                 /*
965                  * If we got a new cursor, switch to it.
966                  */
967                 if (ncur) {
968                         pcur = ncur;
969                         ncur = NULL;
970                 }
971         } while (nbno != NULLAGBLOCK);
972         *stat = i;
973         return 0;
974 }
975
976 STATIC struct xfs_btree_cur *
977 xfs_inobt_dup_cursor(
978         struct xfs_btree_cur    *cur)
979 {
980         return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp,
981                         cur->bc_private.a.agbp, cur->bc_private.a.agno);
982 }
983
984 STATIC void
985 xfs_inobt_set_root(
986         struct xfs_btree_cur    *cur,
987         union xfs_btree_ptr     *nptr,
988         int                     inc)    /* level change */
989 {
990         struct xfs_buf          *agbp = cur->bc_private.a.agbp;
991         struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
992
993         agi->agi_root = nptr->s;
994         be32_add_cpu(&agi->agi_level, inc);
995         xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
996 }
997
998 STATIC int
999 xfs_inobt_alloc_block(
1000         struct xfs_btree_cur    *cur,
1001         union xfs_btree_ptr     *start,
1002         union xfs_btree_ptr     *new,
1003         int                     length,
1004         int                     *stat)
1005 {
1006         xfs_alloc_arg_t         args;           /* block allocation args */
1007         int                     error;          /* error return value */
1008         xfs_agblock_t           sbno = be32_to_cpu(start->s);
1009
1010         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1011
1012         memset(&args, 0, sizeof(args));
1013         args.tp = cur->bc_tp;
1014         args.mp = cur->bc_mp;
1015         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno);
1016         args.minlen = 1;
1017         args.maxlen = 1;
1018         args.prod = 1;
1019         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1020
1021         error = xfs_alloc_vextent(&args);
1022         if (error) {
1023                 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1024                 return error;
1025         }
1026         if (args.fsbno == NULLFSBLOCK) {
1027                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1028                 *stat = 0;
1029                 return 0;
1030         }
1031         ASSERT(args.len == 1);
1032         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1033
1034         new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno));
1035         *stat = 1;
1036         return 0;
1037 }
1038
1039
1040 STATIC int
1041 xfs_inobt_get_maxrecs(
1042         struct xfs_btree_cur    *cur,
1043         int                     level)
1044 {
1045         return cur->bc_mp->m_inobt_mxr[level != 0];
1046 }
1047
1048 STATIC void
1049 xfs_inobt_init_key_from_rec(
1050         union xfs_btree_key     *key,
1051         union xfs_btree_rec     *rec)
1052 {
1053         key->inobt.ir_startino = rec->inobt.ir_startino;
1054 }
1055
1056 /*
1057  * intial value of ptr for lookup
1058  */
1059 STATIC void
1060 xfs_inobt_init_ptr_from_cur(
1061         struct xfs_btree_cur    *cur,
1062         union xfs_btree_ptr     *ptr)
1063 {
1064         struct xfs_agi          *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
1065
1066         ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
1067
1068         ptr->s = agi->agi_root;
1069 }
1070
1071 STATIC __int64_t
1072 xfs_inobt_key_diff(
1073         struct xfs_btree_cur    *cur,
1074         union xfs_btree_key     *key)
1075 {
1076         return (__int64_t)be32_to_cpu(key->inobt.ir_startino) -
1077                           cur->bc_rec.i.ir_startino;
1078 }
1079
1080 #ifdef XFS_BTREE_TRACE
1081 ktrace_t        *xfs_inobt_trace_buf;
1082
1083 STATIC void
1084 xfs_inobt_trace_enter(
1085         struct xfs_btree_cur    *cur,
1086         const char              *func,
1087         char                    *s,
1088         int                     type,
1089         int                     line,
1090         __psunsigned_t          a0,
1091         __psunsigned_t          a1,
1092         __psunsigned_t          a2,
1093         __psunsigned_t          a3,
1094         __psunsigned_t          a4,
1095         __psunsigned_t          a5,
1096         __psunsigned_t          a6,
1097         __psunsigned_t          a7,
1098         __psunsigned_t          a8,
1099         __psunsigned_t          a9,
1100         __psunsigned_t          a10)
1101 {
1102         ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
1103                 (void *)func, (void *)s, NULL, (void *)cur,
1104                 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
1105                 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
1106                 (void *)a8, (void *)a9, (void *)a10);
1107 }
1108
1109 STATIC void
1110 xfs_inobt_trace_cursor(
1111         struct xfs_btree_cur    *cur,
1112         __uint32_t              *s0,
1113         __uint64_t              *l0,
1114         __uint64_t              *l1)
1115 {
1116         *s0 = cur->bc_private.a.agno;
1117         *l0 = cur->bc_rec.i.ir_startino;
1118         *l1 = cur->bc_rec.i.ir_free;
1119 }
1120
1121 STATIC void
1122 xfs_inobt_trace_key(
1123         struct xfs_btree_cur    *cur,
1124         union xfs_btree_key     *key,
1125         __uint64_t              *l0,
1126         __uint64_t              *l1)
1127 {
1128         *l0 = be32_to_cpu(key->inobt.ir_startino);
1129         *l1 = 0;
1130 }
1131
1132 STATIC void
1133 xfs_inobt_trace_record(
1134         struct xfs_btree_cur    *cur,
1135         union xfs_btree_rec     *rec,
1136         __uint64_t              *l0,
1137         __uint64_t              *l1,
1138         __uint64_t              *l2)
1139 {
1140         *l0 = be32_to_cpu(rec->inobt.ir_startino);
1141         *l1 = be32_to_cpu(rec->inobt.ir_freecount);
1142         *l2 = be64_to_cpu(rec->inobt.ir_free);
1143 }
1144 #endif /* XFS_BTREE_TRACE */
1145
1146 static const struct xfs_btree_ops xfs_inobt_ops = {
1147         .rec_len                = sizeof(xfs_inobt_rec_t),
1148         .key_len                = sizeof(xfs_inobt_key_t),
1149
1150         .dup_cursor             = xfs_inobt_dup_cursor,
1151         .set_root               = xfs_inobt_set_root,
1152         .alloc_block            = xfs_inobt_alloc_block,
1153         .get_maxrecs            = xfs_inobt_get_maxrecs,
1154         .init_key_from_rec      = xfs_inobt_init_key_from_rec,
1155         .init_ptr_from_cur      = xfs_inobt_init_ptr_from_cur,
1156         .key_diff               = xfs_inobt_key_diff,
1157
1158 #ifdef XFS_BTREE_TRACE
1159         .trace_enter            = xfs_inobt_trace_enter,
1160         .trace_cursor           = xfs_inobt_trace_cursor,
1161         .trace_key              = xfs_inobt_trace_key,
1162         .trace_record           = xfs_inobt_trace_record,
1163 #endif
1164 };
1165
1166 /*
1167  * Allocate a new inode btree cursor.
1168  */
1169 struct xfs_btree_cur *                          /* new inode btree cursor */
1170 xfs_inobt_init_cursor(
1171         struct xfs_mount        *mp,            /* file system mount point */
1172         struct xfs_trans        *tp,            /* transaction pointer */
1173         struct xfs_buf          *agbp,          /* buffer for agi structure */
1174         xfs_agnumber_t          agno)           /* allocation group number */
1175 {
1176         struct xfs_agi          *agi = XFS_BUF_TO_AGI(agbp);
1177         struct xfs_btree_cur    *cur;
1178
1179         cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
1180
1181         cur->bc_tp = tp;
1182         cur->bc_mp = mp;
1183         cur->bc_nlevels = be32_to_cpu(agi->agi_level);
1184         cur->bc_btnum = XFS_BTNUM_INO;
1185         cur->bc_blocklog = mp->m_sb.sb_blocklog;
1186
1187         cur->bc_ops = &xfs_inobt_ops;
1188
1189         cur->bc_private.a.agbp = agbp;
1190         cur->bc_private.a.agno = agno;
1191
1192         return cur;
1193 }