]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - fs/btrfs/inode.c
Btrfs: Enable btree balancing on old kernels again
[linux-2.6-omap-h63xx.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/buffer_head.h>
22 #include <linux/file.h>
23 #include <linux/fs.h>
24 #include <linux/pagemap.h>
25 #include <linux/highmem.h>
26 #include <linux/time.h>
27 #include <linux/init.h>
28 #include <linux/string.h>
29 #include <linux/smp_lock.h>
30 #include <linux/backing-dev.h>
31 #include <linux/mpage.h>
32 #include <linux/swap.h>
33 #include <linux/writeback.h>
34 #include <linux/statfs.h>
35 #include <linux/compat.h>
36 #include <linux/bit_spinlock.h>
37 #include <linux/version.h>
38 #include <linux/xattr.h>
39 #include "ctree.h"
40 #include "disk-io.h"
41 #include "transaction.h"
42 #include "btrfs_inode.h"
43 #include "ioctl.h"
44 #include "print-tree.h"
45 #include "volumes.h"
46
47 struct btrfs_iget_args {
48         u64 ino;
49         struct btrfs_root *root;
50 };
51
52 static struct inode_operations btrfs_dir_inode_operations;
53 static struct inode_operations btrfs_symlink_inode_operations;
54 static struct inode_operations btrfs_dir_ro_inode_operations;
55 static struct inode_operations btrfs_special_inode_operations;
56 static struct inode_operations btrfs_file_inode_operations;
57 static struct address_space_operations btrfs_aops;
58 static struct address_space_operations btrfs_symlink_aops;
59 static struct file_operations btrfs_dir_file_operations;
60 static struct extent_io_ops btrfs_extent_io_ops;
61
62 static struct kmem_cache *btrfs_inode_cachep;
63 struct kmem_cache *btrfs_trans_handle_cachep;
64 struct kmem_cache *btrfs_transaction_cachep;
65 struct kmem_cache *btrfs_bit_radix_cachep;
66 struct kmem_cache *btrfs_path_cachep;
67
68 #define S_SHIFT 12
69 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
70         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
71         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
72         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
73         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
74         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
75         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
76         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
77 };
78
79 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
80                            int for_del)
81 {
82         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
83         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
84         u64 thresh;
85         unsigned long flags;
86         int ret = 0;
87
88         if (for_del)
89                 thresh = total * 90;
90         else
91                 thresh = total * 85;
92
93         do_div(thresh, 100);
94
95         spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
96         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
97                 ret = -ENOSPC;
98         spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
99         return ret;
100 }
101
102 static int cow_file_range(struct inode *inode, u64 start, u64 end)
103 {
104         struct btrfs_root *root = BTRFS_I(inode)->root;
105         struct btrfs_trans_handle *trans;
106         u64 alloc_hint = 0;
107         u64 num_bytes;
108         u64 cur_alloc_size;
109         u64 blocksize = root->sectorsize;
110         u64 orig_start = start;
111         u64 orig_num_bytes;
112         struct btrfs_key ins;
113         int ret;
114
115         trans = btrfs_start_transaction(root, 1);
116         BUG_ON(!trans);
117         btrfs_set_trans_block_group(trans, inode);
118
119         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
120         num_bytes = max(blocksize,  num_bytes);
121         ret = btrfs_drop_extents(trans, root, inode,
122                                  start, start + num_bytes, start, &alloc_hint);
123         orig_num_bytes = num_bytes;
124
125         if (alloc_hint == EXTENT_MAP_INLINE)
126                 goto out;
127
128         BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
129
130         while(num_bytes > 0) {
131                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
132                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
133                                          root->sectorsize,
134                                          root->root_key.objectid,
135                                          trans->transid,
136                                          inode->i_ino, start, 0,
137                                          alloc_hint, (u64)-1, &ins, 1);
138                 if (ret) {
139                         WARN_ON(1);
140                         goto out;
141                 }
142                 cur_alloc_size = ins.offset;
143                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
144                                                start, ins.objectid, ins.offset,
145                                                ins.offset, 0);
146                 inode->i_blocks += ins.offset >> 9;
147                 btrfs_check_file(root, inode);
148                 if (num_bytes < cur_alloc_size) {
149                         printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
150                                cur_alloc_size);
151                         break;
152                 }
153                 num_bytes -= cur_alloc_size;
154                 alloc_hint = ins.objectid + ins.offset;
155                 start += cur_alloc_size;
156         }
157         btrfs_drop_extent_cache(inode, orig_start,
158                                 orig_start + orig_num_bytes - 1);
159         btrfs_add_ordered_inode(inode);
160         btrfs_update_inode(trans, root, inode);
161 out:
162         btrfs_end_transaction(trans, root);
163         return ret;
164 }
165
166 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
167 {
168         u64 extent_start;
169         u64 extent_end;
170         u64 bytenr;
171         u64 cow_end;
172         u64 loops = 0;
173         u64 total_fs_bytes;
174         struct btrfs_root *root = BTRFS_I(inode)->root;
175         struct btrfs_block_group_cache *block_group;
176         struct extent_buffer *leaf;
177         int found_type;
178         struct btrfs_path *path;
179         struct btrfs_file_extent_item *item;
180         int ret;
181         int err;
182         struct btrfs_key found_key;
183
184         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
185         path = btrfs_alloc_path();
186         BUG_ON(!path);
187 again:
188         ret = btrfs_lookup_file_extent(NULL, root, path,
189                                        inode->i_ino, start, 0);
190         if (ret < 0) {
191                 btrfs_free_path(path);
192                 return ret;
193         }
194
195         cow_end = end;
196         if (ret != 0) {
197                 if (path->slots[0] == 0)
198                         goto not_found;
199                 path->slots[0]--;
200         }
201
202         leaf = path->nodes[0];
203         item = btrfs_item_ptr(leaf, path->slots[0],
204                               struct btrfs_file_extent_item);
205
206         /* are we inside the extent that was found? */
207         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
208         found_type = btrfs_key_type(&found_key);
209         if (found_key.objectid != inode->i_ino ||
210             found_type != BTRFS_EXTENT_DATA_KEY)
211                 goto not_found;
212
213         found_type = btrfs_file_extent_type(leaf, item);
214         extent_start = found_key.offset;
215         if (found_type == BTRFS_FILE_EXTENT_REG) {
216                 u64 extent_num_bytes;
217
218                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
219                 extent_end = extent_start + extent_num_bytes;
220                 err = 0;
221
222                 if (loops && start != extent_start)
223                         goto not_found;
224
225                 if (start < extent_start || start >= extent_end)
226                         goto not_found;
227
228                 cow_end = min(end, extent_end - 1);
229                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
230                 if (bytenr == 0)
231                         goto not_found;
232
233                 if (btrfs_count_snapshots_in_path(root, path, inode->i_ino,
234                                                   bytenr) != 1) {
235                         goto not_found;
236                 }
237
238                 /*
239                  * we may be called by the resizer, make sure we're inside
240                  * the limits of the FS
241                  */
242                 block_group = btrfs_lookup_block_group(root->fs_info,
243                                                        bytenr);
244                 if (!block_group || block_group->ro)
245                         goto not_found;
246
247                 start = extent_end;
248         } else {
249                 goto not_found;
250         }
251 loop:
252         if (start > end) {
253                 btrfs_free_path(path);
254                 return 0;
255         }
256         btrfs_release_path(root, path);
257         loops++;
258         goto again;
259
260 not_found:
261         cow_file_range(inode, start, end);
262         start = end + 1;
263         goto loop;
264 }
265
266 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
267 {
268         struct btrfs_root *root = BTRFS_I(inode)->root;
269         int ret;
270         mutex_lock(&root->fs_info->fs_mutex);
271         if (btrfs_test_opt(root, NODATACOW) ||
272             btrfs_test_flag(inode, NODATACOW))
273                 ret = run_delalloc_nocow(inode, start, end);
274         else
275                 ret = cow_file_range(inode, start, end);
276
277         mutex_unlock(&root->fs_info->fs_mutex);
278         return ret;
279 }
280
281 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
282                        unsigned long old, unsigned long bits)
283 {
284         unsigned long flags;
285         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
286                 struct btrfs_root *root = BTRFS_I(inode)->root;
287                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
288                 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
289                 root->fs_info->delalloc_bytes += end - start + 1;
290                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
291         }
292         return 0;
293 }
294
295 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
296                          unsigned long old, unsigned long bits)
297 {
298         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
299                 struct btrfs_root *root = BTRFS_I(inode)->root;
300                 unsigned long flags;
301
302                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
303                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
304                         printk("warning: delalloc account %Lu %Lu\n",
305                                end - start + 1, root->fs_info->delalloc_bytes);
306                         root->fs_info->delalloc_bytes = 0;
307                         BTRFS_I(inode)->delalloc_bytes = 0;
308                 } else {
309                         root->fs_info->delalloc_bytes -= end - start + 1;
310                         BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
311                 }
312                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
313         }
314         return 0;
315 }
316
317 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
318                          size_t size, struct bio *bio)
319 {
320         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
321         struct btrfs_mapping_tree *map_tree;
322         u64 logical = bio->bi_sector << 9;
323         u64 length = 0;
324         u64 map_length;
325         int ret;
326
327         length = bio->bi_size;
328         map_tree = &root->fs_info->mapping_tree;
329         map_length = length;
330         ret = btrfs_map_block(map_tree, READ, logical,
331                               &map_length, NULL, 0);
332
333         if (map_length < length + size) {
334                 return 1;
335         }
336         return 0;
337 }
338
339 int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
340                           int mirror_num)
341 {
342         struct btrfs_root *root = BTRFS_I(inode)->root;
343         struct btrfs_trans_handle *trans;
344         int ret = 0;
345         char *sums = NULL;
346
347         ret = btrfs_csum_one_bio(root, bio, &sums);
348         BUG_ON(ret);
349
350         mutex_lock(&root->fs_info->fs_mutex);
351         trans = btrfs_start_transaction(root, 1);
352
353         btrfs_set_trans_block_group(trans, inode);
354         btrfs_csum_file_blocks(trans, root, inode, bio, sums);
355
356         ret = btrfs_end_transaction(trans, root);
357         BUG_ON(ret);
358         mutex_unlock(&root->fs_info->fs_mutex);
359
360         kfree(sums);
361
362         return btrfs_map_bio(root, rw, bio, mirror_num);
363 }
364
365 int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
366                           int mirror_num)
367 {
368         struct btrfs_root *root = BTRFS_I(inode)->root;
369         int ret = 0;
370
371         if (!(rw & (1 << BIO_RW))) {
372                 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
373                 BUG_ON(ret);
374                 goto mapit;
375         }
376
377         if (btrfs_test_opt(root, NODATASUM) ||
378             btrfs_test_flag(inode, NODATASUM)) {
379                 goto mapit;
380         }
381
382         return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
383                                    inode, rw, bio, mirror_num,
384                                    __btrfs_submit_bio_hook);
385 mapit:
386         return btrfs_map_bio(root, rw, bio, mirror_num);
387 }
388
389 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
390 {
391         int ret = 0;
392         struct inode *inode = page->mapping->host;
393         struct btrfs_root *root = BTRFS_I(inode)->root;
394         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
395         struct btrfs_csum_item *item;
396         struct btrfs_path *path = NULL;
397         u32 csum;
398
399         if (btrfs_test_opt(root, NODATASUM) ||
400             btrfs_test_flag(inode, NODATASUM))
401                 return 0;
402
403         mutex_lock(&root->fs_info->fs_mutex);
404         path = btrfs_alloc_path();
405         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
406         if (IS_ERR(item)) {
407                 ret = PTR_ERR(item);
408                 /* a csum that isn't present is a preallocated region. */
409                 if (ret == -ENOENT || ret == -EFBIG)
410                         ret = 0;
411                 csum = 0;
412                 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start);
413                 goto out;
414         }
415         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
416                            BTRFS_CRC32_SIZE);
417         set_state_private(io_tree, start, csum);
418 out:
419         if (path)
420                 btrfs_free_path(path);
421         mutex_unlock(&root->fs_info->fs_mutex);
422         return ret;
423 }
424
425 struct io_failure_record {
426         struct page *page;
427         u64 start;
428         u64 len;
429         u64 logical;
430         int last_mirror;
431 };
432
433 int btrfs_io_failed_hook(struct bio *failed_bio,
434                          struct page *page, u64 start, u64 end,
435                          struct extent_state *state)
436 {
437         struct io_failure_record *failrec = NULL;
438         u64 private;
439         struct extent_map *em;
440         struct inode *inode = page->mapping->host;
441         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
442         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
443         struct bio *bio;
444         int num_copies;
445         int ret;
446         int rw;
447         u64 logical;
448
449         ret = get_state_private(failure_tree, start, &private);
450         if (ret) {
451                 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
452                 if (!failrec)
453                         return -ENOMEM;
454                 failrec->start = start;
455                 failrec->len = end - start + 1;
456                 failrec->last_mirror = 0;
457
458                 spin_lock(&em_tree->lock);
459                 em = lookup_extent_mapping(em_tree, start, failrec->len);
460                 if (em->start > start || em->start + em->len < start) {
461                         free_extent_map(em);
462                         em = NULL;
463                 }
464                 spin_unlock(&em_tree->lock);
465
466                 if (!em || IS_ERR(em)) {
467                         kfree(failrec);
468                         return -EIO;
469                 }
470                 logical = start - em->start;
471                 logical = em->block_start + logical;
472                 failrec->logical = logical;
473                 free_extent_map(em);
474                 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
475                                 EXTENT_DIRTY, GFP_NOFS);
476                 set_state_private(failure_tree, start,
477                                  (u64)(unsigned long)failrec);
478         } else {
479                 failrec = (struct io_failure_record *)(unsigned long)private;
480         }
481         num_copies = btrfs_num_copies(
482                               &BTRFS_I(inode)->root->fs_info->mapping_tree,
483                               failrec->logical, failrec->len);
484         failrec->last_mirror++;
485         if (!state) {
486                 spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
487                 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
488                                                     failrec->start,
489                                                     EXTENT_LOCKED);
490                 if (state && state->start != failrec->start)
491                         state = NULL;
492                 spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
493         }
494         if (!state || failrec->last_mirror > num_copies) {
495                 set_state_private(failure_tree, failrec->start, 0);
496                 clear_extent_bits(failure_tree, failrec->start,
497                                   failrec->start + failrec->len - 1,
498                                   EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
499                 kfree(failrec);
500                 return -EIO;
501         }
502         bio = bio_alloc(GFP_NOFS, 1);
503         bio->bi_private = state;
504         bio->bi_end_io = failed_bio->bi_end_io;
505         bio->bi_sector = failrec->logical >> 9;
506         bio->bi_bdev = failed_bio->bi_bdev;
507         bio->bi_size = 0;
508         bio_add_page(bio, page, failrec->len, start - page_offset(page));
509         if (failed_bio->bi_rw & (1 << BIO_RW))
510                 rw = WRITE;
511         else
512                 rw = READ;
513
514         BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
515                                                       failrec->last_mirror);
516         return 0;
517 }
518
519 int btrfs_clean_io_failures(struct inode *inode, u64 start)
520 {
521         u64 private;
522         u64 private_failure;
523         struct io_failure_record *failure;
524         int ret;
525
526         private = 0;
527         if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
528                              (u64)-1, 1, EXTENT_DIRTY)) {
529                 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
530                                         start, &private_failure);
531                 if (ret == 0) {
532                         failure = (struct io_failure_record *)(unsigned long)
533                                    private_failure;
534                         set_state_private(&BTRFS_I(inode)->io_failure_tree,
535                                           failure->start, 0);
536                         clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
537                                           failure->start,
538                                           failure->start + failure->len - 1,
539                                           EXTENT_DIRTY | EXTENT_LOCKED,
540                                           GFP_NOFS);
541                         kfree(failure);
542                 }
543         }
544         return 0;
545 }
546
547 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
548                                struct extent_state *state)
549 {
550         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
551         struct inode *inode = page->mapping->host;
552         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
553         char *kaddr;
554         u64 private = ~(u32)0;
555         int ret;
556         struct btrfs_root *root = BTRFS_I(inode)->root;
557         u32 csum = ~(u32)0;
558         unsigned long flags;
559
560         if (btrfs_test_opt(root, NODATASUM) ||
561             btrfs_test_flag(inode, NODATASUM))
562                 return 0;
563         if (state && state->start == start) {
564                 private = state->private;
565                 ret = 0;
566         } else {
567                 ret = get_state_private(io_tree, start, &private);
568         }
569         local_irq_save(flags);
570         kaddr = kmap_atomic(page, KM_IRQ0);
571         if (ret) {
572                 goto zeroit;
573         }
574         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
575         btrfs_csum_final(csum, (char *)&csum);
576         if (csum != private) {
577                 goto zeroit;
578         }
579         kunmap_atomic(kaddr, KM_IRQ0);
580         local_irq_restore(flags);
581
582         /* if the io failure tree for this inode is non-empty,
583          * check to see if we've recovered from a failed IO
584          */
585         btrfs_clean_io_failures(inode, start);
586         return 0;
587
588 zeroit:
589         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
590                page->mapping->host->i_ino, (unsigned long long)start, csum,
591                private);
592         memset(kaddr + offset, 1, end - start + 1);
593         flush_dcache_page(page);
594         kunmap_atomic(kaddr, KM_IRQ0);
595         local_irq_restore(flags);
596         if (private == 0)
597                 return 0;
598         return -EIO;
599 }
600
601 void btrfs_read_locked_inode(struct inode *inode)
602 {
603         struct btrfs_path *path;
604         struct extent_buffer *leaf;
605         struct btrfs_inode_item *inode_item;
606         struct btrfs_timespec *tspec;
607         struct btrfs_root *root = BTRFS_I(inode)->root;
608         struct btrfs_key location;
609         u64 alloc_group_block;
610         u32 rdev;
611         int ret;
612
613         path = btrfs_alloc_path();
614         BUG_ON(!path);
615         mutex_lock(&root->fs_info->fs_mutex);
616         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
617
618         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
619         if (ret)
620                 goto make_bad;
621
622         leaf = path->nodes[0];
623         inode_item = btrfs_item_ptr(leaf, path->slots[0],
624                                     struct btrfs_inode_item);
625
626         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
627         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
628         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
629         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
630         inode->i_size = btrfs_inode_size(leaf, inode_item);
631
632         tspec = btrfs_inode_atime(inode_item);
633         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
634         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
635
636         tspec = btrfs_inode_mtime(inode_item);
637         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
638         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
639
640         tspec = btrfs_inode_ctime(inode_item);
641         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
642         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
643
644         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
645         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
646         inode->i_rdev = 0;
647         rdev = btrfs_inode_rdev(leaf, inode_item);
648
649         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
650         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
651                                                        alloc_group_block);
652         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
653         if (!BTRFS_I(inode)->block_group) {
654                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
655                                                  NULL, 0,
656                                                  BTRFS_BLOCK_GROUP_METADATA, 0);
657         }
658         btrfs_free_path(path);
659         inode_item = NULL;
660
661         mutex_unlock(&root->fs_info->fs_mutex);
662
663         switch (inode->i_mode & S_IFMT) {
664         case S_IFREG:
665                 inode->i_mapping->a_ops = &btrfs_aops;
666                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
667                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
668                 inode->i_fop = &btrfs_file_operations;
669                 inode->i_op = &btrfs_file_inode_operations;
670                 break;
671         case S_IFDIR:
672                 inode->i_fop = &btrfs_dir_file_operations;
673                 if (root == root->fs_info->tree_root)
674                         inode->i_op = &btrfs_dir_ro_inode_operations;
675                 else
676                         inode->i_op = &btrfs_dir_inode_operations;
677                 break;
678         case S_IFLNK:
679                 inode->i_op = &btrfs_symlink_inode_operations;
680                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
681                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
682                 break;
683         default:
684                 init_special_inode(inode, inode->i_mode, rdev);
685                 break;
686         }
687         return;
688
689 make_bad:
690         btrfs_release_path(root, path);
691         btrfs_free_path(path);
692         mutex_unlock(&root->fs_info->fs_mutex);
693         make_bad_inode(inode);
694 }
695
696 static void fill_inode_item(struct extent_buffer *leaf,
697                             struct btrfs_inode_item *item,
698                             struct inode *inode)
699 {
700         btrfs_set_inode_uid(leaf, item, inode->i_uid);
701         btrfs_set_inode_gid(leaf, item, inode->i_gid);
702         btrfs_set_inode_size(leaf, item, inode->i_size);
703         btrfs_set_inode_mode(leaf, item, inode->i_mode);
704         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
705
706         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
707                                inode->i_atime.tv_sec);
708         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
709                                 inode->i_atime.tv_nsec);
710
711         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
712                                inode->i_mtime.tv_sec);
713         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
714                                 inode->i_mtime.tv_nsec);
715
716         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
717                                inode->i_ctime.tv_sec);
718         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
719                                 inode->i_ctime.tv_nsec);
720
721         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
722         btrfs_set_inode_generation(leaf, item, inode->i_generation);
723         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
724         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
725         btrfs_set_inode_block_group(leaf, item,
726                                     BTRFS_I(inode)->block_group->key.objectid);
727 }
728
729 int btrfs_update_inode(struct btrfs_trans_handle *trans,
730                               struct btrfs_root *root,
731                               struct inode *inode)
732 {
733         struct btrfs_inode_item *inode_item;
734         struct btrfs_path *path;
735         struct extent_buffer *leaf;
736         int ret;
737
738         path = btrfs_alloc_path();
739         BUG_ON(!path);
740         ret = btrfs_lookup_inode(trans, root, path,
741                                  &BTRFS_I(inode)->location, 1);
742         if (ret) {
743                 if (ret > 0)
744                         ret = -ENOENT;
745                 goto failed;
746         }
747
748         leaf = path->nodes[0];
749         inode_item = btrfs_item_ptr(leaf, path->slots[0],
750                                   struct btrfs_inode_item);
751
752         fill_inode_item(leaf, inode_item, inode);
753         btrfs_mark_buffer_dirty(leaf);
754         btrfs_set_inode_last_trans(trans, inode);
755         ret = 0;
756 failed:
757         btrfs_release_path(root, path);
758         btrfs_free_path(path);
759         return ret;
760 }
761
762
763 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
764                               struct btrfs_root *root,
765                               struct inode *dir,
766                               struct dentry *dentry)
767 {
768         struct btrfs_path *path;
769         const char *name = dentry->d_name.name;
770         int name_len = dentry->d_name.len;
771         int ret = 0;
772         struct extent_buffer *leaf;
773         struct btrfs_dir_item *di;
774         struct btrfs_key key;
775
776         path = btrfs_alloc_path();
777         if (!path) {
778                 ret = -ENOMEM;
779                 goto err;
780         }
781
782         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
783                                     name, name_len, -1);
784         if (IS_ERR(di)) {
785                 ret = PTR_ERR(di);
786                 goto err;
787         }
788         if (!di) {
789                 ret = -ENOENT;
790                 goto err;
791         }
792         leaf = path->nodes[0];
793         btrfs_dir_item_key_to_cpu(leaf, di, &key);
794         ret = btrfs_delete_one_dir_name(trans, root, path, di);
795         if (ret)
796                 goto err;
797         btrfs_release_path(root, path);
798
799         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
800                                          key.objectid, name, name_len, -1);
801         if (IS_ERR(di)) {
802                 ret = PTR_ERR(di);
803                 goto err;
804         }
805         if (!di) {
806                 ret = -ENOENT;
807                 goto err;
808         }
809         ret = btrfs_delete_one_dir_name(trans, root, path, di);
810
811         dentry->d_inode->i_ctime = dir->i_ctime;
812         ret = btrfs_del_inode_ref(trans, root, name, name_len,
813                                   dentry->d_inode->i_ino,
814                                   dentry->d_parent->d_inode->i_ino);
815         if (ret) {
816                 printk("failed to delete reference to %.*s, "
817                        "inode %lu parent %lu\n", name_len, name,
818                        dentry->d_inode->i_ino,
819                        dentry->d_parent->d_inode->i_ino);
820         }
821 err:
822         btrfs_free_path(path);
823         if (!ret) {
824                 dir->i_size -= name_len * 2;
825                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
826                 btrfs_update_inode(trans, root, dir);
827 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
828                 dentry->d_inode->i_nlink--;
829 #else
830                 drop_nlink(dentry->d_inode);
831 #endif
832                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
833                 dir->i_sb->s_dirt = 1;
834         }
835         return ret;
836 }
837
838 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
839 {
840         struct btrfs_root *root;
841         struct btrfs_trans_handle *trans;
842         struct inode *inode = dentry->d_inode;
843         int ret;
844         unsigned long nr = 0;
845
846         root = BTRFS_I(dir)->root;
847         mutex_lock(&root->fs_info->fs_mutex);
848
849         ret = btrfs_check_free_space(root, 1, 1);
850         if (ret)
851                 goto fail;
852
853         trans = btrfs_start_transaction(root, 1);
854
855         btrfs_set_trans_block_group(trans, dir);
856         ret = btrfs_unlink_trans(trans, root, dir, dentry);
857         nr = trans->blocks_used;
858
859         if (inode->i_nlink == 0) {
860                 int found;
861                 /* if the inode isn't linked anywhere,
862                  * we don't need to worry about
863                  * data=ordered
864                  */
865                 found = btrfs_del_ordered_inode(inode);
866                 if (found == 1) {
867                         atomic_dec(&inode->i_count);
868                 }
869         }
870
871         btrfs_end_transaction(trans, root);
872 fail:
873         mutex_unlock(&root->fs_info->fs_mutex);
874         btrfs_btree_balance_dirty(root, nr);
875         btrfs_throttle(root);
876         return ret;
877 }
878
879 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
880 {
881         struct inode *inode = dentry->d_inode;
882         int err = 0;
883         int ret;
884         struct btrfs_root *root = BTRFS_I(dir)->root;
885         struct btrfs_trans_handle *trans;
886         unsigned long nr = 0;
887
888         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
889                 return -ENOTEMPTY;
890
891         mutex_lock(&root->fs_info->fs_mutex);
892         ret = btrfs_check_free_space(root, 1, 1);
893         if (ret)
894                 goto fail;
895
896         trans = btrfs_start_transaction(root, 1);
897         btrfs_set_trans_block_group(trans, dir);
898
899         /* now the directory is empty */
900         err = btrfs_unlink_trans(trans, root, dir, dentry);
901         if (!err) {
902                 inode->i_size = 0;
903         }
904
905         nr = trans->blocks_used;
906         ret = btrfs_end_transaction(trans, root);
907 fail:
908         mutex_unlock(&root->fs_info->fs_mutex);
909         btrfs_btree_balance_dirty(root, nr);
910         btrfs_throttle(root);
911
912         if (ret && !err)
913                 err = ret;
914         return err;
915 }
916
917 /*
918  * this can truncate away extent items, csum items and directory items.
919  * It starts at a high offset and removes keys until it can't find
920  * any higher than i_size.
921  *
922  * csum items that cross the new i_size are truncated to the new size
923  * as well.
924  */
925 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
926                                    struct btrfs_root *root,
927                                    struct inode *inode,
928                                    u32 min_type)
929 {
930         int ret;
931         struct btrfs_path *path;
932         struct btrfs_key key;
933         struct btrfs_key found_key;
934         u32 found_type;
935         struct extent_buffer *leaf;
936         struct btrfs_file_extent_item *fi;
937         u64 extent_start = 0;
938         u64 extent_num_bytes = 0;
939         u64 item_end = 0;
940         u64 root_gen = 0;
941         u64 root_owner = 0;
942         int found_extent;
943         int del_item;
944         int pending_del_nr = 0;
945         int pending_del_slot = 0;
946         int extent_type = -1;
947         u64 mask = root->sectorsize - 1;
948
949         btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
950         path = btrfs_alloc_path();
951         path->reada = -1;
952         BUG_ON(!path);
953
954         /* FIXME, add redo link to tree so we don't leak on crash */
955         key.objectid = inode->i_ino;
956         key.offset = (u64)-1;
957         key.type = (u8)-1;
958
959         btrfs_init_path(path);
960 search_again:
961         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
962         if (ret < 0) {
963                 goto error;
964         }
965         if (ret > 0) {
966                 BUG_ON(path->slots[0] == 0);
967                 path->slots[0]--;
968         }
969
970         while(1) {
971                 fi = NULL;
972                 leaf = path->nodes[0];
973                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
974                 found_type = btrfs_key_type(&found_key);
975
976                 if (found_key.objectid != inode->i_ino)
977                         break;
978
979                 if (found_type < min_type)
980                         break;
981
982                 item_end = found_key.offset;
983                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
984                         fi = btrfs_item_ptr(leaf, path->slots[0],
985                                             struct btrfs_file_extent_item);
986                         extent_type = btrfs_file_extent_type(leaf, fi);
987                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
988                                 item_end +=
989                                     btrfs_file_extent_num_bytes(leaf, fi);
990                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
991                                 struct btrfs_item *item = btrfs_item_nr(leaf,
992                                                                 path->slots[0]);
993                                 item_end += btrfs_file_extent_inline_len(leaf,
994                                                                          item);
995                         }
996                         item_end--;
997                 }
998                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
999                         ret = btrfs_csum_truncate(trans, root, path,
1000                                                   inode->i_size);
1001                         BUG_ON(ret);
1002                 }
1003                 if (item_end < inode->i_size) {
1004                         if (found_type == BTRFS_DIR_ITEM_KEY) {
1005                                 found_type = BTRFS_INODE_ITEM_KEY;
1006                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
1007                                 found_type = BTRFS_CSUM_ITEM_KEY;
1008                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
1009                                 found_type = BTRFS_XATTR_ITEM_KEY;
1010                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
1011                                 found_type = BTRFS_INODE_REF_KEY;
1012                         } else if (found_type) {
1013                                 found_type--;
1014                         } else {
1015                                 break;
1016                         }
1017                         btrfs_set_key_type(&key, found_type);
1018                         goto next;
1019                 }
1020                 if (found_key.offset >= inode->i_size)
1021                         del_item = 1;
1022                 else
1023                         del_item = 0;
1024                 found_extent = 0;
1025
1026                 /* FIXME, shrink the extent if the ref count is only 1 */
1027                 if (found_type != BTRFS_EXTENT_DATA_KEY)
1028                         goto delete;
1029
1030                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
1031                         u64 num_dec;
1032                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
1033                         if (!del_item) {
1034                                 u64 orig_num_bytes =
1035                                         btrfs_file_extent_num_bytes(leaf, fi);
1036                                 extent_num_bytes = inode->i_size -
1037                                         found_key.offset + root->sectorsize - 1;
1038                                 extent_num_bytes = extent_num_bytes &
1039                                         ~((u64)root->sectorsize - 1);
1040                                 btrfs_set_file_extent_num_bytes(leaf, fi,
1041                                                          extent_num_bytes);
1042                                 num_dec = (orig_num_bytes -
1043                                            extent_num_bytes);
1044                                 if (extent_start != 0)
1045                                         dec_i_blocks(inode, num_dec);
1046                                 btrfs_mark_buffer_dirty(leaf);
1047                         } else {
1048                                 extent_num_bytes =
1049                                         btrfs_file_extent_disk_num_bytes(leaf,
1050                                                                          fi);
1051                                 /* FIXME blocksize != 4096 */
1052                                 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
1053                                 if (extent_start != 0) {
1054                                         found_extent = 1;
1055                                         dec_i_blocks(inode, num_dec);
1056                                 }
1057                                 root_gen = btrfs_header_generation(leaf);
1058                                 root_owner = btrfs_header_owner(leaf);
1059                         }
1060                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1061                         if (!del_item) {
1062                                 u32 newsize = inode->i_size - found_key.offset;
1063                                 dec_i_blocks(inode, item_end + 1 -
1064                                             found_key.offset - newsize);
1065                                 newsize =
1066                                     btrfs_file_extent_calc_inline_size(newsize);
1067                                 ret = btrfs_truncate_item(trans, root, path,
1068                                                           newsize, 1);
1069                                 BUG_ON(ret);
1070                         } else {
1071                                 dec_i_blocks(inode, item_end + 1 -
1072                                              found_key.offset);
1073                         }
1074                 }
1075 delete:
1076                 if (del_item) {
1077                         if (!pending_del_nr) {
1078                                 /* no pending yet, add ourselves */
1079                                 pending_del_slot = path->slots[0];
1080                                 pending_del_nr = 1;
1081                         } else if (pending_del_nr &&
1082                                    path->slots[0] + 1 == pending_del_slot) {
1083                                 /* hop on the pending chunk */
1084                                 pending_del_nr++;
1085                                 pending_del_slot = path->slots[0];
1086                         } else {
1087                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
1088                         }
1089                 } else {
1090                         break;
1091                 }
1092                 if (found_extent) {
1093                         ret = btrfs_free_extent(trans, root, extent_start,
1094                                                 extent_num_bytes,
1095                                                 root_owner,
1096                                                 root_gen, inode->i_ino,
1097                                                 found_key.offset, 0);
1098                         BUG_ON(ret);
1099                 }
1100 next:
1101                 if (path->slots[0] == 0) {
1102                         if (pending_del_nr)
1103                                 goto del_pending;
1104                         btrfs_release_path(root, path);
1105                         goto search_again;
1106                 }
1107
1108                 path->slots[0]--;
1109                 if (pending_del_nr &&
1110                     path->slots[0] + 1 != pending_del_slot) {
1111                         struct btrfs_key debug;
1112 del_pending:
1113                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
1114                                               pending_del_slot);
1115                         ret = btrfs_del_items(trans, root, path,
1116                                               pending_del_slot,
1117                                               pending_del_nr);
1118                         BUG_ON(ret);
1119                         pending_del_nr = 0;
1120                         btrfs_release_path(root, path);
1121                         goto search_again;
1122                 }
1123         }
1124         ret = 0;
1125 error:
1126         if (pending_del_nr) {
1127                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
1128                                       pending_del_nr);
1129         }
1130         btrfs_release_path(root, path);
1131         btrfs_free_path(path);
1132         inode->i_sb->s_dirt = 1;
1133         return ret;
1134 }
1135
1136 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
1137                               size_t zero_start)
1138 {
1139         char *kaddr;
1140         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1141         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1142         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1143         int ret = 0;
1144
1145         WARN_ON(!PageLocked(page));
1146         set_page_extent_mapped(page);
1147
1148         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1149         set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
1150                             page_end, GFP_NOFS);
1151
1152         if (zero_start != PAGE_CACHE_SIZE) {
1153                 kaddr = kmap(page);
1154                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
1155                 flush_dcache_page(page);
1156                 kunmap(page);
1157         }
1158         set_page_dirty(page);
1159         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1160
1161         return ret;
1162 }
1163
1164 /*
1165  * taken from block_truncate_page, but does cow as it zeros out
1166  * any bytes left in the last page in the file.
1167  */
1168 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1169 {
1170         struct inode *inode = mapping->host;
1171         struct btrfs_root *root = BTRFS_I(inode)->root;
1172         u32 blocksize = root->sectorsize;
1173         pgoff_t index = from >> PAGE_CACHE_SHIFT;
1174         unsigned offset = from & (PAGE_CACHE_SIZE-1);
1175         struct page *page;
1176         int ret = 0;
1177         u64 page_start;
1178
1179         if ((offset & (blocksize - 1)) == 0)
1180                 goto out;
1181
1182         ret = -ENOMEM;
1183 again:
1184         page = grab_cache_page(mapping, index);
1185         if (!page)
1186                 goto out;
1187         if (!PageUptodate(page)) {
1188                 ret = btrfs_readpage(NULL, page);
1189                 lock_page(page);
1190                 if (page->mapping != mapping) {
1191                         unlock_page(page);
1192                         page_cache_release(page);
1193                         goto again;
1194                 }
1195                 if (!PageUptodate(page)) {
1196                         ret = -EIO;
1197                         goto out;
1198                 }
1199         }
1200
1201         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1202         wait_on_page_writeback(page);
1203         ret = btrfs_cow_one_page(inode, page, offset);
1204
1205         unlock_page(page);
1206         page_cache_release(page);
1207 out:
1208         return ret;
1209 }
1210
1211 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1212 {
1213         struct inode *inode = dentry->d_inode;
1214         int err;
1215
1216         err = inode_change_ok(inode, attr);
1217         if (err)
1218                 return err;
1219
1220         if (S_ISREG(inode->i_mode) &&
1221             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1222                 struct btrfs_trans_handle *trans;
1223                 struct btrfs_root *root = BTRFS_I(inode)->root;
1224                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1225
1226                 u64 mask = root->sectorsize - 1;
1227                 u64 hole_start = (inode->i_size + mask) & ~mask;
1228                 u64 block_end = (attr->ia_size + mask) & ~mask;
1229                 u64 hole_size;
1230                 u64 alloc_hint = 0;
1231
1232                 if (attr->ia_size <= hole_start)
1233                         goto out;
1234
1235                 mutex_lock(&root->fs_info->fs_mutex);
1236                 err = btrfs_check_free_space(root, 1, 0);
1237                 mutex_unlock(&root->fs_info->fs_mutex);
1238                 if (err)
1239                         goto fail;
1240
1241                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1242
1243                 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1244                 hole_size = block_end - hole_start;
1245
1246                 mutex_lock(&root->fs_info->fs_mutex);
1247                 trans = btrfs_start_transaction(root, 1);
1248                 btrfs_set_trans_block_group(trans, inode);
1249                 err = btrfs_drop_extents(trans, root, inode,
1250                                          hole_start, block_end, hole_start,
1251                                          &alloc_hint);
1252
1253                 if (alloc_hint != EXTENT_MAP_INLINE) {
1254                         err = btrfs_insert_file_extent(trans, root,
1255                                                        inode->i_ino,
1256                                                        hole_start, 0, 0,
1257                                                        hole_size, 0);
1258                         btrfs_drop_extent_cache(inode, hole_start,
1259                                                 (u64)-1);
1260                         btrfs_check_file(root, inode);
1261                 }
1262                 btrfs_end_transaction(trans, root);
1263                 mutex_unlock(&root->fs_info->fs_mutex);
1264                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1265                 if (err)
1266                         return err;
1267         }
1268 out:
1269         err = inode_setattr(inode, attr);
1270 fail:
1271         return err;
1272 }
1273
1274 void btrfs_put_inode(struct inode *inode)
1275 {
1276         int ret;
1277
1278         if (!BTRFS_I(inode)->ordered_trans) {
1279                 return;
1280         }
1281
1282         if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
1283             mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1284                 return;
1285
1286         ret = btrfs_del_ordered_inode(inode);
1287         if (ret == 1) {
1288                 atomic_dec(&inode->i_count);
1289         }
1290 }
1291
1292 void btrfs_delete_inode(struct inode *inode)
1293 {
1294         struct btrfs_trans_handle *trans;
1295         struct btrfs_root *root = BTRFS_I(inode)->root;
1296         unsigned long nr;
1297         int ret;
1298
1299         truncate_inode_pages(&inode->i_data, 0);
1300         if (is_bad_inode(inode)) {
1301                 goto no_delete;
1302         }
1303
1304         inode->i_size = 0;
1305         mutex_lock(&root->fs_info->fs_mutex);
1306         trans = btrfs_start_transaction(root, 1);
1307
1308         btrfs_set_trans_block_group(trans, inode);
1309         ret = btrfs_truncate_in_trans(trans, root, inode, 0);
1310         if (ret)
1311                 goto no_delete_lock;
1312
1313         nr = trans->blocks_used;
1314         clear_inode(inode);
1315
1316         btrfs_end_transaction(trans, root);
1317         mutex_unlock(&root->fs_info->fs_mutex);
1318         btrfs_btree_balance_dirty(root, nr);
1319         btrfs_throttle(root);
1320         return;
1321
1322 no_delete_lock:
1323         nr = trans->blocks_used;
1324         btrfs_end_transaction(trans, root);
1325         mutex_unlock(&root->fs_info->fs_mutex);
1326         btrfs_btree_balance_dirty(root, nr);
1327         btrfs_throttle(root);
1328 no_delete:
1329         clear_inode(inode);
1330 }
1331
1332 /*
1333  * this returns the key found in the dir entry in the location pointer.
1334  * If no dir entries were found, location->objectid is 0.
1335  */
1336 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1337                                struct btrfs_key *location)
1338 {
1339         const char *name = dentry->d_name.name;
1340         int namelen = dentry->d_name.len;
1341         struct btrfs_dir_item *di;
1342         struct btrfs_path *path;
1343         struct btrfs_root *root = BTRFS_I(dir)->root;
1344         int ret = 0;
1345
1346         if (namelen == 1 && strcmp(name, ".") == 0) {
1347                 location->objectid = dir->i_ino;
1348                 location->type = BTRFS_INODE_ITEM_KEY;
1349                 location->offset = 0;
1350                 return 0;
1351         }
1352         path = btrfs_alloc_path();
1353         BUG_ON(!path);
1354
1355         if (namelen == 2 && strcmp(name, "..") == 0) {
1356                 struct btrfs_key key;
1357                 struct extent_buffer *leaf;
1358                 u32 nritems;
1359                 int slot;
1360
1361                 key.objectid = dir->i_ino;
1362                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1363                 key.offset = 0;
1364                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1365                 BUG_ON(ret == 0);
1366                 ret = 0;
1367
1368                 leaf = path->nodes[0];
1369                 slot = path->slots[0];
1370                 nritems = btrfs_header_nritems(leaf);
1371                 if (slot >= nritems)
1372                         goto out_err;
1373
1374                 btrfs_item_key_to_cpu(leaf, &key, slot);
1375                 if (key.objectid != dir->i_ino ||
1376                     key.type != BTRFS_INODE_REF_KEY) {
1377                         goto out_err;
1378                 }
1379                 location->objectid = key.offset;
1380                 location->type = BTRFS_INODE_ITEM_KEY;
1381                 location->offset = 0;
1382                 goto out;
1383         }
1384
1385         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1386                                     namelen, 0);
1387         if (IS_ERR(di))
1388                 ret = PTR_ERR(di);
1389         if (!di || IS_ERR(di)) {
1390                 goto out_err;
1391         }
1392         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1393 out:
1394         btrfs_free_path(path);
1395         return ret;
1396 out_err:
1397         location->objectid = 0;
1398         goto out;
1399 }
1400
1401 /*
1402  * when we hit a tree root in a directory, the btrfs part of the inode
1403  * needs to be changed to reflect the root directory of the tree root.  This
1404  * is kind of like crossing a mount point.
1405  */
1406 static int fixup_tree_root_location(struct btrfs_root *root,
1407                              struct btrfs_key *location,
1408                              struct btrfs_root **sub_root,
1409                              struct dentry *dentry)
1410 {
1411         struct btrfs_path *path;
1412         struct btrfs_root_item *ri;
1413
1414         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1415                 return 0;
1416         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1417                 return 0;
1418
1419         path = btrfs_alloc_path();
1420         BUG_ON(!path);
1421         mutex_lock(&root->fs_info->fs_mutex);
1422
1423         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1424                                         dentry->d_name.name,
1425                                         dentry->d_name.len);
1426         if (IS_ERR(*sub_root))
1427                 return PTR_ERR(*sub_root);
1428
1429         ri = &(*sub_root)->root_item;
1430         location->objectid = btrfs_root_dirid(ri);
1431         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1432         location->offset = 0;
1433
1434         btrfs_free_path(path);
1435         mutex_unlock(&root->fs_info->fs_mutex);
1436         return 0;
1437 }
1438
1439 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1440 {
1441         struct btrfs_iget_args *args = p;
1442         inode->i_ino = args->ino;
1443         BTRFS_I(inode)->root = args->root;
1444         BTRFS_I(inode)->delalloc_bytes = 0;
1445         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1446         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1447                              inode->i_mapping, GFP_NOFS);
1448         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1449                              inode->i_mapping, GFP_NOFS);
1450         atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1451         return 0;
1452 }
1453
1454 static int btrfs_find_actor(struct inode *inode, void *opaque)
1455 {
1456         struct btrfs_iget_args *args = opaque;
1457         return (args->ino == inode->i_ino &&
1458                 args->root == BTRFS_I(inode)->root);
1459 }
1460
1461 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1462                             u64 root_objectid)
1463 {
1464         struct btrfs_iget_args args;
1465         args.ino = objectid;
1466         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1467
1468         if (!args.root)
1469                 return NULL;
1470
1471         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1472 }
1473
1474 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1475                                 struct btrfs_root *root)
1476 {
1477         struct inode *inode;
1478         struct btrfs_iget_args args;
1479         args.ino = objectid;
1480         args.root = root;
1481
1482         inode = iget5_locked(s, objectid, btrfs_find_actor,
1483                              btrfs_init_locked_inode,
1484                              (void *)&args);
1485         return inode;
1486 }
1487
1488 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1489                                    struct nameidata *nd)
1490 {
1491         struct inode * inode;
1492         struct btrfs_inode *bi = BTRFS_I(dir);
1493         struct btrfs_root *root = bi->root;
1494         struct btrfs_root *sub_root = root;
1495         struct btrfs_key location;
1496         int ret;
1497
1498         if (dentry->d_name.len > BTRFS_NAME_LEN)
1499                 return ERR_PTR(-ENAMETOOLONG);
1500
1501         mutex_lock(&root->fs_info->fs_mutex);
1502         ret = btrfs_inode_by_name(dir, dentry, &location);
1503         mutex_unlock(&root->fs_info->fs_mutex);
1504
1505         if (ret < 0)
1506                 return ERR_PTR(ret);
1507
1508         inode = NULL;
1509         if (location.objectid) {
1510                 ret = fixup_tree_root_location(root, &location, &sub_root,
1511                                                 dentry);
1512                 if (ret < 0)
1513                         return ERR_PTR(ret);
1514                 if (ret > 0)
1515                         return ERR_PTR(-ENOENT);
1516                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1517                                           sub_root);
1518                 if (!inode)
1519                         return ERR_PTR(-EACCES);
1520                 if (inode->i_state & I_NEW) {
1521                         /* the inode and parent dir are two different roots */
1522                         if (sub_root != root) {
1523                                 igrab(inode);
1524                                 sub_root->inode = inode;
1525                         }
1526                         BTRFS_I(inode)->root = sub_root;
1527                         memcpy(&BTRFS_I(inode)->location, &location,
1528                                sizeof(location));
1529                         btrfs_read_locked_inode(inode);
1530                         unlock_new_inode(inode);
1531                 }
1532         }
1533         return d_splice_alias(inode, dentry);
1534 }
1535
1536 static unsigned char btrfs_filetype_table[] = {
1537         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1538 };
1539
1540 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1541 {
1542         struct inode *inode = filp->f_dentry->d_inode;
1543         struct btrfs_root *root = BTRFS_I(inode)->root;
1544         struct btrfs_item *item;
1545         struct btrfs_dir_item *di;
1546         struct btrfs_key key;
1547         struct btrfs_key found_key;
1548         struct btrfs_path *path;
1549         int ret;
1550         u32 nritems;
1551         struct extent_buffer *leaf;
1552         int slot;
1553         int advance;
1554         unsigned char d_type;
1555         int over = 0;
1556         u32 di_cur;
1557         u32 di_total;
1558         u32 di_len;
1559         int key_type = BTRFS_DIR_INDEX_KEY;
1560         char tmp_name[32];
1561         char *name_ptr;
1562         int name_len;
1563
1564         /* FIXME, use a real flag for deciding about the key type */
1565         if (root->fs_info->tree_root == root)
1566                 key_type = BTRFS_DIR_ITEM_KEY;
1567
1568         /* special case for "." */
1569         if (filp->f_pos == 0) {
1570                 over = filldir(dirent, ".", 1,
1571                                1, inode->i_ino,
1572                                DT_DIR);
1573                 if (over)
1574                         return 0;
1575                 filp->f_pos = 1;
1576         }
1577
1578         mutex_lock(&root->fs_info->fs_mutex);
1579         key.objectid = inode->i_ino;
1580         path = btrfs_alloc_path();
1581         path->reada = 2;
1582
1583         /* special case for .., just use the back ref */
1584         if (filp->f_pos == 1) {
1585                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1586                 key.offset = 0;
1587                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1588                 BUG_ON(ret == 0);
1589                 leaf = path->nodes[0];
1590                 slot = path->slots[0];
1591                 nritems = btrfs_header_nritems(leaf);
1592                 if (slot >= nritems) {
1593                         btrfs_release_path(root, path);
1594                         goto read_dir_items;
1595                 }
1596                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1597                 btrfs_release_path(root, path);
1598                 if (found_key.objectid != key.objectid ||
1599                     found_key.type != BTRFS_INODE_REF_KEY)
1600                         goto read_dir_items;
1601                 over = filldir(dirent, "..", 2,
1602                                2, found_key.offset, DT_DIR);
1603                 if (over)
1604                         goto nopos;
1605                 filp->f_pos = 2;
1606         }
1607
1608 read_dir_items:
1609         btrfs_set_key_type(&key, key_type);
1610         key.offset = filp->f_pos;
1611
1612         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1613         if (ret < 0)
1614                 goto err;
1615         advance = 0;
1616         while(1) {
1617                 leaf = path->nodes[0];
1618                 nritems = btrfs_header_nritems(leaf);
1619                 slot = path->slots[0];
1620                 if (advance || slot >= nritems) {
1621                         if (slot >= nritems -1) {
1622                                 ret = btrfs_next_leaf(root, path);
1623                                 if (ret)
1624                                         break;
1625                                 leaf = path->nodes[0];
1626                                 nritems = btrfs_header_nritems(leaf);
1627                                 slot = path->slots[0];
1628                         } else {
1629                                 slot++;
1630                                 path->slots[0]++;
1631                         }
1632                 }
1633                 advance = 1;
1634                 item = btrfs_item_nr(leaf, slot);
1635                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1636
1637                 if (found_key.objectid != key.objectid)
1638                         break;
1639                 if (btrfs_key_type(&found_key) != key_type)
1640                         break;
1641                 if (found_key.offset < filp->f_pos)
1642                         continue;
1643
1644                 filp->f_pos = found_key.offset;
1645                 advance = 1;
1646                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1647                 di_cur = 0;
1648                 di_total = btrfs_item_size(leaf, item);
1649                 while(di_cur < di_total) {
1650                         struct btrfs_key location;
1651
1652                         name_len = btrfs_dir_name_len(leaf, di);
1653                         if (name_len < 32) {
1654                                 name_ptr = tmp_name;
1655                         } else {
1656                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1657                                 BUG_ON(!name_ptr);
1658                         }
1659                         read_extent_buffer(leaf, name_ptr,
1660                                            (unsigned long)(di + 1), name_len);
1661
1662                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1663                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1664                         over = filldir(dirent, name_ptr, name_len,
1665                                        found_key.offset,
1666                                        location.objectid,
1667                                        d_type);
1668
1669                         if (name_ptr != tmp_name)
1670                                 kfree(name_ptr);
1671
1672                         if (over)
1673                                 goto nopos;
1674                         di_len = btrfs_dir_name_len(leaf, di) +
1675                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1676                         di_cur += di_len;
1677                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1678                 }
1679         }
1680         if (key_type == BTRFS_DIR_INDEX_KEY)
1681                 filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
1682         else
1683                 filp->f_pos++;
1684 nopos:
1685         ret = 0;
1686 err:
1687         btrfs_release_path(root, path);
1688         btrfs_free_path(path);
1689         mutex_unlock(&root->fs_info->fs_mutex);
1690         return ret;
1691 }
1692
1693 int btrfs_write_inode(struct inode *inode, int wait)
1694 {
1695         struct btrfs_root *root = BTRFS_I(inode)->root;
1696         struct btrfs_trans_handle *trans;
1697         int ret = 0;
1698
1699         if (wait) {
1700                 mutex_lock(&root->fs_info->fs_mutex);
1701                 trans = btrfs_start_transaction(root, 1);
1702                 btrfs_set_trans_block_group(trans, inode);
1703                 ret = btrfs_commit_transaction(trans, root);
1704                 mutex_unlock(&root->fs_info->fs_mutex);
1705         }
1706         return ret;
1707 }
1708
1709 /*
1710  * This is somewhat expensive, updating the tree every time the
1711  * inode changes.  But, it is most likely to find the inode in cache.
1712  * FIXME, needs more benchmarking...there are no reasons other than performance
1713  * to keep or drop this code.
1714  */
1715 void btrfs_dirty_inode(struct inode *inode)
1716 {
1717         struct btrfs_root *root = BTRFS_I(inode)->root;
1718         struct btrfs_trans_handle *trans;
1719
1720         mutex_lock(&root->fs_info->fs_mutex);
1721         trans = btrfs_start_transaction(root, 1);
1722         btrfs_set_trans_block_group(trans, inode);
1723         btrfs_update_inode(trans, root, inode);
1724         btrfs_end_transaction(trans, root);
1725         mutex_unlock(&root->fs_info->fs_mutex);
1726 }
1727
1728 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1729                                      struct btrfs_root *root,
1730                                      const char *name, int name_len,
1731                                      u64 ref_objectid,
1732                                      u64 objectid,
1733                                      struct btrfs_block_group_cache *group,
1734                                      int mode)
1735 {
1736         struct inode *inode;
1737         struct btrfs_inode_item *inode_item;
1738         struct btrfs_block_group_cache *new_inode_group;
1739         struct btrfs_key *location;
1740         struct btrfs_path *path;
1741         struct btrfs_inode_ref *ref;
1742         struct btrfs_key key[2];
1743         u32 sizes[2];
1744         unsigned long ptr;
1745         int ret;
1746         int owner;
1747
1748         path = btrfs_alloc_path();
1749         BUG_ON(!path);
1750
1751         inode = new_inode(root->fs_info->sb);
1752         if (!inode)
1753                 return ERR_PTR(-ENOMEM);
1754
1755         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1756         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1757                              inode->i_mapping, GFP_NOFS);
1758         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1759                              inode->i_mapping, GFP_NOFS);
1760         atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1761         BTRFS_I(inode)->delalloc_bytes = 0;
1762         BTRFS_I(inode)->root = root;
1763
1764         if (mode & S_IFDIR)
1765                 owner = 0;
1766         else
1767                 owner = 1;
1768         new_inode_group = btrfs_find_block_group(root, group, 0,
1769                                        BTRFS_BLOCK_GROUP_METADATA, owner);
1770         if (!new_inode_group) {
1771                 printk("find_block group failed\n");
1772                 new_inode_group = group;
1773         }
1774         BTRFS_I(inode)->block_group = new_inode_group;
1775         BTRFS_I(inode)->flags = 0;
1776
1777         key[0].objectid = objectid;
1778         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
1779         key[0].offset = 0;
1780
1781         key[1].objectid = objectid;
1782         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
1783         key[1].offset = ref_objectid;
1784
1785         sizes[0] = sizeof(struct btrfs_inode_item);
1786         sizes[1] = name_len + sizeof(*ref);
1787
1788         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
1789         if (ret != 0)
1790                 goto fail;
1791
1792         if (objectid > root->highest_inode)
1793                 root->highest_inode = objectid;
1794
1795         inode->i_uid = current->fsuid;
1796         inode->i_gid = current->fsgid;
1797         inode->i_mode = mode;
1798         inode->i_ino = objectid;
1799         inode->i_blocks = 0;
1800         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1801         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1802                                   struct btrfs_inode_item);
1803         fill_inode_item(path->nodes[0], inode_item, inode);
1804
1805         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1806                              struct btrfs_inode_ref);
1807         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
1808         ptr = (unsigned long)(ref + 1);
1809         write_extent_buffer(path->nodes[0], name, ptr, name_len);
1810
1811         btrfs_mark_buffer_dirty(path->nodes[0]);
1812         btrfs_free_path(path);
1813
1814         location = &BTRFS_I(inode)->location;
1815         location->objectid = objectid;
1816         location->offset = 0;
1817         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1818
1819         insert_inode_hash(inode);
1820         return inode;
1821 fail:
1822         btrfs_free_path(path);
1823         return ERR_PTR(ret);
1824 }
1825
1826 static inline u8 btrfs_inode_type(struct inode *inode)
1827 {
1828         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1829 }
1830
1831 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1832                             struct dentry *dentry, struct inode *inode,
1833                             int add_backref)
1834 {
1835         int ret;
1836         struct btrfs_key key;
1837         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1838         struct inode *parent_inode;
1839
1840         key.objectid = inode->i_ino;
1841         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1842         key.offset = 0;
1843
1844         ret = btrfs_insert_dir_item(trans, root,
1845                                     dentry->d_name.name, dentry->d_name.len,
1846                                     dentry->d_parent->d_inode->i_ino,
1847                                     &key, btrfs_inode_type(inode));
1848         if (ret == 0) {
1849                 if (add_backref) {
1850                         ret = btrfs_insert_inode_ref(trans, root,
1851                                              dentry->d_name.name,
1852                                              dentry->d_name.len,
1853                                              inode->i_ino,
1854                                              dentry->d_parent->d_inode->i_ino);
1855                 }
1856                 parent_inode = dentry->d_parent->d_inode;
1857                 parent_inode->i_size += dentry->d_name.len * 2;
1858                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1859                 ret = btrfs_update_inode(trans, root,
1860                                          dentry->d_parent->d_inode);
1861         }
1862         return ret;
1863 }
1864
1865 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1866                             struct dentry *dentry, struct inode *inode,
1867                             int backref)
1868 {
1869         int err = btrfs_add_link(trans, dentry, inode, backref);
1870         if (!err) {
1871                 d_instantiate(dentry, inode);
1872                 return 0;
1873         }
1874         if (err > 0)
1875                 err = -EEXIST;
1876         return err;
1877 }
1878
1879 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1880                         int mode, dev_t rdev)
1881 {
1882         struct btrfs_trans_handle *trans;
1883         struct btrfs_root *root = BTRFS_I(dir)->root;
1884         struct inode *inode = NULL;
1885         int err;
1886         int drop_inode = 0;
1887         u64 objectid;
1888         unsigned long nr = 0;
1889
1890         if (!new_valid_dev(rdev))
1891                 return -EINVAL;
1892
1893         mutex_lock(&root->fs_info->fs_mutex);
1894         err = btrfs_check_free_space(root, 1, 0);
1895         if (err)
1896                 goto fail;
1897
1898         trans = btrfs_start_transaction(root, 1);
1899         btrfs_set_trans_block_group(trans, dir);
1900
1901         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1902         if (err) {
1903                 err = -ENOSPC;
1904                 goto out_unlock;
1905         }
1906
1907         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1908                                 dentry->d_name.len,
1909                                 dentry->d_parent->d_inode->i_ino, objectid,
1910                                 BTRFS_I(dir)->block_group, mode);
1911         err = PTR_ERR(inode);
1912         if (IS_ERR(inode))
1913                 goto out_unlock;
1914
1915         btrfs_set_trans_block_group(trans, inode);
1916         err = btrfs_add_nondir(trans, dentry, inode, 0);
1917         if (err)
1918                 drop_inode = 1;
1919         else {
1920                 inode->i_op = &btrfs_special_inode_operations;
1921                 init_special_inode(inode, inode->i_mode, rdev);
1922                 btrfs_update_inode(trans, root, inode);
1923         }
1924         dir->i_sb->s_dirt = 1;
1925         btrfs_update_inode_block_group(trans, inode);
1926         btrfs_update_inode_block_group(trans, dir);
1927 out_unlock:
1928         nr = trans->blocks_used;
1929         btrfs_end_transaction(trans, root);
1930 fail:
1931         mutex_unlock(&root->fs_info->fs_mutex);
1932
1933         if (drop_inode) {
1934                 inode_dec_link_count(inode);
1935                 iput(inode);
1936         }
1937         btrfs_btree_balance_dirty(root, nr);
1938         btrfs_throttle(root);
1939         return err;
1940 }
1941
1942 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1943                         int mode, struct nameidata *nd)
1944 {
1945         struct btrfs_trans_handle *trans;
1946         struct btrfs_root *root = BTRFS_I(dir)->root;
1947         struct inode *inode = NULL;
1948         int err;
1949         int drop_inode = 0;
1950         unsigned long nr = 0;
1951         u64 objectid;
1952
1953         mutex_lock(&root->fs_info->fs_mutex);
1954         err = btrfs_check_free_space(root, 1, 0);
1955         if (err)
1956                 goto fail;
1957         trans = btrfs_start_transaction(root, 1);
1958         btrfs_set_trans_block_group(trans, dir);
1959
1960         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1961         if (err) {
1962                 err = -ENOSPC;
1963                 goto out_unlock;
1964         }
1965
1966         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1967                                 dentry->d_name.len,
1968                                 dentry->d_parent->d_inode->i_ino,
1969                                 objectid, BTRFS_I(dir)->block_group, mode);
1970         err = PTR_ERR(inode);
1971         if (IS_ERR(inode))
1972                 goto out_unlock;
1973
1974         btrfs_set_trans_block_group(trans, inode);
1975         err = btrfs_add_nondir(trans, dentry, inode, 0);
1976         if (err)
1977                 drop_inode = 1;
1978         else {
1979                 inode->i_mapping->a_ops = &btrfs_aops;
1980                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1981                 inode->i_fop = &btrfs_file_operations;
1982                 inode->i_op = &btrfs_file_inode_operations;
1983                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1984                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1985                                      inode->i_mapping, GFP_NOFS);
1986                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1987                                      inode->i_mapping, GFP_NOFS);
1988                 BTRFS_I(inode)->delalloc_bytes = 0;
1989                 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1990                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1991         }
1992         dir->i_sb->s_dirt = 1;
1993         btrfs_update_inode_block_group(trans, inode);
1994         btrfs_update_inode_block_group(trans, dir);
1995 out_unlock:
1996         nr = trans->blocks_used;
1997         btrfs_end_transaction(trans, root);
1998 fail:
1999         mutex_unlock(&root->fs_info->fs_mutex);
2000
2001         if (drop_inode) {
2002                 inode_dec_link_count(inode);
2003                 iput(inode);
2004         }
2005         btrfs_btree_balance_dirty(root, nr);
2006         btrfs_throttle(root);
2007         return err;
2008 }
2009
2010 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
2011                       struct dentry *dentry)
2012 {
2013         struct btrfs_trans_handle *trans;
2014         struct btrfs_root *root = BTRFS_I(dir)->root;
2015         struct inode *inode = old_dentry->d_inode;
2016         unsigned long nr = 0;
2017         int err;
2018         int drop_inode = 0;
2019
2020         if (inode->i_nlink == 0)
2021                 return -ENOENT;
2022
2023 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2024         inode->i_nlink++;
2025 #else
2026         inc_nlink(inode);
2027 #endif
2028         mutex_lock(&root->fs_info->fs_mutex);
2029         err = btrfs_check_free_space(root, 1, 0);
2030         if (err)
2031                 goto fail;
2032         trans = btrfs_start_transaction(root, 1);
2033
2034         btrfs_set_trans_block_group(trans, dir);
2035         atomic_inc(&inode->i_count);
2036         err = btrfs_add_nondir(trans, dentry, inode, 1);
2037
2038         if (err)
2039                 drop_inode = 1;
2040
2041         dir->i_sb->s_dirt = 1;
2042         btrfs_update_inode_block_group(trans, dir);
2043         err = btrfs_update_inode(trans, root, inode);
2044
2045         if (err)
2046                 drop_inode = 1;
2047
2048         nr = trans->blocks_used;
2049         btrfs_end_transaction(trans, root);
2050 fail:
2051         mutex_unlock(&root->fs_info->fs_mutex);
2052
2053         if (drop_inode) {
2054                 inode_dec_link_count(inode);
2055                 iput(inode);
2056         }
2057         btrfs_btree_balance_dirty(root, nr);
2058         btrfs_throttle(root);
2059         return err;
2060 }
2061
2062 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2063 {
2064         struct inode *inode = NULL;
2065         struct btrfs_trans_handle *trans;
2066         struct btrfs_root *root = BTRFS_I(dir)->root;
2067         int err = 0;
2068         int drop_on_err = 0;
2069         u64 objectid = 0;
2070         unsigned long nr = 1;
2071
2072         mutex_lock(&root->fs_info->fs_mutex);
2073         err = btrfs_check_free_space(root, 1, 0);
2074         if (err)
2075                 goto out_unlock;
2076
2077         trans = btrfs_start_transaction(root, 1);
2078         btrfs_set_trans_block_group(trans, dir);
2079
2080         if (IS_ERR(trans)) {
2081                 err = PTR_ERR(trans);
2082                 goto out_unlock;
2083         }
2084
2085         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2086         if (err) {
2087                 err = -ENOSPC;
2088                 goto out_unlock;
2089         }
2090
2091         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
2092                                 dentry->d_name.len,
2093                                 dentry->d_parent->d_inode->i_ino, objectid,
2094                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
2095         if (IS_ERR(inode)) {
2096                 err = PTR_ERR(inode);
2097                 goto out_fail;
2098         }
2099
2100         drop_on_err = 1;
2101         inode->i_op = &btrfs_dir_inode_operations;
2102         inode->i_fop = &btrfs_dir_file_operations;
2103         btrfs_set_trans_block_group(trans, inode);
2104
2105         inode->i_size = 0;
2106         err = btrfs_update_inode(trans, root, inode);
2107         if (err)
2108                 goto out_fail;
2109
2110         err = btrfs_add_link(trans, dentry, inode, 0);
2111         if (err)
2112                 goto out_fail;
2113
2114         d_instantiate(dentry, inode);
2115         drop_on_err = 0;
2116         dir->i_sb->s_dirt = 1;
2117         btrfs_update_inode_block_group(trans, inode);
2118         btrfs_update_inode_block_group(trans, dir);
2119
2120 out_fail:
2121         nr = trans->blocks_used;
2122         btrfs_end_transaction(trans, root);
2123
2124 out_unlock:
2125         mutex_unlock(&root->fs_info->fs_mutex);
2126         if (drop_on_err)
2127                 iput(inode);
2128         btrfs_btree_balance_dirty(root, nr);
2129         btrfs_throttle(root);
2130         return err;
2131 }
2132
2133 static int merge_extent_mapping(struct extent_map_tree *em_tree,
2134                                 struct extent_map *existing,
2135                                 struct extent_map *em)
2136 {
2137         u64 start_diff;
2138         u64 new_end;
2139         int ret = 0;
2140         int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;
2141
2142         if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
2143                 goto invalid;
2144
2145         if (!real_blocks && em->block_start != existing->block_start)
2146                 goto invalid;
2147
2148         new_end = max(existing->start + existing->len, em->start + em->len);
2149
2150         if (existing->start >= em->start) {
2151                 if (em->start + em->len < existing->start)
2152                         goto invalid;
2153
2154                 start_diff = existing->start - em->start;
2155                 if (real_blocks && em->block_start + start_diff !=
2156                     existing->block_start)
2157                         goto invalid;
2158
2159                 em->len = new_end - em->start;
2160
2161                 remove_extent_mapping(em_tree, existing);
2162                 /* free for the tree */
2163                 free_extent_map(existing);
2164                 ret = add_extent_mapping(em_tree, em);
2165
2166         } else if (em->start > existing->start) {
2167
2168                 if (existing->start + existing->len < em->start)
2169                         goto invalid;
2170
2171                 start_diff = em->start - existing->start;
2172                 if (real_blocks && existing->block_start + start_diff !=
2173                     em->block_start)
2174                         goto invalid;
2175
2176                 remove_extent_mapping(em_tree, existing);
2177                 em->block_start = existing->block_start;
2178                 em->start = existing->start;
2179                 em->len = new_end - existing->start;
2180                 free_extent_map(existing);
2181
2182                 ret = add_extent_mapping(em_tree, em);
2183         } else {
2184                 goto invalid;
2185         }
2186         return ret;
2187
2188 invalid:
2189         printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
2190                existing->start, existing->len, existing->block_start,
2191                em->start, em->len, em->block_start);
2192         return -EIO;
2193 }
2194
2195 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2196                                     size_t pg_offset, u64 start, u64 len,
2197                                     int create)
2198 {
2199         int ret;
2200         int err = 0;
2201         u64 bytenr;
2202         u64 extent_start = 0;
2203         u64 extent_end = 0;
2204         u64 objectid = inode->i_ino;
2205         u32 found_type;
2206         struct btrfs_path *path;
2207         struct btrfs_root *root = BTRFS_I(inode)->root;
2208         struct btrfs_file_extent_item *item;
2209         struct extent_buffer *leaf;
2210         struct btrfs_key found_key;
2211         struct extent_map *em = NULL;
2212         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2213         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2214         struct btrfs_trans_handle *trans = NULL;
2215
2216         path = btrfs_alloc_path();
2217         BUG_ON(!path);
2218         mutex_lock(&root->fs_info->fs_mutex);
2219
2220 again:
2221         spin_lock(&em_tree->lock);
2222         em = lookup_extent_mapping(em_tree, start, len);
2223         if (em)
2224                 em->bdev = root->fs_info->fs_devices->latest_bdev;
2225         spin_unlock(&em_tree->lock);
2226
2227         if (em) {
2228                 if (em->start > start || em->start + em->len <= start)
2229                         free_extent_map(em);
2230                 else if (em->block_start == EXTENT_MAP_INLINE && page)
2231                         free_extent_map(em);
2232                 else
2233                         goto out;
2234         }
2235         em = alloc_extent_map(GFP_NOFS);
2236         if (!em) {
2237                 err = -ENOMEM;
2238                 goto out;
2239         }
2240
2241         em->start = EXTENT_MAP_HOLE;
2242         em->len = (u64)-1;
2243         em->bdev = root->fs_info->fs_devices->latest_bdev;
2244         ret = btrfs_lookup_file_extent(trans, root, path,
2245                                        objectid, start, trans != NULL);
2246         if (ret < 0) {
2247                 err = ret;
2248                 goto out;
2249         }
2250
2251         if (ret != 0) {
2252                 if (path->slots[0] == 0)
2253                         goto not_found;
2254                 path->slots[0]--;
2255         }
2256
2257         leaf = path->nodes[0];
2258         item = btrfs_item_ptr(leaf, path->slots[0],
2259                               struct btrfs_file_extent_item);
2260         /* are we inside the extent that was found? */
2261         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2262         found_type = btrfs_key_type(&found_key);
2263         if (found_key.objectid != objectid ||
2264             found_type != BTRFS_EXTENT_DATA_KEY) {
2265                 goto not_found;
2266         }
2267
2268         found_type = btrfs_file_extent_type(leaf, item);
2269         extent_start = found_key.offset;
2270         if (found_type == BTRFS_FILE_EXTENT_REG) {
2271                 extent_end = extent_start +
2272                        btrfs_file_extent_num_bytes(leaf, item);
2273                 err = 0;
2274                 if (start < extent_start || start >= extent_end) {
2275                         em->start = start;
2276                         if (start < extent_start) {
2277                                 if (start + len <= extent_start)
2278                                         goto not_found;
2279                                 em->len = extent_end - extent_start;
2280                         } else {
2281                                 em->len = len;
2282                         }
2283                         goto not_found_em;
2284                 }
2285                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
2286                 if (bytenr == 0) {
2287                         em->start = extent_start;
2288                         em->len = extent_end - extent_start;
2289                         em->block_start = EXTENT_MAP_HOLE;
2290                         goto insert;
2291                 }
2292                 bytenr += btrfs_file_extent_offset(leaf, item);
2293                 em->block_start = bytenr;
2294                 em->start = extent_start;
2295                 em->len = extent_end - extent_start;
2296                 goto insert;
2297         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
2298                 u64 page_start;
2299                 unsigned long ptr;
2300                 char *map;
2301                 size_t size;
2302                 size_t extent_offset;
2303                 size_t copy_size;
2304
2305                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
2306                                                     path->slots[0]));
2307                 extent_end = (extent_start + size + root->sectorsize - 1) &
2308                         ~((u64)root->sectorsize - 1);
2309                 if (start < extent_start || start >= extent_end) {
2310                         em->start = start;
2311                         if (start < extent_start) {
2312                                 if (start + len <= extent_start)
2313                                         goto not_found;
2314                                 em->len = extent_end - extent_start;
2315                         } else {
2316                                 em->len = len;
2317                         }
2318                         goto not_found_em;
2319                 }
2320                 em->block_start = EXTENT_MAP_INLINE;
2321
2322                 if (!page) {
2323                         em->start = extent_start;
2324                         em->len = size;
2325                         goto out;
2326                 }
2327
2328                 page_start = page_offset(page) + pg_offset;
2329                 extent_offset = page_start - extent_start;
2330                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
2331                                 size - extent_offset);
2332                 em->start = extent_start + extent_offset;
2333                 em->len = (copy_size + root->sectorsize - 1) &
2334                         ~((u64)root->sectorsize - 1);
2335                 map = kmap(page);
2336                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
2337                 if (create == 0 && !PageUptodate(page)) {
2338                         read_extent_buffer(leaf, map + pg_offset, ptr,
2339                                            copy_size);
2340                         flush_dcache_page(page);
2341                 } else if (create && PageUptodate(page)) {
2342                         if (!trans) {
2343                                 kunmap(page);
2344                                 free_extent_map(em);
2345                                 em = NULL;
2346                                 btrfs_release_path(root, path);
2347                                 trans = btrfs_start_transaction(root, 1);
2348                                 goto again;
2349                         }
2350                         write_extent_buffer(leaf, map + pg_offset, ptr,
2351                                             copy_size);
2352                         btrfs_mark_buffer_dirty(leaf);
2353                 }
2354                 kunmap(page);
2355                 set_extent_uptodate(io_tree, em->start,
2356                                     extent_map_end(em) - 1, GFP_NOFS);
2357                 goto insert;
2358         } else {
2359                 printk("unkknown found_type %d\n", found_type);
2360                 WARN_ON(1);
2361         }
2362 not_found:
2363         em->start = start;
2364         em->len = len;
2365 not_found_em:
2366         em->block_start = EXTENT_MAP_HOLE;
2367 insert:
2368         btrfs_release_path(root, path);
2369         if (em->start > start || extent_map_end(em) <= start) {
2370                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
2371                 err = -EIO;
2372                 goto out;
2373         }
2374
2375         err = 0;
2376         spin_lock(&em_tree->lock);
2377         ret = add_extent_mapping(em_tree, em);
2378         /* it is possible that someone inserted the extent into the tree
2379          * while we had the lock dropped.  It is also possible that
2380          * an overlapping map exists in the tree
2381          */
2382         if (ret == -EEXIST) {
2383                 struct extent_map *existing;
2384                 existing = lookup_extent_mapping(em_tree, start, len);
2385                 if (existing && (existing->start > start ||
2386                     existing->start + existing->len <= start)) {
2387                         free_extent_map(existing);
2388                         existing = NULL;
2389                 }
2390                 if (!existing) {
2391                         existing = lookup_extent_mapping(em_tree, em->start,
2392                                                          em->len);
2393                         if (existing) {
2394                                 err = merge_extent_mapping(em_tree, existing,
2395                                                            em);
2396                                 free_extent_map(existing);
2397                                 if (err) {
2398                                         free_extent_map(em);
2399                                         em = NULL;
2400                                 }
2401                         } else {
2402                                 err = -EIO;
2403                                 printk("failing to insert %Lu %Lu\n",
2404                                        start, len);
2405                                 free_extent_map(em);
2406                                 em = NULL;
2407                         }
2408                 } else {
2409                         free_extent_map(em);
2410                         em = existing;
2411                 }
2412         }
2413         spin_unlock(&em_tree->lock);
2414 out:
2415         btrfs_free_path(path);
2416         if (trans) {
2417                 ret = btrfs_end_transaction(trans, root);
2418                 if (!err)
2419                         err = ret;
2420         }
2421         mutex_unlock(&root->fs_info->fs_mutex);
2422         if (err) {
2423                 free_extent_map(em);
2424                 WARN_ON(1);
2425                 return ERR_PTR(err);
2426         }
2427         return em;
2428 }
2429
2430 #if 0 /* waiting for O_DIRECT reads */
2431 static int btrfs_get_block(struct inode *inode, sector_t iblock,
2432                         struct buffer_head *bh_result, int create)
2433 {
2434         struct extent_map *em;
2435         u64 start = (u64)iblock << inode->i_blkbits;
2436         struct btrfs_multi_bio *multi = NULL;
2437         struct btrfs_root *root = BTRFS_I(inode)->root;
2438         u64 len;
2439         u64 logical;
2440         u64 map_length;
2441         int ret = 0;
2442
2443         em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0);
2444
2445         if (!em || IS_ERR(em))
2446                 goto out;
2447
2448         if (em->start > start || em->start + em->len <= start) {
2449             goto out;
2450         }
2451
2452         if (em->block_start == EXTENT_MAP_INLINE) {
2453                 ret = -EINVAL;
2454                 goto out;
2455         }
2456
2457         len = em->start + em->len - start;
2458         len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size)));
2459
2460         if (em->block_start == EXTENT_MAP_HOLE ||
2461             em->block_start == EXTENT_MAP_DELALLOC) {
2462                 bh_result->b_size = len;
2463                 goto out;
2464         }
2465
2466         logical = start - em->start;
2467         logical = em->block_start + logical;
2468
2469         map_length = len;
2470         ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
2471                               logical, &map_length, &multi, 0);
2472         BUG_ON(ret);
2473         bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits;
2474         bh_result->b_size = min(map_length, len);
2475
2476         bh_result->b_bdev = multi->stripes[0].dev->bdev;
2477         set_buffer_mapped(bh_result);
2478         kfree(multi);
2479 out:
2480         free_extent_map(em);
2481         return ret;
2482 }
2483 #endif
2484
2485 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
2486                         const struct iovec *iov, loff_t offset,
2487                         unsigned long nr_segs)
2488 {
2489         return -EINVAL;
2490 #if 0
2491         struct file *file = iocb->ki_filp;
2492         struct inode *inode = file->f_mapping->host;
2493
2494         if (rw == WRITE)
2495                 return -EINVAL;
2496
2497         return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2498                                   offset, nr_segs, btrfs_get_block, NULL);
2499 #endif
2500 }
2501
2502 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2503 {
2504         return extent_bmap(mapping, iblock, btrfs_get_extent);
2505 }
2506
2507 int btrfs_readpage(struct file *file, struct page *page)
2508 {
2509         struct extent_io_tree *tree;
2510         tree = &BTRFS_I(page->mapping->host)->io_tree;
2511         return extent_read_full_page(tree, page, btrfs_get_extent);
2512 }
2513
2514 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2515 {
2516         struct extent_io_tree *tree;
2517
2518
2519         if (current->flags & PF_MEMALLOC) {
2520                 redirty_page_for_writepage(wbc, page);
2521                 unlock_page(page);
2522                 return 0;
2523         }
2524         tree = &BTRFS_I(page->mapping->host)->io_tree;
2525         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2526 }
2527
2528 static int btrfs_writepages(struct address_space *mapping,
2529                             struct writeback_control *wbc)
2530 {
2531         struct extent_io_tree *tree;
2532         tree = &BTRFS_I(mapping->host)->io_tree;
2533         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2534 }
2535
2536 static int
2537 btrfs_readpages(struct file *file, struct address_space *mapping,
2538                 struct list_head *pages, unsigned nr_pages)
2539 {
2540         struct extent_io_tree *tree;
2541         tree = &BTRFS_I(mapping->host)->io_tree;
2542         return extent_readpages(tree, mapping, pages, nr_pages,
2543                                 btrfs_get_extent);
2544 }
2545
2546 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2547 {
2548         struct extent_io_tree *tree;
2549         struct extent_map_tree *map;
2550         int ret;
2551
2552         tree = &BTRFS_I(page->mapping->host)->io_tree;
2553         map = &BTRFS_I(page->mapping->host)->extent_tree;
2554         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
2555         if (ret == 1) {
2556                 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
2557                 ClearPagePrivate(page);
2558                 set_page_private(page, 0);
2559                 page_cache_release(page);
2560         }
2561         return ret;
2562 }
2563
2564 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2565 {
2566         struct extent_io_tree *tree;
2567
2568         tree = &BTRFS_I(page->mapping->host)->io_tree;
2569         extent_invalidatepage(tree, page, offset);
2570         btrfs_releasepage(page, GFP_NOFS);
2571         if (PagePrivate(page)) {
2572                 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
2573                 ClearPagePrivate(page);
2574                 set_page_private(page, 0);
2575                 page_cache_release(page);
2576         }
2577 }
2578
2579 /*
2580  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2581  * called from a page fault handler when a page is first dirtied. Hence we must
2582  * be careful to check for EOF conditions here. We set the page up correctly
2583  * for a written page which means we get ENOSPC checking when writing into
2584  * holes and correct delalloc and unwritten extent mapping on filesystems that
2585  * support these features.
2586  *
2587  * We are not allowed to take the i_mutex here so we have to play games to
2588  * protect against truncate races as the page could now be beyond EOF.  Because
2589  * vmtruncate() writes the inode size before removing pages, once we have the
2590  * page lock we can determine safely if the page is beyond EOF. If it is not
2591  * beyond EOF, then the page is guaranteed safe against truncation until we
2592  * unlock the page.
2593  */
2594 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2595 {
2596         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2597         struct btrfs_root *root = BTRFS_I(inode)->root;
2598         unsigned long end;
2599         loff_t size;
2600         int ret;
2601         u64 page_start;
2602
2603         mutex_lock(&root->fs_info->fs_mutex);
2604         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2605         mutex_unlock(&root->fs_info->fs_mutex);
2606         if (ret)
2607                 goto out;
2608
2609         ret = -EINVAL;
2610
2611         lock_page(page);
2612         wait_on_page_writeback(page);
2613         size = i_size_read(inode);
2614         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2615
2616         if ((page->mapping != inode->i_mapping) ||
2617             (page_start > size)) {
2618                 /* page got truncated out from underneath us */
2619                 goto out_unlock;
2620         }
2621
2622         /* page is wholly or partially inside EOF */
2623         if (page_start + PAGE_CACHE_SIZE > size)
2624                 end = size & ~PAGE_CACHE_MASK;
2625         else
2626                 end = PAGE_CACHE_SIZE;
2627
2628         ret = btrfs_cow_one_page(inode, page, end);
2629
2630 out_unlock:
2631         unlock_page(page);
2632 out:
2633         return ret;
2634 }
2635
2636 static void btrfs_truncate(struct inode *inode)
2637 {
2638         struct btrfs_root *root = BTRFS_I(inode)->root;
2639         int ret;
2640         struct btrfs_trans_handle *trans;
2641         unsigned long nr;
2642
2643         if (!S_ISREG(inode->i_mode))
2644                 return;
2645         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2646                 return;
2647
2648         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2649
2650         mutex_lock(&root->fs_info->fs_mutex);
2651         trans = btrfs_start_transaction(root, 1);
2652         btrfs_set_trans_block_group(trans, inode);
2653
2654         /* FIXME, add redo link to tree so we don't leak on crash */
2655         ret = btrfs_truncate_in_trans(trans, root, inode,
2656                                       BTRFS_EXTENT_DATA_KEY);
2657         btrfs_update_inode(trans, root, inode);
2658         nr = trans->blocks_used;
2659
2660         ret = btrfs_end_transaction(trans, root);
2661         BUG_ON(ret);
2662         mutex_unlock(&root->fs_info->fs_mutex);
2663         btrfs_btree_balance_dirty(root, nr);
2664         btrfs_throttle(root);
2665 }
2666
2667 static int noinline create_subvol(struct btrfs_root *root, char *name,
2668                                   int namelen)
2669 {
2670         struct btrfs_trans_handle *trans;
2671         struct btrfs_key key;
2672         struct btrfs_root_item root_item;
2673         struct btrfs_inode_item *inode_item;
2674         struct extent_buffer *leaf;
2675         struct btrfs_root *new_root = root;
2676         struct inode *inode;
2677         struct inode *dir;
2678         int ret;
2679         int err;
2680         u64 objectid;
2681         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2682         unsigned long nr = 1;
2683
2684         mutex_lock(&root->fs_info->fs_mutex);
2685         ret = btrfs_check_free_space(root, 1, 0);
2686         if (ret)
2687                 goto fail_commit;
2688
2689         trans = btrfs_start_transaction(root, 1);
2690         BUG_ON(!trans);
2691
2692         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2693                                        0, &objectid);
2694         if (ret)
2695                 goto fail;
2696
2697         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2698                                         objectid, trans->transid, 0, 0,
2699                                         0, 0);
2700         if (IS_ERR(leaf))
2701                 return PTR_ERR(leaf);
2702
2703         btrfs_set_header_nritems(leaf, 0);
2704         btrfs_set_header_level(leaf, 0);
2705         btrfs_set_header_bytenr(leaf, leaf->start);
2706         btrfs_set_header_generation(leaf, trans->transid);
2707         btrfs_set_header_owner(leaf, objectid);
2708
2709         write_extent_buffer(leaf, root->fs_info->fsid,
2710                             (unsigned long)btrfs_header_fsid(leaf),
2711                             BTRFS_FSID_SIZE);
2712         btrfs_mark_buffer_dirty(leaf);
2713
2714         inode_item = &root_item.inode;
2715         memset(inode_item, 0, sizeof(*inode_item));
2716         inode_item->generation = cpu_to_le64(1);
2717         inode_item->size = cpu_to_le64(3);
2718         inode_item->nlink = cpu_to_le32(1);
2719         inode_item->nblocks = cpu_to_le64(1);
2720         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2721
2722         btrfs_set_root_bytenr(&root_item, leaf->start);
2723         btrfs_set_root_level(&root_item, 0);
2724         btrfs_set_root_refs(&root_item, 1);
2725         btrfs_set_root_used(&root_item, 0);
2726
2727         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2728         root_item.drop_level = 0;
2729
2730         free_extent_buffer(leaf);
2731         leaf = NULL;
2732
2733         btrfs_set_root_dirid(&root_item, new_dirid);
2734
2735         key.objectid = objectid;
2736         key.offset = 1;
2737         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2738         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2739                                 &root_item);
2740         if (ret)
2741                 goto fail;
2742
2743         /*
2744          * insert the directory item
2745          */
2746         key.offset = (u64)-1;
2747         dir = root->fs_info->sb->s_root->d_inode;
2748         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2749                                     name, namelen, dir->i_ino, &key,
2750                                     BTRFS_FT_DIR);
2751         if (ret)
2752                 goto fail;
2753
2754         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2755                              name, namelen, objectid,
2756                              root->fs_info->sb->s_root->d_inode->i_ino);
2757         if (ret)
2758                 goto fail;
2759
2760         ret = btrfs_commit_transaction(trans, root);
2761         if (ret)
2762                 goto fail_commit;
2763
2764         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2765         BUG_ON(!new_root);
2766
2767         trans = btrfs_start_transaction(new_root, 1);
2768         BUG_ON(!trans);
2769
2770         inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid,
2771                                 new_dirid,
2772                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2773         if (IS_ERR(inode))
2774                 goto fail;
2775         inode->i_op = &btrfs_dir_inode_operations;
2776         inode->i_fop = &btrfs_dir_file_operations;
2777         new_root->inode = inode;
2778
2779         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2780                                      new_dirid);
2781         inode->i_nlink = 1;
2782         inode->i_size = 0;
2783         ret = btrfs_update_inode(trans, new_root, inode);
2784         if (ret)
2785                 goto fail;
2786 fail:
2787         nr = trans->blocks_used;
2788         err = btrfs_commit_transaction(trans, new_root);
2789         if (err && !ret)
2790                 ret = err;
2791 fail_commit:
2792         mutex_unlock(&root->fs_info->fs_mutex);
2793         btrfs_btree_balance_dirty(root, nr);
2794         btrfs_throttle(root);
2795         return ret;
2796 }
2797
2798 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2799 {
2800         struct btrfs_pending_snapshot *pending_snapshot;
2801         struct btrfs_trans_handle *trans;
2802         int ret;
2803         int err;
2804         unsigned long nr = 0;
2805
2806         if (!root->ref_cows)
2807                 return -EINVAL;
2808
2809         mutex_lock(&root->fs_info->fs_mutex);
2810         ret = btrfs_check_free_space(root, 1, 0);
2811         if (ret)
2812                 goto fail_unlock;
2813
2814         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2815         if (!pending_snapshot) {
2816                 ret = -ENOMEM;
2817                 goto fail_unlock;
2818         }
2819         pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
2820         if (!pending_snapshot->name) {
2821                 ret = -ENOMEM;
2822                 kfree(pending_snapshot);
2823                 goto fail_unlock;
2824         }
2825         memcpy(pending_snapshot->name, name, namelen);
2826         pending_snapshot->name[namelen] = '\0';
2827         trans = btrfs_start_transaction(root, 1);
2828         BUG_ON(!trans);
2829         pending_snapshot->root = root;
2830         list_add(&pending_snapshot->list,
2831                  &trans->transaction->pending_snapshots);
2832         ret = btrfs_update_inode(trans, root, root->inode);
2833         err = btrfs_commit_transaction(trans, root);
2834
2835 fail_unlock:
2836         mutex_unlock(&root->fs_info->fs_mutex);
2837         btrfs_btree_balance_dirty(root, nr);
2838         btrfs_throttle(root);
2839         return ret;
2840 }
2841
2842 unsigned long btrfs_force_ra(struct address_space *mapping,
2843                               struct file_ra_state *ra, struct file *file,
2844                               pgoff_t offset, pgoff_t last_index)
2845 {
2846         pgoff_t req_size = last_index - offset + 1;
2847
2848 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2849         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2850         return offset;
2851 #else
2852         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2853         return offset + req_size;
2854 #endif
2855 }
2856
2857 int btrfs_defrag_file(struct file *file) {
2858         struct inode *inode = fdentry(file)->d_inode;
2859         struct btrfs_root *root = BTRFS_I(inode)->root;
2860         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2861         struct page *page;
2862         unsigned long last_index;
2863         unsigned long ra_pages = root->fs_info->bdi.ra_pages;
2864         unsigned long total_read = 0;
2865         u64 page_start;
2866         u64 page_end;
2867         unsigned long i;
2868         int ret;
2869
2870         mutex_lock(&root->fs_info->fs_mutex);
2871         ret = btrfs_check_free_space(root, inode->i_size, 0);
2872         mutex_unlock(&root->fs_info->fs_mutex);
2873         if (ret)
2874                 return -ENOSPC;
2875
2876         mutex_lock(&inode->i_mutex);
2877         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2878         for (i = 0; i <= last_index; i++) {
2879                 if (total_read % ra_pages == 0) {
2880                         btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
2881                                        min(last_index, i + ra_pages - 1));
2882                 }
2883                 total_read++;
2884                 page = grab_cache_page(inode->i_mapping, i);
2885                 if (!page)
2886                         goto out_unlock;
2887                 if (!PageUptodate(page)) {
2888                         btrfs_readpage(NULL, page);
2889                         lock_page(page);
2890                         if (!PageUptodate(page)) {
2891                                 unlock_page(page);
2892                                 page_cache_release(page);
2893                                 goto out_unlock;
2894                         }
2895                 }
2896
2897 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2898                 ClearPageDirty(page);
2899 #else
2900                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2901 #endif
2902                 wait_on_page_writeback(page);
2903                 set_page_extent_mapped(page);
2904
2905                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2906                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2907
2908                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2909                 set_extent_delalloc(io_tree, page_start,
2910                                     page_end, GFP_NOFS);
2911
2912                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2913                 set_page_dirty(page);
2914                 unlock_page(page);
2915                 page_cache_release(page);
2916                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2917         }
2918
2919 out_unlock:
2920         mutex_unlock(&inode->i_mutex);
2921         return 0;
2922 }
2923
2924 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2925 {
2926         u64 new_size;
2927         u64 old_size;
2928         u64 devid = 1;
2929         struct btrfs_ioctl_vol_args *vol_args;
2930         struct btrfs_trans_handle *trans;
2931         struct btrfs_device *device = NULL;
2932         char *sizestr;
2933         char *devstr = NULL;
2934         int ret = 0;
2935         int namelen;
2936         int mod = 0;
2937
2938         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2939
2940         if (!vol_args)
2941                 return -ENOMEM;
2942
2943         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2944                 ret = -EFAULT;
2945                 goto out;
2946         }
2947         namelen = strlen(vol_args->name);
2948         if (namelen > BTRFS_VOL_NAME_MAX) {
2949                 ret = -EINVAL;
2950                 goto out;
2951         }
2952
2953         mutex_lock(&root->fs_info->fs_mutex);
2954         sizestr = vol_args->name;
2955         devstr = strchr(sizestr, ':');
2956         if (devstr) {
2957                 char *end;
2958                 sizestr = devstr + 1;
2959                 *devstr = '\0';
2960                 devstr = vol_args->name;
2961                 devid = simple_strtoull(devstr, &end, 10);
2962 printk("resizing devid %Lu\n", devid);
2963         }
2964         device = btrfs_find_device(root, devid, NULL);
2965         if (!device) {
2966                 printk("resizer unable to find device %Lu\n", devid);
2967                 ret = -EINVAL;
2968                 goto out_unlock;
2969         }
2970         if (!strcmp(sizestr, "max"))
2971                 new_size = device->bdev->bd_inode->i_size;
2972         else {
2973                 if (sizestr[0] == '-') {
2974                         mod = -1;
2975                         sizestr++;
2976                 } else if (sizestr[0] == '+') {
2977                         mod = 1;
2978                         sizestr++;
2979                 }
2980                 new_size = btrfs_parse_size(sizestr);
2981                 if (new_size == 0) {
2982                         ret = -EINVAL;
2983                         goto out_unlock;
2984                 }
2985         }
2986
2987         old_size = device->total_bytes;
2988
2989         if (mod < 0) {
2990                 if (new_size > old_size) {
2991                         ret = -EINVAL;
2992                         goto out_unlock;
2993                 }
2994                 new_size = old_size - new_size;
2995         } else if (mod > 0) {
2996                 new_size = old_size + new_size;
2997         }
2998
2999         if (new_size < 256 * 1024 * 1024) {
3000                 ret = -EINVAL;
3001                 goto out_unlock;
3002         }
3003         if (new_size > device->bdev->bd_inode->i_size) {
3004                 ret = -EFBIG;
3005                 goto out_unlock;
3006         }
3007
3008         do_div(new_size, root->sectorsize);
3009         new_size *= root->sectorsize;
3010
3011 printk("new size for %s is %llu\n", device->name, (unsigned long long)new_size);
3012
3013         if (new_size > old_size) {
3014                 trans = btrfs_start_transaction(root, 1);
3015                 ret = btrfs_grow_device(trans, device, new_size);
3016                 btrfs_commit_transaction(trans, root);
3017         } else {
3018                 ret = btrfs_shrink_device(device, new_size);
3019         }
3020
3021 out_unlock:
3022         mutex_unlock(&root->fs_info->fs_mutex);
3023 out:
3024         kfree(vol_args);
3025         return ret;
3026 }
3027
3028 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
3029                                             void __user *arg)
3030 {
3031         struct btrfs_ioctl_vol_args *vol_args;
3032         struct btrfs_dir_item *di;
3033         struct btrfs_path *path;
3034         u64 root_dirid;
3035         int namelen;
3036         int ret;
3037
3038         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
3039
3040         if (!vol_args)
3041                 return -ENOMEM;
3042
3043         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
3044                 ret = -EFAULT;
3045                 goto out;
3046         }
3047
3048         namelen = strlen(vol_args->name);
3049         if (namelen > BTRFS_VOL_NAME_MAX) {
3050                 ret = -EINVAL;
3051                 goto out;
3052         }
3053         if (strchr(vol_args->name, '/')) {
3054                 ret = -EINVAL;
3055                 goto out;
3056         }
3057
3058         path = btrfs_alloc_path();
3059         if (!path) {
3060                 ret = -ENOMEM;
3061                 goto out;
3062         }
3063
3064         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
3065         mutex_lock(&root->fs_info->fs_mutex);
3066         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
3067                             path, root_dirid,
3068                             vol_args->name, namelen, 0);
3069         mutex_unlock(&root->fs_info->fs_mutex);
3070         btrfs_free_path(path);
3071
3072         if (di && !IS_ERR(di)) {
3073                 ret = -EEXIST;
3074                 goto out;
3075         }
3076
3077         if (IS_ERR(di)) {
3078                 ret = PTR_ERR(di);
3079                 goto out;
3080         }
3081
3082         if (root == root->fs_info->tree_root)
3083                 ret = create_subvol(root, vol_args->name, namelen);
3084         else
3085                 ret = create_snapshot(root, vol_args->name, namelen);
3086 out:
3087         kfree(vol_args);
3088         return ret;
3089 }
3090
3091 static int btrfs_ioctl_defrag(struct file *file)
3092 {
3093         struct inode *inode = fdentry(file)->d_inode;
3094         struct btrfs_root *root = BTRFS_I(inode)->root;
3095
3096         switch (inode->i_mode & S_IFMT) {
3097         case S_IFDIR:
3098                 mutex_lock(&root->fs_info->fs_mutex);
3099                 btrfs_defrag_root(root, 0);
3100                 btrfs_defrag_root(root->fs_info->extent_root, 0);
3101                 mutex_unlock(&root->fs_info->fs_mutex);
3102                 break;
3103         case S_IFREG:
3104                 btrfs_defrag_file(file);
3105                 break;
3106         }
3107
3108         return 0;
3109 }
3110
3111 long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
3112 {
3113         struct btrfs_ioctl_vol_args *vol_args;
3114         int ret;
3115
3116         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
3117
3118         if (!vol_args)
3119                 return -ENOMEM;
3120
3121         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
3122                 ret = -EFAULT;
3123                 goto out;
3124         }
3125         ret = btrfs_init_new_device(root, vol_args->name);
3126
3127 out:
3128         kfree(vol_args);
3129         return ret;
3130 }
3131
3132 long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
3133 {
3134         struct btrfs_ioctl_vol_args *vol_args;
3135         int ret;
3136
3137         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
3138
3139         if (!vol_args)
3140                 return -ENOMEM;
3141
3142         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
3143                 ret = -EFAULT;
3144                 goto out;
3145         }
3146         ret = btrfs_rm_device(root, vol_args->name);
3147
3148 out:
3149         kfree(vol_args);
3150         return ret;
3151 }
3152
3153 int dup_item_to_inode(struct btrfs_trans_handle *trans,
3154                        struct btrfs_root *root,
3155                        struct btrfs_path *path,
3156                        struct extent_buffer *leaf,
3157                        int slot,
3158                        struct btrfs_key *key,
3159                        u64 destino)
3160 {
3161         char *dup;
3162         int len = btrfs_item_size_nr(leaf, slot);
3163         struct btrfs_key ckey = *key;
3164         int ret = 0;
3165
3166         dup = kmalloc(len, GFP_NOFS);
3167         if (!dup)
3168                 return -ENOMEM;
3169
3170         read_extent_buffer(leaf, dup, btrfs_item_ptr_offset(leaf, slot), len);
3171         btrfs_release_path(root, path);
3172
3173         ckey.objectid = destino;
3174         ret = btrfs_insert_item(trans, root, &ckey, dup, len);
3175         kfree(dup);
3176         return ret;
3177 }
3178
3179 long btrfs_ioctl_clone(struct file *file, unsigned long src_fd)
3180 {
3181         struct inode *inode = fdentry(file)->d_inode;
3182         struct btrfs_root *root = BTRFS_I(inode)->root;
3183         struct file *src_file;
3184         struct inode *src;
3185         struct btrfs_trans_handle *trans;
3186         int ret;
3187         u64 pos;
3188         struct btrfs_path *path;
3189         struct btrfs_key key;
3190         struct extent_buffer *leaf;
3191         u32 nritems;
3192         int slot;
3193
3194         src_file = fget(src_fd);
3195         if (!src_file)
3196                 return -EBADF;
3197         src = src_file->f_dentry->d_inode;
3198
3199         ret = -EXDEV;
3200         if (src->i_sb != inode->i_sb)
3201                 goto out_fput;
3202
3203         if (inode < src) {
3204                 mutex_lock(&inode->i_mutex);
3205                 mutex_lock(&src->i_mutex);
3206         } else {
3207                 mutex_lock(&src->i_mutex);
3208                 mutex_lock(&inode->i_mutex);
3209         }
3210
3211         ret = -ENOTEMPTY;
3212         if (inode->i_size)
3213                 goto out_unlock;
3214
3215         /* do any pending delalloc/csum calc on src, one way or
3216            another, and lock file content */
3217         while (1) {
3218                 filemap_write_and_wait(src->i_mapping);
3219                 lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
3220                 if (BTRFS_I(src)->delalloc_bytes == 0)
3221                         break;
3222                 unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
3223         }
3224
3225         mutex_lock(&root->fs_info->fs_mutex);
3226         trans = btrfs_start_transaction(root, 0);
3227         path = btrfs_alloc_path();
3228         if (!path) {
3229                 ret = -ENOMEM;
3230                 goto out;
3231         }
3232         key.offset = 0;
3233         key.type = BTRFS_EXTENT_DATA_KEY;
3234         key.objectid = src->i_ino;
3235         pos = 0;
3236         path->reada = 2;
3237
3238         while (1) {
3239                 /*
3240                  * note the key will change type as we walk through the
3241                  * tree.
3242                  */
3243                 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
3244                 if (ret < 0)
3245                         goto out;
3246
3247                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3248                         ret = btrfs_next_leaf(root, path);
3249                         if (ret < 0)
3250                                 goto out;
3251                         if (ret > 0)
3252                                 break;
3253                 }
3254                 leaf = path->nodes[0];
3255                 slot = path->slots[0];
3256                 btrfs_item_key_to_cpu(leaf, &key, slot);
3257                 nritems = btrfs_header_nritems(leaf);
3258
3259                 if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY ||
3260                     key.objectid != src->i_ino)
3261                         break;
3262
3263                 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
3264                         struct btrfs_file_extent_item *extent;
3265                         int found_type;
3266                         pos = key.offset;
3267                         extent = btrfs_item_ptr(leaf, slot,
3268                                                 struct btrfs_file_extent_item);
3269                         found_type = btrfs_file_extent_type(leaf, extent);
3270                         if (found_type == BTRFS_FILE_EXTENT_REG) {
3271                                 u64 len = btrfs_file_extent_num_bytes(leaf,
3272                                                                       extent);
3273                                 u64 ds = btrfs_file_extent_disk_bytenr(leaf,
3274                                                                        extent);
3275                                 u64 dl = btrfs_file_extent_disk_num_bytes(leaf,
3276                                                                  extent);
3277                                 u64 off = btrfs_file_extent_offset(leaf,
3278                                                                    extent);
3279                                 btrfs_insert_file_extent(trans, root,
3280                                                          inode->i_ino, pos,
3281                                                          ds, dl, len, off);
3282                                 /* ds == 0 means there's a hole */
3283                                 if (ds != 0) {
3284                                         btrfs_inc_extent_ref(trans, root,
3285                                                      ds, dl,
3286                                                      root->root_key.objectid,
3287                                                      trans->transid,
3288                                                      inode->i_ino, pos);
3289                                 }
3290                                 pos = key.offset + len;
3291                         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
3292                                 ret = dup_item_to_inode(trans, root, path,
3293                                                         leaf, slot, &key,
3294                                                         inode->i_ino);
3295                                 if (ret)
3296                                         goto out;
3297                                 pos = key.offset + btrfs_item_size_nr(leaf,
3298                                                                       slot);
3299                         }
3300                 } else if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
3301                         ret = dup_item_to_inode(trans, root, path, leaf,
3302                                                 slot, &key, inode->i_ino);
3303
3304                         if (ret)
3305                                 goto out;
3306                 }
3307                 key.offset++;
3308                 btrfs_release_path(root, path);
3309         }
3310
3311         ret = 0;
3312 out:
3313         btrfs_free_path(path);
3314
3315         inode->i_blocks = src->i_blocks;
3316         i_size_write(inode, src->i_size);
3317         btrfs_update_inode(trans, root, inode);
3318
3319         unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
3320
3321         btrfs_end_transaction(trans, root);
3322         mutex_unlock(&root->fs_info->fs_mutex);
3323
3324 out_unlock:
3325         mutex_unlock(&src->i_mutex);
3326         mutex_unlock(&inode->i_mutex);
3327 out_fput:
3328         fput(src_file);
3329         return ret;
3330 }
3331
3332 long btrfs_ioctl(struct file *file, unsigned int
3333                 cmd, unsigned long arg)
3334 {
3335         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3336
3337         switch (cmd) {
3338         case BTRFS_IOC_SNAP_CREATE:
3339                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
3340         case BTRFS_IOC_DEFRAG:
3341                 return btrfs_ioctl_defrag(file);
3342         case BTRFS_IOC_RESIZE:
3343                 return btrfs_ioctl_resize(root, (void __user *)arg);
3344         case BTRFS_IOC_ADD_DEV:
3345                 return btrfs_ioctl_add_dev(root, (void __user *)arg);
3346         case BTRFS_IOC_RM_DEV:
3347                 return btrfs_ioctl_rm_dev(root, (void __user *)arg);
3348         case BTRFS_IOC_BALANCE:
3349                 return btrfs_balance(root->fs_info->dev_root);
3350         case BTRFS_IOC_CLONE:
3351                 return btrfs_ioctl_clone(file, arg);
3352         }
3353
3354         return -ENOTTY;
3355 }
3356
3357 /*
3358  * Called inside transaction, so use GFP_NOFS
3359  */
3360 struct inode *btrfs_alloc_inode(struct super_block *sb)
3361 {
3362         struct btrfs_inode *ei;
3363
3364         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
3365         if (!ei)
3366                 return NULL;
3367         ei->last_trans = 0;
3368         ei->ordered_trans = 0;
3369         return &ei->vfs_inode;
3370 }
3371
3372 void btrfs_destroy_inode(struct inode *inode)
3373 {
3374         WARN_ON(!list_empty(&inode->i_dentry));
3375         WARN_ON(inode->i_data.nrpages);
3376
3377         btrfs_drop_extent_cache(inode, 0, (u64)-1);
3378         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
3379 }
3380
3381 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
3382 static void init_once(struct kmem_cache * cachep, void *foo)
3383 #else
3384 static void init_once(void * foo, struct kmem_cache * cachep,
3385                       unsigned long flags)
3386 #endif
3387 {
3388         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
3389
3390         inode_init_once(&ei->vfs_inode);
3391 }
3392
3393 void btrfs_destroy_cachep(void)
3394 {
3395         if (btrfs_inode_cachep)
3396                 kmem_cache_destroy(btrfs_inode_cachep);
3397         if (btrfs_trans_handle_cachep)
3398                 kmem_cache_destroy(btrfs_trans_handle_cachep);
3399         if (btrfs_transaction_cachep)
3400                 kmem_cache_destroy(btrfs_transaction_cachep);
3401         if (btrfs_bit_radix_cachep)
3402                 kmem_cache_destroy(btrfs_bit_radix_cachep);
3403         if (btrfs_path_cachep)
3404                 kmem_cache_destroy(btrfs_path_cachep);
3405 }
3406
3407 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
3408                                        unsigned long extra_flags,
3409 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
3410                                        void (*ctor)(struct kmem_cache *, void *)
3411 #else
3412                                        void (*ctor)(void *, struct kmem_cache *,
3413                                                     unsigned long)
3414 #endif
3415                                      )
3416 {
3417         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
3418                                  SLAB_MEM_SPREAD | extra_flags), ctor
3419 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
3420                                  ,NULL
3421 #endif
3422                                 );
3423 }
3424
3425 int btrfs_init_cachep(void)
3426 {
3427         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
3428                                           sizeof(struct btrfs_inode),
3429                                           0, init_once);
3430         if (!btrfs_inode_cachep)
3431                 goto fail;
3432         btrfs_trans_handle_cachep =
3433                         btrfs_cache_create("btrfs_trans_handle_cache",
3434                                            sizeof(struct btrfs_trans_handle),
3435                                            0, NULL);
3436         if (!btrfs_trans_handle_cachep)
3437                 goto fail;
3438         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
3439                                              sizeof(struct btrfs_transaction),
3440                                              0, NULL);
3441         if (!btrfs_transaction_cachep)
3442                 goto fail;
3443         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
3444                                          sizeof(struct btrfs_path),
3445                                          0, NULL);
3446         if (!btrfs_path_cachep)
3447                 goto fail;
3448         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
3449                                               SLAB_DESTROY_BY_RCU, NULL);
3450         if (!btrfs_bit_radix_cachep)
3451                 goto fail;
3452         return 0;
3453 fail:
3454         btrfs_destroy_cachep();
3455         return -ENOMEM;
3456 }
3457
3458 static int btrfs_getattr(struct vfsmount *mnt,
3459                          struct dentry *dentry, struct kstat *stat)
3460 {
3461         struct inode *inode = dentry->d_inode;
3462         generic_fillattr(inode, stat);
3463         stat->blksize = PAGE_CACHE_SIZE;
3464         stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9);
3465         return 0;
3466 }
3467
3468 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
3469                            struct inode * new_dir,struct dentry *new_dentry)
3470 {
3471         struct btrfs_trans_handle *trans;
3472         struct btrfs_root *root = BTRFS_I(old_dir)->root;
3473         struct inode *new_inode = new_dentry->d_inode;
3474         struct inode *old_inode = old_dentry->d_inode;
3475         struct timespec ctime = CURRENT_TIME;
3476         struct btrfs_path *path;
3477         int ret;
3478
3479         if (S_ISDIR(old_inode->i_mode) && new_inode &&
3480             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
3481                 return -ENOTEMPTY;
3482         }
3483
3484         mutex_lock(&root->fs_info->fs_mutex);
3485         ret = btrfs_check_free_space(root, 1, 0);
3486         if (ret)
3487                 goto out_unlock;
3488
3489         trans = btrfs_start_transaction(root, 1);
3490
3491         btrfs_set_trans_block_group(trans, new_dir);
3492         path = btrfs_alloc_path();
3493         if (!path) {
3494                 ret = -ENOMEM;
3495                 goto out_fail;
3496         }
3497
3498         old_dentry->d_inode->i_nlink++;
3499         old_dir->i_ctime = old_dir->i_mtime = ctime;
3500         new_dir->i_ctime = new_dir->i_mtime = ctime;
3501         old_inode->i_ctime = ctime;
3502
3503         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
3504         if (ret)
3505                 goto out_fail;
3506
3507         if (new_inode) {
3508                 new_inode->i_ctime = CURRENT_TIME;
3509                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
3510                 if (ret)
3511                         goto out_fail;
3512         }
3513         ret = btrfs_add_link(trans, new_dentry, old_inode, 1);
3514         if (ret)
3515                 goto out_fail;
3516
3517 out_fail:
3518         btrfs_free_path(path);
3519         btrfs_end_transaction(trans, root);
3520 out_unlock:
3521         mutex_unlock(&root->fs_info->fs_mutex);
3522         return ret;
3523 }
3524
3525 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
3526                          const char *symname)
3527 {
3528         struct btrfs_trans_handle *trans;
3529         struct btrfs_root *root = BTRFS_I(dir)->root;
3530         struct btrfs_path *path;
3531         struct btrfs_key key;
3532         struct inode *inode = NULL;
3533         int err;
3534         int drop_inode = 0;
3535         u64 objectid;
3536         int name_len;
3537         int datasize;
3538         unsigned long ptr;
3539         struct btrfs_file_extent_item *ei;
3540         struct extent_buffer *leaf;
3541         unsigned long nr = 0;
3542
3543         name_len = strlen(symname) + 1;
3544         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
3545                 return -ENAMETOOLONG;
3546
3547         mutex_lock(&root->fs_info->fs_mutex);
3548         err = btrfs_check_free_space(root, 1, 0);
3549         if (err)
3550                 goto out_fail;
3551
3552         trans = btrfs_start_transaction(root, 1);
3553         btrfs_set_trans_block_group(trans, dir);
3554
3555         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3556         if (err) {
3557                 err = -ENOSPC;
3558                 goto out_unlock;
3559         }
3560
3561         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
3562                                 dentry->d_name.len,
3563                                 dentry->d_parent->d_inode->i_ino, objectid,
3564                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
3565         err = PTR_ERR(inode);
3566         if (IS_ERR(inode))
3567                 goto out_unlock;
3568
3569         btrfs_set_trans_block_group(trans, inode);
3570         err = btrfs_add_nondir(trans, dentry, inode, 0);
3571         if (err)
3572                 drop_inode = 1;
3573         else {
3574                 inode->i_mapping->a_ops = &btrfs_aops;
3575                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3576                 inode->i_fop = &btrfs_file_operations;
3577                 inode->i_op = &btrfs_file_inode_operations;
3578                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3579                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3580                                      inode->i_mapping, GFP_NOFS);
3581                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3582                                      inode->i_mapping, GFP_NOFS);
3583                 BTRFS_I(inode)->delalloc_bytes = 0;
3584                 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
3585                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3586         }
3587         dir->i_sb->s_dirt = 1;
3588         btrfs_update_inode_block_group(trans, inode);
3589         btrfs_update_inode_block_group(trans, dir);
3590         if (drop_inode)
3591                 goto out_unlock;
3592
3593         path = btrfs_alloc_path();
3594         BUG_ON(!path);
3595         key.objectid = inode->i_ino;
3596         key.offset = 0;
3597         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
3598         datasize = btrfs_file_extent_calc_inline_size(name_len);
3599         err = btrfs_insert_empty_item(trans, root, path, &key,
3600                                       datasize);
3601         if (err) {
3602                 drop_inode = 1;
3603                 goto out_unlock;
3604         }
3605         leaf = path->nodes[0];
3606         ei = btrfs_item_ptr(leaf, path->slots[0],
3607                             struct btrfs_file_extent_item);
3608         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
3609         btrfs_set_file_extent_type(leaf, ei,
3610                                    BTRFS_FILE_EXTENT_INLINE);
3611         ptr = btrfs_file_extent_inline_start(ei);
3612         write_extent_buffer(leaf, symname, ptr, name_len);
3613         btrfs_mark_buffer_dirty(leaf);
3614         btrfs_free_path(path);
3615
3616         inode->i_op = &btrfs_symlink_inode_operations;
3617         inode->i_mapping->a_ops = &btrfs_symlink_aops;
3618         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3619         inode->i_size = name_len - 1;
3620         err = btrfs_update_inode(trans, root, inode);
3621         if (err)
3622                 drop_inode = 1;
3623
3624 out_unlock:
3625         nr = trans->blocks_used;
3626         btrfs_end_transaction(trans, root);
3627 out_fail:
3628         mutex_unlock(&root->fs_info->fs_mutex);
3629         if (drop_inode) {
3630                 inode_dec_link_count(inode);
3631                 iput(inode);
3632         }
3633         btrfs_btree_balance_dirty(root, nr);
3634         btrfs_throttle(root);
3635         return err;
3636 }
3637
3638 static int btrfs_permission(struct inode *inode, int mask,
3639                             struct nameidata *nd)
3640 {
3641         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
3642                 return -EACCES;
3643         return generic_permission(inode, mask, NULL);
3644 }
3645
3646 static struct inode_operations btrfs_dir_inode_operations = {
3647         .lookup         = btrfs_lookup,
3648         .create         = btrfs_create,
3649         .unlink         = btrfs_unlink,
3650         .link           = btrfs_link,
3651         .mkdir          = btrfs_mkdir,
3652         .rmdir          = btrfs_rmdir,
3653         .rename         = btrfs_rename,
3654         .symlink        = btrfs_symlink,
3655         .setattr        = btrfs_setattr,
3656         .mknod          = btrfs_mknod,
3657         .setxattr       = generic_setxattr,
3658         .getxattr       = generic_getxattr,
3659         .listxattr      = btrfs_listxattr,
3660         .removexattr    = generic_removexattr,
3661         .permission     = btrfs_permission,
3662 };
3663 static struct inode_operations btrfs_dir_ro_inode_operations = {
3664         .lookup         = btrfs_lookup,
3665         .permission     = btrfs_permission,
3666 };
3667 static struct file_operations btrfs_dir_file_operations = {
3668         .llseek         = generic_file_llseek,
3669         .read           = generic_read_dir,
3670         .readdir        = btrfs_readdir,
3671         .unlocked_ioctl = btrfs_ioctl,
3672 #ifdef CONFIG_COMPAT
3673         .compat_ioctl   = btrfs_ioctl,
3674 #endif
3675 };
3676
3677 static struct extent_io_ops btrfs_extent_io_ops = {
3678         .fill_delalloc = run_delalloc_range,
3679         .submit_bio_hook = btrfs_submit_bio_hook,
3680         .merge_bio_hook = btrfs_merge_bio_hook,
3681         .readpage_io_hook = btrfs_readpage_io_hook,
3682         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3683         .readpage_io_failed_hook = btrfs_io_failed_hook,
3684         .set_bit_hook = btrfs_set_bit_hook,
3685         .clear_bit_hook = btrfs_clear_bit_hook,
3686 };
3687
3688 static struct address_space_operations btrfs_aops = {
3689         .readpage       = btrfs_readpage,
3690         .writepage      = btrfs_writepage,
3691         .writepages     = btrfs_writepages,
3692         .readpages      = btrfs_readpages,
3693         .sync_page      = block_sync_page,
3694         .bmap           = btrfs_bmap,
3695         .direct_IO      = btrfs_direct_IO,
3696         .invalidatepage = btrfs_invalidatepage,
3697         .releasepage    = btrfs_releasepage,
3698         .set_page_dirty = __set_page_dirty_nobuffers,
3699 };
3700
3701 static struct address_space_operations btrfs_symlink_aops = {
3702         .readpage       = btrfs_readpage,
3703         .writepage      = btrfs_writepage,
3704         .invalidatepage = btrfs_invalidatepage,
3705         .releasepage    = btrfs_releasepage,
3706 };
3707
3708 static struct inode_operations btrfs_file_inode_operations = {
3709         .truncate       = btrfs_truncate,
3710         .getattr        = btrfs_getattr,
3711         .setattr        = btrfs_setattr,
3712         .setxattr       = generic_setxattr,
3713         .getxattr       = generic_getxattr,
3714         .listxattr      = btrfs_listxattr,
3715         .removexattr    = generic_removexattr,
3716         .permission     = btrfs_permission,
3717 };
3718 static struct inode_operations btrfs_special_inode_operations = {
3719         .getattr        = btrfs_getattr,
3720         .setattr        = btrfs_setattr,
3721         .permission     = btrfs_permission,
3722 };
3723 static struct inode_operations btrfs_symlink_inode_operations = {
3724         .readlink       = generic_readlink,
3725         .follow_link    = page_follow_link_light,
3726         .put_link       = page_put_link,
3727         .permission     = btrfs_permission,
3728 };