]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - fs/btrfs/inode.c
Btrfs: Add the ability to find and remove dead roots after a crash.
[linux-2.6-omap-h63xx.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/module.h>
20 #include <linux/buffer_head.h>
21 #include <linux/fs.h>
22 #include <linux/pagemap.h>
23 #include <linux/highmem.h>
24 #include <linux/time.h>
25 #include <linux/init.h>
26 #include <linux/string.h>
27 #include <linux/smp_lock.h>
28 #include <linux/backing-dev.h>
29 #include <linux/mpage.h>
30 #include <linux/swap.h>
31 #include <linux/writeback.h>
32 #include <linux/statfs.h>
33 #include <linux/compat.h>
34 #include <linux/bit_spinlock.h>
35 #include "ctree.h"
36 #include "disk-io.h"
37 #include "transaction.h"
38 #include "btrfs_inode.h"
39 #include "ioctl.h"
40 #include "print-tree.h"
41
42 struct btrfs_iget_args {
43         u64 ino;
44         struct btrfs_root *root;
45 };
46
47 static struct inode_operations btrfs_dir_inode_operations;
48 static struct inode_operations btrfs_symlink_inode_operations;
49 static struct inode_operations btrfs_dir_ro_inode_operations;
50 static struct inode_operations btrfs_file_inode_operations;
51 static struct address_space_operations btrfs_aops;
52 static struct address_space_operations btrfs_symlink_aops;
53 static struct file_operations btrfs_dir_file_operations;
54
55 static struct kmem_cache *btrfs_inode_cachep;
56 struct kmem_cache *btrfs_trans_handle_cachep;
57 struct kmem_cache *btrfs_transaction_cachep;
58 struct kmem_cache *btrfs_bit_radix_cachep;
59 struct kmem_cache *btrfs_path_cachep;
60
61 #define S_SHIFT 12
62 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
63         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
64         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
65         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
66         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
67         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
68         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
69         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
70 };
71
72 void btrfs_read_locked_inode(struct inode *inode)
73 {
74         struct btrfs_path *path;
75         struct btrfs_inode_item *inode_item;
76         struct btrfs_root *root = BTRFS_I(inode)->root;
77         struct btrfs_key location;
78         u64 alloc_group_block;
79         int ret;
80
81         path = btrfs_alloc_path();
82         BUG_ON(!path);
83         mutex_lock(&root->fs_info->fs_mutex);
84
85         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
86         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
87         if (ret) {
88                 btrfs_free_path(path);
89                 goto make_bad;
90         }
91         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
92                                   path->slots[0],
93                                   struct btrfs_inode_item);
94
95         inode->i_mode = btrfs_inode_mode(inode_item);
96         inode->i_nlink = btrfs_inode_nlink(inode_item);
97         inode->i_uid = btrfs_inode_uid(inode_item);
98         inode->i_gid = btrfs_inode_gid(inode_item);
99         inode->i_size = btrfs_inode_size(inode_item);
100         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
101         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
102         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
103         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
104         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
105         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
106         inode->i_blocks = btrfs_inode_nblocks(inode_item);
107         inode->i_generation = btrfs_inode_generation(inode_item);
108         alloc_group_block = btrfs_inode_block_group(inode_item);
109         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
110                                                        alloc_group_block);
111
112         btrfs_free_path(path);
113         inode_item = NULL;
114
115         mutex_unlock(&root->fs_info->fs_mutex);
116
117         switch (inode->i_mode & S_IFMT) {
118 #if 0
119         default:
120                 init_special_inode(inode, inode->i_mode,
121                                    btrfs_inode_rdev(inode_item));
122                 break;
123 #endif
124         case S_IFREG:
125                 inode->i_mapping->a_ops = &btrfs_aops;
126                 inode->i_fop = &btrfs_file_operations;
127                 inode->i_op = &btrfs_file_inode_operations;
128                 break;
129         case S_IFDIR:
130                 inode->i_fop = &btrfs_dir_file_operations;
131                 if (root == root->fs_info->tree_root)
132                         inode->i_op = &btrfs_dir_ro_inode_operations;
133                 else
134                         inode->i_op = &btrfs_dir_inode_operations;
135                 break;
136         case S_IFLNK:
137                 inode->i_op = &btrfs_symlink_inode_operations;
138                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
139                 break;
140         }
141         return;
142
143 make_bad:
144         btrfs_release_path(root, path);
145         btrfs_free_path(path);
146         mutex_unlock(&root->fs_info->fs_mutex);
147         make_bad_inode(inode);
148 }
149
150 static void fill_inode_item(struct btrfs_inode_item *item,
151                             struct inode *inode)
152 {
153         btrfs_set_inode_uid(item, inode->i_uid);
154         btrfs_set_inode_gid(item, inode->i_gid);
155         btrfs_set_inode_size(item, inode->i_size);
156         btrfs_set_inode_mode(item, inode->i_mode);
157         btrfs_set_inode_nlink(item, inode->i_nlink);
158         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
159         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
160         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
161         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
162         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
163         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
164         btrfs_set_inode_nblocks(item, inode->i_blocks);
165         btrfs_set_inode_generation(item, inode->i_generation);
166         btrfs_set_inode_block_group(item,
167                                     BTRFS_I(inode)->block_group->key.objectid);
168 }
169
170 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
171                               struct btrfs_root *root,
172                               struct inode *inode)
173 {
174         struct btrfs_inode_item *inode_item;
175         struct btrfs_path *path;
176         int ret;
177
178         path = btrfs_alloc_path();
179         BUG_ON(!path);
180         ret = btrfs_lookup_inode(trans, root, path,
181                                  &BTRFS_I(inode)->location, 1);
182         if (ret) {
183                 if (ret > 0)
184                         ret = -ENOENT;
185                 goto failed;
186         }
187
188         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
189                                   path->slots[0],
190                                   struct btrfs_inode_item);
191
192         fill_inode_item(inode_item, inode);
193         btrfs_mark_buffer_dirty(path->nodes[0]);
194         ret = 0;
195 failed:
196         btrfs_release_path(root, path);
197         btrfs_free_path(path);
198         return ret;
199 }
200
201
202 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
203                               struct btrfs_root *root,
204                               struct inode *dir,
205                               struct dentry *dentry)
206 {
207         struct btrfs_path *path;
208         const char *name = dentry->d_name.name;
209         int name_len = dentry->d_name.len;
210         int ret = 0;
211         u64 objectid;
212         struct btrfs_dir_item *di;
213
214         path = btrfs_alloc_path();
215         if (!path) {
216                 ret = -ENOMEM;
217                 goto err;
218         }
219
220         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
221                                     name, name_len, -1);
222         if (IS_ERR(di)) {
223                 ret = PTR_ERR(di);
224                 goto err;
225         }
226         if (!di) {
227                 ret = -ENOENT;
228                 goto err;
229         }
230         objectid = btrfs_disk_key_objectid(&di->location);
231         ret = btrfs_delete_one_dir_name(trans, root, path, di);
232         if (ret)
233                 goto err;
234         btrfs_release_path(root, path);
235
236         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
237                                          objectid, name, name_len, -1);
238         if (IS_ERR(di)) {
239                 ret = PTR_ERR(di);
240                 goto err;
241         }
242         if (!di) {
243                 ret = -ENOENT;
244                 goto err;
245         }
246         ret = btrfs_delete_one_dir_name(trans, root, path, di);
247
248         dentry->d_inode->i_ctime = dir->i_ctime;
249 err:
250         btrfs_free_path(path);
251         if (!ret) {
252                 dir->i_size -= name_len * 2;
253                 btrfs_update_inode(trans, root, dir);
254                 drop_nlink(dentry->d_inode);
255                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
256                 dir->i_sb->s_dirt = 1;
257         }
258         return ret;
259 }
260
261 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
262 {
263         struct btrfs_root *root;
264         struct btrfs_trans_handle *trans;
265         int ret;
266
267         root = BTRFS_I(dir)->root;
268         mutex_lock(&root->fs_info->fs_mutex);
269         trans = btrfs_start_transaction(root, 1);
270         btrfs_set_trans_block_group(trans, dir);
271         ret = btrfs_unlink_trans(trans, root, dir, dentry);
272         btrfs_end_transaction(trans, root);
273         mutex_unlock(&root->fs_info->fs_mutex);
274         btrfs_btree_balance_dirty(root);
275         return ret;
276 }
277
278 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
279 {
280         struct inode *inode = dentry->d_inode;
281         int err;
282         int ret;
283         struct btrfs_root *root = BTRFS_I(dir)->root;
284         struct btrfs_path *path;
285         struct btrfs_key key;
286         struct btrfs_trans_handle *trans;
287         struct btrfs_key found_key;
288         int found_type;
289         struct btrfs_leaf *leaf;
290         char *goodnames = "..";
291
292         path = btrfs_alloc_path();
293         BUG_ON(!path);
294         mutex_lock(&root->fs_info->fs_mutex);
295         trans = btrfs_start_transaction(root, 1);
296         btrfs_set_trans_block_group(trans, dir);
297         key.objectid = inode->i_ino;
298         key.offset = (u64)-1;
299         key.flags = (u32)-1;
300         while(1) {
301                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
302                 if (ret < 0) {
303                         err = ret;
304                         goto out;
305                 }
306                 BUG_ON(ret == 0);
307                 if (path->slots[0] == 0) {
308                         err = -ENOENT;
309                         goto out;
310                 }
311                 path->slots[0]--;
312                 leaf = btrfs_buffer_leaf(path->nodes[0]);
313                 btrfs_disk_key_to_cpu(&found_key,
314                                       &leaf->items[path->slots[0]].key);
315                 found_type = btrfs_key_type(&found_key);
316                 if (found_key.objectid != inode->i_ino) {
317                         err = -ENOENT;
318                         goto out;
319                 }
320                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
321                      found_type != BTRFS_DIR_INDEX_KEY) ||
322                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
323                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
324                         err = -ENOTEMPTY;
325                         goto out;
326                 }
327                 ret = btrfs_del_item(trans, root, path);
328                 BUG_ON(ret);
329
330                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
331                         break;
332                 btrfs_release_path(root, path);
333         }
334         ret = 0;
335         btrfs_release_path(root, path);
336
337         /* now the directory is empty */
338         err = btrfs_unlink_trans(trans, root, dir, dentry);
339         if (!err) {
340                 inode->i_size = 0;
341         }
342 out:
343         btrfs_release_path(root, path);
344         btrfs_free_path(path);
345         mutex_unlock(&root->fs_info->fs_mutex);
346         ret = btrfs_end_transaction(trans, root);
347         btrfs_btree_balance_dirty(root);
348         if (ret && !err)
349                 err = ret;
350         return err;
351 }
352
353 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
354                             struct btrfs_root *root,
355                             struct inode *inode)
356 {
357         struct btrfs_path *path;
358         int ret;
359
360         clear_inode(inode);
361
362         path = btrfs_alloc_path();
363         BUG_ON(!path);
364         ret = btrfs_lookup_inode(trans, root, path,
365                                  &BTRFS_I(inode)->location, -1);
366         if (ret > 0)
367                 ret = -ENOENT;
368         if (!ret)
369                 ret = btrfs_del_item(trans, root, path);
370         btrfs_free_path(path);
371         return ret;
372 }
373
374 /*
375  * truncates go from a high offset to a low offset.  So, walk
376  * from hi to lo in the node and issue readas.  Stop when you find
377  * keys from a different objectid
378  */
379 static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
380                            u64 objectid)
381 {
382         struct btrfs_node *node;
383         int i;
384         int nritems;
385         u64 item_objectid;
386         u64 blocknr;
387         int slot;
388         int ret;
389
390         if (!path->nodes[1])
391                 return;
392         node = btrfs_buffer_node(path->nodes[1]);
393         slot = path->slots[1];
394         if (slot == 0)
395                 return;
396         nritems = btrfs_header_nritems(&node->header);
397         for (i = slot - 1; i >= 0; i--) {
398                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
399                 if (item_objectid != objectid)
400                         break;
401                 blocknr = btrfs_node_blockptr(node, i);
402                 ret = readahead_tree_block(root, blocknr);
403                 if (ret)
404                         break;
405         }
406 }
407
408 /*
409  * this can truncate away extent items, csum items and directory items.
410  * It starts at a high offset and removes keys until it can't find
411  * any higher than i_size.
412  *
413  * csum items that cross the new i_size are truncated to the new size
414  * as well.
415  */
416 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
417                                    struct btrfs_root *root,
418                                    struct inode *inode)
419 {
420         int ret;
421         struct btrfs_path *path;
422         struct btrfs_key key;
423         struct btrfs_disk_key *found_key;
424         u32 found_type;
425         struct btrfs_leaf *leaf;
426         struct btrfs_file_extent_item *fi;
427         u64 extent_start = 0;
428         u64 extent_num_blocks = 0;
429         u64 item_end = 0;
430         int found_extent;
431         int del_item;
432
433         path = btrfs_alloc_path();
434         BUG_ON(!path);
435         /* FIXME, add redo link to tree so we don't leak on crash */
436         key.objectid = inode->i_ino;
437         key.offset = (u64)-1;
438         key.flags = (u32)-1;
439         while(1) {
440                 btrfs_init_path(path);
441                 fi = NULL;
442                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
443                 if (ret < 0) {
444                         goto error;
445                 }
446                 if (ret > 0) {
447                         BUG_ON(path->slots[0] == 0);
448                         path->slots[0]--;
449                 }
450                 reada_truncate(root, path, inode->i_ino);
451                 leaf = btrfs_buffer_leaf(path->nodes[0]);
452                 found_key = &leaf->items[path->slots[0]].key;
453                 found_type = btrfs_disk_key_type(found_key);
454
455                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
456                         break;
457                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
458                     found_type != BTRFS_DIR_ITEM_KEY &&
459                     found_type != BTRFS_DIR_INDEX_KEY &&
460                     found_type != BTRFS_EXTENT_DATA_KEY)
461                         break;
462
463                 item_end = btrfs_disk_key_offset(found_key);
464                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
465                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
466                                             path->slots[0],
467                                             struct btrfs_file_extent_item);
468                         if (btrfs_file_extent_type(fi) !=
469                             BTRFS_FILE_EXTENT_INLINE) {
470                                 item_end += btrfs_file_extent_num_blocks(fi) <<
471                                                 inode->i_blkbits;
472                         }
473                 }
474                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
475                         ret = btrfs_csum_truncate(trans, root, path,
476                                                   inode->i_size);
477                         BUG_ON(ret);
478                 }
479                 if (item_end < inode->i_size) {
480                         if (found_type) {
481                                 btrfs_set_key_type(&key, found_type - 1);
482                                 continue;
483                         }
484                         break;
485                 }
486                 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
487                         del_item = 1;
488                 else
489                         del_item = 0;
490                 found_extent = 0;
491
492                 /* FIXME, shrink the extent if the ref count is only 1 */
493                 if (found_type == BTRFS_EXTENT_DATA_KEY &&
494                            btrfs_file_extent_type(fi) !=
495                            BTRFS_FILE_EXTENT_INLINE) {
496                         u64 num_dec;
497                         if (!del_item) {
498                                 u64 orig_num_blocks =
499                                         btrfs_file_extent_num_blocks(fi);
500                                 extent_num_blocks = inode->i_size -
501                                         btrfs_disk_key_offset(found_key) +
502                                         root->blocksize - 1;
503                                 extent_num_blocks >>= inode->i_blkbits;
504                                 btrfs_set_file_extent_num_blocks(fi,
505                                                          extent_num_blocks);
506                                 inode->i_blocks -= (orig_num_blocks -
507                                         extent_num_blocks) << 3;
508                                 mark_buffer_dirty(path->nodes[0]);
509                         } else {
510                                 extent_start =
511                                         btrfs_file_extent_disk_blocknr(fi);
512                                 extent_num_blocks =
513                                         btrfs_file_extent_disk_num_blocks(fi);
514                                 /* FIXME blocksize != 4096 */
515                                 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
516                                 if (extent_start != 0) {
517                                         found_extent = 1;
518                                         inode->i_blocks -= num_dec;
519                                 }
520                         }
521                 }
522                 if (del_item) {
523                         ret = btrfs_del_item(trans, root, path);
524                         if (ret)
525                                 goto error;
526                 } else {
527                         break;
528                 }
529                 btrfs_release_path(root, path);
530                 if (found_extent) {
531                         ret = btrfs_free_extent(trans, root, extent_start,
532                                                 extent_num_blocks, 0);
533                         BUG_ON(ret);
534                 }
535         }
536         ret = 0;
537 error:
538         btrfs_release_path(root, path);
539         btrfs_free_path(path);
540         inode->i_sb->s_dirt = 1;
541         return ret;
542 }
543
544 /*
545  * taken from block_truncate_page, but does cow as it zeros out
546  * any bytes left in the last page in the file.
547  */
548 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
549 {
550         struct inode *inode = mapping->host;
551         unsigned blocksize = 1 << inode->i_blkbits;
552         pgoff_t index = from >> PAGE_CACHE_SHIFT;
553         unsigned offset = from & (PAGE_CACHE_SIZE-1);
554         struct page *page;
555         char *kaddr;
556         int ret = 0;
557         struct btrfs_root *root = BTRFS_I(inode)->root;
558         u64 alloc_hint = 0;
559         struct btrfs_key ins;
560         struct btrfs_trans_handle *trans;
561
562         if ((offset & (blocksize - 1)) == 0)
563                 goto out;
564
565         ret = -ENOMEM;
566         page = grab_cache_page(mapping, index);
567         if (!page)
568                 goto out;
569
570         if (!PageUptodate(page)) {
571                 ret = btrfs_readpage(NULL, page);
572                 lock_page(page);
573                 if (!PageUptodate(page)) {
574                         ret = -EIO;
575                         goto out;
576                 }
577         }
578         mutex_lock(&root->fs_info->fs_mutex);
579         trans = btrfs_start_transaction(root, 1);
580         btrfs_set_trans_block_group(trans, inode);
581
582         ret = btrfs_drop_extents(trans, root, inode,
583                                  page->index << PAGE_CACHE_SHIFT,
584                                  (page->index + 1) << PAGE_CACHE_SHIFT,
585                                  &alloc_hint);
586         if (ret)
587                 goto out;
588         ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
589                                  alloc_hint, (u64)-1, &ins, 1);
590         if (ret)
591                 goto out;
592         ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
593                                        page->index << PAGE_CACHE_SHIFT,
594                                        ins.objectid, 1, 1);
595         if (ret)
596                 goto out;
597         SetPageChecked(page);
598         kaddr = kmap(page);
599         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
600         flush_dcache_page(page);
601         ret = btrfs_csum_file_block(trans, root, inode->i_ino,
602                               page->index << PAGE_CACHE_SHIFT,
603                               kaddr, PAGE_CACHE_SIZE);
604         kunmap(page);
605         btrfs_end_transaction(trans, root);
606         mutex_unlock(&root->fs_info->fs_mutex);
607
608         set_page_dirty(page);
609         unlock_page(page);
610         page_cache_release(page);
611 out:
612         return ret;
613 }
614
615 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
616 {
617         struct inode *inode = dentry->d_inode;
618         int err;
619
620         err = inode_change_ok(inode, attr);
621         if (err)
622                 return err;
623
624         if (S_ISREG(inode->i_mode) &&
625             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
626                 struct btrfs_trans_handle *trans;
627                 struct btrfs_root *root = BTRFS_I(inode)->root;
628                 u64 mask = root->blocksize - 1;
629                 u64 pos = (inode->i_size + mask) & ~mask;
630                 u64 hole_size;
631
632                 if (attr->ia_size <= pos)
633                         goto out;
634
635                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
636
637                 hole_size = (attr->ia_size - pos + mask) & ~mask;
638                 hole_size >>= inode->i_blkbits;
639
640                 mutex_lock(&root->fs_info->fs_mutex);
641                 trans = btrfs_start_transaction(root, 1);
642                 btrfs_set_trans_block_group(trans, inode);
643                 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
644                                                pos, 0, 0, hole_size);
645                 btrfs_end_transaction(trans, root);
646                 mutex_unlock(&root->fs_info->fs_mutex);
647                 if (err)
648                         return err;
649         }
650 out:
651         err = inode_setattr(inode, attr);
652
653         return err;
654 }
655 void btrfs_delete_inode(struct inode *inode)
656 {
657         struct btrfs_trans_handle *trans;
658         struct btrfs_root *root = BTRFS_I(inode)->root;
659         int ret;
660
661         truncate_inode_pages(&inode->i_data, 0);
662         if (is_bad_inode(inode)) {
663                 goto no_delete;
664         }
665         inode->i_size = 0;
666         mutex_lock(&root->fs_info->fs_mutex);
667         trans = btrfs_start_transaction(root, 1);
668         btrfs_set_trans_block_group(trans, inode);
669         ret = btrfs_truncate_in_trans(trans, root, inode);
670         if (ret)
671                 goto no_delete_lock;
672         ret = btrfs_free_inode(trans, root, inode);
673         if (ret)
674                 goto no_delete_lock;
675         btrfs_end_transaction(trans, root);
676         mutex_unlock(&root->fs_info->fs_mutex);
677         btrfs_btree_balance_dirty(root);
678         return;
679
680 no_delete_lock:
681         btrfs_end_transaction(trans, root);
682         mutex_unlock(&root->fs_info->fs_mutex);
683         btrfs_btree_balance_dirty(root);
684 no_delete:
685         clear_inode(inode);
686 }
687
688 /*
689  * this returns the key found in the dir entry in the location pointer.
690  * If no dir entries were found, location->objectid is 0.
691  */
692 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
693                                struct btrfs_key *location)
694 {
695         const char *name = dentry->d_name.name;
696         int namelen = dentry->d_name.len;
697         struct btrfs_dir_item *di;
698         struct btrfs_path *path;
699         struct btrfs_root *root = BTRFS_I(dir)->root;
700         int ret;
701
702         path = btrfs_alloc_path();
703         BUG_ON(!path);
704         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
705                                     namelen, 0);
706         if (!di || IS_ERR(di)) {
707                 location->objectid = 0;
708                 ret = 0;
709                 goto out;
710         }
711         btrfs_disk_key_to_cpu(location, &di->location);
712 out:
713         btrfs_release_path(root, path);
714         btrfs_free_path(path);
715         return ret;
716 }
717
718 /*
719  * when we hit a tree root in a directory, the btrfs part of the inode
720  * needs to be changed to reflect the root directory of the tree root.  This
721  * is kind of like crossing a mount point.
722  */
723 static int fixup_tree_root_location(struct btrfs_root *root,
724                              struct btrfs_key *location,
725                              struct btrfs_root **sub_root)
726 {
727         struct btrfs_path *path;
728         struct btrfs_root_item *ri;
729
730         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
731                 return 0;
732         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
733                 return 0;
734
735         path = btrfs_alloc_path();
736         BUG_ON(!path);
737         mutex_lock(&root->fs_info->fs_mutex);
738
739         *sub_root = btrfs_read_fs_root(root->fs_info, location);
740         if (IS_ERR(*sub_root))
741                 return PTR_ERR(*sub_root);
742
743         ri = &(*sub_root)->root_item;
744         location->objectid = btrfs_root_dirid(ri);
745         location->flags = 0;
746         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
747         location->offset = 0;
748
749         btrfs_free_path(path);
750         mutex_unlock(&root->fs_info->fs_mutex);
751         return 0;
752 }
753
754 static int btrfs_init_locked_inode(struct inode *inode, void *p)
755 {
756         struct btrfs_iget_args *args = p;
757         inode->i_ino = args->ino;
758         BTRFS_I(inode)->root = args->root;
759         return 0;
760 }
761
762 static int btrfs_find_actor(struct inode *inode, void *opaque)
763 {
764         struct btrfs_iget_args *args = opaque;
765         return (args->ino == inode->i_ino &&
766                 args->root == BTRFS_I(inode)->root);
767 }
768
769 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
770                                 struct btrfs_root *root)
771 {
772         struct inode *inode;
773         struct btrfs_iget_args args;
774         args.ino = objectid;
775         args.root = root;
776
777         inode = iget5_locked(s, objectid, btrfs_find_actor,
778                              btrfs_init_locked_inode,
779                              (void *)&args);
780         return inode;
781 }
782
783 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
784                                    struct nameidata *nd)
785 {
786         struct inode * inode;
787         struct btrfs_inode *bi = BTRFS_I(dir);
788         struct btrfs_root *root = bi->root;
789         struct btrfs_root *sub_root = root;
790         struct btrfs_key location;
791         int ret;
792
793         if (dentry->d_name.len > BTRFS_NAME_LEN)
794                 return ERR_PTR(-ENAMETOOLONG);
795         mutex_lock(&root->fs_info->fs_mutex);
796         ret = btrfs_inode_by_name(dir, dentry, &location);
797         mutex_unlock(&root->fs_info->fs_mutex);
798         if (ret < 0)
799                 return ERR_PTR(ret);
800         inode = NULL;
801         if (location.objectid) {
802                 ret = fixup_tree_root_location(root, &location, &sub_root);
803                 if (ret < 0)
804                         return ERR_PTR(ret);
805                 if (ret > 0)
806                         return ERR_PTR(-ENOENT);
807                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
808                                           sub_root);
809                 if (!inode)
810                         return ERR_PTR(-EACCES);
811                 if (inode->i_state & I_NEW) {
812                         /* the inode and parent dir are two different roots */
813                         if (sub_root != root) {
814                                 igrab(inode);
815                                 sub_root->inode = inode;
816                         }
817                         BTRFS_I(inode)->root = sub_root;
818                         memcpy(&BTRFS_I(inode)->location, &location,
819                                sizeof(location));
820                         btrfs_read_locked_inode(inode);
821                         unlock_new_inode(inode);
822                 }
823         }
824         return d_splice_alias(inode, dentry);
825 }
826
827 /*
828  * readahead one full node of leaves as long as their keys include
829  * the objectid supplied
830  */
831 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
832                          u64 objectid)
833 {
834         struct btrfs_node *node;
835         int i;
836         u32 nritems;
837         u64 item_objectid;
838         u64 blocknr;
839         int slot;
840         int ret;
841
842         if (!path->nodes[1])
843                 return;
844         node = btrfs_buffer_node(path->nodes[1]);
845         slot = path->slots[1];
846         nritems = btrfs_header_nritems(&node->header);
847         for (i = slot + 1; i < nritems; i++) {
848                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
849                 if (item_objectid != objectid)
850                         break;
851                 blocknr = btrfs_node_blockptr(node, i);
852                 ret = readahead_tree_block(root, blocknr);
853                 if (ret)
854                         break;
855         }
856 }
857 static unsigned char btrfs_filetype_table[] = {
858         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
859 };
860
861 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
862 {
863         struct inode *inode = filp->f_path.dentry->d_inode;
864         struct btrfs_root *root = BTRFS_I(inode)->root;
865         struct btrfs_item *item;
866         struct btrfs_dir_item *di;
867         struct btrfs_key key;
868         struct btrfs_path *path;
869         int ret;
870         u32 nritems;
871         struct btrfs_leaf *leaf;
872         int slot;
873         int advance;
874         unsigned char d_type;
875         int over = 0;
876         u32 di_cur;
877         u32 di_total;
878         u32 di_len;
879         int key_type = BTRFS_DIR_INDEX_KEY;
880
881         /* FIXME, use a real flag for deciding about the key type */
882         if (root->fs_info->tree_root == root)
883                 key_type = BTRFS_DIR_ITEM_KEY;
884         mutex_lock(&root->fs_info->fs_mutex);
885         key.objectid = inode->i_ino;
886         key.flags = 0;
887         btrfs_set_key_type(&key, key_type);
888         key.offset = filp->f_pos;
889         path = btrfs_alloc_path();
890         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
891         if (ret < 0)
892                 goto err;
893         advance = 0;
894         reada_leaves(root, path, inode->i_ino);
895         while(1) {
896                 leaf = btrfs_buffer_leaf(path->nodes[0]);
897                 nritems = btrfs_header_nritems(&leaf->header);
898                 slot = path->slots[0];
899                 if (advance || slot >= nritems) {
900                         if (slot >= nritems -1) {
901                                 reada_leaves(root, path, inode->i_ino);
902                                 ret = btrfs_next_leaf(root, path);
903                                 if (ret)
904                                         break;
905                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
906                                 nritems = btrfs_header_nritems(&leaf->header);
907                                 slot = path->slots[0];
908                         } else {
909                                 slot++;
910                                 path->slots[0]++;
911                         }
912                 }
913                 advance = 1;
914                 item = leaf->items + slot;
915                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
916                         break;
917                 if (btrfs_disk_key_type(&item->key) != key_type)
918                         break;
919                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
920                         continue;
921                 filp->f_pos = btrfs_disk_key_offset(&item->key);
922                 advance = 1;
923                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
924                 di_cur = 0;
925                 di_total = btrfs_item_size(leaf->items + slot);
926                 while(di_cur < di_total) {
927                         d_type = btrfs_filetype_table[btrfs_dir_type(di)];
928                         over = filldir(dirent, (const char *)(di + 1),
929                                        btrfs_dir_name_len(di),
930                                        btrfs_disk_key_offset(&item->key),
931                                        btrfs_disk_key_objectid(&di->location),
932                                        d_type);
933                         if (over)
934                                 goto nopos;
935                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
936                         di_cur += di_len;
937                         di = (struct btrfs_dir_item *)((char *)di + di_len);
938                 }
939         }
940         filp->f_pos++;
941 nopos:
942         ret = 0;
943 err:
944         btrfs_release_path(root, path);
945         btrfs_free_path(path);
946         mutex_unlock(&root->fs_info->fs_mutex);
947         return ret;
948 }
949
950 int btrfs_write_inode(struct inode *inode, int wait)
951 {
952         struct btrfs_root *root = BTRFS_I(inode)->root;
953         struct btrfs_trans_handle *trans;
954         int ret = 0;
955
956         if (wait) {
957                 mutex_lock(&root->fs_info->fs_mutex);
958                 trans = btrfs_start_transaction(root, 1);
959                 btrfs_set_trans_block_group(trans, inode);
960                 ret = btrfs_commit_transaction(trans, root);
961                 mutex_unlock(&root->fs_info->fs_mutex);
962         }
963         return ret;
964 }
965
966 /*
967  * This is somewhat expensive, updating the tree every time the
968  * inode changes.  But, it is most likely to find the inode in cache.
969  * FIXME, needs more benchmarking...there are no reasons other than performance
970  * to keep or drop this code.
971  */
972 void btrfs_dirty_inode(struct inode *inode)
973 {
974         struct btrfs_root *root = BTRFS_I(inode)->root;
975         struct btrfs_trans_handle *trans;
976
977         mutex_lock(&root->fs_info->fs_mutex);
978         trans = btrfs_start_transaction(root, 1);
979         btrfs_set_trans_block_group(trans, inode);
980         btrfs_update_inode(trans, root, inode);
981         btrfs_end_transaction(trans, root);
982         mutex_unlock(&root->fs_info->fs_mutex);
983 }
984
985 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
986                                      struct btrfs_root *root,
987                                      u64 objectid,
988                                      struct btrfs_block_group_cache *group,
989                                      int mode)
990 {
991         struct inode *inode;
992         struct btrfs_inode_item inode_item;
993         struct btrfs_key *location;
994         int ret;
995         int owner;
996
997         inode = new_inode(root->fs_info->sb);
998         if (!inode)
999                 return ERR_PTR(-ENOMEM);
1000
1001         BTRFS_I(inode)->root = root;
1002         if (mode & S_IFDIR)
1003                 owner = 0;
1004         else
1005                 owner = 1;
1006         group = btrfs_find_block_group(root, group, 0, 0, owner);
1007         BTRFS_I(inode)->block_group = group;
1008
1009         inode->i_uid = current->fsuid;
1010         inode->i_gid = current->fsgid;
1011         inode->i_mode = mode;
1012         inode->i_ino = objectid;
1013         inode->i_blocks = 0;
1014         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1015         fill_inode_item(&inode_item, inode);
1016         location = &BTRFS_I(inode)->location;
1017         location->objectid = objectid;
1018         location->flags = 0;
1019         location->offset = 0;
1020         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1021
1022         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
1023         if (ret)
1024                 return ERR_PTR(ret);
1025         insert_inode_hash(inode);
1026         return inode;
1027 }
1028
1029 static inline u8 btrfs_inode_type(struct inode *inode)
1030 {
1031         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1032 }
1033
1034 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1035                             struct dentry *dentry, struct inode *inode)
1036 {
1037         int ret;
1038         struct btrfs_key key;
1039         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1040         key.objectid = inode->i_ino;
1041         key.flags = 0;
1042         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1043         key.offset = 0;
1044
1045         ret = btrfs_insert_dir_item(trans, root,
1046                                     dentry->d_name.name, dentry->d_name.len,
1047                                     dentry->d_parent->d_inode->i_ino,
1048                                     &key, btrfs_inode_type(inode));
1049         if (ret == 0) {
1050                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
1051                 ret = btrfs_update_inode(trans, root,
1052                                          dentry->d_parent->d_inode);
1053         }
1054         return ret;
1055 }
1056
1057 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1058                             struct dentry *dentry, struct inode *inode)
1059 {
1060         int err = btrfs_add_link(trans, dentry, inode);
1061         if (!err) {
1062                 d_instantiate(dentry, inode);
1063                 return 0;
1064         }
1065         if (err > 0)
1066                 err = -EEXIST;
1067         return err;
1068 }
1069
1070 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1071                         int mode, struct nameidata *nd)
1072 {
1073         struct btrfs_trans_handle *trans;
1074         struct btrfs_root *root = BTRFS_I(dir)->root;
1075         struct inode *inode;
1076         int err;
1077         int drop_inode = 0;
1078         u64 objectid;
1079
1080         mutex_lock(&root->fs_info->fs_mutex);
1081         trans = btrfs_start_transaction(root, 1);
1082         btrfs_set_trans_block_group(trans, dir);
1083
1084         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1085         if (err) {
1086                 err = -ENOSPC;
1087                 goto out_unlock;
1088         }
1089
1090         inode = btrfs_new_inode(trans, root, objectid,
1091                                 BTRFS_I(dir)->block_group, mode);
1092         err = PTR_ERR(inode);
1093         if (IS_ERR(inode))
1094                 goto out_unlock;
1095
1096         btrfs_set_trans_block_group(trans, inode);
1097         err = btrfs_add_nondir(trans, dentry, inode);
1098         if (err)
1099                 drop_inode = 1;
1100         else {
1101                 inode->i_mapping->a_ops = &btrfs_aops;
1102                 inode->i_fop = &btrfs_file_operations;
1103                 inode->i_op = &btrfs_file_inode_operations;
1104         }
1105         dir->i_sb->s_dirt = 1;
1106         btrfs_update_inode_block_group(trans, inode);
1107         btrfs_update_inode_block_group(trans, dir);
1108 out_unlock:
1109         btrfs_end_transaction(trans, root);
1110         mutex_unlock(&root->fs_info->fs_mutex);
1111
1112         if (drop_inode) {
1113                 inode_dec_link_count(inode);
1114                 iput(inode);
1115         }
1116         btrfs_btree_balance_dirty(root);
1117         return err;
1118 }
1119
1120 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1121                       struct dentry *dentry)
1122 {
1123         struct btrfs_trans_handle *trans;
1124         struct btrfs_root *root = BTRFS_I(dir)->root;
1125         struct inode *inode = old_dentry->d_inode;
1126         int err;
1127         int drop_inode = 0;
1128
1129         if (inode->i_nlink == 0)
1130                 return -ENOENT;
1131
1132         inc_nlink(inode);
1133         mutex_lock(&root->fs_info->fs_mutex);
1134         trans = btrfs_start_transaction(root, 1);
1135         btrfs_set_trans_block_group(trans, dir);
1136         atomic_inc(&inode->i_count);
1137         err = btrfs_add_nondir(trans, dentry, inode);
1138         if (err)
1139                 drop_inode = 1;
1140         dir->i_sb->s_dirt = 1;
1141         btrfs_update_inode_block_group(trans, dir);
1142         err = btrfs_update_inode(trans, root, inode);
1143         if (err)
1144                 drop_inode = 1;
1145
1146         btrfs_end_transaction(trans, root);
1147         mutex_unlock(&root->fs_info->fs_mutex);
1148
1149         if (drop_inode) {
1150                 inode_dec_link_count(inode);
1151                 iput(inode);
1152         }
1153         btrfs_btree_balance_dirty(root);
1154         return err;
1155 }
1156
1157 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1158                                 struct btrfs_root *root,
1159                                 u64 objectid, u64 dirid)
1160 {
1161         int ret;
1162         char buf[2];
1163         struct btrfs_key key;
1164
1165         buf[0] = '.';
1166         buf[1] = '.';
1167
1168         key.objectid = objectid;
1169         key.offset = 0;
1170         key.flags = 0;
1171         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1172
1173         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1174                                     &key, BTRFS_FT_DIR);
1175         if (ret)
1176                 goto error;
1177         key.objectid = dirid;
1178         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1179                                     &key, BTRFS_FT_DIR);
1180         if (ret)
1181                 goto error;
1182 error:
1183         return ret;
1184 }
1185
1186 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1187 {
1188         struct inode *inode;
1189         struct btrfs_trans_handle *trans;
1190         struct btrfs_root *root = BTRFS_I(dir)->root;
1191         int err = 0;
1192         int drop_on_err = 0;
1193         u64 objectid;
1194
1195         mutex_lock(&root->fs_info->fs_mutex);
1196         trans = btrfs_start_transaction(root, 1);
1197         btrfs_set_trans_block_group(trans, dir);
1198         if (IS_ERR(trans)) {
1199                 err = PTR_ERR(trans);
1200                 goto out_unlock;
1201         }
1202
1203         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1204         if (err) {
1205                 err = -ENOSPC;
1206                 goto out_unlock;
1207         }
1208
1209         inode = btrfs_new_inode(trans, root, objectid,
1210                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1211         if (IS_ERR(inode)) {
1212                 err = PTR_ERR(inode);
1213                 goto out_fail;
1214         }
1215         drop_on_err = 1;
1216         inode->i_op = &btrfs_dir_inode_operations;
1217         inode->i_fop = &btrfs_dir_file_operations;
1218         btrfs_set_trans_block_group(trans, inode);
1219
1220         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1221         if (err)
1222                 goto out_fail;
1223
1224         inode->i_size = 6;
1225         err = btrfs_update_inode(trans, root, inode);
1226         if (err)
1227                 goto out_fail;
1228         err = btrfs_add_link(trans, dentry, inode);
1229         if (err)
1230                 goto out_fail;
1231         d_instantiate(dentry, inode);
1232         drop_on_err = 0;
1233         dir->i_sb->s_dirt = 1;
1234         btrfs_update_inode_block_group(trans, inode);
1235         btrfs_update_inode_block_group(trans, dir);
1236
1237 out_fail:
1238         btrfs_end_transaction(trans, root);
1239 out_unlock:
1240         mutex_unlock(&root->fs_info->fs_mutex);
1241         if (drop_on_err)
1242                 iput(inode);
1243         btrfs_btree_balance_dirty(root);
1244         return err;
1245 }
1246
1247 /*
1248  * FIBMAP and others want to pass in a fake buffer head.  They need to
1249  * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy
1250  * any packed file data into the fake bh
1251  */
1252 #define BTRFS_GET_BLOCK_NO_CREATE 0
1253 #define BTRFS_GET_BLOCK_CREATE 1
1254 #define BTRFS_GET_BLOCK_NO_DIRECT 2
1255
1256 /*
1257  * FIXME create==1 doe not work.
1258  */
1259 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1260                                 struct buffer_head *result, int create)
1261 {
1262         int ret;
1263         int err = 0;
1264         u64 blocknr;
1265         u64 extent_start = 0;
1266         u64 extent_end = 0;
1267         u64 objectid = inode->i_ino;
1268         u32 found_type;
1269         u64 alloc_hint = 0;
1270         struct btrfs_path *path;
1271         struct btrfs_root *root = BTRFS_I(inode)->root;
1272         struct btrfs_file_extent_item *item;
1273         struct btrfs_leaf *leaf;
1274         struct btrfs_disk_key *found_key;
1275         struct btrfs_trans_handle *trans = NULL;
1276
1277         path = btrfs_alloc_path();
1278         BUG_ON(!path);
1279         if (create & BTRFS_GET_BLOCK_CREATE) {
1280                 /*
1281                  * danger!, this only works if the page is properly up
1282                  * to date somehow
1283                  */
1284                 trans = btrfs_start_transaction(root, 1);
1285                 if (!trans) {
1286                         err = -ENOMEM;
1287                         goto out;
1288                 }
1289                 ret = btrfs_drop_extents(trans, root, inode,
1290                                          iblock << inode->i_blkbits,
1291                                          (iblock + 1) << inode->i_blkbits,
1292                                          &alloc_hint);
1293                 BUG_ON(ret);
1294         }
1295
1296         ret = btrfs_lookup_file_extent(NULL, root, path,
1297                                        objectid,
1298                                        iblock << inode->i_blkbits, 0);
1299         if (ret < 0) {
1300                 err = ret;
1301                 goto out;
1302         }
1303
1304         if (ret != 0) {
1305                 if (path->slots[0] == 0) {
1306                         btrfs_release_path(root, path);
1307                         goto not_found;
1308                 }
1309                 path->slots[0]--;
1310         }
1311
1312         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1313                               struct btrfs_file_extent_item);
1314         leaf = btrfs_buffer_leaf(path->nodes[0]);
1315         blocknr = btrfs_file_extent_disk_blocknr(item);
1316         blocknr += btrfs_file_extent_offset(item);
1317
1318         /* are we inside the extent that was found? */
1319         found_key = &leaf->items[path->slots[0]].key;
1320         found_type = btrfs_disk_key_type(found_key);
1321         if (btrfs_disk_key_objectid(found_key) != objectid ||
1322             found_type != BTRFS_EXTENT_DATA_KEY) {
1323                 extent_end = 0;
1324                 extent_start = 0;
1325                 goto not_found;
1326         }
1327         found_type = btrfs_file_extent_type(item);
1328         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1329         if (found_type == BTRFS_FILE_EXTENT_REG) {
1330                 extent_start = extent_start >> inode->i_blkbits;
1331                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1332                 err = 0;
1333                 if (btrfs_file_extent_disk_blocknr(item) == 0)
1334                         goto out;
1335                 if (iblock >= extent_start && iblock < extent_end) {
1336                         btrfs_map_bh_to_logical(root, result, blocknr +
1337                                                 iblock - extent_start);
1338                         goto out;
1339                 }
1340         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1341                 char *ptr;
1342                 char *map;
1343                 u32 size;
1344
1345                 if (create & BTRFS_GET_BLOCK_NO_DIRECT) {
1346                         err = -EINVAL;
1347                         goto out;
1348                 }
1349                 size = btrfs_file_extent_inline_len(leaf->items +
1350                                                     path->slots[0]);
1351                 extent_end = (extent_start + size) >> inode->i_blkbits;
1352                 extent_start >>= inode->i_blkbits;
1353                 if (iblock < extent_start || iblock > extent_end) {
1354                         goto not_found;
1355                 }
1356                 ptr = btrfs_file_extent_inline_start(item);
1357                 map = kmap(result->b_page);
1358                 memcpy(map, ptr, size);
1359                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1360                 flush_dcache_page(result->b_page);
1361                 kunmap(result->b_page);
1362                 set_buffer_uptodate(result);
1363                 SetPageChecked(result->b_page);
1364                 btrfs_map_bh_to_logical(root, result, 0);
1365         }
1366 not_found:
1367         if (create & BTRFS_GET_BLOCK_CREATE) {
1368                 struct btrfs_key ins;
1369                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1370                                          1, alloc_hint, (u64)-1,
1371                                          &ins, 1);
1372                 if (ret) {
1373                         err = ret;
1374                         goto out;
1375                 }
1376                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1377                                                iblock << inode->i_blkbits,
1378                                                ins.objectid, ins.offset,
1379                                                ins.offset);
1380                 if (ret) {
1381                         err = ret;
1382                         goto out;
1383                 }
1384                 btrfs_map_bh_to_logical(root, result, ins.objectid);
1385         }
1386 out:
1387         if (trans) {
1388                 ret = btrfs_end_transaction(trans, root);
1389                 if (!err)
1390                         err = ret;
1391         }
1392         btrfs_free_path(path);
1393         return err;
1394 }
1395
1396 int btrfs_get_block(struct inode *inode, sector_t iblock,
1397                     struct buffer_head *result, int create)
1398 {
1399         int err;
1400         struct btrfs_root *root = BTRFS_I(inode)->root;
1401         mutex_lock(&root->fs_info->fs_mutex);
1402         err = btrfs_get_block_lock(inode, iblock, result, create);
1403         mutex_unlock(&root->fs_info->fs_mutex);
1404         return err;
1405 }
1406
1407 static int btrfs_get_block_csum(struct inode *inode, sector_t iblock,
1408                                 struct buffer_head *result, int create)
1409 {
1410         int ret;
1411         struct btrfs_root *root = BTRFS_I(inode)->root;
1412         struct page *page = result->b_page;
1413         u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result);
1414         struct btrfs_csum_item *item;
1415         struct btrfs_path *path = NULL;
1416
1417         mutex_lock(&root->fs_info->fs_mutex);
1418         ret = btrfs_get_block_lock(inode, iblock, result, create);
1419         if (ret)
1420                 goto out;
1421
1422         path = btrfs_alloc_path();
1423         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0);
1424         if (IS_ERR(item)) {
1425                 ret = PTR_ERR(item);
1426                 /* a csum that isn't present is a preallocated region. */
1427                 if (ret == -ENOENT || ret == -EFBIG)
1428                         ret = 0;
1429                 result->b_private = NULL;
1430                 goto out;
1431         }
1432         memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE);
1433 out:
1434         if (path)
1435                 btrfs_free_path(path);
1436         mutex_unlock(&root->fs_info->fs_mutex);
1437         return ret;
1438 }
1439
1440 static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1441                            struct buffer_head *result, int create)
1442 {
1443         struct btrfs_root *root = BTRFS_I(inode)->root;
1444         mutex_lock(&root->fs_info->fs_mutex);
1445         btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT);
1446         mutex_unlock(&root->fs_info->fs_mutex);
1447         return 0;
1448 }
1449
1450 static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1451 {
1452         return generic_block_bmap(as, block, btrfs_get_block_bmap);
1453 }
1454
1455 static int btrfs_prepare_write(struct file *file, struct page *page,
1456                                unsigned from, unsigned to)
1457 {
1458         return block_prepare_write(page, from, to, btrfs_get_block);
1459 }
1460
1461 static void buffer_io_error(struct buffer_head *bh)
1462 {
1463         char b[BDEVNAME_SIZE];
1464
1465         printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
1466                         bdevname(bh->b_bdev, b),
1467                         (unsigned long long)bh->b_blocknr);
1468 }
1469
1470 /*
1471  * I/O completion handler for block_read_full_page() - pages
1472  * which come unlocked at the end of I/O.
1473  */
1474 static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
1475 {
1476         unsigned long flags;
1477         struct buffer_head *first;
1478         struct buffer_head *tmp;
1479         struct page *page;
1480         int page_uptodate = 1;
1481         struct inode *inode;
1482         int ret;
1483
1484         BUG_ON(!buffer_async_read(bh));
1485
1486         page = bh->b_page;
1487         inode = page->mapping->host;
1488         if (uptodate) {
1489                 void *kaddr;
1490                 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
1491                 if (bh->b_private) {
1492                         char csum[BTRFS_CRC32_SIZE];
1493                         kaddr = kmap_atomic(page, KM_IRQ0);
1494                         ret = btrfs_csum_data(root, kaddr + bh_offset(bh),
1495                                               bh->b_size, csum);
1496                         BUG_ON(ret);
1497                         if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) {
1498                                 u64 offset;
1499                                 offset = (page->index << PAGE_CACHE_SHIFT) +
1500                                         bh_offset(bh);
1501                                 printk("btrfs csum failed ino %lu off %llu\n",
1502                                        page->mapping->host->i_ino,
1503                                        (unsigned long long)offset);
1504                                 memset(kaddr + bh_offset(bh), 1, bh->b_size);
1505                                 flush_dcache_page(page);
1506                         }
1507                         kunmap_atomic(kaddr, KM_IRQ0);
1508                 }
1509                 set_buffer_uptodate(bh);
1510         } else {
1511                 clear_buffer_uptodate(bh);
1512                 if (printk_ratelimit())
1513                         buffer_io_error(bh);
1514                 SetPageError(page);
1515         }
1516
1517         /*
1518          * Be _very_ careful from here on. Bad things can happen if
1519          * two buffer heads end IO at almost the same time and both
1520          * decide that the page is now completely done.
1521          */
1522         first = page_buffers(page);
1523         local_irq_save(flags);
1524         bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
1525         clear_buffer_async_read(bh);
1526         unlock_buffer(bh);
1527         tmp = bh;
1528         do {
1529                 if (!buffer_uptodate(tmp))
1530                         page_uptodate = 0;
1531                 if (buffer_async_read(tmp)) {
1532                         BUG_ON(!buffer_locked(tmp));
1533                         goto still_busy;
1534                 }
1535                 tmp = tmp->b_this_page;
1536         } while (tmp != bh);
1537         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
1538         local_irq_restore(flags);
1539
1540         /*
1541          * If none of the buffers had errors and they are all
1542          * uptodate then we can set the page uptodate.
1543          */
1544         if (page_uptodate && !PageError(page))
1545                 SetPageUptodate(page);
1546         unlock_page(page);
1547         return;
1548
1549 still_busy:
1550         bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
1551         local_irq_restore(flags);
1552         return;
1553 }
1554
1555 /*
1556  * Generic "read page" function for block devices that have the normal
1557  * get_block functionality. This is most of the block device filesystems.
1558  * Reads the page asynchronously --- the unlock_buffer() and
1559  * set/clear_buffer_uptodate() functions propagate buffer state into the
1560  * page struct once IO has completed.
1561  */
1562 int btrfs_readpage(struct file *file, struct page *page)
1563 {
1564         struct inode *inode = page->mapping->host;
1565         sector_t iblock, lblock;
1566         struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
1567         unsigned int blocksize;
1568         int nr, i;
1569         int fully_mapped = 1;
1570
1571         BUG_ON(!PageLocked(page));
1572         blocksize = 1 << inode->i_blkbits;
1573         if (!page_has_buffers(page))
1574                 create_empty_buffers(page, blocksize, 0);
1575         head = page_buffers(page);
1576
1577         iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1578         lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
1579         bh = head;
1580         nr = 0;
1581         i = 0;
1582
1583         do {
1584                 if (buffer_uptodate(bh))
1585                         continue;
1586
1587                 if (!buffer_mapped(bh)) {
1588                         int err = 0;
1589
1590                         fully_mapped = 0;
1591                         if (iblock < lblock) {
1592                                 WARN_ON(bh->b_size != blocksize);
1593                                 err = btrfs_get_block_csum(inode, iblock,
1594                                                            bh, 0);
1595                                 if (err)
1596                                         SetPageError(page);
1597                         }
1598                         if (!buffer_mapped(bh)) {
1599                                 void *kaddr = kmap_atomic(page, KM_USER0);
1600                                 memset(kaddr + i * blocksize, 0, blocksize);
1601                                 flush_dcache_page(page);
1602                                 kunmap_atomic(kaddr, KM_USER0);
1603                                 if (!err)
1604                                         set_buffer_uptodate(bh);
1605                                 continue;
1606                         }
1607                         /*
1608                          * get_block() might have updated the buffer
1609                          * synchronously
1610                          */
1611                         if (buffer_uptodate(bh))
1612                                 continue;
1613                 }
1614                 arr[nr++] = bh;
1615         } while (i++, iblock++, (bh = bh->b_this_page) != head);
1616
1617         if (fully_mapped)
1618                 SetPageMappedToDisk(page);
1619
1620         if (!nr) {
1621                 /*
1622                  * All buffers are uptodate - we can set the page uptodate
1623                  * as well. But not if get_block() returned an error.
1624                  */
1625                 if (!PageError(page))
1626                         SetPageUptodate(page);
1627                 unlock_page(page);
1628                 return 0;
1629         }
1630
1631         /* Stage two: lock the buffers */
1632         for (i = 0; i < nr; i++) {
1633                 bh = arr[i];
1634                 lock_buffer(bh);
1635                 bh->b_end_io = btrfs_end_buffer_async_read;
1636                 set_buffer_async_read(bh);
1637         }
1638
1639         /*
1640          * Stage 3: start the IO.  Check for uptodateness
1641          * inside the buffer lock in case another process reading
1642          * the underlying blockdev brought it uptodate (the sct fix).
1643          */
1644         for (i = 0; i < nr; i++) {
1645                 bh = arr[i];
1646                 if (buffer_uptodate(bh))
1647                         btrfs_end_buffer_async_read(bh, 1);
1648                 else
1649                         submit_bh(READ, bh);
1650         }
1651         return 0;
1652 }
1653
1654 /*
1655  * Aside from a tiny bit of packed file data handling, this is the
1656  * same as the generic code.
1657  *
1658  * While block_write_full_page is writing back the dirty buffers under
1659  * the page lock, whoever dirtied the buffers may decide to clean them
1660  * again at any time.  We handle that by only looking at the buffer
1661  * state inside lock_buffer().
1662  *
1663  * If block_write_full_page() is called for regular writeback
1664  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1665  * locked buffer.   This only can happen if someone has written the buffer
1666  * directly, with submit_bh().  At the address_space level PageWriteback
1667  * prevents this contention from occurring.
1668  */
1669 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1670                                    struct writeback_control *wbc)
1671 {
1672         int err;
1673         sector_t block;
1674         sector_t last_block;
1675         struct buffer_head *bh, *head;
1676         const unsigned blocksize = 1 << inode->i_blkbits;
1677         int nr_underway = 0;
1678         struct btrfs_root *root = BTRFS_I(inode)->root;
1679
1680         BUG_ON(!PageLocked(page));
1681
1682         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1683
1684         /* no csumming allowed when from PF_MEMALLOC */
1685         if (current->flags & PF_MEMALLOC) {
1686                 redirty_page_for_writepage(wbc, page);
1687                 unlock_page(page);
1688                 return 0;
1689         }
1690
1691         if (!page_has_buffers(page)) {
1692                 create_empty_buffers(page, blocksize,
1693                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1694         }
1695
1696         /*
1697          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1698          * here, and the (potentially unmapped) buffers may become dirty at
1699          * any time.  If a buffer becomes dirty here after we've inspected it
1700          * then we just miss that fact, and the page stays dirty.
1701          *
1702          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1703          * handle that here by just cleaning them.
1704          */
1705
1706         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1707         head = page_buffers(page);
1708         bh = head;
1709
1710         /*
1711          * Get all the dirty buffers mapped to disk addresses and
1712          * handle any aliases from the underlying blockdev's mapping.
1713          */
1714         do {
1715                 if (block > last_block) {
1716                         /*
1717                          * mapped buffers outside i_size will occur, because
1718                          * this page can be outside i_size when there is a
1719                          * truncate in progress.
1720                          */
1721                         /*
1722                          * The buffer was zeroed by block_write_full_page()
1723                          */
1724                         clear_buffer_dirty(bh);
1725                         set_buffer_uptodate(bh);
1726                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1727                         WARN_ON(bh->b_size != blocksize);
1728                         err = btrfs_get_block(inode, block, bh, 0);
1729                         if (err) {
1730                                 goto recover;
1731                         }
1732                         if (buffer_new(bh)) {
1733                                 /* blockdev mappings never come here */
1734                                 clear_buffer_new(bh);
1735                         }
1736                 }
1737                 bh = bh->b_this_page;
1738                 block++;
1739         } while (bh != head);
1740
1741         do {
1742                 if (!buffer_mapped(bh))
1743                         continue;
1744                 /*
1745                  * If it's a fully non-blocking write attempt and we cannot
1746                  * lock the buffer then redirty the page.  Note that this can
1747                  * potentially cause a busy-wait loop from pdflush and kswapd
1748                  * activity, but those code paths have their own higher-level
1749                  * throttling.
1750                  */
1751                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1752                         lock_buffer(bh);
1753                 } else if (test_set_buffer_locked(bh)) {
1754                         redirty_page_for_writepage(wbc, page);
1755                         continue;
1756                 }
1757                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1758                         struct btrfs_trans_handle *trans;
1759                         int ret;
1760                         u64 off = page->index << PAGE_CACHE_SHIFT;
1761                         char *kaddr;
1762
1763                         off += bh_offset(bh);
1764                         mutex_lock(&root->fs_info->fs_mutex);
1765                         trans = btrfs_start_transaction(root, 1);
1766                         btrfs_set_trans_block_group(trans, inode);
1767                         kaddr = kmap(page);
1768                         btrfs_csum_file_block(trans, root, inode->i_ino,
1769                                                     off, kaddr + bh_offset(bh),
1770                                                     bh->b_size);
1771                         kunmap(page);
1772                         ret = btrfs_end_transaction(trans, root);
1773                         BUG_ON(ret);
1774                         mutex_unlock(&root->fs_info->fs_mutex);
1775                         mark_buffer_async_write(bh);
1776                 } else {
1777                         unlock_buffer(bh);
1778                 }
1779         } while ((bh = bh->b_this_page) != head);
1780
1781         /*
1782          * The page and its buffers are protected by PageWriteback(), so we can
1783          * drop the bh refcounts early.
1784          */
1785         BUG_ON(PageWriteback(page));
1786         set_page_writeback(page);
1787
1788         do {
1789                 struct buffer_head *next = bh->b_this_page;
1790                 if (buffer_async_write(bh)) {
1791                         submit_bh(WRITE, bh);
1792                         nr_underway++;
1793                 }
1794                 bh = next;
1795         } while (bh != head);
1796         unlock_page(page);
1797
1798         err = 0;
1799 done:
1800         if (nr_underway == 0) {
1801                 /*
1802                  * The page was marked dirty, but the buffers were
1803                  * clean.  Someone wrote them back by hand with
1804                  * ll_rw_block/submit_bh.  A rare case.
1805                  */
1806                 int uptodate = 1;
1807                 do {
1808                         if (!buffer_uptodate(bh)) {
1809                                 uptodate = 0;
1810                                 break;
1811                         }
1812                         bh = bh->b_this_page;
1813                 } while (bh != head);
1814                 if (uptodate)
1815                         SetPageUptodate(page);
1816                 end_page_writeback(page);
1817         }
1818         return err;
1819
1820 recover:
1821         /*
1822          * ENOSPC, or some other error.  We may already have added some
1823          * blocks to the file, so we need to write these out to avoid
1824          * exposing stale data.
1825          * The page is currently locked and not marked for writeback
1826          */
1827         bh = head;
1828         /* Recovery: lock and submit the mapped buffers */
1829         do {
1830                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1831                         lock_buffer(bh);
1832                         mark_buffer_async_write(bh);
1833                 } else {
1834                         /*
1835                          * The buffer may have been set dirty during
1836                          * attachment to a dirty page.
1837                          */
1838                         clear_buffer_dirty(bh);
1839                 }
1840         } while ((bh = bh->b_this_page) != head);
1841         SetPageError(page);
1842         BUG_ON(PageWriteback(page));
1843         set_page_writeback(page);
1844         do {
1845                 struct buffer_head *next = bh->b_this_page;
1846                 if (buffer_async_write(bh)) {
1847                         clear_buffer_dirty(bh);
1848                         submit_bh(WRITE, bh);
1849                         nr_underway++;
1850                 }
1851                 bh = next;
1852         } while (bh != head);
1853         unlock_page(page);
1854         goto done;
1855 }
1856
1857 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1858 {
1859         struct inode * const inode = page->mapping->host;
1860         loff_t i_size = i_size_read(inode);
1861         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1862         unsigned offset;
1863         void *kaddr;
1864
1865         /* Is the page fully inside i_size? */
1866         if (page->index < end_index)
1867                 return __btrfs_write_full_page(inode, page, wbc);
1868
1869         /* Is the page fully outside i_size? (truncate in progress) */
1870         offset = i_size & (PAGE_CACHE_SIZE-1);
1871         if (page->index >= end_index+1 || !offset) {
1872                 /*
1873                  * The page may have dirty, unmapped buffers.  For example,
1874                  * they may have been added in ext3_writepage().  Make them
1875                  * freeable here, so the page does not leak.
1876                  */
1877                 block_invalidatepage(page, 0);
1878                 unlock_page(page);
1879                 return 0; /* don't care */
1880         }
1881
1882         /*
1883          * The page straddles i_size.  It must be zeroed out on each and every
1884          * writepage invokation because it may be mmapped.  "A file is mapped
1885          * in multiples of the page size.  For a file that is not a multiple of
1886          * the  page size, the remaining memory is zeroed when mapped, and
1887          * writes to that region are not written out to the file."
1888          */
1889         kaddr = kmap_atomic(page, KM_USER0);
1890         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1891         flush_dcache_page(page);
1892         kunmap_atomic(kaddr, KM_USER0);
1893         return __btrfs_write_full_page(inode, page, wbc);
1894 }
1895
1896 /*
1897  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
1898  * called from a page fault handler when a page is first dirtied. Hence we must
1899  * be careful to check for EOF conditions here. We set the page up correctly
1900  * for a written page which means we get ENOSPC checking when writing into
1901  * holes and correct delalloc and unwritten extent mapping on filesystems that
1902  * support these features.
1903  *
1904  * We are not allowed to take the i_mutex here so we have to play games to
1905  * protect against truncate races as the page could now be beyond EOF.  Because
1906  * vmtruncate() writes the inode size before removing pages, once we have the
1907  * page lock we can determine safely if the page is beyond EOF. If it is not
1908  * beyond EOF, then the page is guaranteed safe against truncation until we
1909  * unlock the page.
1910  */
1911 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1912 {
1913         struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1914         unsigned long end;
1915         loff_t size;
1916         int ret = -EINVAL;
1917
1918         lock_page(page);
1919         wait_on_page_writeback(page);
1920         size = i_size_read(inode);
1921         if ((page->mapping != inode->i_mapping) ||
1922             ((page->index << PAGE_CACHE_SHIFT) > size)) {
1923                 /* page got truncated out from underneath us */
1924                 goto out_unlock;
1925         }
1926
1927         /* page is wholly or partially inside EOF */
1928         if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
1929                 end = size & ~PAGE_CACHE_MASK;
1930         else
1931                 end = PAGE_CACHE_SIZE;
1932
1933         ret = btrfs_prepare_write(NULL, page, 0, end);
1934         if (!ret)
1935                 ret = btrfs_commit_write(NULL, page, 0, end);
1936
1937 out_unlock:
1938         unlock_page(page);
1939         return ret;
1940 }
1941
1942 static void btrfs_truncate(struct inode *inode)
1943 {
1944         struct btrfs_root *root = BTRFS_I(inode)->root;
1945         int ret;
1946         struct btrfs_trans_handle *trans;
1947
1948         if (!S_ISREG(inode->i_mode))
1949                 return;
1950         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1951                 return;
1952
1953         btrfs_truncate_page(inode->i_mapping, inode->i_size);
1954
1955         mutex_lock(&root->fs_info->fs_mutex);
1956         trans = btrfs_start_transaction(root, 1);
1957         btrfs_set_trans_block_group(trans, inode);
1958
1959         /* FIXME, add redo link to tree so we don't leak on crash */
1960         ret = btrfs_truncate_in_trans(trans, root, inode);
1961         btrfs_update_inode(trans, root, inode);
1962         ret = btrfs_end_transaction(trans, root);
1963         BUG_ON(ret);
1964         mutex_unlock(&root->fs_info->fs_mutex);
1965         btrfs_btree_balance_dirty(root);
1966 }
1967
1968 int btrfs_commit_write(struct file *file, struct page *page,
1969                        unsigned from, unsigned to)
1970 {
1971         struct inode *inode = page->mapping->host;
1972         struct buffer_head *bh;
1973         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1974
1975         SetPageUptodate(page);
1976         bh = page_buffers(page);
1977         set_buffer_uptodate(bh);
1978         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1979                 set_page_dirty(page);
1980         }
1981         if (pos > inode->i_size) {
1982                 i_size_write(inode, pos);
1983                 mark_inode_dirty(inode);
1984         }
1985         return 0;
1986 }
1987
1988 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
1989 {
1990         struct btrfs_trans_handle *trans;
1991         struct btrfs_key key;
1992         struct btrfs_root_item root_item;
1993         struct btrfs_inode_item *inode_item;
1994         struct buffer_head *subvol;
1995         struct btrfs_leaf *leaf;
1996         struct btrfs_root *new_root;
1997         struct inode *inode;
1998         struct inode *dir;
1999         int ret;
2000         int err;
2001         u64 objectid;
2002         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2003
2004         mutex_lock(&root->fs_info->fs_mutex);
2005         trans = btrfs_start_transaction(root, 1);
2006         BUG_ON(!trans);
2007
2008         subvol = btrfs_alloc_free_block(trans, root, 0);
2009         if (IS_ERR(subvol))
2010                 return PTR_ERR(subvol);
2011         leaf = btrfs_buffer_leaf(subvol);
2012         btrfs_set_header_nritems(&leaf->header, 0);
2013         btrfs_set_header_level(&leaf->header, 0);
2014         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2015         btrfs_set_header_generation(&leaf->header, trans->transid);
2016         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2017         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2018                sizeof(leaf->header.fsid));
2019         mark_buffer_dirty(subvol);
2020
2021         inode_item = &root_item.inode;
2022         memset(inode_item, 0, sizeof(*inode_item));
2023         btrfs_set_inode_generation(inode_item, 1);
2024         btrfs_set_inode_size(inode_item, 3);
2025         btrfs_set_inode_nlink(inode_item, 1);
2026         btrfs_set_inode_nblocks(inode_item, 1);
2027         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2028
2029         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2030         btrfs_set_root_refs(&root_item, 1);
2031         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2032         root_item.drop_level = 0;
2033         brelse(subvol);
2034         subvol = NULL;
2035
2036         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2037                                        0, &objectid);
2038         if (ret)
2039                 goto fail;
2040
2041         btrfs_set_root_dirid(&root_item, new_dirid);
2042
2043         key.objectid = objectid;
2044         key.offset = 1;
2045         key.flags = 0;
2046         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2047         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2048                                 &root_item);
2049         if (ret)
2050                 goto fail;
2051
2052         /*
2053          * insert the directory item
2054          */
2055         key.offset = (u64)-1;
2056         dir = root->fs_info->sb->s_root->d_inode;
2057         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2058                                     name, namelen, dir->i_ino, &key,
2059                                     BTRFS_FT_DIR);
2060         if (ret)
2061                 goto fail;
2062
2063         ret = btrfs_commit_transaction(trans, root);
2064         if (ret)
2065                 goto fail_commit;
2066
2067         new_root = btrfs_read_fs_root(root->fs_info, &key);
2068         BUG_ON(!new_root);
2069
2070         trans = btrfs_start_transaction(new_root, 1);
2071         BUG_ON(!trans);
2072
2073         inode = btrfs_new_inode(trans, new_root, new_dirid,
2074                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2075         if (IS_ERR(inode))
2076                 goto fail;
2077         inode->i_op = &btrfs_dir_inode_operations;
2078         inode->i_fop = &btrfs_dir_file_operations;
2079         new_root->inode = inode;
2080
2081         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2082         if (ret)
2083                 goto fail;
2084
2085         inode->i_nlink = 1;
2086         inode->i_size = 6;
2087         ret = btrfs_update_inode(trans, new_root, inode);
2088         if (ret)
2089                 goto fail;
2090 fail:
2091         err = btrfs_commit_transaction(trans, root);
2092         if (err && !ret)
2093                 ret = err;
2094 fail_commit:
2095         mutex_unlock(&root->fs_info->fs_mutex);
2096         btrfs_btree_balance_dirty(root);
2097         return ret;
2098 }
2099
2100 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2101 {
2102         struct btrfs_trans_handle *trans;
2103         struct btrfs_key key;
2104         struct btrfs_root_item new_root_item;
2105         int ret;
2106         int err;
2107         u64 objectid;
2108
2109         if (!root->ref_cows)
2110                 return -EINVAL;
2111
2112         mutex_lock(&root->fs_info->fs_mutex);
2113         trans = btrfs_start_transaction(root, 1);
2114         BUG_ON(!trans);
2115
2116         ret = btrfs_update_inode(trans, root, root->inode);
2117         if (ret)
2118                 goto fail;
2119
2120         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2121                                        0, &objectid);
2122         if (ret)
2123                 goto fail;
2124
2125         memcpy(&new_root_item, &root->root_item,
2126                sizeof(new_root_item));
2127
2128         key.objectid = objectid;
2129         key.offset = 1;
2130         key.flags = 0;
2131         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2132         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2133
2134         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2135                                 &new_root_item);
2136         if (ret)
2137                 goto fail;
2138
2139         /*
2140          * insert the directory item
2141          */
2142         key.offset = (u64)-1;
2143         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2144                                     name, namelen,
2145                                     root->fs_info->sb->s_root->d_inode->i_ino,
2146                                     &key, BTRFS_FT_DIR);
2147
2148         if (ret)
2149                 goto fail;
2150
2151         ret = btrfs_inc_root_ref(trans, root);
2152         if (ret)
2153                 goto fail;
2154
2155 fail:
2156         err = btrfs_commit_transaction(trans, root);
2157         if (err && !ret)
2158                 ret = err;
2159         mutex_unlock(&root->fs_info->fs_mutex);
2160         btrfs_btree_balance_dirty(root);
2161         return ret;
2162 }
2163
2164 int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2165                 cmd, unsigned long arg)
2166 {
2167         struct btrfs_root *root = BTRFS_I(inode)->root;
2168         struct btrfs_ioctl_vol_args vol_args;
2169         int ret = 0;
2170         struct btrfs_dir_item *di;
2171         int namelen;
2172         struct btrfs_path *path;
2173         u64 root_dirid;
2174
2175         switch (cmd) {
2176         case BTRFS_IOC_SNAP_CREATE:
2177                 if (copy_from_user(&vol_args,
2178                                    (struct btrfs_ioctl_vol_args __user *)arg,
2179                                    sizeof(vol_args)))
2180                         return -EFAULT;
2181                 namelen = strlen(vol_args.name);
2182                 if (namelen > BTRFS_VOL_NAME_MAX)
2183                         return -EINVAL;
2184                 if (strchr(vol_args.name, '/'))
2185                         return -EINVAL;
2186                 path = btrfs_alloc_path();
2187                 if (!path)
2188                         return -ENOMEM;
2189                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2190                 mutex_lock(&root->fs_info->fs_mutex);
2191                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2192                                     path, root_dirid,
2193                                     vol_args.name, namelen, 0);
2194                 mutex_unlock(&root->fs_info->fs_mutex);
2195                 btrfs_free_path(path);
2196                 if (di && !IS_ERR(di))
2197                         return -EEXIST;
2198                 if (IS_ERR(di))
2199                         return PTR_ERR(di);
2200
2201                 if (root == root->fs_info->tree_root)
2202                         ret = create_subvol(root, vol_args.name, namelen);
2203                 else
2204                         ret = create_snapshot(root, vol_args.name, namelen);
2205                 break;
2206         default:
2207                 return -ENOTTY;
2208         }
2209         return ret;
2210 }
2211
2212 #ifdef CONFIG_COMPAT
2213 long btrfs_compat_ioctl(struct file *file, unsigned int cmd,
2214                                unsigned long arg)
2215 {
2216         struct inode *inode = file->f_path.dentry->d_inode;
2217         int ret;
2218         lock_kernel();
2219         ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
2220         unlock_kernel();
2221         return ret;
2222
2223 }
2224 #endif
2225
2226 /*
2227  * Called inside transaction, so use GFP_NOFS
2228  */
2229 struct inode *btrfs_alloc_inode(struct super_block *sb)
2230 {
2231         struct btrfs_inode *ei;
2232
2233         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2234         if (!ei)
2235                 return NULL;
2236         return &ei->vfs_inode;
2237 }
2238
2239 void btrfs_destroy_inode(struct inode *inode)
2240 {
2241         WARN_ON(!list_empty(&inode->i_dentry));
2242         WARN_ON(inode->i_data.nrpages);
2243
2244         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2245 }
2246
2247 static void init_once(void * foo, struct kmem_cache * cachep,
2248                       unsigned long flags)
2249 {
2250         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2251
2252         inode_init_once(&ei->vfs_inode);
2253 }
2254
2255 void btrfs_destroy_cachep(void)
2256 {
2257         if (btrfs_inode_cachep)
2258                 kmem_cache_destroy(btrfs_inode_cachep);
2259         if (btrfs_trans_handle_cachep)
2260                 kmem_cache_destroy(btrfs_trans_handle_cachep);
2261         if (btrfs_transaction_cachep)
2262                 kmem_cache_destroy(btrfs_transaction_cachep);
2263         if (btrfs_bit_radix_cachep)
2264                 kmem_cache_destroy(btrfs_bit_radix_cachep);
2265         if (btrfs_path_cachep)
2266                 kmem_cache_destroy(btrfs_path_cachep);
2267 }
2268
2269 int btrfs_init_cachep(void)
2270 {
2271         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2272                                              sizeof(struct btrfs_inode),
2273                                              0, (SLAB_RECLAIM_ACCOUNT|
2274                                                 SLAB_MEM_SPREAD),
2275                                              init_once, NULL);
2276         if (!btrfs_inode_cachep)
2277                 goto fail;
2278         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2279                                              sizeof(struct btrfs_trans_handle),
2280                                              0, (SLAB_RECLAIM_ACCOUNT|
2281                                                 SLAB_MEM_SPREAD),
2282                                              NULL, NULL);
2283         if (!btrfs_trans_handle_cachep)
2284                 goto fail;
2285         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2286                                              sizeof(struct btrfs_transaction),
2287                                              0, (SLAB_RECLAIM_ACCOUNT|
2288                                                 SLAB_MEM_SPREAD),
2289                                              NULL, NULL);
2290         if (!btrfs_transaction_cachep)
2291                 goto fail;
2292         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2293                                              sizeof(struct btrfs_transaction),
2294                                              0, (SLAB_RECLAIM_ACCOUNT|
2295                                                 SLAB_MEM_SPREAD),
2296                                              NULL, NULL);
2297         if (!btrfs_path_cachep)
2298                 goto fail;
2299         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2300                                              256,
2301                                              0, (SLAB_RECLAIM_ACCOUNT|
2302                                                 SLAB_MEM_SPREAD |
2303                                                 SLAB_DESTROY_BY_RCU),
2304                                              NULL, NULL);
2305         if (!btrfs_bit_radix_cachep)
2306                 goto fail;
2307         return 0;
2308 fail:
2309         btrfs_destroy_cachep();
2310         return -ENOMEM;
2311 }
2312
2313 static int btrfs_getattr(struct vfsmount *mnt,
2314                          struct dentry *dentry, struct kstat *stat)
2315 {
2316         struct inode *inode = dentry->d_inode;
2317         generic_fillattr(inode, stat);
2318         stat->blksize = 256 * 1024;
2319         return 0;
2320 }
2321
2322 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2323                            struct inode * new_dir,struct dentry *new_dentry)
2324 {
2325         struct btrfs_trans_handle *trans;
2326         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2327         struct inode *new_inode = new_dentry->d_inode;
2328         struct inode *old_inode = old_dentry->d_inode;
2329         struct timespec ctime = CURRENT_TIME;
2330         struct btrfs_path *path;
2331         struct btrfs_dir_item *di;
2332         int ret;
2333
2334         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2335             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2336                 return -ENOTEMPTY;
2337         }
2338         mutex_lock(&root->fs_info->fs_mutex);
2339         trans = btrfs_start_transaction(root, 1);
2340         btrfs_set_trans_block_group(trans, new_dir);
2341         path = btrfs_alloc_path();
2342         if (!path) {
2343                 ret = -ENOMEM;
2344                 goto out_fail;
2345         }
2346
2347         old_dentry->d_inode->i_nlink++;
2348         old_dir->i_ctime = old_dir->i_mtime = ctime;
2349         new_dir->i_ctime = new_dir->i_mtime = ctime;
2350         old_inode->i_ctime = ctime;
2351         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2352                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2353                 u64 old_parent_oid;
2354                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2355                                            "..", 2, -1);
2356                 if (IS_ERR(di)) {
2357                         ret = PTR_ERR(di);
2358                         goto out_fail;
2359                 }
2360                 if (!di) {
2361                         ret = -ENOENT;
2362                         goto out_fail;
2363                 }
2364                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2365                 ret = btrfs_del_item(trans, root, path);
2366                 if (ret) {
2367                         goto out_fail;
2368                 }
2369                 btrfs_release_path(root, path);
2370
2371                 di = btrfs_lookup_dir_index_item(trans, root, path,
2372                                                  old_inode->i_ino,
2373                                                  old_parent_oid,
2374                                                  "..", 2, -1);
2375                 if (IS_ERR(di)) {
2376                         ret = PTR_ERR(di);
2377                         goto out_fail;
2378                 }
2379                 if (!di) {
2380                         ret = -ENOENT;
2381                         goto out_fail;
2382                 }
2383                 ret = btrfs_del_item(trans, root, path);
2384                 if (ret) {
2385                         goto out_fail;
2386                 }
2387                 btrfs_release_path(root, path);
2388
2389                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2390                                             old_inode->i_ino, location,
2391                                             BTRFS_FT_DIR);
2392                 if (ret)
2393                         goto out_fail;
2394         }
2395
2396
2397         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2398         if (ret)
2399                 goto out_fail;
2400
2401         if (new_inode) {
2402                 new_inode->i_ctime = CURRENT_TIME;
2403                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2404                 if (ret)
2405                         goto out_fail;
2406                 if (S_ISDIR(new_inode->i_mode))
2407                         clear_nlink(new_inode);
2408                 else
2409                         drop_nlink(new_inode);
2410                 ret = btrfs_update_inode(trans, root, new_inode);
2411                 if (ret)
2412                         goto out_fail;
2413         }
2414         ret = btrfs_add_link(trans, new_dentry, old_inode);
2415         if (ret)
2416                 goto out_fail;
2417
2418 out_fail:
2419         btrfs_free_path(path);
2420         btrfs_end_transaction(trans, root);
2421         mutex_unlock(&root->fs_info->fs_mutex);
2422         return ret;
2423 }
2424
2425 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2426                          const char *symname)
2427 {
2428         struct btrfs_trans_handle *trans;
2429         struct btrfs_root *root = BTRFS_I(dir)->root;
2430         struct btrfs_path *path;
2431         struct btrfs_key key;
2432         struct inode *inode;
2433         int err;
2434         int drop_inode = 0;
2435         u64 objectid;
2436         int name_len;
2437         int datasize;
2438         char *ptr;
2439         struct btrfs_file_extent_item *ei;
2440
2441         name_len = strlen(symname) + 1;
2442         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2443                 return -ENAMETOOLONG;
2444         mutex_lock(&root->fs_info->fs_mutex);
2445         trans = btrfs_start_transaction(root, 1);
2446         btrfs_set_trans_block_group(trans, dir);
2447
2448         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2449         if (err) {
2450                 err = -ENOSPC;
2451                 goto out_unlock;
2452         }
2453
2454         inode = btrfs_new_inode(trans, root, objectid,
2455                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2456         err = PTR_ERR(inode);
2457         if (IS_ERR(inode))
2458                 goto out_unlock;
2459
2460         btrfs_set_trans_block_group(trans, inode);
2461         err = btrfs_add_nondir(trans, dentry, inode);
2462         if (err)
2463                 drop_inode = 1;
2464         else {
2465                 inode->i_mapping->a_ops = &btrfs_aops;
2466                 inode->i_fop = &btrfs_file_operations;
2467                 inode->i_op = &btrfs_file_inode_operations;
2468         }
2469         dir->i_sb->s_dirt = 1;
2470         btrfs_update_inode_block_group(trans, inode);
2471         btrfs_update_inode_block_group(trans, dir);
2472         if (drop_inode)
2473                 goto out_unlock;
2474
2475         path = btrfs_alloc_path();
2476         BUG_ON(!path);
2477         key.objectid = inode->i_ino;
2478         key.offset = 0;
2479         key.flags = 0;
2480         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2481         datasize = btrfs_file_extent_calc_inline_size(name_len);
2482         err = btrfs_insert_empty_item(trans, root, path, &key,
2483                                       datasize);
2484         if (err) {
2485                 drop_inode = 1;
2486                 goto out_unlock;
2487         }
2488         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2489                path->slots[0], struct btrfs_file_extent_item);
2490         btrfs_set_file_extent_generation(ei, trans->transid);
2491         btrfs_set_file_extent_type(ei,
2492                                    BTRFS_FILE_EXTENT_INLINE);
2493         ptr = btrfs_file_extent_inline_start(ei);
2494         btrfs_memcpy(root, path->nodes[0]->b_data,
2495                      ptr, symname, name_len);
2496         mark_buffer_dirty(path->nodes[0]);
2497         btrfs_free_path(path);
2498         inode->i_op = &btrfs_symlink_inode_operations;
2499         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2500         inode->i_size = name_len - 1;
2501         err = btrfs_update_inode(trans, root, inode);
2502         if (err)
2503                 drop_inode = 1;
2504
2505 out_unlock:
2506         btrfs_end_transaction(trans, root);
2507         mutex_unlock(&root->fs_info->fs_mutex);
2508         if (drop_inode) {
2509                 inode_dec_link_count(inode);
2510                 iput(inode);
2511         }
2512         btrfs_btree_balance_dirty(root);
2513         return err;
2514 }
2515
2516 static struct inode_operations btrfs_dir_inode_operations = {
2517         .lookup         = btrfs_lookup,
2518         .create         = btrfs_create,
2519         .unlink         = btrfs_unlink,
2520         .link           = btrfs_link,
2521         .mkdir          = btrfs_mkdir,
2522         .rmdir          = btrfs_rmdir,
2523         .rename         = btrfs_rename,
2524         .symlink        = btrfs_symlink,
2525         .setattr        = btrfs_setattr,
2526 };
2527
2528 static struct inode_operations btrfs_dir_ro_inode_operations = {
2529         .lookup         = btrfs_lookup,
2530 };
2531
2532 static struct file_operations btrfs_dir_file_operations = {
2533         .llseek         = generic_file_llseek,
2534         .read           = generic_read_dir,
2535         .readdir        = btrfs_readdir,
2536         .ioctl          = btrfs_ioctl,
2537 #ifdef CONFIG_COMPAT
2538         .compat_ioctl   = btrfs_compat_ioctl,
2539 #endif
2540 };
2541
2542 static struct address_space_operations btrfs_aops = {
2543         .readpage       = btrfs_readpage,
2544         .writepage      = btrfs_writepage,
2545         .sync_page      = block_sync_page,
2546         .prepare_write  = btrfs_prepare_write,
2547         .commit_write   = btrfs_commit_write,
2548         .bmap           = btrfs_bmap,
2549 };
2550
2551 static struct address_space_operations btrfs_symlink_aops = {
2552         .readpage       = btrfs_readpage,
2553         .writepage      = btrfs_writepage,
2554 };
2555
2556 static struct inode_operations btrfs_file_inode_operations = {
2557         .truncate       = btrfs_truncate,
2558         .getattr        = btrfs_getattr,
2559         .setattr        = btrfs_setattr,
2560 };
2561
2562 static struct inode_operations btrfs_symlink_inode_operations = {
2563         .readlink       = generic_readlink,
2564         .follow_link    = page_follow_link_light,
2565         .put_link       = page_put_link,
2566 };