]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - fs/btrfs/disk-io.c
Btrfs: groundwork for subvolume and snapshot roots
[linux-2.6-omap-h63xx.git] / fs / btrfs / disk-io.c
1 #include <linux/module.h>
2 #include <linux/fs.h>
3 #include <linux/blkdev.h>
4 #include <linux/crypto.h>
5 #include <linux/scatterlist.h>
6 #include <linux/swap.h>
7 #include <linux/radix-tree.h>
8 #include "ctree.h"
9 #include "disk-io.h"
10 #include "transaction.h"
11 #include "btrfs_inode.h"
12
13 static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf)
14 {
15         struct btrfs_node *node = btrfs_buffer_node(buf);
16         if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) {
17                 BUG();
18         }
19         if (root->node && btrfs_header_parentid(&node->header) !=
20             btrfs_header_parentid(btrfs_buffer_header(root->node))) {
21                 BUG();
22         }
23         return 0;
24 }
25
26 struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr)
27 {
28         struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
29         int blockbits = root->fs_info->sb->s_blocksize_bits;
30         unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
31         struct page *page;
32         struct buffer_head *bh;
33         struct buffer_head *head;
34         struct buffer_head *ret = NULL;
35
36
37         page = find_lock_page(mapping, index);
38         if (!page)
39                 return NULL;
40
41         if (!page_has_buffers(page))
42                 goto out_unlock;
43
44         head = page_buffers(page);
45         bh = head;
46         do {
47                 if (buffer_mapped(bh) && bh->b_blocknr == blocknr) {
48                         ret = bh;
49                         get_bh(bh);
50                         goto out_unlock;
51                 }
52                 bh = bh->b_this_page;
53         } while (bh != head);
54 out_unlock:
55         unlock_page(page);
56         if (ret) {
57                 touch_buffer(ret);
58         }
59         page_cache_release(page);
60         return ret;
61 }
62
63 struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root,
64                                                  u64 blocknr)
65 {
66         struct address_space *mapping = root->fs_info->btree_inode->i_mapping;
67         int blockbits = root->fs_info->sb->s_blocksize_bits;
68         unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits);
69         struct page *page;
70         struct buffer_head *bh;
71         struct buffer_head *head;
72         struct buffer_head *ret = NULL;
73         u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits);
74
75         page = grab_cache_page(mapping, index);
76         if (!page)
77                 return NULL;
78
79         if (!page_has_buffers(page))
80                 create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0);
81         head = page_buffers(page);
82         bh = head;
83         do {
84                 if (!buffer_mapped(bh)) {
85                         bh->b_bdev = root->fs_info->sb->s_bdev;
86                         bh->b_blocknr = first_block;
87                         set_buffer_mapped(bh);
88                 }
89                 if (bh->b_blocknr == blocknr) {
90                         ret = bh;
91                         get_bh(bh);
92                         goto out_unlock;
93                 }
94                 bh = bh->b_this_page;
95                 first_block++;
96         } while (bh != head);
97 out_unlock:
98         unlock_page(page);
99         if (ret)
100                 touch_buffer(ret);
101         page_cache_release(page);
102         return ret;
103 }
104
105 static sector_t max_block(struct block_device *bdev)
106 {
107         sector_t retval = ~((sector_t)0);
108         loff_t sz = i_size_read(bdev->bd_inode);
109
110         if (sz) {
111                 unsigned int size = block_size(bdev);
112                 unsigned int sizebits = blksize_bits(size);
113                 retval = (sz >> sizebits);
114         }
115         return retval;
116 }
117
118 static int btree_get_block(struct inode *inode, sector_t iblock,
119                            struct buffer_head *bh, int create)
120 {
121         if (iblock >= max_block(inode->i_sb->s_bdev)) {
122                 if (create)
123                         return -EIO;
124
125                 /*
126                  * for reads, we're just trying to fill a partial page.
127                  * return a hole, they will have to call get_block again
128                  * before they can fill it, and they will get -EIO at that
129                  * time
130                  */
131                 return 0;
132         }
133         bh->b_bdev = inode->i_sb->s_bdev;
134         bh->b_blocknr = iblock;
135         set_buffer_mapped(bh);
136         return 0;
137 }
138
139 int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len,
140                     char *result)
141 {
142         struct scatterlist sg;
143         struct crypto_hash *tfm = root->fs_info->hash_tfm;
144         struct hash_desc desc;
145         int ret;
146
147         desc.tfm = tfm;
148         desc.flags = 0;
149         sg_init_one(&sg, data, len);
150         spin_lock(&root->fs_info->hash_lock);
151         ret = crypto_hash_digest(&desc, &sg, 1, result);
152         spin_unlock(&root->fs_info->hash_lock);
153         if (ret) {
154                 printk("sha256 digest failed\n");
155         }
156         return ret;
157 }
158 static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh,
159                            int verify)
160 {
161         char result[BTRFS_CSUM_SIZE];
162         int ret;
163         struct btrfs_node *node;
164
165         ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE,
166                               bh->b_size - BTRFS_CSUM_SIZE, result);
167         if (ret)
168                 return ret;
169         if (verify) {
170                 if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) {
171                         printk("checksum verify failed on %lu\n",
172                                bh->b_blocknr);
173                         return 1;
174                 }
175         } else {
176                 node = btrfs_buffer_node(bh);
177                 memcpy(node->header.csum, result, BTRFS_CSUM_SIZE);
178         }
179         return 0;
180 }
181
182 static int btree_writepage(struct page *page, struct writeback_control *wbc)
183 {
184         struct buffer_head *bh;
185         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
186         struct buffer_head *head;
187         if (!page_has_buffers(page)) {
188                 create_empty_buffers(page, root->fs_info->sb->s_blocksize,
189                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
190         }
191         head = page_buffers(page);
192         bh = head;
193         do {
194                 if (buffer_dirty(bh))
195                         csum_tree_block(root, bh, 0);
196                 bh = bh->b_this_page;
197         } while (bh != head);
198         return block_write_full_page(page, btree_get_block, wbc);
199 }
200
201 static int btree_readpage(struct file * file, struct page * page)
202 {
203         return block_read_full_page(page, btree_get_block);
204 }
205
206 static struct address_space_operations btree_aops = {
207         .readpage       = btree_readpage,
208         .writepage      = btree_writepage,
209         .sync_page      = block_sync_page,
210 };
211
212 struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr)
213 {
214         struct buffer_head *bh = NULL;
215
216         bh = btrfs_find_create_tree_block(root, blocknr);
217         if (!bh)
218                 return bh;
219         if (buffer_uptodate(bh))
220                 goto uptodate;
221         lock_buffer(bh);
222         if (!buffer_uptodate(bh)) {
223                 get_bh(bh);
224                 bh->b_end_io = end_buffer_read_sync;
225                 submit_bh(READ, bh);
226                 wait_on_buffer(bh);
227                 if (!buffer_uptodate(bh))
228                         goto fail;
229                 csum_tree_block(root, bh, 1);
230         } else {
231                 unlock_buffer(bh);
232         }
233 uptodate:
234         if (check_tree_block(root, bh))
235                 BUG();
236         return bh;
237 fail:
238         brelse(bh);
239         return NULL;
240 }
241
242 int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
243                      struct buffer_head *buf)
244 {
245         WARN_ON(atomic_read(&buf->b_count) == 0);
246         mark_buffer_dirty(buf);
247         return 0;
248 }
249
250 int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
251                      struct buffer_head *buf)
252 {
253         WARN_ON(atomic_read(&buf->b_count) == 0);
254         clear_buffer_dirty(buf);
255         return 0;
256 }
257
258 static int __setup_root(int blocksize,
259                         struct btrfs_root *root,
260                         struct btrfs_fs_info *fs_info,
261                         u64 objectid)
262 {
263         root->node = NULL;
264         root->inode = NULL;
265         root->commit_root = NULL;
266         root->blocksize = blocksize;
267         root->ref_cows = 0;
268         root->fs_info = fs_info;
269         root->objectid = objectid;
270         root->last_trans = 0;
271         memset(&root->root_key, 0, sizeof(root->root_key));
272         memset(&root->root_item, 0, sizeof(root->root_item));
273         return 0;
274 }
275
276 static int find_and_setup_root(int blocksize,
277                                struct btrfs_root *tree_root,
278                                struct btrfs_fs_info *fs_info,
279                                u64 objectid,
280                                struct btrfs_root *root)
281 {
282         int ret;
283
284         __setup_root(blocksize, root, fs_info, objectid);
285         ret = btrfs_find_last_root(tree_root, objectid,
286                                    &root->root_item, &root->root_key);
287         BUG_ON(ret);
288
289         root->node = read_tree_block(root,
290                                      btrfs_root_blocknr(&root->root_item));
291         BUG_ON(!root->node);
292         return 0;
293 }
294
295 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
296                                       struct btrfs_key *location)
297 {
298         struct btrfs_root *root;
299         struct btrfs_root *tree_root = fs_info->tree_root;
300         struct btrfs_path *path;
301         struct btrfs_leaf *l;
302         int ret = 0;
303
304 printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags);
305         root = kmalloc(sizeof(*root), GFP_NOFS);
306         if (!root) {
307 printk("failed1\n");
308                 return ERR_PTR(-ENOMEM);
309         }
310         if (location->offset == (u64)-1) {
311                 ret = find_and_setup_root(fs_info->sb->s_blocksize,
312                                           fs_info->tree_root, fs_info,
313                                           location->objectid, root);
314                 if (ret) {
315 printk("failed2\n");
316                         kfree(root);
317                         return ERR_PTR(ret);
318                 }
319                 goto insert;
320         }
321
322         __setup_root(fs_info->sb->s_blocksize, root, fs_info,
323                      location->objectid);
324
325         path = btrfs_alloc_path();
326         BUG_ON(!path);
327         ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
328         if (ret != 0) {
329 printk("internal search_slot gives us %d\n", ret);
330                 if (ret > 0)
331                         ret = -ENOENT;
332                 goto out;
333         }
334         l = btrfs_buffer_leaf(path->nodes[0]);
335         memcpy(&root->root_item,
336                btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item),
337                sizeof(root->root_item));
338         memcpy(&root->root_key, location, sizeof(*location));
339         ret = 0;
340 out:
341         btrfs_release_path(root, path);
342         btrfs_free_path(path);
343         if (ret) {
344                 kfree(root);
345                 return ERR_PTR(ret);
346         }
347         root->node = read_tree_block(root,
348                                      btrfs_root_blocknr(&root->root_item));
349         BUG_ON(!root->node);
350 insert:
351 printk("inserting %p\n", root);
352         root->ref_cows = 1;
353         ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root,
354                                 root);
355         if (ret) {
356 printk("radix_tree_insert gives us %d\n", ret);
357                 brelse(root->node);
358                 kfree(root);
359                 return ERR_PTR(ret);
360         }
361 printk("all worked\n");
362         return root;
363 }
364
365 struct btrfs_root *open_ctree(struct super_block *sb)
366 {
367         struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root),
368                                                  GFP_NOFS);
369         struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root),
370                                                GFP_NOFS);
371         struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root),
372                                                 GFP_NOFS);
373         struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info),
374                                                 GFP_NOFS);
375         int ret;
376         struct btrfs_super_block *disk_super;
377
378         init_bit_radix(&fs_info->pinned_radix);
379         init_bit_radix(&fs_info->pending_del_radix);
380         INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
381         sb_set_blocksize(sb, 4096);
382         fs_info->running_transaction = NULL;
383         fs_info->tree_root = tree_root;
384         fs_info->extent_root = extent_root;
385         fs_info->inode_root = inode_root;
386         fs_info->last_inode_alloc = 0;
387         fs_info->highest_inode = 0;
388         fs_info->sb = sb;
389         fs_info->btree_inode = new_inode(sb);
390         fs_info->btree_inode->i_ino = 1;
391         fs_info->btree_inode->i_nlink = 1;
392         fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size;
393         fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
394         BTRFS_I(fs_info->btree_inode)->root = tree_root;
395         memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
396                sizeof(struct btrfs_key));
397         insert_inode_hash(fs_info->btree_inode);
398         mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
399         fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
400         spin_lock_init(&fs_info->hash_lock);
401         if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) {
402                 printk("failed to allocate sha256 hash\n");
403                 return NULL;
404         }
405         mutex_init(&fs_info->trans_mutex);
406         mutex_init(&fs_info->fs_mutex);
407         memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert));
408         memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert));
409
410         __setup_root(sb->s_blocksize, tree_root,
411                      fs_info, BTRFS_ROOT_TREE_OBJECTID);
412         fs_info->sb_buffer = read_tree_block(tree_root,
413                                              BTRFS_SUPER_INFO_OFFSET /
414                                              sb->s_blocksize);
415
416         if (!fs_info->sb_buffer)
417                 return NULL;
418         disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data;
419         if (!btrfs_super_root(disk_super))
420                 return NULL;
421
422         fs_info->disk_super = disk_super;
423         tree_root->node = read_tree_block(tree_root,
424                                           btrfs_super_root(disk_super));
425         BUG_ON(!tree_root->node);
426
427         mutex_lock(&fs_info->fs_mutex);
428         ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info,
429                                   BTRFS_EXTENT_TREE_OBJECTID, extent_root);
430         BUG_ON(ret);
431
432         ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info,
433                                   BTRFS_INODE_MAP_OBJECTID, inode_root);
434         BUG_ON(ret);
435
436         fs_info->generation = btrfs_super_generation(disk_super) + 1;
437         ret = btrfs_find_highest_inode(tree_root, &fs_info->last_inode_alloc);
438         if (ret == 0)
439                 fs_info->highest_inode = fs_info->last_inode_alloc;
440         memset(&fs_info->kobj, 0, sizeof(fs_info->kobj));
441         kobj_set_kset_s(fs_info, btrfs_subsys);
442         kobject_set_name(&fs_info->kobj, "%s", sb->s_id);
443         kobject_register(&fs_info->kobj);
444         mutex_unlock(&fs_info->fs_mutex);
445         return tree_root;
446 }
447
448 int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
449                       *root)
450 {
451         struct buffer_head *bh = root->fs_info->sb_buffer;
452
453         btrfs_set_super_root(root->fs_info->disk_super,
454                              root->fs_info->tree_root->node->b_blocknr);
455         lock_buffer(bh);
456         WARN_ON(atomic_read(&bh->b_count) < 1);
457         clear_buffer_dirty(bh);
458         csum_tree_block(root, bh, 0);
459         bh->b_end_io = end_buffer_write_sync;
460         get_bh(bh);
461         submit_bh(WRITE, bh);
462         wait_on_buffer(bh);
463         if (!buffer_uptodate(bh)) {
464                 WARN_ON(1);
465                 return -EIO;
466         }
467         return 0;
468 }
469
470 int del_fs_roots(struct btrfs_fs_info *fs_info)
471 {
472         int ret;
473         struct btrfs_root *gang[8];
474         int i;
475
476         while(1) {
477                 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
478                                              (void **)gang, 0,
479                                              ARRAY_SIZE(gang));
480                 if (!ret)
481                         break;
482                 for (i = 0; i < ret; i++) {
483                         radix_tree_delete(&fs_info->fs_roots_radix,
484                                           (unsigned long)gang[i]);
485                         if (gang[i]->inode)
486                                 iput(gang[i]->inode);
487                         else
488                                 printk("no inode for root %p\n", gang[i]);
489                         if (gang[i]->node)
490                                 brelse(gang[i]->node);
491                         if (gang[i]->commit_root)
492                                 brelse(gang[i]->commit_root);
493                         kfree(gang[i]);
494                 }
495         }
496         return 0;
497 }
498
499 int close_ctree(struct btrfs_root *root)
500 {
501         int ret;
502         struct btrfs_trans_handle *trans;
503         struct btrfs_fs_info *fs_info = root->fs_info;
504
505         mutex_lock(&fs_info->fs_mutex);
506         trans = btrfs_start_transaction(root, 1);
507         btrfs_commit_transaction(trans, root);
508         /* run commit again to  drop the original snapshot */
509         trans = btrfs_start_transaction(root, 1);
510         btrfs_commit_transaction(trans, root);
511         ret = btrfs_write_and_wait_transaction(NULL, root);
512         BUG_ON(ret);
513         write_ctree_super(NULL, root);
514         mutex_unlock(&fs_info->fs_mutex);
515
516         if (fs_info->extent_root->node)
517                 btrfs_block_release(fs_info->extent_root,
518                                     fs_info->extent_root->node);
519         if (fs_info->inode_root->node)
520                 btrfs_block_release(fs_info->inode_root,
521                                     fs_info->inode_root->node);
522         if (fs_info->tree_root->node)
523                 btrfs_block_release(fs_info->tree_root,
524                                     fs_info->tree_root->node);
525         btrfs_block_release(root, fs_info->sb_buffer);
526         crypto_free_hash(fs_info->hash_tfm);
527         truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
528         iput(fs_info->btree_inode);
529         del_fs_roots(fs_info);
530         kfree(fs_info->extent_root);
531         kfree(fs_info->inode_root);
532         kfree(fs_info->tree_root);
533         kobject_unregister(&fs_info->kobj);
534         return 0;
535 }
536
537 void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf)
538 {
539         brelse(buf);
540 }
541