X-Git-Url: http://pilppa.org/gitweb/?a=blobdiff_plain;f=fs%2Fbtrfs%2Finode.c;h=5c05ecbc5726e79e2716faddd8aa35d31534f6b2;hb=e9d0b13b5bbb58c9b840e407a8d181442f799966;hp=e639cc9e089f01d02595be343f582b99463936c7;hpb=f1ace244c8c1e16eaa5c8b3b5339849651e31ede;p=linux-2.6-omap-h63xx.git diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e639cc9e089..5c05ecbc572 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include @@ -31,6 +30,8 @@ #include #include #include +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -46,6 +47,7 @@ struct btrfs_iget_args { static struct inode_operations btrfs_dir_inode_operations; static struct inode_operations btrfs_symlink_inode_operations; static struct inode_operations btrfs_dir_ro_inode_operations; +static struct inode_operations btrfs_special_inode_operations; static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; @@ -75,6 +77,7 @@ void btrfs_read_locked_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; u64 alloc_group_block; + u32 rdev; int ret; path = btrfs_alloc_path(); @@ -104,6 +107,8 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); + inode->i_rdev = 0; + rdev = btrfs_inode_rdev(inode_item); alloc_group_block = btrfs_inode_block_group(inode_item); BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); @@ -114,12 +119,6 @@ void btrfs_read_locked_inode(struct inode *inode) mutex_unlock(&root->fs_info->fs_mutex); switch (inode->i_mode & S_IFMT) { -#if 0 - default: - init_special_inode(inode, inode->i_mode, - btrfs_inode_rdev(inode_item)); - break; -#endif case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; @@ -136,6 +135,9 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; break; + default: + init_special_inode(inode, inode->i_mode, rdev); + break; } return; @@ -162,6 +164,7 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); + btrfs_set_inode_rdev(item, inode->i_rdev); btrfs_set_inode_block_group(item, BTRFS_I(inode)->block_group->key.objectid); } @@ -211,7 +214,11 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + ret = -ENOMEM; + goto err; + } + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); if (IS_ERR(di)) { @@ -224,7 +231,8 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, } objectid = btrfs_disk_key_objectid(&di->location); ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); + if (ret) + goto err; btrfs_release_path(root, path); di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, @@ -238,16 +246,16 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, goto err; } ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; err: btrfs_free_path(path); if (!ret) { dir->i_size -= name_len * 2; + dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); drop_nlink(dentry->d_inode); - btrfs_update_inode(trans, root, dentry->d_inode); + ret = btrfs_update_inode(trans, root, dentry->d_inode); dir->i_sb->s_dirt = 1; } return ret; @@ -358,47 +366,14 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, BUG_ON(!path); ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, -1); - BUG_ON(ret); - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (ret > 0) + ret = -ENOENT; + if (!ret) + ret = btrfs_del_item(trans, root, path); btrfs_free_path(path); return ret; } -/* - * truncates go from a high offset to a low offset. So, walk - * from hi to lo in the node and issue readas. Stop when you find - * keys from a different objectid - */ -static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid) -{ - struct btrfs_node *node; - int i; - int nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - if (slot == 0) - return; - nritems = btrfs_header_nritems(&node->header); - for (i = slot - 1; i >= 0; i--) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} - /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find @@ -425,6 +400,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int del_item; path = btrfs_alloc_path(); + path->reada = -1; BUG_ON(!path); /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; @@ -441,7 +417,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } - reada_truncate(root, path, inode->i_ino); leaf = btrfs_buffer_leaf(path->nodes[0]); found_key = &leaf->items[path->slots[0]].key; found_type = btrfs_disk_key_type(found_key); @@ -499,7 +474,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, extent_num_blocks); inode->i_blocks -= (orig_num_blocks - extent_num_blocks) << 3; - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } else { extent_start = btrfs_file_extent_disk_blocknr(fi); @@ -515,7 +490,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } if (del_item) { ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (ret) + goto error; } else { break; } @@ -561,7 +537,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) goto out; if (!PageUptodate(page)) { - ret = mpage_readpage(page, btrfs_get_block); + ret = btrfs_readpage(NULL, page); lock_page(page); if (!PageUptodate(page)) { ret = -EIO; @@ -576,19 +552,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) page->index << PAGE_CACHE_SHIFT, (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint); - BUG_ON(ret); - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, + if (ret) + goto out; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, alloc_hint, (u64)-1, &ins, 1); - BUG_ON(ret); + if (ret) + goto out; ret = btrfs_insert_file_extent(trans, root, inode->i_ino, page->index << PAGE_CACHE_SHIFT, ins.objectid, 1, 1); - BUG_ON(ret); + if (ret) + goto out; SetPageChecked(page); kaddr = kmap(page); memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); flush_dcache_page(page); - btrfs_csum_file_block(trans, root, inode->i_ino, + ret = btrfs_csum_file_block(trans, root, inode->i_ino, page->index << PAGE_CACHE_SHIFT, kaddr, PAGE_CACHE_SIZE); kunmap(page); @@ -632,9 +611,10 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_set_trans_block_group(trans, inode); err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); - BUG_ON(err); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + if (err) + return err; } out: err = inode_setattr(inode, attr); @@ -656,12 +636,20 @@ void btrfs_delete_inode(struct inode *inode) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); - btrfs_free_inode(trans, root, inode); + if (ret) + goto no_delete_lock; + ret = btrfs_free_inode(trans, root, inode); + if (ret) + goto no_delete_lock; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); return; + +no_delete_lock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); no_delete: clear_inode(inode); } @@ -805,36 +793,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } -/* - * readahead one full node of leaves as long as their keys include - * the objectid supplied - */ -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid) -{ - struct btrfs_node *node; - int i; - u32 nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - nritems = btrfs_header_nritems(&node->header); - for (i = slot + 1; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} static unsigned char btrfs_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK }; @@ -868,18 +826,17 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; path = btrfs_alloc_path(); + path->reada = 1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; advance = 0; - reada_leaves(root, path, inode->i_ino); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { - reada_leaves(root, path, inode->i_ino); ret = btrfs_next_leaf(root, path); if (ret) break; @@ -945,7 +902,7 @@ int btrfs_write_inode(struct inode *inode, int wait) } /* - * This is somewhat expense, updating the tree every time the + * This is somewhat expensive, updating the tree every time the * inode changes. But, it is most likely to find the inode in cache. * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. @@ -961,7 +918,6 @@ void btrfs_dirty_inode(struct inode *inode) btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -1002,8 +958,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - BUG_ON(ret); - + if (ret) + return ERR_PTR(ret); insert_inode_hash(inode); return inode; } @@ -1019,6 +975,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, int ret; struct btrfs_key key; struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; + struct inode *parent_inode; key.objectid = inode->i_ino; key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); @@ -1029,7 +986,9 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_parent->d_inode->i_ino, &key, btrfs_inode_type(inode)); if (ret == 0) { - dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; + parent_inode = dentry->d_parent->d_inode; + parent_inode->i_size += dentry->d_name.len * 2; + parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; ret = btrfs_update_inode(trans, root, dentry->d_parent->d_inode); } @@ -1049,6 +1008,58 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, return err; } +static int btrfs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct inode *inode; + int err; + int drop_inode = 0; + u64 objectid; + + if (!new_valid_dev(rdev)) + return -EINVAL; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_unlock; + + btrfs_set_trans_block_group(trans, inode); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + else { + inode->i_op = &btrfs_special_inode_operations; + init_special_inode(inode, inode->i_mode, rdev); + } + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); +out_unlock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + static int btrfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { @@ -1121,7 +1132,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); - btrfs_update_inode(trans, root, inode); + err = btrfs_update_inode(trans, root, inode); + if (err) + drop_inode = 1; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -1257,8 +1270,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, path = btrfs_alloc_path(); BUG_ON(!path); if (create & BTRFS_GET_BLOCK_CREATE) { - WARN_ON(1); - /* this almost but not quite works */ + /* + * danger!, this only works if the page is properly up + * to date somehow + */ trans = btrfs_start_transaction(root, 1); if (!trans) { err = -ENOMEM; @@ -1345,20 +1360,28 @@ not_found: if (create & BTRFS_GET_BLOCK_CREATE) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, inode->i_ino, - 1, alloc_hint, (u64)-1, + 1, 0, alloc_hint, (u64)-1, &ins, 1); - BUG_ON(ret); + if (ret) { + err = ret; + goto out; + } ret = btrfs_insert_file_extent(trans, root, inode->i_ino, iblock << inode->i_blkbits, ins.objectid, ins.offset, ins.offset); - BUG_ON(ret); - SetPageChecked(result->b_page); + if (ret) { + err = ret; + goto out; + } btrfs_map_bh_to_logical(root, result, ins.objectid); } out: - if (trans) - err = btrfs_end_transaction(trans, root); + if (trans) { + ret = btrfs_end_transaction(trans, root); + if (!err) + err = ret; + } btrfs_free_path(path); return err; } @@ -1374,6 +1397,39 @@ int btrfs_get_block(struct inode *inode, sector_t iblock, return err; } +static int btrfs_get_block_csum(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int ret; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct page *page = result->b_page; + u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result); + struct btrfs_csum_item *item; + struct btrfs_path *path = NULL; + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_get_block_lock(inode, iblock, result, create); + if (ret) + goto out; + + path = btrfs_alloc_path(); + item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0); + if (IS_ERR(item)) { + ret = PTR_ERR(item); + /* a csum that isn't present is a preallocated region. */ + if (ret == -ENOENT || ret == -EFBIG) + ret = 0; + result->b_private = NULL; + goto out; + } + memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); +out: + if (path) + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -1395,9 +1451,197 @@ static int btrfs_prepare_write(struct file *file, struct page *page, return block_prepare_write(page, from, to, btrfs_get_block); } -static int btrfs_readpage(struct file *file, struct page *page) +static void buffer_io_error(struct buffer_head *bh) { - return mpage_readpage(page, btrfs_get_block); + char b[BDEVNAME_SIZE]; + + printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", + bdevname(bh->b_bdev, b), + (unsigned long long)bh->b_blocknr); +} + +/* + * I/O completion handler for block_read_full_page() - pages + * which come unlocked at the end of I/O. + */ +static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) +{ + unsigned long flags; + struct buffer_head *first; + struct buffer_head *tmp; + struct page *page; + int page_uptodate = 1; + struct inode *inode; + int ret; + + BUG_ON(!buffer_async_read(bh)); + + page = bh->b_page; + inode = page->mapping->host; + if (uptodate) { + void *kaddr; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + if (bh->b_private) { + char csum[BTRFS_CRC32_SIZE]; + kaddr = kmap_atomic(page, KM_IRQ0); + ret = btrfs_csum_data(root, kaddr + bh_offset(bh), + bh->b_size, csum); + BUG_ON(ret); + if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) { + u64 offset; + offset = (page->index << PAGE_CACHE_SHIFT) + + bh_offset(bh); + printk("btrfs csum failed ino %lu off %llu\n", + page->mapping->host->i_ino, + (unsigned long long)offset); + memset(kaddr + bh_offset(bh), 1, bh->b_size); + flush_dcache_page(page); + } + kunmap_atomic(kaddr, KM_IRQ0); + } + set_buffer_uptodate(bh); + } else { + clear_buffer_uptodate(bh); + if (printk_ratelimit()) + buffer_io_error(bh); + SetPageError(page); + } + + /* + * Be _very_ careful from here on. Bad things can happen if + * two buffer heads end IO at almost the same time and both + * decide that the page is now completely done. + */ + first = page_buffers(page); + local_irq_save(flags); + bit_spin_lock(BH_Uptodate_Lock, &first->b_state); + clear_buffer_async_read(bh); + unlock_buffer(bh); + tmp = bh; + do { + if (!buffer_uptodate(tmp)) + page_uptodate = 0; + if (buffer_async_read(tmp)) { + BUG_ON(!buffer_locked(tmp)); + goto still_busy; + } + tmp = tmp->b_this_page; + } while (tmp != bh); + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); + + /* + * If none of the buffers had errors and they are all + * uptodate then we can set the page uptodate. + */ + if (page_uptodate && !PageError(page)) + SetPageUptodate(page); + unlock_page(page); + return; + +still_busy: + bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); + local_irq_restore(flags); + return; +} + +/* + * Generic "read page" function for block devices that have the normal + * get_block functionality. This is most of the block device filesystems. + * Reads the page asynchronously --- the unlock_buffer() and + * set/clear_buffer_uptodate() functions propagate buffer state into the + * page struct once IO has completed. + */ +int btrfs_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + sector_t iblock, lblock; + struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; + unsigned int blocksize; + int nr, i; + int fully_mapped = 1; + + BUG_ON(!PageLocked(page)); + blocksize = 1 << inode->i_blkbits; + if (!page_has_buffers(page)) + create_empty_buffers(page, blocksize, 0); + head = page_buffers(page); + + iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; + bh = head; + nr = 0; + i = 0; + + do { + if (buffer_uptodate(bh)) + continue; + + if (!buffer_mapped(bh)) { + int err = 0; + + fully_mapped = 0; + if (iblock < lblock) { + WARN_ON(bh->b_size != blocksize); + err = btrfs_get_block_csum(inode, iblock, + bh, 0); + if (err) + SetPageError(page); + } + if (!buffer_mapped(bh)) { + void *kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + i * blocksize, 0, blocksize); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + if (!err) + set_buffer_uptodate(bh); + continue; + } + /* + * get_block() might have updated the buffer + * synchronously + */ + if (buffer_uptodate(bh)) + continue; + } + arr[nr++] = bh; + } while (i++, iblock++, (bh = bh->b_this_page) != head); + + if (fully_mapped) + SetPageMappedToDisk(page); + + if (!nr) { + /* + * All buffers are uptodate - we can set the page uptodate + * as well. But not if get_block() returned an error. + */ + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + return 0; + } + + /* Stage two: lock the buffers */ + for (i = 0; i < nr; i++) { + bh = arr[i]; + lock_buffer(bh); + bh->b_end_io = btrfs_end_buffer_async_read; + set_buffer_async_read(bh); + } + + /* + * Stage 3: start the IO. Check for uptodateness + * inside the buffer lock in case another process reading + * the underlying blockdev brought it uptodate (the sct fix). + */ + for (i = 0; i < nr; i++) { + bh = arr[i]; + if (buffer_uptodate(bh)) + btrfs_end_buffer_async_read(bh, 1); + else + submit_bh(READ, bh); + } + return 0; } /* @@ -1424,11 +1668,19 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; const unsigned blocksize = 1 << inode->i_blkbits; int nr_underway = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; BUG_ON(!PageLocked(page)); last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; + /* no csumming allowed when from PF_MEMALLOC */ + if (current->flags & PF_MEMALLOC) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + if (!page_has_buffers(page)) { create_empty_buffers(page, blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -1496,6 +1748,23 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, continue; } if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { + struct btrfs_trans_handle *trans; + int ret; + u64 off = page->index << PAGE_CACHE_SHIFT; + char *kaddr; + + off += bh_offset(bh); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + kaddr = kmap(page); + btrfs_csum_file_block(trans, root, inode->i_ino, + off, kaddr + bh_offset(bh), + bh->b_size); + kunmap(page); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); mark_buffer_async_write(bh); } else { unlock_buffer(bh); @@ -1617,6 +1886,52 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return __btrfs_write_full_page(inode, page, wbc); } +/* + * btrfs_page_mkwrite() is not allowed to change the file size as it gets + * called from a page fault handler when a page is first dirtied. Hence we must + * be careful to check for EOF conditions here. We set the page up correctly + * for a written page which means we get ENOSPC checking when writing into + * holes and correct delalloc and unwritten extent mapping on filesystems that + * support these features. + * + * We are not allowed to take the i_mutex here so we have to play games to + * protect against truncate races as the page could now be beyond EOF. Because + * vmtruncate() writes the inode size before removing pages, once we have the + * page lock we can determine safely if the page is beyond EOF. If it is not + * beyond EOF, then the page is guaranteed safe against truncation until we + * unlock the page. + */ +int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + unsigned long end; + loff_t size; + int ret = -EINVAL; + + lock_page(page); + wait_on_page_writeback(page); + size = i_size_read(inode); + if ((page->mapping != inode->i_mapping) || + ((page->index << PAGE_CACHE_SHIFT) > size)) { + /* page got truncated out from underneath us */ + goto out_unlock; + } + + /* page is wholly or partially inside EOF */ + if (((page->index + 1) << PAGE_CACHE_SHIFT) > size) + end = size & ~PAGE_CACHE_MASK; + else + end = PAGE_CACHE_SIZE; + + ret = btrfs_prepare_write(NULL, page, 0, end); + if (!ret) + ret = btrfs_commit_write(NULL, page, 0, end); + +out_unlock: + unlock_page(page); + return ret; +} + static void btrfs_truncate(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1636,7 +1951,6 @@ static void btrfs_truncate(struct inode *inode) /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); btrfs_update_inode(trans, root, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); @@ -1676,6 +1990,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct inode *inode; struct inode *dir; int ret; + int err; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; @@ -1683,9 +1998,9 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0); - if (subvol == NULL) - return -ENOSPC; + subvol = btrfs_alloc_free_block(trans, root, 0, 0); + if (IS_ERR(subvol)) + return PTR_ERR(subvol); leaf = btrfs_buffer_leaf(subvol); btrfs_set_header_nritems(&leaf->header, 0); btrfs_set_header_level(&leaf->header, 0); @@ -1694,7 +2009,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_header_owner(&leaf->header, root->root_key.objectid); memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); - mark_buffer_dirty(subvol); + btrfs_mark_buffer_dirty(subvol); inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); @@ -1706,12 +2021,15 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); + root_item.drop_level = 0; brelse(subvol); subvol = NULL; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); - BUG_ON(ret); + if (ret) + goto fail; btrfs_set_root_dirid(&root_item, new_dirid); @@ -1721,7 +2039,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); - BUG_ON(ret); + if (ret) + goto fail; /* * insert the directory item @@ -1731,10 +2050,12 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, name, namelen, dir->i_ino, &key, BTRFS_FT_DIR); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); + if (ret) + goto fail_commit; new_root = btrfs_read_fs_root(root->fs_info, &key); BUG_ON(!new_root); @@ -1744,24 +2065,29 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) inode = btrfs_new_inode(trans, new_root, new_dirid, BTRFS_I(dir)->block_group, S_IFDIR | 0700); + if (IS_ERR(inode)) + goto fail; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; new_root->inode = inode; ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); - BUG_ON(ret); + if (ret) + goto fail; inode->i_nlink = 1; inode->i_size = 6; ret = btrfs_update_inode(trans, new_root, inode); - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, new_root); - BUG_ON(ret); - + if (ret) + goto fail; +fail: + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; +fail_commit: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); - return 0; + return ret; } static int create_snapshot(struct btrfs_root *root, char *name, int namelen) @@ -1770,6 +2096,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) struct btrfs_key key; struct btrfs_root_item new_root_item; int ret; + int err; u64 objectid; if (!root->ref_cows) @@ -1780,11 +2107,13 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) BUG_ON(!trans); ret = btrfs_update_inode(trans, root, root->inode); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); - BUG_ON(ret); + if (ret) + goto fail; memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); @@ -1797,7 +2126,8 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); - BUG_ON(ret); + if (ret) + goto fail; /* * insert the directory item @@ -1808,16 +2138,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) root->fs_info->sb->s_root->d_inode->i_ino, &key, BTRFS_FT_DIR); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_inc_root_ref(trans, root); - BUG_ON(ret); + if (ret) + goto fail; - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); +fail: + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); - return 0; + return ret; } int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int @@ -1854,12 +2188,21 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int btrfs_free_path(path); if (di && !IS_ERR(di)) return -EEXIST; + if (IS_ERR(di)) + return PTR_ERR(di); if (root == root->fs_info->tree_root) ret = create_subvol(root, vol_args.name, namelen); else ret = create_snapshot(root, vol_args.name, namelen); - WARN_ON(ret); + break; + + case BTRFS_IOC_DEFRAG: + mutex_lock(&root->fs_info->fs_mutex); + btrfs_defrag_root(root, 0); + btrfs_defrag_root(root->fs_info->extent_root, 0); + mutex_unlock(&root->fs_info->fs_mutex); + ret = 0; break; default: return -ENOTTY; @@ -1924,42 +2267,43 @@ void btrfs_destroy_cachep(void) kmem_cache_destroy(btrfs_path_cachep); } +static struct kmem_cache *cache_create(const char *name, size_t size, + unsigned long extra_flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)) +{ + return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT | + SLAB_MEM_SPREAD | extra_flags), ctor +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + ,NULL +#endif + ); +} + int btrfs_init_cachep(void) { - btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", - sizeof(struct btrfs_inode), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - init_once, NULL); + btrfs_inode_cachep = cache_create("btrfs_inode_cache", + sizeof(struct btrfs_inode), + 0, init_once); if (!btrfs_inode_cachep) goto fail; - btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", + btrfs_trans_handle_cachep = cache_create("btrfs_trans_handle_cache", sizeof(struct btrfs_trans_handle), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); + 0, NULL); if (!btrfs_trans_handle_cachep) goto fail; - btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", + btrfs_transaction_cachep = cache_create("btrfs_transaction_cache", sizeof(struct btrfs_transaction), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); + 0, NULL); if (!btrfs_transaction_cachep) goto fail; - btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", - sizeof(struct btrfs_transaction), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); + btrfs_path_cachep = cache_create("btrfs_path_cache", + sizeof(struct btrfs_transaction), + 0, NULL); if (!btrfs_path_cachep) goto fail; - btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", - 256, - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD | - SLAB_DESTROY_BY_RCU), - NULL, NULL); + btrfs_bit_radix_cachep = cache_create("btrfs_radix", 256, + SLAB_DESTROY_BY_RCU, NULL); if (!btrfs_bit_radix_cachep) goto fail; return 0; @@ -2022,7 +2366,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, old_parent_oid = btrfs_disk_key_objectid(&di->location); ret = btrfs_del_item(trans, root, path); if (ret) { - ret = -EIO; goto out_fail; } btrfs_release_path(root, path); @@ -2041,7 +2384,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, } ret = btrfs_del_item(trans, root, path); if (ret) { - ret = -EIO; goto out_fail; } btrfs_release_path(root, path); @@ -2067,7 +2409,9 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, clear_nlink(new_inode); else drop_nlink(new_inode); - btrfs_update_inode(trans, root, new_inode); + ret = btrfs_update_inode(trans, root, new_inode); + if (ret) + goto out_fail; } ret = btrfs_add_link(trans, new_dentry, old_inode); if (ret) @@ -2139,7 +2483,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, datasize); - BUG_ON(err); + if (err) { + drop_inode = 1; + goto out_unlock; + } ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_generation(ei, trans->transid); @@ -2148,18 +2495,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, ptr = btrfs_file_extent_inline_start(ei); btrfs_memcpy(root, path->nodes[0]->b_data, ptr, symname, name_len); - mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_size = name_len - 1; - btrfs_update_inode(trans, root, inode); - err = 0; + err = btrfs_update_inode(trans, root, inode); + if (err) + drop_inode = 1; out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -2178,6 +2525,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .rename = btrfs_rename, .symlink = btrfs_symlink, .setattr = btrfs_setattr, + .mknod = btrfs_mknod, }; static struct inode_operations btrfs_dir_ro_inode_operations = { @@ -2214,6 +2562,11 @@ static struct inode_operations btrfs_file_inode_operations = { .setattr = btrfs_setattr, }; +static struct inode_operations btrfs_special_inode_operations = { + .getattr = btrfs_getattr, + .setattr = btrfs_setattr, +}; + static struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light,