X-Git-Url: http://pilppa.org/gitweb/?a=blobdiff_plain;f=fs%2Fbtrfs%2Finode.c;h=5c05ecbc5726e79e2716faddd8aa35d31534f6b2;hb=e9d0b13b5bbb58c9b840e407a8d181442f799966;hp=e639cc9e089f01d02595be343f582b99463936c7;hpb=f1ace244c8c1e16eaa5c8b3b5339849651e31ede;p=linux-2.6-omap-h63xx.git

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e639cc9e089..5c05ecbc572 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -16,7 +16,6 @@
  * Boston, MA 021110-1307, USA.
  */
 
-#include <linux/module.h>
 #include <linux/buffer_head.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
@@ -31,6 +30,8 @@
 #include <linux/writeback.h>
 #include <linux/statfs.h>
 #include <linux/compat.h>
+#include <linux/bit_spinlock.h>
+#include <linux/version.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
@@ -46,6 +47,7 @@ struct btrfs_iget_args {
 static struct inode_operations btrfs_dir_inode_operations;
 static struct inode_operations btrfs_symlink_inode_operations;
 static struct inode_operations btrfs_dir_ro_inode_operations;
+static struct inode_operations btrfs_special_inode_operations;
 static struct inode_operations btrfs_file_inode_operations;
 static struct address_space_operations btrfs_aops;
 static struct address_space_operations btrfs_symlink_aops;
@@ -75,6 +77,7 @@ void btrfs_read_locked_inode(struct inode *inode)
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_key location;
 	u64 alloc_group_block;
+	u32 rdev;
 	int ret;
 
 	path = btrfs_alloc_path();
@@ -104,6 +107,8 @@ void btrfs_read_locked_inode(struct inode *inode)
 	inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
 	inode->i_blocks = btrfs_inode_nblocks(inode_item);
 	inode->i_generation = btrfs_inode_generation(inode_item);
+	inode->i_rdev = 0;
+	rdev = btrfs_inode_rdev(inode_item);
 	alloc_group_block = btrfs_inode_block_group(inode_item);
 	BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
 						       alloc_group_block);
@@ -114,12 +119,6 @@ void btrfs_read_locked_inode(struct inode *inode)
 	mutex_unlock(&root->fs_info->fs_mutex);
 
 	switch (inode->i_mode & S_IFMT) {
-#if 0
-	default:
-		init_special_inode(inode, inode->i_mode,
-				   btrfs_inode_rdev(inode_item));
-		break;
-#endif
 	case S_IFREG:
 		inode->i_mapping->a_ops = &btrfs_aops;
 		inode->i_fop = &btrfs_file_operations;
@@ -136,6 +135,9 @@ void btrfs_read_locked_inode(struct inode *inode)
 		inode->i_op = &btrfs_symlink_inode_operations;
 		inode->i_mapping->a_ops = &btrfs_symlink_aops;
 		break;
+	default:
+		init_special_inode(inode, inode->i_mode, rdev);
+		break;
 	}
 	return;
 
@@ -162,6 +164,7 @@ static void fill_inode_item(struct btrfs_inode_item *item,
 	btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
 	btrfs_set_inode_nblocks(item, inode->i_blocks);
 	btrfs_set_inode_generation(item, inode->i_generation);
+	btrfs_set_inode_rdev(item, inode->i_rdev);
 	btrfs_set_inode_block_group(item,
 				    BTRFS_I(inode)->block_group->key.objectid);
 }
@@ -211,7 +214,11 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
 	struct btrfs_dir_item *di;
 
 	path = btrfs_alloc_path();
-	BUG_ON(!path);
+	if (!path) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
 	di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
 				    name, name_len, -1);
 	if (IS_ERR(di)) {
@@ -224,7 +231,8 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
 	}
 	objectid = btrfs_disk_key_objectid(&di->location);
 	ret = btrfs_delete_one_dir_name(trans, root, path, di);
-	BUG_ON(ret);
+	if (ret)
+		goto err;
 	btrfs_release_path(root, path);
 
 	di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
@@ -238,16 +246,16 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
 		goto err;
 	}
 	ret = btrfs_delete_one_dir_name(trans, root, path, di);
-	BUG_ON(ret);
 
 	dentry->d_inode->i_ctime = dir->i_ctime;
 err:
 	btrfs_free_path(path);
 	if (!ret) {
 		dir->i_size -= name_len * 2;
+		dir->i_mtime = dir->i_ctime = CURRENT_TIME;
 		btrfs_update_inode(trans, root, dir);
 		drop_nlink(dentry->d_inode);
-		btrfs_update_inode(trans, root, dentry->d_inode);
+		ret = btrfs_update_inode(trans, root, dentry->d_inode);
 		dir->i_sb->s_dirt = 1;
 	}
 	return ret;
@@ -358,47 +366,14 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans,
 	BUG_ON(!path);
 	ret = btrfs_lookup_inode(trans, root, path,
 				 &BTRFS_I(inode)->location, -1);
-	BUG_ON(ret);
-	ret = btrfs_del_item(trans, root, path);
-	BUG_ON(ret);
+	if (ret > 0)
+		ret = -ENOENT;
+	if (!ret)
+		ret = btrfs_del_item(trans, root, path);
 	btrfs_free_path(path);
 	return ret;
 }
 
-/*
- * truncates go from a high offset to a low offset.  So, walk
- * from hi to lo in the node and issue readas.  Stop when you find
- * keys from a different objectid
- */
-static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
-			   u64 objectid)
-{
-	struct btrfs_node *node;
-	int i;
-	int nritems;
-	u64 item_objectid;
-	u64 blocknr;
-	int slot;
-	int ret;
-
-	if (!path->nodes[1])
-		return;
-	node = btrfs_buffer_node(path->nodes[1]);
-	slot = path->slots[1];
-	if (slot == 0)
-		return;
-	nritems = btrfs_header_nritems(&node->header);
-	for (i = slot - 1; i >= 0; i--) {
-		item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
-		if (item_objectid != objectid)
-			break;
-		blocknr = btrfs_node_blockptr(node, i);
-		ret = readahead_tree_block(root, blocknr);
-		if (ret)
-			break;
-	}
-}
-
 /*
  * this can truncate away extent items, csum items and directory items.
  * It starts at a high offset and removes keys until it can't find
@@ -425,6 +400,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
 	int del_item;
 
 	path = btrfs_alloc_path();
+	path->reada = -1;
 	BUG_ON(!path);
 	/* FIXME, add redo link to tree so we don't leak on crash */
 	key.objectid = inode->i_ino;
@@ -441,7 +417,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
 			BUG_ON(path->slots[0] == 0);
 			path->slots[0]--;
 		}
-		reada_truncate(root, path, inode->i_ino);
 		leaf = btrfs_buffer_leaf(path->nodes[0]);
 		found_key = &leaf->items[path->slots[0]].key;
 		found_type = btrfs_disk_key_type(found_key);
@@ -499,7 +474,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
 							 extent_num_blocks);
 				inode->i_blocks -= (orig_num_blocks -
 					extent_num_blocks) << 3;
-				mark_buffer_dirty(path->nodes[0]);
+				btrfs_mark_buffer_dirty(path->nodes[0]);
 			} else {
 				extent_start =
 					btrfs_file_extent_disk_blocknr(fi);
@@ -515,7 +490,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
 		}
 		if (del_item) {
 			ret = btrfs_del_item(trans, root, path);
-			BUG_ON(ret);
+			if (ret)
+				goto error;
 		} else {
 			break;
 		}
@@ -561,7 +537,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 		goto out;
 
 	if (!PageUptodate(page)) {
-		ret = mpage_readpage(page, btrfs_get_block);
+		ret = btrfs_readpage(NULL, page);
 		lock_page(page);
 		if (!PageUptodate(page)) {
 			ret = -EIO;
@@ -576,19 +552,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 				 page->index << PAGE_CACHE_SHIFT,
 				 (page->index + 1) << PAGE_CACHE_SHIFT,
 				 &alloc_hint);
-	BUG_ON(ret);
-	ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
+	if (ret)
+		goto out;
+	ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0,
 				 alloc_hint, (u64)-1, &ins, 1);
-	BUG_ON(ret);
+	if (ret)
+		goto out;
 	ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
 				       page->index << PAGE_CACHE_SHIFT,
 				       ins.objectid, 1, 1);
-	BUG_ON(ret);
+	if (ret)
+		goto out;
 	SetPageChecked(page);
 	kaddr = kmap(page);
 	memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
 	flush_dcache_page(page);
-	btrfs_csum_file_block(trans, root, inode->i_ino,
+	ret = btrfs_csum_file_block(trans, root, inode->i_ino,
 			      page->index << PAGE_CACHE_SHIFT,
 			      kaddr, PAGE_CACHE_SIZE);
 	kunmap(page);
@@ -632,9 +611,10 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
 		btrfs_set_trans_block_group(trans, inode);
 		err = btrfs_insert_file_extent(trans, root, inode->i_ino,
 					       pos, 0, 0, hole_size);
-		BUG_ON(err);
 		btrfs_end_transaction(trans, root);
 		mutex_unlock(&root->fs_info->fs_mutex);
+		if (err)
+			return err;
 	}
 out:
 	err = inode_setattr(inode, attr);
@@ -656,12 +636,20 @@ void btrfs_delete_inode(struct inode *inode)
 	trans = btrfs_start_transaction(root, 1);
 	btrfs_set_trans_block_group(trans, inode);
 	ret = btrfs_truncate_in_trans(trans, root, inode);
-	BUG_ON(ret);
-	btrfs_free_inode(trans, root, inode);
+	if (ret)
+		goto no_delete_lock;
+	ret = btrfs_free_inode(trans, root, inode);
+	if (ret)
+		goto no_delete_lock;
 	btrfs_end_transaction(trans, root);
 	mutex_unlock(&root->fs_info->fs_mutex);
 	btrfs_btree_balance_dirty(root);
 	return;
+
+no_delete_lock:
+	btrfs_end_transaction(trans, root);
+	mutex_unlock(&root->fs_info->fs_mutex);
+	btrfs_btree_balance_dirty(root);
 no_delete:
 	clear_inode(inode);
 }
@@ -805,36 +793,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 	return d_splice_alias(inode, dentry);
 }
 
-/*
- * readahead one full node of leaves as long as their keys include
- * the objectid supplied
- */
-static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
-			 u64 objectid)
-{
-	struct btrfs_node *node;
-	int i;
-	u32 nritems;
-	u64 item_objectid;
-	u64 blocknr;
-	int slot;
-	int ret;
-
-	if (!path->nodes[1])
-		return;
-	node = btrfs_buffer_node(path->nodes[1]);
-	slot = path->slots[1];
-	nritems = btrfs_header_nritems(&node->header);
-	for (i = slot + 1; i < nritems; i++) {
-		item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
-		if (item_objectid != objectid)
-			break;
-		blocknr = btrfs_node_blockptr(node, i);
-		ret = readahead_tree_block(root, blocknr);
-		if (ret)
-			break;
-	}
-}
 static unsigned char btrfs_filetype_table[] = {
 	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
 };
@@ -868,18 +826,17 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	btrfs_set_key_type(&key, key_type);
 	key.offset = filp->f_pos;
 	path = btrfs_alloc_path();
+	path->reada = 1;
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0)
 		goto err;
 	advance = 0;
-	reada_leaves(root, path, inode->i_ino);
 	while(1) {
 		leaf = btrfs_buffer_leaf(path->nodes[0]);
 		nritems = btrfs_header_nritems(&leaf->header);
 		slot = path->slots[0];
 		if (advance || slot >= nritems) {
 			if (slot >= nritems -1) {
-				reada_leaves(root, path, inode->i_ino);
 				ret = btrfs_next_leaf(root, path);
 				if (ret)
 					break;
@@ -945,7 +902,7 @@ int btrfs_write_inode(struct inode *inode, int wait)
 }
 
 /*
- * This is somewhat expense, updating the tree every time the
+ * This is somewhat expensive, updating the tree every time the
  * inode changes.  But, it is most likely to find the inode in cache.
  * FIXME, needs more benchmarking...there are no reasons other than performance
  * to keep or drop this code.
@@ -961,7 +918,6 @@ void btrfs_dirty_inode(struct inode *inode)
 	btrfs_update_inode(trans, root, inode);
 	btrfs_end_transaction(trans, root);
 	mutex_unlock(&root->fs_info->fs_mutex);
-	btrfs_btree_balance_dirty(root);
 }
 
 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
@@ -1002,8 +958,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
 
 	ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
-	BUG_ON(ret);
-
+	if (ret)
+		return ERR_PTR(ret);
 	insert_inode_hash(inode);
 	return inode;
 }
@@ -1019,6 +975,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans,
 	int ret;
 	struct btrfs_key key;
 	struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
+	struct inode *parent_inode;
 	key.objectid = inode->i_ino;
 	key.flags = 0;
 	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
@@ -1029,7 +986,9 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans,
 				    dentry->d_parent->d_inode->i_ino,
 				    &key, btrfs_inode_type(inode));
 	if (ret == 0) {
-		dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
+		parent_inode = dentry->d_parent->d_inode;
+		parent_inode->i_size += dentry->d_name.len * 2;
+		parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
 		ret = btrfs_update_inode(trans, root,
 					 dentry->d_parent->d_inode);
 	}
@@ -1049,6 +1008,58 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
 	return err;
 }
 
+static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
+			int mode, dev_t rdev)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_root *root = BTRFS_I(dir)->root;
+	struct inode *inode;
+	int err;
+	int drop_inode = 0;
+	u64 objectid;
+
+	if (!new_valid_dev(rdev))
+		return -EINVAL;
+
+	mutex_lock(&root->fs_info->fs_mutex);
+	trans = btrfs_start_transaction(root, 1);
+	btrfs_set_trans_block_group(trans, dir);
+
+	err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
+	if (err) {
+		err = -ENOSPC;
+		goto out_unlock;
+	}
+
+	inode = btrfs_new_inode(trans, root, objectid,
+				BTRFS_I(dir)->block_group, mode);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out_unlock;
+
+	btrfs_set_trans_block_group(trans, inode);
+	err = btrfs_add_nondir(trans, dentry, inode);
+	if (err)
+		drop_inode = 1;
+	else {
+		inode->i_op = &btrfs_special_inode_operations;
+		init_special_inode(inode, inode->i_mode, rdev);
+	}
+	dir->i_sb->s_dirt = 1;
+	btrfs_update_inode_block_group(trans, inode);
+	btrfs_update_inode_block_group(trans, dir);
+out_unlock:
+	btrfs_end_transaction(trans, root);
+	mutex_unlock(&root->fs_info->fs_mutex);
+
+	if (drop_inode) {
+		inode_dec_link_count(inode);
+		iput(inode);
+	}
+	btrfs_btree_balance_dirty(root);
+	return err;
+}
+
 static int btrfs_create(struct inode *dir, struct dentry *dentry,
 			int mode, struct nameidata *nd)
 {
@@ -1121,7 +1132,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 		drop_inode = 1;
 	dir->i_sb->s_dirt = 1;
 	btrfs_update_inode_block_group(trans, dir);
-	btrfs_update_inode(trans, root, inode);
+	err = btrfs_update_inode(trans, root, inode);
+	if (err)
+		drop_inode = 1;
 
 	btrfs_end_transaction(trans, root);
 	mutex_unlock(&root->fs_info->fs_mutex);
@@ -1257,8 +1270,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 	if (create & BTRFS_GET_BLOCK_CREATE) {
-		WARN_ON(1);
-		/* this almost but not quite works */
+		/*
+		 * danger!, this only works if the page is properly up
+		 * to date somehow
+		 */
 		trans = btrfs_start_transaction(root, 1);
 		if (!trans) {
 			err = -ENOMEM;
@@ -1345,20 +1360,28 @@ not_found:
 	if (create & BTRFS_GET_BLOCK_CREATE) {
 		struct btrfs_key ins;
 		ret = btrfs_alloc_extent(trans, root, inode->i_ino,
-					 1, alloc_hint, (u64)-1,
+					 1, 0, alloc_hint, (u64)-1,
 					 &ins, 1);
-		BUG_ON(ret);
+		if (ret) {
+			err = ret;
+			goto out;
+		}
 		ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
 					       iblock << inode->i_blkbits,
 					       ins.objectid, ins.offset,
 					       ins.offset);
-		BUG_ON(ret);
-		SetPageChecked(result->b_page);
+		if (ret) {
+			err = ret;
+			goto out;
+		}
 		btrfs_map_bh_to_logical(root, result, ins.objectid);
 	}
 out:
-	if (trans)
-		err = btrfs_end_transaction(trans, root);
+	if (trans) {
+		ret = btrfs_end_transaction(trans, root);
+		if (!err)
+			err = ret;
+	}
 	btrfs_free_path(path);
 	return err;
 }
@@ -1374,6 +1397,39 @@ int btrfs_get_block(struct inode *inode, sector_t iblock,
 	return err;
 }
 
+static int btrfs_get_block_csum(struct inode *inode, sector_t iblock,
+				struct buffer_head *result, int create)
+{
+	int ret;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct page *page = result->b_page;
+	u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result);
+	struct btrfs_csum_item *item;
+	struct btrfs_path *path = NULL;
+
+	mutex_lock(&root->fs_info->fs_mutex);
+	ret = btrfs_get_block_lock(inode, iblock, result, create);
+	if (ret)
+		goto out;
+
+	path = btrfs_alloc_path();
+	item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0);
+	if (IS_ERR(item)) {
+		ret = PTR_ERR(item);
+		/* a csum that isn't present is a preallocated region. */
+		if (ret == -ENOENT || ret == -EFBIG)
+			ret = 0;
+		result->b_private = NULL;
+		goto out;
+	}
+	memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE);
+out:
+	if (path)
+		btrfs_free_path(path);
+	mutex_unlock(&root->fs_info->fs_mutex);
+	return ret;
+}
+
 static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
 			   struct buffer_head *result, int create)
 {
@@ -1395,9 +1451,197 @@ static int btrfs_prepare_write(struct file *file, struct page *page,
 	return block_prepare_write(page, from, to, btrfs_get_block);
 }
 
-static int btrfs_readpage(struct file *file, struct page *page)
+static void buffer_io_error(struct buffer_head *bh)
 {
-	return mpage_readpage(page, btrfs_get_block);
+	char b[BDEVNAME_SIZE];
+
+	printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
+			bdevname(bh->b_bdev, b),
+			(unsigned long long)bh->b_blocknr);
+}
+
+/*
+ * I/O completion handler for block_read_full_page() - pages
+ * which come unlocked at the end of I/O.
+ */
+static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
+{
+	unsigned long flags;
+	struct buffer_head *first;
+	struct buffer_head *tmp;
+	struct page *page;
+	int page_uptodate = 1;
+	struct inode *inode;
+	int ret;
+
+	BUG_ON(!buffer_async_read(bh));
+
+	page = bh->b_page;
+	inode = page->mapping->host;
+	if (uptodate) {
+		void *kaddr;
+		struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+		if (bh->b_private) {
+			char csum[BTRFS_CRC32_SIZE];
+			kaddr = kmap_atomic(page, KM_IRQ0);
+			ret = btrfs_csum_data(root, kaddr + bh_offset(bh),
+					      bh->b_size, csum);
+			BUG_ON(ret);
+			if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) {
+				u64 offset;
+				offset = (page->index << PAGE_CACHE_SHIFT) +
+					bh_offset(bh);
+				printk("btrfs csum failed ino %lu off %llu\n",
+				       page->mapping->host->i_ino,
+				       (unsigned long long)offset);
+				memset(kaddr + bh_offset(bh), 1, bh->b_size);
+				flush_dcache_page(page);
+			}
+			kunmap_atomic(kaddr, KM_IRQ0);
+		}
+		set_buffer_uptodate(bh);
+	} else {
+		clear_buffer_uptodate(bh);
+		if (printk_ratelimit())
+			buffer_io_error(bh);
+		SetPageError(page);
+	}
+
+	/*
+	 * Be _very_ careful from here on. Bad things can happen if
+	 * two buffer heads end IO at almost the same time and both
+	 * decide that the page is now completely done.
+	 */
+	first = page_buffers(page);
+	local_irq_save(flags);
+	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+	clear_buffer_async_read(bh);
+	unlock_buffer(bh);
+	tmp = bh;
+	do {
+		if (!buffer_uptodate(tmp))
+			page_uptodate = 0;
+		if (buffer_async_read(tmp)) {
+			BUG_ON(!buffer_locked(tmp));
+			goto still_busy;
+		}
+		tmp = tmp->b_this_page;
+	} while (tmp != bh);
+	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+	local_irq_restore(flags);
+
+	/*
+	 * If none of the buffers had errors and they are all
+	 * uptodate then we can set the page uptodate.
+	 */
+	if (page_uptodate && !PageError(page))
+		SetPageUptodate(page);
+	unlock_page(page);
+	return;
+
+still_busy:
+	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+	local_irq_restore(flags);
+	return;
+}
+
+/*
+ * Generic "read page" function for block devices that have the normal
+ * get_block functionality. This is most of the block device filesystems.
+ * Reads the page asynchronously --- the unlock_buffer() and
+ * set/clear_buffer_uptodate() functions propagate buffer state into the
+ * page struct once IO has completed.
+ */
+int btrfs_readpage(struct file *file, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	sector_t iblock, lblock;
+	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+	unsigned int blocksize;
+	int nr, i;
+	int fully_mapped = 1;
+
+	BUG_ON(!PageLocked(page));
+	blocksize = 1 << inode->i_blkbits;
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, blocksize, 0);
+	head = page_buffers(page);
+
+	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
+	bh = head;
+	nr = 0;
+	i = 0;
+
+	do {
+		if (buffer_uptodate(bh))
+			continue;
+
+		if (!buffer_mapped(bh)) {
+			int err = 0;
+
+			fully_mapped = 0;
+			if (iblock < lblock) {
+				WARN_ON(bh->b_size != blocksize);
+				err = btrfs_get_block_csum(inode, iblock,
+							   bh, 0);
+				if (err)
+					SetPageError(page);
+			}
+			if (!buffer_mapped(bh)) {
+				void *kaddr = kmap_atomic(page, KM_USER0);
+				memset(kaddr + i * blocksize, 0, blocksize);
+				flush_dcache_page(page);
+				kunmap_atomic(kaddr, KM_USER0);
+				if (!err)
+					set_buffer_uptodate(bh);
+				continue;
+			}
+			/*
+			 * get_block() might have updated the buffer
+			 * synchronously
+			 */
+			if (buffer_uptodate(bh))
+				continue;
+		}
+		arr[nr++] = bh;
+	} while (i++, iblock++, (bh = bh->b_this_page) != head);
+
+	if (fully_mapped)
+		SetPageMappedToDisk(page);
+
+	if (!nr) {
+		/*
+		 * All buffers are uptodate - we can set the page uptodate
+		 * as well. But not if get_block() returned an error.
+		 */
+		if (!PageError(page))
+			SetPageUptodate(page);
+		unlock_page(page);
+		return 0;
+	}
+
+	/* Stage two: lock the buffers */
+	for (i = 0; i < nr; i++) {
+		bh = arr[i];
+		lock_buffer(bh);
+		bh->b_end_io = btrfs_end_buffer_async_read;
+		set_buffer_async_read(bh);
+	}
+
+	/*
+	 * Stage 3: start the IO.  Check for uptodateness
+	 * inside the buffer lock in case another process reading
+	 * the underlying blockdev brought it uptodate (the sct fix).
+	 */
+	for (i = 0; i < nr; i++) {
+		bh = arr[i];
+		if (buffer_uptodate(bh))
+			btrfs_end_buffer_async_read(bh, 1);
+		else
+			submit_bh(READ, bh);
+	}
+	return 0;
 }
 
 /*
@@ -1424,11 +1668,19 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page,
 	struct buffer_head *bh, *head;
 	const unsigned blocksize = 1 << inode->i_blkbits;
 	int nr_underway = 0;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
 
 	BUG_ON(!PageLocked(page));
 
 	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
 
+	/* no csumming allowed when from PF_MEMALLOC */
+	if (current->flags & PF_MEMALLOC) {
+		redirty_page_for_writepage(wbc, page);
+		unlock_page(page);
+		return 0;
+	}
+
 	if (!page_has_buffers(page)) {
 		create_empty_buffers(page, blocksize,
 					(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1496,6 +1748,23 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page,
 			continue;
 		}
 		if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
+			struct btrfs_trans_handle *trans;
+			int ret;
+			u64 off = page->index << PAGE_CACHE_SHIFT;
+			char *kaddr;
+
+			off += bh_offset(bh);
+			mutex_lock(&root->fs_info->fs_mutex);
+			trans = btrfs_start_transaction(root, 1);
+			btrfs_set_trans_block_group(trans, inode);
+			kaddr = kmap(page);
+			btrfs_csum_file_block(trans, root, inode->i_ino,
+						    off, kaddr + bh_offset(bh),
+						    bh->b_size);
+			kunmap(page);
+			ret = btrfs_end_transaction(trans, root);
+			BUG_ON(ret);
+			mutex_unlock(&root->fs_info->fs_mutex);
 			mark_buffer_async_write(bh);
 		} else {
 			unlock_buffer(bh);
@@ -1617,6 +1886,52 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
 	return __btrfs_write_full_page(inode, page, wbc);
 }
 
+/*
+ * btrfs_page_mkwrite() is not allowed to change the file size as it gets
+ * called from a page fault handler when a page is first dirtied. Hence we must
+ * be careful to check for EOF conditions here. We set the page up correctly
+ * for a written page which means we get ENOSPC checking when writing into
+ * holes and correct delalloc and unwritten extent mapping on filesystems that
+ * support these features.
+ *
+ * We are not allowed to take the i_mutex here so we have to play games to
+ * protect against truncate races as the page could now be beyond EOF.  Because
+ * vmtruncate() writes the inode size before removing pages, once we have the
+ * page lock we can determine safely if the page is beyond EOF. If it is not
+ * beyond EOF, then the page is guaranteed safe against truncation until we
+ * unlock the page.
+ */
+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	unsigned long end;
+	loff_t size;
+	int ret = -EINVAL;
+
+	lock_page(page);
+	wait_on_page_writeback(page);
+	size = i_size_read(inode);
+	if ((page->mapping != inode->i_mapping) ||
+	    ((page->index << PAGE_CACHE_SHIFT) > size)) {
+		/* page got truncated out from underneath us */
+		goto out_unlock;
+	}
+
+	/* page is wholly or partially inside EOF */
+	if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
+		end = size & ~PAGE_CACHE_MASK;
+	else
+		end = PAGE_CACHE_SIZE;
+
+	ret = btrfs_prepare_write(NULL, page, 0, end);
+	if (!ret)
+		ret = btrfs_commit_write(NULL, page, 0, end);
+
+out_unlock:
+	unlock_page(page);
+	return ret;
+}
+
 static void btrfs_truncate(struct inode *inode)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -1636,7 +1951,6 @@ static void btrfs_truncate(struct inode *inode)
 
 	/* FIXME, add redo link to tree so we don't leak on crash */
 	ret = btrfs_truncate_in_trans(trans, root, inode);
-	BUG_ON(ret);
 	btrfs_update_inode(trans, root, inode);
 	ret = btrfs_end_transaction(trans, root);
 	BUG_ON(ret);
@@ -1676,6 +1990,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
 	struct inode *inode;
 	struct inode *dir;
 	int ret;
+	int err;
 	u64 objectid;
 	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
 
@@ -1683,9 +1998,9 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
 	trans = btrfs_start_transaction(root, 1);
 	BUG_ON(!trans);
 
-	subvol = btrfs_alloc_free_block(trans, root, 0);
-	if (subvol == NULL)
-		return -ENOSPC;
+	subvol = btrfs_alloc_free_block(trans, root, 0, 0);
+	if (IS_ERR(subvol))
+		return PTR_ERR(subvol);
 	leaf = btrfs_buffer_leaf(subvol);
 	btrfs_set_header_nritems(&leaf->header, 0);
 	btrfs_set_header_level(&leaf->header, 0);
@@ -1694,7 +2009,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
 	btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
 	memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
 	       sizeof(leaf->header.fsid));
-	mark_buffer_dirty(subvol);
+	btrfs_mark_buffer_dirty(subvol);
 
 	inode_item = &root_item.inode;
 	memset(inode_item, 0, sizeof(*inode_item));
@@ -1706,12 +2021,15 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
 
 	btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
 	btrfs_set_root_refs(&root_item, 1);
+	memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
+	root_item.drop_level = 0;
 	brelse(subvol);
 	subvol = NULL;
 
 	ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
 				       0, &objectid);
-	BUG_ON(ret);
+	if (ret)
+		goto fail;
 
 	btrfs_set_root_dirid(&root_item, new_dirid);
 
@@ -1721,7 +2039,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
 	btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
 				&root_item);
-	BUG_ON(ret);
+	if (ret)
+		goto fail;
 
 	/*
 	 * insert the directory item
@@ -1731,10 +2050,12 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
 	ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
 				    name, namelen, dir->i_ino, &key,
 				    BTRFS_FT_DIR);
-	BUG_ON(ret);
+	if (ret)
+		goto fail;
 
 	ret = btrfs_commit_transaction(trans, root);
-	BUG_ON(ret);
+	if (ret)
+		goto fail_commit;
 
 	new_root = btrfs_read_fs_root(root->fs_info, &key);
 	BUG_ON(!new_root);
@@ -1744,24 +2065,29 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
 
 	inode = btrfs_new_inode(trans, new_root, new_dirid,
 				BTRFS_I(dir)->block_group, S_IFDIR | 0700);
+	if (IS_ERR(inode))
+		goto fail;
 	inode->i_op = &btrfs_dir_inode_operations;
 	inode->i_fop = &btrfs_dir_file_operations;
 	new_root->inode = inode;
 
 	ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
-	BUG_ON(ret);
+	if (ret)
+		goto fail;
 
 	inode->i_nlink = 1;
 	inode->i_size = 6;
 	ret = btrfs_update_inode(trans, new_root, inode);
-	BUG_ON(ret);
-
-	ret = btrfs_commit_transaction(trans, new_root);
-	BUG_ON(ret);
-
+	if (ret)
+		goto fail;
+fail:
+	err = btrfs_commit_transaction(trans, root);
+	if (err && !ret)
+		ret = err;
+fail_commit:
 	mutex_unlock(&root->fs_info->fs_mutex);
 	btrfs_btree_balance_dirty(root);
-	return 0;
+	return ret;
 }
 
 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
@@ -1770,6 +2096,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
 	struct btrfs_key key;
 	struct btrfs_root_item new_root_item;
 	int ret;
+	int err;
 	u64 objectid;
 
 	if (!root->ref_cows)
@@ -1780,11 +2107,13 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
 	BUG_ON(!trans);
 
 	ret = btrfs_update_inode(trans, root, root->inode);
-	BUG_ON(ret);
+	if (ret)
+		goto fail;
 
 	ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
 				       0, &objectid);
-	BUG_ON(ret);
+	if (ret)
+		goto fail;
 
 	memcpy(&new_root_item, &root->root_item,
 	       sizeof(new_root_item));
@@ -1797,7 +2126,8 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
 
 	ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
 				&new_root_item);
-	BUG_ON(ret);
+	if (ret)
+		goto fail;
 
 	/*
 	 * insert the directory item
@@ -1808,16 +2138,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
 				    root->fs_info->sb->s_root->d_inode->i_ino,
 				    &key, BTRFS_FT_DIR);
 
-	BUG_ON(ret);
+	if (ret)
+		goto fail;
 
 	ret = btrfs_inc_root_ref(trans, root);
-	BUG_ON(ret);
+	if (ret)
+		goto fail;
 
-	ret = btrfs_commit_transaction(trans, root);
-	BUG_ON(ret);
+fail:
+	err = btrfs_commit_transaction(trans, root);
+	if (err && !ret)
+		ret = err;
 	mutex_unlock(&root->fs_info->fs_mutex);
 	btrfs_btree_balance_dirty(root);
-	return 0;
+	return ret;
 }
 
 int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
@@ -1854,12 +2188,21 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
 		btrfs_free_path(path);
 		if (di && !IS_ERR(di))
 			return -EEXIST;
+		if (IS_ERR(di))
+			return PTR_ERR(di);
 
 		if (root == root->fs_info->tree_root)
 			ret = create_subvol(root, vol_args.name, namelen);
 		else
 			ret = create_snapshot(root, vol_args.name, namelen);
-		WARN_ON(ret);
+		break;
+
+	case BTRFS_IOC_DEFRAG:
+		mutex_lock(&root->fs_info->fs_mutex);
+		btrfs_defrag_root(root, 0);
+		btrfs_defrag_root(root->fs_info->extent_root, 0);
+		mutex_unlock(&root->fs_info->fs_mutex);
+		ret = 0;
 		break;
 	default:
 		return -ENOTTY;
@@ -1924,42 +2267,43 @@ void btrfs_destroy_cachep(void)
 		kmem_cache_destroy(btrfs_path_cachep);
 }
 
+static struct kmem_cache *cache_create(const char *name, size_t size,
+				       unsigned long extra_flags,
+				       void (*ctor)(void *, struct kmem_cache *,
+						    unsigned long))
+{
+	return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
+				 SLAB_MEM_SPREAD | extra_flags), ctor
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+				 ,NULL
+#endif
+				);
+}
+
 int btrfs_init_cachep(void)
 {
-	btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
-					     sizeof(struct btrfs_inode),
-					     0, (SLAB_RECLAIM_ACCOUNT|
-						SLAB_MEM_SPREAD),
-					     init_once, NULL);
+	btrfs_inode_cachep = cache_create("btrfs_inode_cache",
+					  sizeof(struct btrfs_inode),
+					  0, init_once);
 	if (!btrfs_inode_cachep)
 		goto fail;
-	btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
+	btrfs_trans_handle_cachep = cache_create("btrfs_trans_handle_cache",
 					     sizeof(struct btrfs_trans_handle),
-					     0, (SLAB_RECLAIM_ACCOUNT|
-						SLAB_MEM_SPREAD),
-					     NULL, NULL);
+					     0, NULL);
 	if (!btrfs_trans_handle_cachep)
 		goto fail;
-	btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
+	btrfs_transaction_cachep = cache_create("btrfs_transaction_cache",
 					     sizeof(struct btrfs_transaction),
-					     0, (SLAB_RECLAIM_ACCOUNT|
-						SLAB_MEM_SPREAD),
-					     NULL, NULL);
+					     0, NULL);
 	if (!btrfs_transaction_cachep)
 		goto fail;
-	btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
-					     sizeof(struct btrfs_transaction),
-					     0, (SLAB_RECLAIM_ACCOUNT|
-						SLAB_MEM_SPREAD),
-					     NULL, NULL);
+	btrfs_path_cachep = cache_create("btrfs_path_cache",
+					 sizeof(struct btrfs_transaction),
+					 0, NULL);
 	if (!btrfs_path_cachep)
 		goto fail;
-	btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
-					     256,
-					     0, (SLAB_RECLAIM_ACCOUNT|
-						SLAB_MEM_SPREAD |
-						SLAB_DESTROY_BY_RCU),
-					     NULL, NULL);
+	btrfs_bit_radix_cachep = cache_create("btrfs_radix", 256,
+					      SLAB_DESTROY_BY_RCU, NULL);
 	if (!btrfs_bit_radix_cachep)
 		goto fail;
 	return 0;
@@ -2022,7 +2366,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
 		old_parent_oid = btrfs_disk_key_objectid(&di->location);
 		ret = btrfs_del_item(trans, root, path);
 		if (ret) {
-			ret = -EIO;
 			goto out_fail;
 		}
 		btrfs_release_path(root, path);
@@ -2041,7 +2384,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
 		}
 		ret = btrfs_del_item(trans, root, path);
 		if (ret) {
-			ret = -EIO;
 			goto out_fail;
 		}
 		btrfs_release_path(root, path);
@@ -2067,7 +2409,9 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
 			clear_nlink(new_inode);
 		else
 			drop_nlink(new_inode);
-		btrfs_update_inode(trans, root, new_inode);
+		ret = btrfs_update_inode(trans, root, new_inode);
+		if (ret)
+			goto out_fail;
 	}
 	ret = btrfs_add_link(trans, new_dentry, old_inode);
 	if (ret)
@@ -2139,7 +2483,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	datasize = btrfs_file_extent_calc_inline_size(name_len);
 	err = btrfs_insert_empty_item(trans, root, path, &key,
 				      datasize);
-	BUG_ON(err);
+	if (err) {
+		drop_inode = 1;
+		goto out_unlock;
+	}
 	ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
 	       path->slots[0], struct btrfs_file_extent_item);
 	btrfs_set_file_extent_generation(ei, trans->transid);
@@ -2148,18 +2495,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	ptr = btrfs_file_extent_inline_start(ei);
 	btrfs_memcpy(root, path->nodes[0]->b_data,
 		     ptr, symname, name_len);
-	mark_buffer_dirty(path->nodes[0]);
+	btrfs_mark_buffer_dirty(path->nodes[0]);
 	btrfs_free_path(path);
 	inode->i_op = &btrfs_symlink_inode_operations;
 	inode->i_mapping->a_ops = &btrfs_symlink_aops;
 	inode->i_size = name_len - 1;
-	btrfs_update_inode(trans, root, inode);
-	err = 0;
+	err = btrfs_update_inode(trans, root, inode);
+	if (err)
+		drop_inode = 1;
 
 out_unlock:
 	btrfs_end_transaction(trans, root);
 	mutex_unlock(&root->fs_info->fs_mutex);
-
 	if (drop_inode) {
 		inode_dec_link_count(inode);
 		iput(inode);
@@ -2178,6 +2525,7 @@ static struct inode_operations btrfs_dir_inode_operations = {
 	.rename		= btrfs_rename,
 	.symlink	= btrfs_symlink,
 	.setattr	= btrfs_setattr,
+	.mknod		= btrfs_mknod,
 };
 
 static struct inode_operations btrfs_dir_ro_inode_operations = {
@@ -2214,6 +2562,11 @@ static struct inode_operations btrfs_file_inode_operations = {
 	.setattr	= btrfs_setattr,
 };
 
+static struct inode_operations btrfs_special_inode_operations = {
+	.getattr	= btrfs_getattr,
+	.setattr	= btrfs_setattr,
+};
+
 static struct inode_operations btrfs_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,