ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
inode, dir->i_ino);
BUG_ON(ret != 0 && ret != -ENOENT);
- if (ret != -ENOENT)
- BTRFS_I(dir)->log_dirty_trans = trans->transid;
ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
dir, index);
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, dir);
+
+ btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
+
ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
dentry->d_name.name, dentry->d_name.len);
if (err)
return err;
- if (S_ISREG(inode->i_mode) &&
- attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
- err = btrfs_cont_expand(inode, attr->ia_size);
- if (err)
- return err;
+ if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
+ if (attr->ia_size > inode->i_size) {
+ err = btrfs_cont_expand(inode, attr->ia_size);
+ if (err)
+ return err;
+ } else if (inode->i_size > 0 &&
+ attr->ia_size == 0) {
+
+ /* we're truncating a file that used to have good
+ * data down to zero. Make sure it gets into
+ * the ordered flush list so that any new writes
+ * get down to disk quickly.
+ */
+ BTRFS_I(inode)->ordered_data_close = 1;
+ }
}
err = inode_setattr(inode, attr);
bi->disk_i_size = 0;
bi->flags = 0;
bi->index_cnt = (u64)-1;
- bi->log_dirty_trans = 0;
+ bi->last_unlink_trans = 0;
extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
extent_io_tree_init(&BTRFS_I(inode)->io_tree,
inode->i_mapping, GFP_NOFS);
extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
inode->i_mapping, GFP_NOFS);
INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
+ INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
mutex_init(&BTRFS_I(inode)->extent_mutex);
mutex_init(&BTRFS_I(inode)->log_mutex);
if (dir) {
ret = btrfs_set_inode_index(dir, index);
- if (ret)
+ if (ret) {
+ iput(inode);
return ERR_PTR(ret);
+ }
}
/*
* index_cnt is ignored for everything but a dir,
if (dir)
BTRFS_I(dir)->index_cnt--;
btrfs_free_path(path);
+ iput(inode);
return ERR_PTR(ret);
}
drop_inode = 1;
nr = trans->blocks_used;
+
+ btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
btrfs_end_transaction_throttle(trans, root);
fail:
if (drop_inode) {
* beyond EOF, then the page is guaranteed safe against truncation until we
* unlock the page.
*/
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
+ struct page *page = vmf->page;
struct inode *inode = fdentry(vma->vm_file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 page_end;
ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
- if (ret)
+ if (ret) {
+ if (ret == -ENOMEM)
+ ret = VM_FAULT_OOM;
+ else /* -ENOSPC, -EIO, etc */
+ ret = VM_FAULT_SIGBUS;
goto out;
+ }
- ret = -EINVAL;
+ ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
again:
lock_page(page);
size = i_size_read(inode);
}
ClearPageChecked(page);
set_page_dirty(page);
+
+ BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
out_unlock:
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
trans = btrfs_start_transaction(root, 1);
+
+ /*
+ * setattr is responsible for setting the ordered_data_close flag,
+ * but that is only tested during the last file release. That
+ * could happen well after the next commit, leaving a great big
+ * window where new writes may get lost if someone chooses to write
+ * to this file after truncating to zero
+ *
+ * The inode doesn't have any dirty data here, and so if we commit
+ * this is a noop. If someone immediately starts writing to the inode
+ * it is very likely we'll catch some of their writes in this
+ * transaction, and the commit will find this file on the ordered
+ * data list with good things to send down.
+ *
+ * This is a best effort solution, there is still a window where
+ * using truncate to replace the contents of the file will
+ * end up with a zero length file after a crash.
+ */
+ if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
+ btrfs_add_ordered_operation(trans, root, inode);
+
btrfs_set_trans_block_group(trans, inode);
btrfs_i_size_write(inode, inode->i_size);
ei->i_acl = BTRFS_ACL_NOT_CACHED;
ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
INIT_LIST_HEAD(&ei->i_orphan);
+ INIT_LIST_HEAD(&ei->ordered_operations);
return &ei->vfs_inode;
}
void btrfs_destroy_inode(struct inode *inode)
{
struct btrfs_ordered_extent *ordered;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
posix_acl_release(BTRFS_I(inode)->i_default_acl);
- spin_lock(&BTRFS_I(inode)->root->list_lock);
+ /*
+ * Make sure we're properly removed from the ordered operation
+ * lists.
+ */
+ smp_mb();
+ if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
+ spin_lock(&root->fs_info->ordered_extent_lock);
+ list_del_init(&BTRFS_I(inode)->ordered_operations);
+ spin_unlock(&root->fs_info->ordered_extent_lock);
+ }
+
+ spin_lock(&root->list_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
" list\n", inode->i_ino);
dump_stack();
}
- spin_unlock(&BTRFS_I(inode)->root->list_lock);
+ spin_unlock(&root->list_lock);
while (1) {
ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
if (ret)
goto out_unlock;
+ /*
+ * we're using rename to replace one file with another.
+ * and the replacement file is large. Start IO on it now so
+ * we don't add too much work to the end of the transaction
+ */
+ if (new_inode && old_inode && S_ISREG(old_inode->i_mode) &&
+ new_inode->i_size &&
+ old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+ filemap_flush(old_inode->i_mapping);
+
trans = btrfs_start_transaction(root, 1);
+ /*
+ * make sure the inode gets flushed if it is replacing
+ * something.
+ */
+ if (new_inode && new_inode->i_size &&
+ old_inode && S_ISREG(old_inode->i_mode)) {
+ btrfs_add_ordered_operation(trans, root, old_inode);
+ }
+
+ /*
+ * this is an ugly little race, but the rename is required to make
+ * sure that if we crash, the inode is either at the old name
+ * or the new one. pinning the log transaction lets us make sure
+ * we don't allow a log commit to come in after we unlink the
+ * name but before we add the new name back in.
+ */
+ btrfs_pin_log_trans(root);
+
btrfs_set_trans_block_group(trans, new_dir);
btrfs_inc_nlink(old_dentry->d_inode);
new_dir->i_ctime = new_dir->i_mtime = ctime;
old_inode->i_ctime = ctime;
+ if (old_dentry->d_parent != new_dentry->d_parent)
+ btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
+
ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
old_dentry->d_name.name,
old_dentry->d_name.len);
if (ret)
goto out_fail;
+ btrfs_log_new_name(trans, old_inode, old_dir,
+ new_dentry->d_parent);
out_fail:
+
+ /* this btrfs_end_log_trans just allows the current
+ * log-sub transaction to complete
+ */
+ btrfs_end_log_trans(root);
btrfs_end_transaction_throttle(trans, root);
out_unlock:
return ret;