Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs...

author Chris Mason <chris.mason@oracle.com>

Thu, 29 Jan 2009 01:29:43 +0000 (20:29 -0500)

committer Chris Mason <chris.mason@oracle.com>

Thu, 29 Jan 2009 01:29:43 +0000 (20:29 -0500)
author Chris Mason <chris.mason@oracle.com>
Thu, 29 Jan 2009 01:29:43 +0000 (20:29 -0500)
committer Chris Mason <chris.mason@oracle.com>
Thu, 29 Jan 2009 01:29:43 +0000 (20:29 -0500)
diff --git a/MAINTAINERS b/MAINTAINERS

index d992d407197b726033a88797af6963082bc9b1e3..8a7b0b36e8b456d1fbf2d147221300d1bb5fd6f8 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1021,6 +1021,14 @@ M:       mb@bu3sch.de
  W:     http://bu3sch.de/btgpio.php
  S:     Maintained
  
+BTRFS FILE SYSTEM
+P:     Chris Mason
+M:     chris.mason@oracle.com
+L:     linux-btrfs@vger.kernel.org
+W:     http://btrfs.wiki.kernel.org/
+T:     git kernel.org:/pub/scm/linux/kernel/git/mason/btrfs-unstable.git
+S:     Maintained
+
  BTTV VIDEO4LINUX DRIVER
  P:     Mauro Carvalho Chehab
  M:     mchehab@infradead.org
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c

index 8e2fec05dbe0c084df02625a6c0342ee2fee1176..d5f4e94f2ca238b2afe5ea925363dda47ca5b585 100644 (file)
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -16,7 +16,6 @@
   * Boston, MA 021110-1307, USA.
   */
  
-#include <linux/version.h>
  #include <linux/kthread.h>
  #include <linux/list.h>
  #include <linux/spinlock.h>
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c

index ee848d8585d9b97be41e1d37c8363b0d31408b49..ab07627084f13c4f84c39c39d802b258dbc181ab 100644 (file)
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -32,7 +32,6 @@
  #include <linux/swap.h>
  #include <linux/writeback.h>
  #include <linux/bit_spinlock.h>
-#include <linux/version.h>
  #include <linux/pagevec.h>
  #include "compat.h"
  #include "ctree.h"
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c

index 9e46c07768167f53f9ae0c9b6ffdb5ce8641f56e..2603ee539b7ae100f0677ffbe9bd6ebc392652e4 100644 (file)
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1210,8 +1210,7 @@ static noinline void reada_for_search(struct btrfs_root *root,
         struct btrfs_disk_key disk_key;
         u32 nritems;
         u64 search;
-       u64 lowest_read;
-       u64 highest_read;
+       u64 target;
         u64 nread = 0;
         int direction = path->reada;
         struct extent_buffer *eb;
@@ -1235,8 +1234,7 @@ static noinline void reada_for_search(struct btrfs_root *root,
                 return;
         }
  
-       highest_read = search;
-       lowest_read = search;
+       target = search;
  
         nritems = btrfs_header_nritems(node);
         nr = slot;
@@ -1256,24 +1254,15 @@ static noinline void reada_for_search(struct btrfs_root *root,
                                 break;
                 }
                 search = btrfs_node_blockptr(node, nr);
-               if ((search >= lowest_read && search <= highest_read) ||
-                   (search < lowest_read && lowest_read - search <= 16384) ||
-                   (search > highest_read && search - highest_read <= 16384)) {
+               if ((search <= target && target - search <= 65536) ||
+                   (search > target && search - target <= 65536)) {
                         readahead_tree_block(root, search, blocksize,
                                      btrfs_node_ptr_generation(node, nr));
                         nread += blocksize;
                 }
                 nscan++;
-               if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32))
+               if ((nread > 65536 || nscan > 32))
                         break;
-
-               if (nread > (256 * 1024) || nscan > 128)
-                       break;
-
-               if (search < lowest_read)
-                       lowest_read = search;
-               if (search > highest_read)
-                       highest_read = search;
         }
  }
  
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index eee060f88113089c5243e2dc9c38e77258d58da6..de103a8a815ef5a897862e154d661b12f2f3b76b 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -454,17 +454,11 @@ struct btrfs_timespec {
         __le32 nsec;
  } __attribute__ ((__packed__));
  
-typedef enum {
+enum btrfs_compression_type {
         BTRFS_COMPRESS_NONE = 0,
         BTRFS_COMPRESS_ZLIB = 1,
         BTRFS_COMPRESS_LAST = 2,
-} btrfs_compression_type;
-
-/* we don't understand any encryption methods right now */
-typedef enum {
-       BTRFS_ENCRYPTION_NONE = 0,
-       BTRFS_ENCRYPTION_LAST = 1,
-} btrfs_encryption_type;
+};
  
  struct btrfs_inode_item {
         /* nfs style generation number */
@@ -701,9 +695,7 @@ struct btrfs_fs_info {
         struct btrfs_transaction *running_transaction;
         wait_queue_head_t transaction_throttle;
         wait_queue_head_t transaction_wait;
-
         wait_queue_head_t async_submit_wait;
-       wait_queue_head_t tree_log_wait;
  
         struct btrfs_super_block super_copy;
         struct btrfs_super_block super_for_commit;
@@ -730,10 +722,6 @@ struct btrfs_fs_info {
         atomic_t async_submit_draining;
         atomic_t nr_async_bios;
         atomic_t async_delalloc_pages;
-       atomic_t tree_log_writers;
-       atomic_t tree_log_commit;
-       unsigned long tree_log_batch;
-       u64 tree_log_transid;
  
         /*
          * this is used by the balancing code to wait for all the pending
@@ -833,7 +821,14 @@ struct btrfs_root {
         struct kobject root_kobj;
         struct completion kobj_unregister;
         struct mutex objectid_mutex;
+
         struct mutex log_mutex;
+       wait_queue_head_t log_writer_wait;
+       wait_queue_head_t log_commit_wait[2];
+       atomic_t log_writers;
+       atomic_t log_commit[2];
+       unsigned long log_transid;
+       unsigned long log_batch;
  
         u64 objectid;
         u64 last_trans;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 81a313874ae577aa51370459a0e98c8e26c53c46..7feac5a475e970bac2fe10c5d313dbefcfb4ae69 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -16,7 +16,6 @@
   * Boston, MA 021110-1307, USA.
   */
  
-#include <linux/version.h>
  #include <linux/fs.h>
  #include <linux/blkdev.h>
  #include <linux/scatterlist.h>
@@ -850,6 +849,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
         spin_lock_init(&root->list_lock);
         mutex_init(&root->objectid_mutex);
         mutex_init(&root->log_mutex);
+       init_waitqueue_head(&root->log_writer_wait);
+       init_waitqueue_head(&root->log_commit_wait[0]);
+       init_waitqueue_head(&root->log_commit_wait[1]);
+       atomic_set(&root->log_commit[0], 0);
+       atomic_set(&root->log_commit[1], 0);
+       atomic_set(&root->log_writers, 0);
+       root->log_batch = 0;
+       root->log_transid = 0;
         extent_io_tree_init(&root->dirty_log_pages,
                              fs_info->btree_inode->i_mapping, GFP_NOFS);
  
@@ -934,15 +941,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
         return 0;
  }
  
-int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
-                            struct btrfs_fs_info *fs_info)
+static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
+                                        struct btrfs_fs_info *fs_info)
  {
         struct btrfs_root *root;
         struct btrfs_root *tree_root = fs_info->tree_root;
+       struct extent_buffer *leaf;
  
         root = kzalloc(sizeof(*root), GFP_NOFS);
         if (!root)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
  
         __setup_root(tree_root->nodesize, tree_root->leafsize,
                      tree_root->sectorsize, tree_root->stripesize,
@@ -951,12 +959,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
         root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
         root->root_key.type = BTRFS_ROOT_ITEM_KEY;
         root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
+       /*
+        * log trees do not get reference counted because they go away
+        * before a real commit is actually done.  They do store pointers
+        * to file data extents, and those reference counts still get
+        * updated (along with back refs to the log tree).
+        */
         root->ref_cows = 0;
  
-       root->node = btrfs_alloc_free_block(trans, root, root->leafsize,
-                                           0, BTRFS_TREE_LOG_OBJECTID,
-                                           trans->transid, 0, 0, 0);
+       leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
+                                     0, BTRFS_TREE_LOG_OBJECTID,
+                                     trans->transid, 0, 0, 0);
+       if (IS_ERR(leaf)) {
+               kfree(root);
+               return ERR_CAST(leaf);
+       }
  
+       root->node = leaf;
         btrfs_set_header_nritems(root->node, 0);
         btrfs_set_header_level(root->node, 0);
         btrfs_set_header_bytenr(root->node, root->node->start);
@@ -968,7 +987,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
                             BTRFS_FSID_SIZE);
         btrfs_mark_buffer_dirty(root->node);
         btrfs_tree_unlock(root->node);
-       fs_info->log_root_tree = root;
+       return root;
+}
+
+int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
+                            struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_root *log_root;
+
+       log_root = alloc_log_tree(trans, fs_info);
+       if (IS_ERR(log_root))
+               return PTR_ERR(log_root);
+       WARN_ON(fs_info->log_root_tree);
+       fs_info->log_root_tree = log_root;
+       return 0;
+}
+
+int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root)
+{
+       struct btrfs_root *log_root;
+       struct btrfs_inode_item *inode_item;
+
+       log_root = alloc_log_tree(trans, root->fs_info);
+       if (IS_ERR(log_root))
+               return PTR_ERR(log_root);
+
+       log_root->last_trans = trans->transid;
+       log_root->root_key.offset = root->root_key.objectid;
+
+       inode_item = &log_root->root_item.inode;
+       inode_item->generation = cpu_to_le64(1);
+       inode_item->size = cpu_to_le64(3);
+       inode_item->nlink = cpu_to_le32(1);
+       inode_item->nbytes = cpu_to_le64(root->leafsize);
+       inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
+
+       btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
+       btrfs_set_root_generation(&log_root->root_item, trans->transid);
+
+       WARN_ON(root->log_root);
+       root->log_root = log_root;
+       root->log_transid = 0;
         return 0;
  }
  
@@ -1136,7 +1196,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
  {
         struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;
         int ret = 0;
-       struct list_head *cur;
         struct btrfs_device *device;
         struct backing_dev_info *bdi;
  #if 0
@@ -1144,8 +1203,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
             btrfs_congested_async(info, 0))
                 return 1;
  #endif
-       list_for_each(cur, &info->fs_devices->devices) {
-               device = list_entry(cur, struct btrfs_device, dev_list);
+       list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
                 if (!device->bdev)
                         continue;
                 bdi = blk_get_backing_dev_info(device->bdev);
@@ -1163,13 +1221,11 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
   */
  static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
  {
-       struct list_head *cur;
         struct btrfs_device *device;
         struct btrfs_fs_info *info;
  
         info = (struct btrfs_fs_info *)bdi->unplug_io_data;
-       list_for_each(cur, &info->fs_devices->devices) {
-               device = list_entry(cur, struct btrfs_device, dev_list);
+       list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
                 if (!device->bdev)
                         continue;
  
@@ -1535,10 +1591,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
         init_waitqueue_head(&fs_info->transaction_throttle);
         init_waitqueue_head(&fs_info->transaction_wait);
         init_waitqueue_head(&fs_info->async_submit_wait);
-       init_waitqueue_head(&fs_info->tree_log_wait);
-       atomic_set(&fs_info->tree_log_commit, 0);
-       atomic_set(&fs_info->tree_log_writers, 0);
-       fs_info->tree_log_transid = 0;
  
         __setup_root(4096, 4096, 4096, 4096, tree_root,
                      fs_info, BTRFS_ROOT_TREE_OBJECTID);
@@ -1740,13 +1792,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
         fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
         fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
                                                "btrfs-cleaner");
-       if (!fs_info->cleaner_kthread)
+       if (IS_ERR(fs_info->cleaner_kthread))
                 goto fail_csum_root;
  
         fs_info->transaction_kthread = kthread_run(transaction_kthread,
                                                    tree_root,
                                                    "btrfs-transaction");
-       if (!fs_info->transaction_kthread)
+       if (IS_ERR(fs_info->transaction_kthread))
                 goto fail_cleaner;
  
         if (btrfs_super_log_root(disk_super) != 0) {
@@ -1828,13 +1880,14 @@ fail_sb_buffer:
  fail_iput:
         invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
         iput(fs_info->btree_inode);
-fail:
+
         btrfs_close_devices(fs_info->fs_devices);
         btrfs_mapping_tree_free(&fs_info->mapping_tree);
+       bdi_destroy(&fs_info->bdi);
  
+fail:
         kfree(extent_root);
         kfree(tree_root);
-       bdi_destroy(&fs_info->bdi);
         kfree(fs_info);
         kfree(chunk_root);
         kfree(dev_root);
@@ -1995,7 +2048,6 @@ static int write_dev_supers(struct btrfs_device *device,
  
  int write_all_supers(struct btrfs_root *root, int max_mirrors)
  {
-       struct list_head *cur;
         struct list_head *head = &root->fs_info->fs_devices->devices;
         struct btrfs_device *dev;
         struct btrfs_super_block *sb;
@@ -2011,8 +2063,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
  
         sb = &root->fs_info->super_for_commit;
         dev_item = &sb->dev_item;
-       list_for_each(cur, head) {
-               dev = list_entry(cur, struct btrfs_device, dev_list);
+       list_for_each_entry(dev, head, dev_list) {
                 if (!dev->bdev) {
                         total_errors++;
                         continue;
@@ -2045,8 +2096,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
         }
  
         total_errors = 0;
-       list_for_each(cur, head) {
-               dev = list_entry(cur, struct btrfs_device, dev_list);
+       list_for_each_entry(dev, head, dev_list) {
                 if (!dev->bdev)
                         continue;
                 if (!dev->in_fs_metadata || !dev->writeable)
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h

index c0ff404c31b71745b4a8b8b9addb2f9f64f02018..494a56eb298614d6f443b37ff375e94f06f44b92 100644 (file)
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -98,5 +98,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info);
  int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info);
+int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root);
  int btree_lock_page_hook(struct page *page);
  #endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 293da650873f5193c726c4883bc6ecc5114b0a1a..3b26f09809460b0574406137e6d07f6010ea94d1 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -19,7 +19,6 @@
  #include <linux/pagemap.h>
  #include <linux/writeback.h>
  #include <linux/blkdev.h>
-#include <linux/version.h>
  #include "compat.h"
  #include "hash.h"
  #include "crc32c.h"
@@ -30,7 +29,6 @@
  #include "volumes.h"
  #include "locking.h"
  #include "ref-cache.h"
-#include "compat.h"
  
  #define PENDING_EXTENT_INSERT 0
  #define PENDING_EXTENT_DELETE 1
@@ -326,10 +324,8 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
                                                   u64 flags)
  {
         struct list_head *head = &info->space_info;
-       struct list_head *cur;
         struct btrfs_space_info *found;
-       list_for_each(cur, head) {
-               found = list_entry(cur, struct btrfs_space_info, list);
+       list_for_each_entry(found, head, list) {
                 if (found->flags == flags)
                         return found;
         }
@@ -2159,7 +2155,8 @@ again:
                 ret = find_first_extent_bit(&info->extent_ins, search, &start,
                                             &end, EXTENT_WRITEBACK);
                 if (ret) {
-                       if (skipped && all && !num_inserts) {
+                       if (skipped && all && !num_inserts &&
+                           list_empty(&update_list)) {
                                 skipped = 0;
                                 search = 0;
                                 continue;
@@ -2547,6 +2544,7 @@ again:
                 if (ret) {
                         if (all && skipped && !nr) {
                                 search = 0;
+                               skipped = 0;
                                 continue;
                         }
                         mutex_unlock(&info->extent_ins_mutex);
@@ -2700,13 +2698,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
         /* if metadata always pin */
         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
                 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
-                       struct btrfs_block_group_cache *cache;
-
-                       /* btrfs_free_reserved_extent */
-                       cache = btrfs_lookup_block_group(root->fs_info, bytenr);
-                       BUG_ON(!cache);
-                       btrfs_add_free_space(cache, bytenr, num_bytes);
-                       put_block_group(cache);
+                       mutex_lock(&root->fs_info->pinned_mutex);
+                       btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
+                       mutex_unlock(&root->fs_info->pinned_mutex);
                         update_reserved_extents(root, bytenr, num_bytes, 0);
                         return 0;
                 }
@@ -3014,7 +3008,6 @@ loop_check:
  static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
  {
         struct btrfs_block_group_cache *cache;
-       struct list_head *l;
  
         printk(KERN_INFO "space_info has %llu free, is %sfull\n",
                (unsigned long long)(info->total_bytes - info->bytes_used -
@@ -3022,8 +3015,7 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
                (info->full) ? "" : "not ");
  
         down_read(&info->groups_sem);
-       list_for_each(l, &info->block_groups) {
-               cache = list_entry(l, struct btrfs_block_group_cache, list);
+       list_for_each_entry(cache, &info->block_groups, list) {
                 spin_lock(&cache->lock);
                 printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
                        "%llu pinned %llu reserved\n",
@@ -4444,7 +4436,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
         u64 lock_end = 0;
         u64 num_bytes;
         u64 ext_offset;
-       u64 first_pos;
+       u64 search_end = (u64)-1;
         u32 nritems;
         int nr_scaned = 0;
         int extent_locked = 0;
@@ -4452,7 +4444,6 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
         int ret;
  
         memcpy(&key, leaf_key, sizeof(key));
-       first_pos = INT_LIMIT(loff_t) - extent_key->offset;
         if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
                 if (key.objectid < ref_path->owner_objectid ||
                     (key.objectid == ref_path->owner_objectid &&
@@ -4501,7 +4492,7 @@ next:
                         if ((key.objectid > ref_path->owner_objectid) ||
                             (key.objectid == ref_path->owner_objectid &&
                              key.type > BTRFS_EXTENT_DATA_KEY) ||
-                           (key.offset >= first_pos + extent_key->offset))
+                           key.offset >= search_end)
                                 break;
                 }
  
@@ -4534,8 +4525,10 @@ next:
                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
                 ext_offset = btrfs_file_extent_offset(leaf, fi);
  
-               if (first_pos > key.offset - ext_offset)
-                       first_pos = key.offset - ext_offset;
+               if (search_end == (u64)-1) {
+                       search_end = key.offset - ext_offset +
+                               btrfs_file_extent_ram_bytes(leaf, fi);
+               }
  
                 if (!extent_locked) {
                         lock_start = key.offset;
@@ -4724,7 +4717,7 @@ next:
                 }
  skip:
                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
-                   key.offset >= first_pos + extent_key->offset)
+                   key.offset >= search_end)
                         break;
  
                 cond_resched();
@@ -5957,9 +5950,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         path = btrfs_alloc_path();
         BUG_ON(!path);
  
-       btrfs_remove_free_space_cache(block_group);
+       spin_lock(&root->fs_info->block_group_cache_lock);
         rb_erase(&block_group->cache_node,
                  &root->fs_info->block_group_cache_tree);
+       spin_unlock(&root->fs_info->block_group_cache_lock);
+       btrfs_remove_free_space_cache(block_group);
         down_write(&block_group->space_info->groups_sem);
         list_del(&block_group->list);
         up_write(&block_group->space_info->groups_sem);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index e086d407f1fa8b3ad34b2c6770721e8592ca9cc1..a3b0676403f76bcff0a06b56fa6d0f3c53ef21dc 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -9,7 +9,6 @@
  #include <linux/spinlock.h>
  #include <linux/blkdev.h>
  #include <linux/swap.h>
-#include <linux/version.h>
  #include <linux/writeback.h>
  #include <linux/pagevec.h>
  #include "extent_io.h"
@@ -2855,6 +2854,98 @@ out:
         return sector;
  }
  
+int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               __u64 start, __u64 len, get_extent_t *get_extent)
+{
+       int ret;
+       u64 off = start;
+       u64 max = start + len;
+       u32 flags = 0;
+       u64 disko = 0;
+       struct extent_map *em = NULL;
+       int end = 0;
+       u64 em_start = 0, em_len = 0;
+       unsigned long emflags;
+       ret = 0;
+
+       if (len == 0)
+               return -EINVAL;
+
+       lock_extent(&BTRFS_I(inode)->io_tree, start, start + len,
+               GFP_NOFS);
+       em = get_extent(inode, NULL, 0, off, max - off, 0);
+       if (!em)
+               goto out;
+       if (IS_ERR(em)) {
+               ret = PTR_ERR(em);
+               goto out;
+       }
+       while (!end) {
+               off = em->start + em->len;
+               if (off >= max)
+                       end = 1;
+
+               em_start = em->start;
+               em_len = em->len;
+
+               disko = 0;
+               flags = 0;
+
+               switch (em->block_start) {
+               case EXTENT_MAP_LAST_BYTE:
+                       end = 1;
+                       flags |= FIEMAP_EXTENT_LAST;
+                       break;
+               case EXTENT_MAP_HOLE:
+                       flags |= FIEMAP_EXTENT_UNWRITTEN;
+                       break;
+               case EXTENT_MAP_INLINE:
+                       flags |= (FIEMAP_EXTENT_DATA_INLINE |
+                                 FIEMAP_EXTENT_NOT_ALIGNED);
+                       break;
+               case EXTENT_MAP_DELALLOC:
+                       flags |= (FIEMAP_EXTENT_DELALLOC |
+                                 FIEMAP_EXTENT_UNKNOWN);
+                       break;
+               default:
+                       disko = em->block_start;
+                       break;
+               }
+               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+                       flags |= FIEMAP_EXTENT_ENCODED;
+
+               emflags = em->flags;
+               free_extent_map(em);
+               em = NULL;
+
+               if (!end) {
+                       em = get_extent(inode, NULL, 0, off, max - off, 0);
+                       if (!em)
+                               goto out;
+                       if (IS_ERR(em)) {
+                               ret = PTR_ERR(em);
+                               goto out;
+                       }
+                       emflags = em->flags;
+               }
+               if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
+                       flags |= FIEMAP_EXTENT_LAST;
+                       end = 1;
+               }
+
+               ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+                                       em_len, flags);
+               if (ret)
+                       goto out_free;
+       }
+out_free:
+       free_extent_map(em);
+out:
+       unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len,
+                       GFP_NOFS);
+       return ret;
+}
+
  static inline struct page *extent_buffer_page(struct extent_buffer *eb,
                                               unsigned long i)
  {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h

index c5b483a791372c6b9a849b36c41e8e3dcf6437ac..e80c6d96b318d7d6445f9f3fde47a56e83005b81 100644 (file)
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -193,6 +193,8 @@ int extent_commit_write(struct extent_io_tree *tree,
                         unsigned from, unsigned to);
  sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
                 get_extent_t *get_extent);
+int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               __u64 start, __u64 len, get_extent_t *get_extent);
  int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end);
  int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
  int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c

index 4a83e33ada32548c1970adceab67590b1747c60d..50da69da20cec141fca63ffba2a4e92b8f93f875 100644 (file)
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -3,7 +3,6 @@
  #include <linux/slab.h>
  #include <linux/module.h>
  #include <linux/spinlock.h>
-#include <linux/version.h>
  #include <linux/hardirq.h>
  #include "extent_map.h"
  
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index 90268334145e30e4abe0390333b98678bf47db29..3e8023efaff7581568f8d9a4f131a01c37f76d3b 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -29,7 +29,6 @@
  #include <linux/writeback.h>
  #include <linux/statfs.h>
  #include <linux/compat.h>
-#include <linux/version.h>
  #include "ctree.h"
  #include "disk-io.h"
  #include "transaction.h"
@@ -1215,10 +1214,10 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
         }
         mutex_unlock(&root->fs_info->trans_mutex);
  
-       root->fs_info->tree_log_batch++;
+       root->log_batch++;
         filemap_fdatawrite(inode->i_mapping);
         btrfs_wait_ordered_range(inode, 0, (u64)-1);
-       root->fs_info->tree_log_batch++;
+       root->log_batch++;
  
         /*
          * ok we haven't committed the transaction yet, lets do a commit
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 8adfe059ab4153f911da295a9673009965039bc2..288c2cdc7543ca7487c1831caf38b1191d9fbf4d 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -34,7 +34,6 @@
  #include <linux/statfs.h>
  #include <linux/compat.h>
  #include <linux/bit_spinlock.h>
-#include <linux/version.h>
  #include <linux/xattr.h>
  #include <linux/posix_acl.h>
  #include <linux/falloc.h>
@@ -1324,12 +1323,11 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
                              struct inode *inode, u64 file_offset,
                              struct list_head *list)
  {
-       struct list_head *cur;
         struct btrfs_ordered_sum *sum;
  
         btrfs_set_trans_block_group(trans, inode);
-       list_for_each(cur, list) {
-               sum = list_entry(cur, struct btrfs_ordered_sum, list);
+
+       list_for_each_entry(sum, list, list) {
                 btrfs_csum_file_blocks(trans,
                        BTRFS_I(inode)->root->fs_info->csum_root, sum);
         }
@@ -4158,9 +4156,10 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
         return -EINVAL;
  }
  
-static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
+static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+               __u64 start, __u64 len)
  {
-       return extent_bmap(mapping, iblock, btrfs_get_extent);
+       return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
  }
  
  int btrfs_readpage(struct file *file, struct page *page)
@@ -4987,13 +4986,24 @@ static struct extent_io_ops btrfs_extent_io_ops = {
         .clear_bit_hook = btrfs_clear_bit_hook,
  };
  
+/*
+ * btrfs doesn't support the bmap operation because swapfiles
+ * use bmap to make a mapping of extents in the file.  They assume
+ * these extents won't change over the life of the file and they
+ * use the bmap result to do IO directly to the drive.
+ *
+ * the btrfs bmap call would return logical addresses that aren't
+ * suitable for IO and they also will change frequently as COW
+ * operations happen.  So, swapfile + btrfs == corruption.
+ *
+ * For now we're avoiding this by dropping bmap.
+ */
  static struct address_space_operations btrfs_aops = {
         .readpage       = btrfs_readpage,
         .writepage      = btrfs_writepage,
         .writepages     = btrfs_writepages,
         .readpages      = btrfs_readpages,
         .sync_page      = block_sync_page,
-       .bmap           = btrfs_bmap,
         .direct_IO      = btrfs_direct_IO,
         .invalidatepage = btrfs_invalidatepage,
         .releasepage    = btrfs_releasepage,
@@ -5017,6 +5027,7 @@ static struct inode_operations btrfs_file_inode_operations = {
         .removexattr    = btrfs_removexattr,
         .permission     = btrfs_permission,
         .fallocate      = btrfs_fallocate,
+       .fiemap         = btrfs_fiemap,
  };
  static struct inode_operations btrfs_special_inode_operations = {
         .getattr        = btrfs_getattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index c2aa33e3feb5b40f251fcdedae4aed93b3745fc2..988fdc8b49ebb92b7ad7d5d9163f3fc23e866d16 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -38,7 +38,6 @@
  #include <linux/compat.h>
  #include <linux/bit_spinlock.h>
  #include <linux/security.h>
-#include <linux/version.h>
  #include <linux/xattr.h>
  #include <linux/vmalloc.h>
  #include "compat.h"
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index a2094017027489a16473e03db2cddaad41c048cc..77c2411a5f0f0c59b6ab97e78b2828d94a1c7401 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -613,7 +613,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
         struct btrfs_sector_sum *sector_sums;
         struct btrfs_ordered_extent *ordered;
         struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
-       struct list_head *cur;
         unsigned long num_sectors;
         unsigned long i;
         u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
@@ -624,8 +623,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
                 return 1;
  
         mutex_lock(&tree->mutex);
-       list_for_each_prev(cur, &ordered->list) {
-               ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
+       list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
                 if (disk_bytenr >= ordered_sum->bytenr) {
                         num_sectors = ordered_sum->len / sectorsize;
                         sector_sums = ordered_sum->sums;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index db9fb3bc1e333c5255a92537a9891d4f8716f899..f3fd7e2cbc383aaefd42be70a02f80c759b6979b 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -37,7 +37,6 @@
  #include <linux/ctype.h>
  #include <linux/namei.h>
  #include <linux/miscdevice.h>
-#include <linux/version.h>
  #include <linux/magic.h>
  #include "compat.h"
  #include "ctree.h"
@@ -583,17 +582,18 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
         struct btrfs_ioctl_vol_args *vol;
         struct btrfs_fs_devices *fs_devices;
         int ret = -ENOTTY;
-       int len;
  
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
  
         vol = kmalloc(sizeof(*vol), GFP_KERNEL);
+       if (!vol)
+               return -ENOMEM;
+
         if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
                 ret = -EFAULT;
                 goto out;
         }
-       len = strnlen(vol->name, BTRFS_PATH_NAME_MAX);
  
         switch (cmd) {
         case BTRFS_IOC_SCAN_DEV:
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 8a08f94433407e6c8ca7ce2e15285ef75115bb26..919172de5c9aa9ce64630f593f74a7942e8292fd 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -852,11 +852,9 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
  {
         struct btrfs_pending_snapshot *pending;
         struct list_head *head = &trans->transaction->pending_snapshots;
-       struct list_head *cur;
         int ret;
  
-       list_for_each(cur, head) {
-               pending = list_entry(cur, struct btrfs_pending_snapshot, list);
+       list_for_each_entry(pending, head, list) {
                 ret = create_pending_snapshot(trans, fs_info, pending);
                 BUG_ON(ret);
         }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index d81cda2e077c9b3c4685d26a55057826d0c06b2d..4f26f3ed0c87e3fa8e53c224c2ad20c9d12ce5c0 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -77,104 +77,6 @@ static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
   * and once to do all the other items.
   */
  
-/*
- * btrfs_add_log_tree adds a new per-subvolume log tree into the
- * tree of log tree roots.  This must be called with a tree log transaction
- * running (see start_log_trans).
- */
-static int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
-                     struct btrfs_root *root)
-{
-       struct btrfs_key key;
-       struct btrfs_root_item root_item;
-       struct btrfs_inode_item *inode_item;
-       struct extent_buffer *leaf;
-       struct btrfs_root *new_root = root;
-       int ret;
-       u64 objectid = root->root_key.objectid;
-
-       leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
-                                     BTRFS_TREE_LOG_OBJECTID,
-                                     trans->transid, 0, 0, 0);
-       if (IS_ERR(leaf)) {
-               ret = PTR_ERR(leaf);
-               return ret;
-       }
-
-       btrfs_set_header_nritems(leaf, 0);
-       btrfs_set_header_level(leaf, 0);
-       btrfs_set_header_bytenr(leaf, leaf->start);
-       btrfs_set_header_generation(leaf, trans->transid);
-       btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID);
-
-       write_extent_buffer(leaf, root->fs_info->fsid,
-                           (unsigned long)btrfs_header_fsid(leaf),
-                           BTRFS_FSID_SIZE);
-       btrfs_mark_buffer_dirty(leaf);
-
-       inode_item = &root_item.inode;
-       memset(inode_item, 0, sizeof(*inode_item));
-       inode_item->generation = cpu_to_le64(1);
-       inode_item->size = cpu_to_le64(3);
-       inode_item->nlink = cpu_to_le32(1);
-       inode_item->nbytes = cpu_to_le64(root->leafsize);
-       inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
-
-       btrfs_set_root_bytenr(&root_item, leaf->start);
-       btrfs_set_root_generation(&root_item, trans->transid);
-       btrfs_set_root_level(&root_item, 0);
-       btrfs_set_root_refs(&root_item, 0);
-       btrfs_set_root_used(&root_item, 0);
-
-       memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
-       root_item.drop_level = 0;
-
-       btrfs_tree_unlock(leaf);
-       free_extent_buffer(leaf);
-       leaf = NULL;
-
-       btrfs_set_root_dirid(&root_item, 0);
-
-       key.objectid = BTRFS_TREE_LOG_OBJECTID;
-       key.offset = objectid;
-       btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
-       ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key,
-                               &root_item);
-       if (ret)
-               goto fail;
-
-       new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree,
-                                              &key);
-       BUG_ON(!new_root);
-
-       WARN_ON(root->log_root);
-       root->log_root = new_root;
-
-       /*
-        * log trees do not get reference counted because they go away
-        * before a real commit is actually done.  They do store pointers
-        * to file data extents, and those reference counts still get
-        * updated (along with back refs to the log tree).
-        */
-       new_root->ref_cows = 0;
-       new_root->last_trans = trans->transid;
-
-       /*
-        * we need to make sure the root block for this new tree
-        * is marked as dirty in the dirty_log_pages tree.  This
-        * is how it gets flushed down to disk at tree log commit time.
-        *
-        * the tree logging mutex keeps others from coming in and changing
-        * the new_root->node, so we can safely access it here
-        */
-       set_extent_dirty(&new_root->dirty_log_pages, new_root->node->start,
-                        new_root->node->start + new_root->node->len - 1,
-                        GFP_NOFS);
-
-fail:
-       return ret;
-}
-
  /*
   * start a sub transaction and setup the log tree
   * this increments the log tree writer count to make the people
@@ -184,6 +86,14 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root)
  {
         int ret;
+
+       mutex_lock(&root->log_mutex);
+       if (root->log_root) {
+               root->log_batch++;
+               atomic_inc(&root->log_writers);
+               mutex_unlock(&root->log_mutex);
+               return 0;
+       }
         mutex_lock(&root->fs_info->tree_log_mutex);
         if (!root->fs_info->log_root_tree) {
                 ret = btrfs_init_log_root_tree(trans, root->fs_info);
@@ -193,9 +103,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
                 ret = btrfs_add_log_tree(trans, root);
                 BUG_ON(ret);
         }
-       atomic_inc(&root->fs_info->tree_log_writers);
-       root->fs_info->tree_log_batch++;
         mutex_unlock(&root->fs_info->tree_log_mutex);
+       root->log_batch++;
+       atomic_inc(&root->log_writers);
+       mutex_unlock(&root->log_mutex);
         return 0;
  }
  
@@ -212,13 +123,12 @@ static int join_running_log_trans(struct btrfs_root *root)
         if (!root->log_root)
                 return -ENOENT;
  
-       mutex_lock(&root->fs_info->tree_log_mutex);
+       mutex_lock(&root->log_mutex);
         if (root->log_root) {
                 ret = 0;
-               atomic_inc(&root->fs_info->tree_log_writers);
-               root->fs_info->tree_log_batch++;
+               atomic_inc(&root->log_writers);
         }
-       mutex_unlock(&root->fs_info->tree_log_mutex);
+       mutex_unlock(&root->log_mutex);
         return ret;
  }
  
@@ -228,10 +138,11 @@ static int join_running_log_trans(struct btrfs_root *root)
   */
  static int end_log_trans(struct btrfs_root *root)
  {
-       atomic_dec(&root->fs_info->tree_log_writers);
-       smp_mb();
-       if (waitqueue_active(&root->fs_info->tree_log_wait))
-               wake_up(&root->fs_info->tree_log_wait);
+       if (atomic_dec_and_test(&root->log_writers)) {
+               smp_mb();
+               if (waitqueue_active(&root->log_writer_wait))
+                       wake_up(&root->log_writer_wait);
+       }
         return 0;
  }
  
@@ -1902,26 +1813,65 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
                 }
         }
         btrfs_free_path(path);
-       if (wc->free)
-               free_extent_buffer(log->node);
         return ret;
  }
  
-static int wait_log_commit(struct btrfs_root *log)
+/*
+ * helper function to update the item for a given subvolumes log root
+ * in the tree of log roots
+ */
+static int update_log_root(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *log)
+{
+       int ret;
+
+       if (log->log_transid == 1) {
+               /* insert root item on the first sync */
+               ret = btrfs_insert_root(trans, log->fs_info->log_root_tree,
+                               &log->root_key, &log->root_item);
+       } else {
+               ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
+                               &log->root_key, &log->root_item);
+       }
+       return ret;
+}
+
+static int wait_log_commit(struct btrfs_root *root, unsigned long transid)
  {
         DEFINE_WAIT(wait);
-       u64 transid = log->fs_info->tree_log_transid;
+       int index = transid % 2;
  
+       /*
+        * we only allow two pending log transactions at a time,
+        * so we know that if ours is more than 2 older than the
+        * current transaction, we're done
+        */
         do {
-               prepare_to_wait(&log->fs_info->tree_log_wait, &wait,
-                               TASK_UNINTERRUPTIBLE);
-               mutex_unlock(&log->fs_info->tree_log_mutex);
-               if (atomic_read(&log->fs_info->tree_log_commit))
+               prepare_to_wait(&root->log_commit_wait[index],
+                               &wait, TASK_UNINTERRUPTIBLE);
+               mutex_unlock(&root->log_mutex);
+               if (root->log_transid < transid + 2 &&
+                   atomic_read(&root->log_commit[index]))
                         schedule();
-               finish_wait(&log->fs_info->tree_log_wait, &wait);
-               mutex_lock(&log->fs_info->tree_log_mutex);
-       } while (transid == log->fs_info->tree_log_transid &&
-               atomic_read(&log->fs_info->tree_log_commit));
+               finish_wait(&root->log_commit_wait[index], &wait);
+               mutex_lock(&root->log_mutex);
+       } while (root->log_transid < transid + 2 &&
+                atomic_read(&root->log_commit[index]));
+       return 0;
+}
+
+static int wait_for_writer(struct btrfs_root *root)
+{
+       DEFINE_WAIT(wait);
+       while (atomic_read(&root->log_writers)) {
+               prepare_to_wait(&root->log_writer_wait,
+                               &wait, TASK_UNINTERRUPTIBLE);
+               mutex_unlock(&root->log_mutex);
+               if (atomic_read(&root->log_writers))
+                       schedule();
+               mutex_lock(&root->log_mutex);
+               finish_wait(&root->log_writer_wait, &wait);
+       }
         return 0;
  }
  
@@ -1933,57 +1883,114 @@ static int wait_log_commit(struct btrfs_root *log)
  int btrfs_sync_log(struct btrfs_trans_handle *trans,
                    struct btrfs_root *root)
  {
+       int index1;
+       int index2;
         int ret;
-       unsigned long batch;
         struct btrfs_root *log = root->log_root;
+       struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
  
-       mutex_lock(&log->fs_info->tree_log_mutex);
-       if (atomic_read(&log->fs_info->tree_log_commit)) {
-               wait_log_commit(log);
-               goto out;
+       mutex_lock(&root->log_mutex);
+       index1 = root->log_transid % 2;
+       if (atomic_read(&root->log_commit[index1])) {
+               wait_log_commit(root, root->log_transid);
+               mutex_unlock(&root->log_mutex);
+               return 0;
         }
-       atomic_set(&log->fs_info->tree_log_commit, 1);
+       atomic_set(&root->log_commit[index1], 1);
+
+       /* wait for previous tree log sync to complete */
+       if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
+               wait_log_commit(root, root->log_transid - 1);
  
         while (1) {
-               batch = log->fs_info->tree_log_batch;
-               mutex_unlock(&log->fs_info->tree_log_mutex);
+               unsigned long batch = root->log_batch;
+               mutex_unlock(&root->log_mutex);
                 schedule_timeout_uninterruptible(1);
-               mutex_lock(&log->fs_info->tree_log_mutex);
-
-               while (atomic_read(&log->fs_info->tree_log_writers)) {
-                       DEFINE_WAIT(wait);
-                       prepare_to_wait(&log->fs_info->tree_log_wait, &wait,
-                                       TASK_UNINTERRUPTIBLE);
-                       mutex_unlock(&log->fs_info->tree_log_mutex);
-                       if (atomic_read(&log->fs_info->tree_log_writers))
-                               schedule();
-                       mutex_lock(&log->fs_info->tree_log_mutex);
-                       finish_wait(&log->fs_info->tree_log_wait, &wait);
-               }
-               if (batch == log->fs_info->tree_log_batch)
+               mutex_lock(&root->log_mutex);
+               wait_for_writer(root);
+               if (batch == root->log_batch)
                         break;
         }
  
         ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages);
         BUG_ON(ret);
-       ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree,
-                              &root->fs_info->log_root_tree->dirty_log_pages);
+
+       btrfs_set_root_bytenr(&log->root_item, log->node->start);
+       btrfs_set_root_generation(&log->root_item, trans->transid);
+       btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
+
+       root->log_batch = 0;
+       root->log_transid++;
+       log->log_transid = root->log_transid;
+       smp_mb();
+       /*
+        * log tree has been flushed to disk, new modifications of
+        * the log will be written to new positions. so it's safe to
+        * allow log writers to go in.
+        */
+       mutex_unlock(&root->log_mutex);
+
+       mutex_lock(&log_root_tree->log_mutex);
+       log_root_tree->log_batch++;
+       atomic_inc(&log_root_tree->log_writers);
+       mutex_unlock(&log_root_tree->log_mutex);
+
+       ret = update_log_root(trans, log);
+       BUG_ON(ret);
+
+       mutex_lock(&log_root_tree->log_mutex);
+       if (atomic_dec_and_test(&log_root_tree->log_writers)) {
+               smp_mb();
+               if (waitqueue_active(&log_root_tree->log_writer_wait))
+                       wake_up(&log_root_tree->log_writer_wait);
+       }
+
+       index2 = log_root_tree->log_transid % 2;
+       if (atomic_read(&log_root_tree->log_commit[index2])) {
+               wait_log_commit(log_root_tree, log_root_tree->log_transid);
+               mutex_unlock(&log_root_tree->log_mutex);
+               goto out;
+       }
+       atomic_set(&log_root_tree->log_commit[index2], 1);
+
+       if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2]))
+               wait_log_commit(log_root_tree, log_root_tree->log_transid - 1);
+
+       wait_for_writer(log_root_tree);
+
+       ret = btrfs_write_and_wait_marked_extents(log_root_tree,
+                               &log_root_tree->dirty_log_pages);
         BUG_ON(ret);
  
         btrfs_set_super_log_root(&root->fs_info->super_for_commit,
-                                log->fs_info->log_root_tree->node->start);
+                               log_root_tree->node->start);
         btrfs_set_super_log_root_level(&root->fs_info->super_for_commit,
-                      btrfs_header_level(log->fs_info->log_root_tree->node));
+                               btrfs_header_level(log_root_tree->node));
+
+       log_root_tree->log_batch = 0;
+       log_root_tree->log_transid++;
+       smp_mb();
+
+       mutex_unlock(&log_root_tree->log_mutex);
+
+       /*
+        * nobody else is going to jump in and write the the ctree
+        * super here because the log_commit atomic below is protecting
+        * us.  We must be called with a transaction handle pinning
+        * the running transaction open, so a full commit can't hop
+        * in and cause problems either.
+        */
+       write_ctree_super(trans, root->fs_info->tree_root, 2);
  
-       write_ctree_super(trans, log->fs_info->tree_root, 2);
-       log->fs_info->tree_log_transid++;
-       log->fs_info->tree_log_batch = 0;
-       atomic_set(&log->fs_info->tree_log_commit, 0);
+       atomic_set(&log_root_tree->log_commit[index2], 0);
         smp_mb();
-       if (waitqueue_active(&log->fs_info->tree_log_wait))
-               wake_up(&log->fs_info->tree_log_wait);
+       if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
+               wake_up(&log_root_tree->log_commit_wait[index2]);
  out:
-       mutex_unlock(&log->fs_info->tree_log_mutex);
+       atomic_set(&root->log_commit[index1], 0);
+       smp_mb();
+       if (waitqueue_active(&root->log_commit_wait[index1]))
+               wake_up(&root->log_commit_wait[index1]);
         return 0;
  }
  
@@ -2019,37 +2026,17 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
                                    start, end, GFP_NOFS);
         }
  
-       log = root->log_root;
-       ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
-                            &log->root_key);
-       BUG_ON(ret);
+       if (log->log_transid > 0) {
+               ret = btrfs_del_root(trans, root->fs_info->log_root_tree,
+                                    &log->root_key);
+               BUG_ON(ret);
+       }
         root->log_root = NULL;
-       kfree(root->log_root);
+       free_extent_buffer(log->node);
+       kfree(log);
         return 0;
  }
  
-/*
- * helper function to update the item for a given subvolumes log root
- * in the tree of log roots
- */
-static int update_log_root(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *log)
-{
-       u64 bytenr = btrfs_root_bytenr(&log->root_item);
-       int ret;
-
-       if (log->node->start == bytenr)
-               return 0;
-
-       btrfs_set_root_bytenr(&log->root_item, log->node->start);
-       btrfs_set_root_generation(&log->root_item, trans->transid);
-       btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node));
-       ret = btrfs_update_root(trans, log->fs_info->log_root_tree,
-                               &log->root_key, &log->root_item);
-       BUG_ON(ret);
-       return ret;
-}
-
  /*
   * If both a file and directory are logged, and unlinks or renames are
   * mixed in, we have a few interesting corners:
@@ -2711,11 +2698,6 @@ next_slot:
  
         btrfs_free_path(path);
         btrfs_free_path(dst_path);
-
-       mutex_lock(&root->fs_info->tree_log_mutex);
-       ret = update_log_root(trans, log);
-       BUG_ON(ret);
-       mutex_unlock(&root->fs_info->tree_log_mutex);
  out:
         return 0;
  }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 3451e1cca2b5c77ccdfdc3f5cab713f4a02721d2..fd0bedb07a6405d78f66ef7de63965b17d5617d3 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -20,7 +20,6 @@
  #include <linux/buffer_head.h>
  #include <linux/blkdev.h>
  #include <linux/random.h>
-#include <linux/version.h>
  #include <asm/div64.h>
  #include "compat.h"
  #include "ctree.h"
@@ -104,10 +103,8 @@ static noinline struct btrfs_device *__find_device(struct list_head *head,
                                                    u64 devid, u8 *uuid)
  {
         struct btrfs_device *dev;
-       struct list_head *cur;
  
-       list_for_each(cur, head) {
-               dev = list_entry(cur, struct btrfs_device, dev_list);
+       list_for_each_entry(dev, head, dev_list) {
                 if (dev->devid == devid &&
                     (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
                         return dev;
@@ -118,11 +115,9 @@ static noinline struct btrfs_device *__find_device(struct list_head *head,
  
  static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
  {
-       struct list_head *cur;
         struct btrfs_fs_devices *fs_devices;
  
-       list_for_each(cur, &fs_uuids) {
-               fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
+       list_for_each_entry(fs_devices, &fs_uuids, list) {
                 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
                         return fs_devices;
         }
@@ -345,14 +340,11 @@ error:
  
  int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
  {
-       struct list_head *tmp;
-       struct list_head *cur;
-       struct btrfs_device *device;
+       struct btrfs_device *device, *next;
  
         mutex_lock(&uuid_mutex);
  again:
-       list_for_each_safe(cur, tmp, &fs_devices->devices) {
-               device = list_entry(cur, struct btrfs_device, dev_list);
+       list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
                 if (device->in_fs_metadata)
                         continue;
  
@@ -383,14 +375,12 @@ again:
  
  static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
  {
-       struct list_head *cur;
         struct btrfs_device *device;
  
         if (--fs_devices->opened > 0)
                 return 0;
  
-       list_for_each(cur, &fs_devices->devices) {
-               device = list_entry(cur, struct btrfs_device, dev_list);
+       list_for_each_entry(device, &fs_devices->devices, dev_list) {
                 if (device->bdev) {
                         close_bdev_exclusive(device->bdev, device->mode);
                         fs_devices->open_devices--;
@@ -439,7 +429,6 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
  {
         struct block_device *bdev;
         struct list_head *head = &fs_devices->devices;
-       struct list_head *cur;
         struct btrfs_device *device;
         struct block_device *latest_bdev = NULL;
         struct buffer_head *bh;
@@ -450,8 +439,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
         int seeding = 1;
         int ret = 0;
  
-       list_for_each(cur, head) {
-               device = list_entry(cur, struct btrfs_device, dev_list);
+       list_for_each_entry(device, head, dev_list) {
                 if (device->bdev)
                         continue;
                 if (!device->name)
@@ -578,7 +566,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
                        *(unsigned long long *)disk_super->fsid,
                        *(unsigned long long *)(disk_super->fsid + 8));
         }
-       printk(KERN_INFO "devid %llu transid %llu %s\n",
+       printk(KERN_CONT "devid %llu transid %llu %s\n",
                (unsigned long long)devid, (unsigned long long)transid, path);
         ret = device_list_add(path, disk_super, devid, fs_devices_ret);
  
@@ -1017,14 +1005,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
         }
  
         if (strcmp(device_path, "missing") == 0) {
-               struct list_head *cur;
                 struct list_head *devices;
                 struct btrfs_device *tmp;
  
                 device = NULL;
                 devices = &root->fs_info->fs_devices->devices;
-               list_for_each(cur, devices) {
-                       tmp = list_entry(cur, struct btrfs_device, dev_list);
+               list_for_each_entry(tmp, devices, dev_list) {
                         if (tmp->in_fs_metadata && !tmp->bdev) {
                                 device = tmp;
                                 break;
@@ -1280,7 +1266,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
         struct btrfs_trans_handle *trans;
         struct btrfs_device *device;
         struct block_device *bdev;
-       struct list_head *cur;
         struct list_head *devices;
         struct super_block *sb = root->fs_info->sb;
         u64 total_bytes;
@@ -1304,8 +1289,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
         mutex_lock(&root->fs_info->volume_mutex);
  
         devices = &root->fs_info->fs_devices->devices;
-       list_for_each(cur, devices) {
-               device = list_entry(cur, struct btrfs_device, dev_list);
+       list_for_each_entry(device, devices, dev_list) {
                 if (device->bdev == bdev) {
                         ret = -EEXIST;
                         goto error;
@@ -1704,7 +1688,6 @@ static u64 div_factor(u64 num, int factor)
  int btrfs_balance(struct btrfs_root *dev_root)
  {
         int ret;
-       struct list_head *cur;
         struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
         struct btrfs_device *device;
         u64 old_size;
@@ -1723,8 +1706,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
         dev_root = dev_root->fs_info->dev_root;
  
         /* step one make some room on all the devices */
-       list_for_each(cur, devices) {
-               device = list_entry(cur, struct btrfs_device, dev_list);
+       list_for_each_entry(device, devices, dev_list) {
                 old_size = device->total_bytes;
                 size_to_free = div_factor(old_size, 1);
                 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c

index 7f332e2708942f95968f07d49279fc08f43e59b2..b4fa5f4b6ad1261a5a09013ebec3a1af0c2e3e4f 100644 (file)
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -45,9 +45,12 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
         /* lookup the xattr by name */
         di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name,
                                 strlen(name), 0);
-       if (!di || IS_ERR(di)) {
+       if (!di) {
                 ret = -ENODATA;
                 goto out;
+       } else if (IS_ERR(di)) {
+               ret = PTR_ERR(di);
+               goto out;
         }
  
         leaf = path->nodes[0];
@@ -62,6 +65,14 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
                 ret = -ERANGE;
                 goto out;
         }
+
+       /*
+        * The way things are packed into the leaf is like this
+        * |struct btrfs_dir_item|name|data|
+        * where name is the xattr name, so security.foo, and data is the
+        * content of the xattr.  data_ptr points to the location in memory
+        * where the data starts in the in memory leaf
+        */
         data_ptr = (unsigned long)((char *)(di + 1) +
                                    btrfs_dir_name_len(leaf, di));
         read_extent_buffer(leaf, buffer, data_ptr,
@@ -176,7 +187,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
         if (ret < 0)
                 goto err;
-       ret = 0;
         advance = 0;
         while (1) {
                 leaf = path->nodes[0];
author	Chris Mason <chris.mason@oracle.com>
	Thu, 29 Jan 2009 01:29:43 +0000 (20:29 -0500)
committer	Chris Mason <chris.mason@oracle.com>
	Thu, 29 Jan 2009 01:29:43 +0000 (20:29 -0500)
MAINTAINERS		patch \| blob \| history
fs/btrfs/async-thread.c		patch \| blob \| history
fs/btrfs/compression.c		patch \| blob \| history
fs/btrfs/ctree.c		patch \| blob \| history
fs/btrfs/ctree.h		patch \| blob \| history
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/disk-io.h		patch \| blob \| history
fs/btrfs/extent-tree.c		patch \| blob \| history
fs/btrfs/extent_io.c		patch \| blob \| history
fs/btrfs/extent_io.h		patch \| blob \| history
fs/btrfs/extent_map.c		patch \| blob \| history
fs/btrfs/file.c		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/ioctl.c		patch \| blob \| history
fs/btrfs/ordered-data.c		patch \| blob \| history
fs/btrfs/super.c		patch \| blob \| history
fs/btrfs/transaction.c		patch \| blob \| history
fs/btrfs/tree-log.c		patch \| blob \| history
fs/btrfs/volumes.c		patch \| blob \| history
fs/btrfs/xattr.c		patch \| blob \| history