return 0;
 }
 
-static unsigned long async_submit_limit(struct btrfs_fs_info *info)
+unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
 {
        unsigned long limit = min_t(unsigned long,
                                    info->workers.max_workers,
 
 int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
 {
-       return atomic_read(&info->nr_async_bios) > async_submit_limit(info);
+       return atomic_read(&info->nr_async_bios) >
+               btrfs_async_submit_limit(info);
 }
 
 static void run_one_async_submit(struct btrfs_work *work)
        async = container_of(work, struct  async_submit_bio, work);
        fs_info = BTRFS_I(async->inode)->root->fs_info;
 
-       limit = async_submit_limit(fs_info);
+       limit = btrfs_async_submit_limit(fs_info);
        limit = limit * 2 / 3;
 
        atomic_dec(&fs_info->nr_async_submits);
 
-       if (atomic_read(&fs_info->nr_async_submits) < limit)
+       if (atomic_read(&fs_info->nr_async_submits) < limit &&
+           waitqueue_active(&fs_info->async_submit_wait))
                wake_up(&fs_info->async_submit_wait);
 
        async->submit_bio_hook(async->inode, async->rw, async->bio,
                        extent_submit_bio_hook_t *submit_bio_hook)
 {
        struct async_submit_bio *async;
-       int limit = async_submit_limit(fs_info);
+       int limit = btrfs_async_submit_limit(fs_info);
 
        async = kmalloc(sizeof(*async), GFP_NOFS);
        if (!async)
        struct extent_io_tree *tree;
        u64 num_dirty;
        u64 start = 0;
-       unsigned long thresh = 12 * 1024 * 1024;
+       unsigned long thresh = 96 * 1024 * 1024;
        tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
 
-       if (current_is_pdflush())
+       if (current_is_pdflush() || current->flags & PF_MEMALLOC)
                return;
 
        num_dirty = count_range_bits(tree, &start, (u64)-1,
 
 {
        struct bio *pending;
        struct backing_dev_info *bdi;
+       struct btrfs_fs_info *fs_info;
        struct bio *tail;
        struct bio *cur;
        int again = 0;
        unsigned long num_run = 0;
+       unsigned long limit;
 
        bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
+       fs_info = device->dev_root->fs_info;
+       limit = btrfs_async_submit_limit(fs_info);
+       limit = limit * 2 / 3;
+
 loop:
        spin_lock(&device->io_lock);
 
                cur = pending;
                pending = pending->bi_next;
                cur->bi_next = NULL;
-               atomic_dec(&device->dev_root->fs_info->nr_async_bios);
+               atomic_dec(&fs_info->nr_async_bios);
+
+               if (atomic_read(&fs_info->nr_async_bios) < limit &&
+                   waitqueue_active(&fs_info->async_submit_wait))
+                       wake_up(&fs_info->async_submit_wait);
 
                BUG_ON(atomic_read(&cur->bi_cnt) == 0);
                bio_get(cur);
                 int rw, struct bio *bio)
 {
        int should_queue = 1;
+       unsigned long limit;
 
        /* don't bother with additional async steps for reads, right now */
        if (!(rw & (1 << BIO_RW))) {
        if (should_queue)
                btrfs_queue_worker(&root->fs_info->submit_workers,
                                   &device->work);
+
+       limit = btrfs_async_submit_limit(root->fs_info);
+       wait_event_timeout(root->fs_info->async_submit_wait,
+                          (atomic_read(&root->fs_info->nr_async_bios) < limit),
+                          HZ/10);
        return 0;
 }