]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - fs/ext4/mballoc.c
Merge branch 'core-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-omap-h63xx.git] / fs / ext4 / mballoc.c
index d559a03f3eb2834f937c941741d34c0925135aee..4415beeb0b620c46611348c7092fa75c42cf2337 100644 (file)
@@ -794,22 +794,42 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                if (bh[i] == NULL)
                        goto out;
 
-               if (buffer_uptodate(bh[i]) &&
-                   !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)))
+               if (bitmap_uptodate(bh[i]))
                        continue;
 
                lock_buffer(bh[i]);
+               if (bitmap_uptodate(bh[i])) {
+                       unlock_buffer(bh[i]);
+                       continue;
+               }
                spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
                if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                        ext4_init_block_bitmap(sb, bh[i],
                                                first_group + i, desc);
+                       set_bitmap_uptodate(bh[i]);
                        set_buffer_uptodate(bh[i]);
-                       unlock_buffer(bh[i]);
                        spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+                       unlock_buffer(bh[i]);
                        continue;
                }
                spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
+               if (buffer_uptodate(bh[i])) {
+                       /*
+                        * if not uninit if bh is uptodate,
+                        * bitmap is also uptodate
+                        */
+                       set_bitmap_uptodate(bh[i]);
+                       unlock_buffer(bh[i]);
+                       continue;
+               }
                get_bh(bh[i]);
+               /*
+                * submit the buffer_head for read. We can
+                * safely mark the bitmap as uptodate now.
+                * We do it here so the bitmap uptodate bit
+                * get set with buffer lock held.
+                */
+               set_bitmap_uptodate(bh[i]);
                bh[i]->b_end_io = end_buffer_read_sync;
                submit_bh(READ, bh[i]);
                mb_debug("read bitmap for group %u\n", first_group + i);
@@ -826,6 +846,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
 
        err = 0;
        first_block = page->index * blocks_per_page;
+       /* init the page  */
+       memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
        for (i = 0; i < blocks_per_page; i++) {
                int group;
                struct ext4_group_info *grinfo;
@@ -852,7 +874,6 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                        BUG_ON(incore == NULL);
                        mb_debug("put buddy for group %u in page %lu/%x\n",
                                group, page->index, i * blocksize);
-                       memset(data, 0xff, blocksize);
                        grinfo = ext4_get_group_info(sb, group);
                        grinfo->bb_fragments = 0;
                        memset(grinfo->bb_counters, 0,
@@ -1032,7 +1053,8 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b)
        if (e4b->bd_buddy_page)
                page_cache_release(e4b->bd_buddy_page);
        /* Done with the buddy cache */
-       up_read(e4b->alloc_semp);
+       if (e4b->alloc_semp)
+               up_read(e4b->alloc_semp);
 }
 
 
@@ -1351,7 +1373,9 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
        get_page(ac->ac_bitmap_page);
        ac->ac_buddy_page = e4b->bd_buddy_page;
        get_page(ac->ac_buddy_page);
-
+       /* on allocation we use ac to track the held semaphore */
+       ac->alloc_semp =  e4b->alloc_semp;
+       e4b->alloc_semp = NULL;
        /* store last allocated for subsequent stream allocation */
        if ((ac->ac_flags & EXT4_MB_HINT_DATA)) {
                spin_lock(&sbi->s_md_lock);
@@ -2515,7 +2539,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
                        ext4_free_blocks_after_init(sb, group, desc);
        } else {
                meta_group_info[i]->bb_free =
-                       le16_to_cpu(desc->bg_free_blocks_count);
+                       ext4_free_blks_count(sb, desc);
        }
 
        INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
@@ -2854,8 +2878,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
                        + entry->start_blk
                        + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-               trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
-                          (unsigned long long) discard_block, entry->count);
+               trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u",
+                          sb->s_id, (unsigned long long) discard_block,
+                          entry->count);
                sb_issue_discard(sb, discard_block, entry->count);
 
                kmem_cache_free(ext4_free_ext_cachep, entry);
@@ -3000,7 +3025,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                goto out_err;
 
        ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
-                       gdp->bg_free_blocks_count);
+                       ext4_free_blks_count(sb, gdp));
 
        err = ext4_journal_get_write_access(handle, gdp_bh);
        if (err)
@@ -3018,8 +3043,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
            in_range(block + len - 1, ext4_inode_table(sb, gdp),
                     EXT4_SB(sb)->s_itb_per_group)) {
                ext4_error(sb, __func__,
-                          "Allocating block in system zone - block = %llu",
-                          block);
+                          "Allocating block %llu in system zone of %d group\n",
+                          block, ac->ac_b_ex.fe_group);
                /* File system mounted not to panic on error
                 * Fix the bitmap and repeat the block allocation
                 * We leak some of the blocks here.
@@ -3046,12 +3071,12 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                                ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
        if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
-               gdp->bg_free_blocks_count =
-                       cpu_to_le16(ext4_free_blocks_after_init(sb,
-                                               ac->ac_b_ex.fe_group,
-                                               gdp));
+               ext4_free_blks_set(sb, gdp,
+                                       ext4_free_blocks_after_init(sb,
+                                       ac->ac_b_ex.fe_group, gdp));
        }
-       le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
+       len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
+       ext4_free_blks_set(sb, gdp, len);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
        percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3668,11 +3693,17 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
        pa->pa_free = pa->pa_len;
        atomic_set(&pa->pa_count, 1);
        spin_lock_init(&pa->pa_lock);
+       INIT_LIST_HEAD(&pa->pa_inode_list);
+       INIT_LIST_HEAD(&pa->pa_group_list);
        pa->pa_deleted = 0;
        pa->pa_linear = 0;
 
        mb_debug("new inode pa %p: %llu/%u for %u\n", pa,
                        pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+       trace_mark(ext4_mb_new_inode_pa,
+                  "dev %s ino %lu pstart %llu len %u lstart %u",
+                  sb->s_id, ac->ac_inode->i_ino,
+                  pa->pa_pstart, pa->pa_len, pa->pa_lstart);
 
        ext4_mb_use_inode_pa(ac, pa);
        atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3726,11 +3757,14 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
        atomic_set(&pa->pa_count, 1);
        spin_lock_init(&pa->pa_lock);
        INIT_LIST_HEAD(&pa->pa_inode_list);
+       INIT_LIST_HEAD(&pa->pa_group_list);
        pa->pa_deleted = 0;
        pa->pa_linear = 1;
 
        mb_debug("new group pa %p: %llu/%u for %u\n", pa,
-                       pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+                pa->pa_pstart, pa->pa_len, pa->pa_lstart);
+       trace_mark(ext4_mb_new_group_pa, "dev %s pstart %llu len %u lstart %u",
+                  sb->s_id, pa->pa_pstart, pa->pa_len, pa->pa_lstart);
 
        ext4_mb_use_group_pa(ac, pa);
        atomic_add(pa->pa_free, &EXT4_SB(sb)->s_mb_preallocated);
@@ -3783,12 +3817,14 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
        unsigned int next;
        ext4_group_t group;
        ext4_grpblk_t bit;
+       unsigned long long grp_blk_start;
        sector_t start;
        int err = 0;
        int free = 0;
 
        BUG_ON(pa->pa_deleted == 0);
        ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
+       grp_blk_start = pa->pa_pstart - bit;
        BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
        end = bit + pa->pa_len;
 
@@ -3818,6 +3854,10 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                        ext4_mb_store_history(ac);
                }
 
+               trace_mark(ext4_mb_release_inode_pa,
+                          "dev %s ino %lu block %llu count %u",
+                          sb->s_id, pa->pa_inode->i_ino, grp_blk_start + bit,
+                          next - bit);
                mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
                bit = next + 1;
        }
@@ -3851,6 +3891,8 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
        if (ac)
                ac->ac_op = EXT4_MB_HISTORY_DISCARD;
 
+       trace_mark(ext4_mb_release_group_pa, "dev %s pstart %llu len %d",
+                  sb->s_id, pa->pa_pstart, pa->pa_len);
        BUG_ON(pa->pa_deleted == 0);
        ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
        BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -4016,6 +4058,8 @@ void ext4_discard_preallocations(struct inode *inode)
        }
 
        mb_debug("discard preallocation for inode %lu\n", inode->i_ino);
+       trace_mark(ext4_discard_preallocations, "dev %s ino %lu", sb->s_id,
+                  inode->i_ino);
 
        INIT_LIST_HEAD(&list);
 
@@ -4269,6 +4313,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
        ac->ac_pa = NULL;
        ac->ac_bitmap_page = NULL;
        ac->ac_buddy_page = NULL;
+       ac->alloc_semp = NULL;
        ac->ac_lg = NULL;
 
        /* we have to define context: we'll we work with a file or
@@ -4434,18 +4479,23 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
                        pa->pa_free -= ac->ac_b_ex.fe_len;
                        pa->pa_len -= ac->ac_b_ex.fe_len;
                        spin_unlock(&pa->pa_lock);
-                       /*
-                        * We want to add the pa to the right bucket.
-                        * Remove it from the list and while adding
-                        * make sure the list to which we are adding
-                        * doesn't grow big.
-                        */
-                       if (likely(pa->pa_free)) {
-                               spin_lock(pa->pa_obj_lock);
-                               list_del_rcu(&pa->pa_inode_list);
-                               spin_unlock(pa->pa_obj_lock);
-                               ext4_mb_add_n_trim(ac);
-                       }
+               }
+       }
+       if (ac->alloc_semp)
+               up_read(ac->alloc_semp);
+       if (pa) {
+               /*
+                * We want to add the pa to the right bucket.
+                * Remove it from the list and while adding
+                * make sure the list to which we are adding
+                * doesn't grow big.  We need to release
+                * alloc_semp before calling ext4_mb_add_n_trim()
+                */
+               if (pa->pa_linear && likely(pa->pa_free)) {
+                       spin_lock(pa->pa_obj_lock);
+                       list_del_rcu(&pa->pa_inode_list);
+                       spin_unlock(pa->pa_obj_lock);
+                       ext4_mb_add_n_trim(ac);
                }
                ext4_mb_put_pa(ac, ac->ac_sb, pa);
        }
@@ -4465,6 +4515,8 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
        int ret;
        int freed = 0;
 
+       trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
+                  sb->s_id, needed);
        for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) {
                ret = ext4_mb_discard_group_preallocations(sb, i, needed);
                freed += ret;
@@ -4493,6 +4545,18 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        sb = ar->inode->i_sb;
        sbi = EXT4_SB(sb);
 
+       trace_mark(ext4_request_blocks, "dev %s flags %u len %u ino %lu "
+                  "lblk %llu goal %llu lleft %llu lright %llu "
+                  "pleft %llu pright %llu ",
+                  sb->s_id, ar->flags, ar->len,
+                  ar->inode ? ar->inode->i_ino : 0,
+                  (unsigned long long) ar->logical,
+                  (unsigned long long) ar->goal,
+                  (unsigned long long) ar->lleft,
+                  (unsigned long long) ar->lright,
+                  (unsigned long long) ar->pleft,
+                  (unsigned long long) ar->pright);
+
        if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
                /*
                 * With delalloc we already reserved the blocks
@@ -4514,7 +4578,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
        }
        if (ar->len == 0) {
                *errp = -EDQUOT;
-               return 0;
+               goto out3;
        }
        inquota = ar->len;
 
@@ -4549,10 +4613,14 @@ repeat:
                                ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len)
                        ext4_mb_new_preallocation(ac);
        }
-
        if (likely(ac->ac_status == AC_STATUS_FOUND)) {
                *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
                if (*errp ==  -EAGAIN) {
+                       /*
+                        * drop the reference that we took
+                        * in ext4_mb_use_best_found
+                        */
+                       ext4_mb_release_context(ac);
                        ac->ac_b_ex.fe_group = 0;
                        ac->ac_b_ex.fe_start = 0;
                        ac->ac_b_ex.fe_len = 0;
@@ -4583,6 +4651,26 @@ out2:
 out1:
        if (ar->len < inquota)
                DQUOT_FREE_BLOCK(ar->inode, inquota - ar->len);
+out3:
+       if (!ar->len) {
+               if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+                       /* release all the reserved blocks if non delalloc */
+                       percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                               reserv_blks);
+       }
+
+       trace_mark(ext4_allocate_blocks,
+                  "dev %s block %llu flags %u len %u ino %lu "
+                  "logical %llu goal %llu lleft %llu lright %llu "
+                  "pleft %llu pright %llu ",
+                  sb->s_id, (unsigned long long) block,
+                  ar->flags, ar->len, ar->inode ? ar->inode->i_ino : 0,
+                  (unsigned long long) ar->logical,
+                  (unsigned long long) ar->goal,
+                  (unsigned long long) ar->lleft,
+                  (unsigned long long) ar->lright,
+                  (unsigned long long) ar->pleft,
+                  (unsigned long long) ar->pright);
 
        return block;
 }
@@ -4717,6 +4805,10 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
        }
 
        ext4_debug("freeing block %lu\n", block);
+       trace_mark(ext4_free_blocks,
+                  "dev %s block %llu count %lu metadata %d ino %lu",
+                  sb->s_id, (unsigned long long) block, count, metadata,
+                  inode ? inode->i_ino : 0);
 
        ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
        if (ac) {
@@ -4823,7 +4915,8 @@ do_more:
        }
 
        spin_lock(sb_bgl_lock(sbi, block_group));
-       le16_add_cpu(&gdp->bg_free_blocks_count, count);
+       ret = ext4_free_blks_count(sb, gdp) + count;
+       ext4_free_blks_set(sb, gdp, ret);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);