]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - fs/ext4/mballoc.c
ext4: fix #11321: create /proc/ext4/*/stats more carefully
[linux-2.6-omap-h63xx.git] / fs / ext4 / mballoc.c
index 865e9ddb44d406d298da48b0137a4f426f8d45dd..9122271e3d65de0e79cfa0e2fe233afde857f79d 100644 (file)
@@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
                b2 = (unsigned char *) bitmap;
                for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
                        if (b1[i] != b2[i]) {
-                               printk("corruption in group %lu at byte %u(%u):"
-                                      " %x in copy != %x on disk/prealloc\n",
-                                       e4b->bd_group, i, i * 8, b1[i], b2[i]);
+                               printk(KERN_ERR "corruption in group %lu "
+                                      "at byte %u(%u): %x in copy != %x "
+                                      "on disk/prealloc\n",
+                                      e4b->bd_group, i, i * 8, b1[i], b2[i]);
                                BUG();
                        }
                }
@@ -2560,7 +2561,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        ext4_mb_init_per_dev_proc(sb);
        ext4_mb_history_init(sb);
 
-       printk("EXT4-fs: mballoc enabled\n");
+       printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
        return 0;
 }
 
@@ -2785,14 +2786,20 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
        mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct proc_dir_entry *proc;
-       char devname[64];
+       char devname[BDEVNAME_SIZE], *p;
 
        if (proc_root_ext4 == NULL) {
                sbi->s_mb_proc = NULL;
                return -EINVAL;
        }
        bdevname(sb->s_bdev, devname);
+       p = devname;
+       while ((p = strchr(p, '/')))
+               *p = '!';
+
        sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
+       if (!sbi->s_mb_proc)
+               goto err_create_dir;
 
        MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
        MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan);
@@ -2804,7 +2811,6 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
        return 0;
 
 err_out:
-       printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname);
        remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
        remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
        remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
@@ -2813,6 +2819,8 @@ err_out:
        remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc);
        remove_proc_entry(devname, proc_root_ext4);
        sbi->s_mb_proc = NULL;
+err_create_dir:
+       printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname);
 
        return -ENOMEM;
 }
@@ -2820,12 +2828,15 @@ err_out:
 static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       char devname[64];
+       char devname[BDEVNAME_SIZE], *p;
 
        if (sbi->s_mb_proc == NULL)
                return -EINVAL;
 
        bdevname(sb->s_bdev, devname);
+       p = devname;
+       while ((p = strchr(p, '/')))
+               *p = '!';
        remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
        remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
        remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
@@ -2879,7 +2890,7 @@ void exit_ext4_mballoc(void)
  */
 static noinline_for_stack int
 ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
-                               handle_t *handle)
+                               handle_t *handle, unsigned long reserv_blks)
 {
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_super_block *es;
@@ -2968,15 +2979,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-
+       percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
        /*
-        * free blocks account has already be reduced/reserved
-        * at write_begin() time for delayed allocation
-        * do not double accounting
+        * Now reduce the dirty block count also. Should not go negative
         */
        if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
-               percpu_counter_sub(&sbi->s_freeblocks_counter,
-                                       ac->ac_b_ex.fe_len);
+               /* release all the reserved blocks if non delalloc */
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
+       else
+               percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+                                               ac->ac_b_ex.fe_len);
 
        if (sbi->s_log_groups_per_flex) {
                ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -3281,6 +3293,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
        mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
 }
 
+/*
+ * Return the prealloc space that have minimal distance
+ * from the goal block. @cpa is the prealloc
+ * space that is having currently known minimal distance
+ * from the goal block.
+ */
+static struct ext4_prealloc_space *
+ext4_mb_check_group_pa(ext4_fsblk_t goal_block,
+                       struct ext4_prealloc_space *pa,
+                       struct ext4_prealloc_space *cpa)
+{
+       ext4_fsblk_t cur_distance, new_distance;
+
+       if (cpa == NULL) {
+               atomic_inc(&pa->pa_count);
+               return pa;
+       }
+       cur_distance = abs(goal_block - cpa->pa_pstart);
+       new_distance = abs(goal_block - pa->pa_pstart);
+
+       if (cur_distance < new_distance)
+               return cpa;
+
+       /* drop the previous reference */
+       atomic_dec(&cpa->pa_count);
+       atomic_inc(&pa->pa_count);
+       return pa;
+}
+
 /*
  * search goal blocks in preallocated space
  */
@@ -3290,7 +3331,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
        int order, i;
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
        struct ext4_locality_group *lg;
-       struct ext4_prealloc_space *pa;
+       struct ext4_prealloc_space *pa, *cpa = NULL;
+       ext4_fsblk_t goal_block;
 
        /* only data can be preallocated */
        if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3333,6 +3375,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                /* The max size of hash table is PREALLOC_TB_SIZE */
                order = PREALLOC_TB_SIZE - 1;
 
+       goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) +
+                    ac->ac_g_ex.fe_start +
+                    le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
+       /*
+        * search for the prealloc space that is having
+        * minimal distance from the goal block.
+        */
        for (i = order; i < PREALLOC_TB_SIZE; i++) {
                rcu_read_lock();
                list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
@@ -3340,17 +3389,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                        spin_lock(&pa->pa_lock);
                        if (pa->pa_deleted == 0 &&
                                        pa->pa_free >= ac->ac_o_ex.fe_len) {
-                               atomic_inc(&pa->pa_count);
-                               ext4_mb_use_group_pa(ac, pa);
-                               spin_unlock(&pa->pa_lock);
-                               ac->ac_criteria = 20;
-                               rcu_read_unlock();
-                               return 1;
+
+                               cpa = ext4_mb_check_group_pa(goal_block,
+                                                               pa, cpa);
                        }
                        spin_unlock(&pa->pa_lock);
                }
                rcu_read_unlock();
        }
+       if (cpa) {
+               ext4_mb_use_group_pa(ac, cpa);
+               ac->ac_criteria = 20;
+               return 1;
+       }
        return 0;
 }
 
@@ -4330,12 +4381,13 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
 ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                                 struct ext4_allocation_request *ar, int *errp)
 {
+       int freed;
        struct ext4_allocation_context *ac = NULL;
        struct ext4_sb_info *sbi;
        struct super_block *sb;
        ext4_fsblk_t block = 0;
-       int freed;
-       int inquota;
+       unsigned long inquota;
+       unsigned long reserv_blks = 0;
 
        sb = ar->inode->i_sb;
        sbi = EXT4_SB(sb);
@@ -4349,14 +4401,17 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
                /*
                 * With delalloc we already reserved the blocks
                 */
-               ar->len = ext4_has_free_blocks(sbi, ar->len);
-       }
-
-       if (ar->len == 0) {
-               *errp = -ENOSPC;
-               return 0;
+               while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) {
+                       /* let others to free the space */
+                       yield();
+                       ar->len = ar->len >> 1;
+               }
+               if (!ar->len) {
+                       *errp = -ENOSPC;
+                       return 0;
+               }
+               reserv_blks = ar->len;
        }
-
        while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
                ar->flags |= EXT4_MB_HINT_NOPREALLOC;
                ar->len--;
@@ -4402,7 +4457,7 @@ repeat:
        }
 
        if (likely(ac->ac_status == AC_STATUS_FOUND)) {
-               *errp = ext4_mb_mark_diskspace_used(ac, handle);
+               *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
                if (*errp ==  -EAGAIN) {
                        ac->ac_b_ex.fe_group = 0;
                        ac->ac_b_ex.fe_start = 0;