]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - fs/ocfs2/suballoc.c
Fix __pfn_to_page(pfn) for CONFIG_DISCONTIGMEM=y
[linux-2.6-omap-h63xx.git] / fs / ocfs2 / suballoc.c
index d2d278fb9819ee3da92577a367b79f0bdc18681f..c5ff18b46b57524884d9c4b7639f8089e7d3c532 100644 (file)
@@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle_t *handle,
                                  struct ocfs2_chain_list *cl);
 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
                                   struct inode *alloc_inode,
-                                  struct buffer_head *bh);
+                                  struct buffer_head *bh,
+                                  u64 max_block);
 
 static int ocfs2_cluster_group_search(struct inode *inode,
                                      struct buffer_head *group_bh,
                                      u32 bits_wanted, u32 min_bits,
+                                     u64 max_block,
                                      u16 *bit_off, u16 *bits_found);
 static int ocfs2_block_group_search(struct inode *inode,
                                    struct buffer_head *group_bh,
                                    u32 bits_wanted, u32 min_bits,
+                                   u64 max_block,
                                    u16 *bit_off, u16 *bits_found);
 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                                     struct ocfs2_alloc_context *ac,
@@ -110,8 +113,11 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
                                                u64 data_blkno,
                                                u64 *bg_blkno,
                                                u16 *bg_bit_off);
+static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
+                                            u32 bits_wanted, u64 max_block,
+                                            struct ocfs2_alloc_context **ac);
 
-static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
+void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
 {
        struct inode *inode = ac->ac_inode;
 
@@ -124,10 +130,8 @@ static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
                iput(inode);
                ac->ac_inode = NULL;
        }
-       if (ac->ac_bh) {
-               brelse(ac->ac_bh);
-               ac->ac_bh = NULL;
-       }
+       brelse(ac->ac_bh);
+       ac->ac_bh = NULL;
 }
 
 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -276,7 +280,8 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
  */
 static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
                                   struct inode *alloc_inode,
-                                  struct buffer_head *bh)
+                                  struct buffer_head *bh,
+                                  u64 max_block)
 {
        int status, credits;
        struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
@@ -294,9 +299,9 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
        mlog_entry_void();
 
        cl = &fe->id2.i_chain;
-       status = ocfs2_reserve_clusters(osb,
-                                       le16_to_cpu(cl->cl_cpg),
-                                       &ac);
+       status = ocfs2_reserve_clusters_with_limit(osb,
+                                                  le16_to_cpu(cl->cl_cpg),
+                                                  max_block, &ac);
        if (status < 0) {
                if (status != -ENOSPC)
                        mlog_errno(status);
@@ -394,8 +399,7 @@ bail:
        if (ac)
                ocfs2_free_alloc_context(ac);
 
-       if (bg_bh)
-               brelse(bg_bh);
+       brelse(bg_bh);
 
        mlog_exit(status);
        return status;
@@ -469,7 +473,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
                        goto bail;
                }
 
-               status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
+               status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
+                                                ac->ac_max_block);
                if (status < 0) {
                        if (status != -ENOSPC)
                                mlog_errno(status);
@@ -486,16 +491,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
        get_bh(bh);
        ac->ac_bh = bh;
 bail:
-       if (bh)
-               brelse(bh);
+       brelse(bh);
 
        mlog_exit(status);
        return status;
 }
 
-int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
-                              struct ocfs2_dinode *fe,
-                              struct ocfs2_alloc_context **ac)
+int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
+                                     int blocks,
+                                     struct ocfs2_alloc_context **ac)
 {
        int status;
        u32 slot;
@@ -507,7 +511,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
                goto bail;
        }
 
-       (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
+       (*ac)->ac_bits_wanted = blocks;
        (*ac)->ac_which = OCFS2_AC_USE_META;
        slot = osb->slot_num;
        (*ac)->ac_group_search = ocfs2_block_group_search;
@@ -532,6 +536,15 @@ bail:
        return status;
 }
 
+int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
+                              struct ocfs2_extent_list *root_el,
+                              struct ocfs2_alloc_context **ac)
+{
+       return ocfs2_reserve_new_metadata_blocks(osb,
+                                       ocfs2_extend_meta_needed(root_el),
+                                       ac);
+}
+
 static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
                                              struct ocfs2_alloc_context *ac)
 {
@@ -581,6 +594,14 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
 
        (*ac)->ac_group_search = ocfs2_block_group_search;
 
+       /*
+        * stat(2) can't handle i_ino > 32bits, so we tell the
+        * lower levels not to allocate us a block group past that
+        * limit.  The 'inode64' mount option avoids this behavior.
+        */
+       if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
+               (*ac)->ac_max_block = (u32)~0U;
+
        /*
         * slot is set when we successfully steal inode from other nodes.
         * It is reset in 3 places:
@@ -661,9 +682,9 @@ bail:
 /* Callers don't need to care which bitmap (local alloc or main) to
  * use so we figure it out for them, but unfortunately this clutters
  * things a bit. */
-int ocfs2_reserve_clusters(struct ocfs2_super *osb,
-                          u32 bits_wanted,
-                          struct ocfs2_alloc_context **ac)
+static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
+                                            u32 bits_wanted, u64 max_block,
+                                            struct ocfs2_alloc_context **ac)
 {
        int status;
 
@@ -677,24 +698,20 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
        }
 
        (*ac)->ac_bits_wanted = bits_wanted;
+       (*ac)->ac_max_block = max_block;
 
        status = -ENOSPC;
        if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
                status = ocfs2_reserve_local_alloc_bits(osb,
                                                        bits_wanted,
                                                        *ac);
-               if ((status < 0) && (status != -ENOSPC)) {
+               if (status == -EFBIG) {
+                       /* The local alloc window is outside ac_max_block.
+                        * use the main bitmap. */
+                       status = -ENOSPC;
+               } else if ((status < 0) && (status != -ENOSPC)) {
                        mlog_errno(status);
                        goto bail;
-               } else if (status == -ENOSPC) {
-                       /* reserve_local_bits will return enospc with
-                        * the local alloc inode still locked, so we
-                        * can change this safely here. */
-                       mlog(0, "Disabling local alloc\n");
-                       /* We set to OCFS2_LA_DISABLED so that umount
-                        * can clean up what's left of the local
-                        * allocation */
-                       osb->local_alloc_state = OCFS2_LA_DISABLED;
                }
        }
 
@@ -718,6 +735,13 @@ bail:
        return status;
 }
 
+int ocfs2_reserve_clusters(struct ocfs2_super *osb,
+                          u32 bits_wanted,
+                          struct ocfs2_alloc_context **ac)
+{
+       return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
+}
+
 /*
  * More or less lifted from ext3. I'll leave their description below:
  *
@@ -1000,11 +1024,14 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg
 static int ocfs2_cluster_group_search(struct inode *inode,
                                      struct buffer_head *group_bh,
                                      u32 bits_wanted, u32 min_bits,
+                                     u64 max_block,
                                      u16 *bit_off, u16 *bits_found)
 {
        int search = -ENOSPC;
        int ret;
+       u64 blkoff;
        struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        u16 tmp_off, tmp_found;
        unsigned int max_bits, gd_cluster_off;
 
@@ -1037,6 +1064,17 @@ static int ocfs2_cluster_group_search(struct inode *inode,
                if (ret)
                        return ret;
 
+               if (max_block) {
+                       blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
+                                                         gd_cluster_off +
+                                                         tmp_off + tmp_found);
+                       mlog(0, "Checking %llu against %llu\n",
+                            (unsigned long long)blkoff,
+                            (unsigned long long)max_block);
+                       if (blkoff > max_block)
+                               return -ENOSPC;
+               }
+
                /* ocfs2_block_group_find_clear_bits() might
                 * return success, but we still want to return
                 * -ENOSPC unless it found the minimum number
@@ -1045,6 +1083,12 @@ static int ocfs2_cluster_group_search(struct inode *inode,
                        *bit_off = tmp_off;
                        *bits_found = tmp_found;
                        search = 0; /* success */
+               } else if (tmp_found) {
+                       /*
+                        * Don't show bits which we'll be returning
+                        * for allocation to the local alloc bitmap.
+                        */
+                       ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
                }
        }
 
@@ -1054,19 +1098,31 @@ static int ocfs2_cluster_group_search(struct inode *inode,
 static int ocfs2_block_group_search(struct inode *inode,
                                    struct buffer_head *group_bh,
                                    u32 bits_wanted, u32 min_bits,
+                                   u64 max_block,
                                    u16 *bit_off, u16 *bits_found)
 {
        int ret = -ENOSPC;
+       u64 blkoff;
        struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
 
        BUG_ON(min_bits != 1);
        BUG_ON(ocfs2_is_cluster_bitmap(inode));
 
-       if (bg->bg_free_bits_count)
+       if (bg->bg_free_bits_count) {
                ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
                                                        group_bh, bits_wanted,
                                                        le16_to_cpu(bg->bg_bits),
                                                        bit_off, bits_found);
+               if (!ret && max_block) {
+                       blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
+                               *bits_found;
+                       mlog(0, "Checking %llu against %llu\n",
+                            (unsigned long long)blkoff,
+                            (unsigned long long)max_block);
+                       if (blkoff > max_block)
+                               ret = -ENOSPC;
+               }
+       }
 
        return ret;
 }
@@ -1116,8 +1172,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
        struct ocfs2_group_desc *gd;
        struct inode *alloc_inode = ac->ac_inode;
 
-       ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
-                              &group_bh, OCFS2_BH_CACHED, alloc_inode);
+       ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh);
        if (ret < 0) {
                mlog_errno(ret);
                return ret;
@@ -1131,7 +1186,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
        }
 
        ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
-                                 bit_off, &found);
+                                 ac->ac_max_block, bit_off, &found);
        if (ret < 0) {
                if (ret != -ENOSPC)
                        mlog_errno(ret);
@@ -1186,9 +1241,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
             bits_wanted, chain,
             (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
 
-       status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
+       status = ocfs2_read_block(alloc_inode,
                                  le64_to_cpu(cl->cl_recs[chain].c_blkno),
-                                 &group_bh, OCFS2_BH_CACHED, alloc_inode);
+                                 &group_bh);
        if (status < 0) {
                mlog_errno(status);
                goto bail;
@@ -1204,21 +1259,20 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
        /* for now, the chain search is a bit simplistic. We just use
         * the 1st group with any empty bits. */
        while ((status = ac->ac_group_search(alloc_inode, group_bh,
-                                            bits_wanted, min_bits, bit_off,
+                                            bits_wanted, min_bits,
+                                            ac->ac_max_block, bit_off,
                                             &tmp_bits)) == -ENOSPC) {
                if (!bg->bg_next_group)
                        break;
 
-               if (prev_group_bh) {
-                       brelse(prev_group_bh);
-                       prev_group_bh = NULL;
-               }
+               brelse(prev_group_bh);
+               prev_group_bh = NULL;
+
                next_group = le64_to_cpu(bg->bg_next_group);
                prev_group_bh = group_bh;
                group_bh = NULL;
-               status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
-                                         next_group, &group_bh,
-                                         OCFS2_BH_CACHED, alloc_inode);
+               status = ocfs2_read_block(alloc_inode,
+                                         next_group, &group_bh);
                if (status < 0) {
                        mlog_errno(status);
                        goto bail;
@@ -1307,10 +1361,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
        *bg_blkno = le64_to_cpu(bg->bg_blkno);
        *bits_left = le16_to_cpu(bg->bg_free_bits_count);
 bail:
-       if (group_bh)
-               brelse(group_bh);
-       if (prev_group_bh)
-               brelse(prev_group_bh);
+       brelse(group_bh);
+       brelse(prev_group_bh);
 
        mlog_exit(status);
        return status;
@@ -1723,7 +1775,6 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
 {
        int status = 0;
        u32 tmp_used;
-       struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
        struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
        struct ocfs2_chain_list *cl = &fe->id2.i_chain;
        struct buffer_head *group_bh = NULL;
@@ -1742,8 +1793,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
             (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
             (unsigned long long)bg_blkno, start_bit);
 
-       status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
-                                 alloc_inode);
+       status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh);
        if (status < 0) {
                mlog_errno(status);
                goto bail;
@@ -1784,8 +1834,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
        }
 
 bail:
-       if (group_bh)
-               brelse(group_bh);
+       brelse(group_bh);
 
        mlog_exit(status);
        return status;
@@ -1838,9 +1887,15 @@ int ocfs2_free_clusters(handle_t *handle,
        status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
                                          bg_start_bit, bg_blkno,
                                          num_clusters);
-       if (status < 0)
+       if (status < 0) {
                mlog_errno(status);
+               goto out;
+       }
 
+       ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
+                                        num_clusters);
+
+out:
        mlog_exit(status);
        return status;
 }
@@ -1891,3 +1946,84 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
                       (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
        }
 }
+
+/*
+ * For a given allocation, determine which allocators will need to be
+ * accessed, and lock them, reserving the appropriate number of bits.
+ *
+ * Sparse file systems call this from ocfs2_write_begin_nolock()
+ * and ocfs2_allocate_unwritten_extents().
+ *
+ * File systems which don't support holes call this from
+ * ocfs2_extend_allocation().
+ */
+int ocfs2_lock_allocators(struct inode *inode,
+                         struct ocfs2_extent_tree *et,
+                         u32 clusters_to_add, u32 extents_to_split,
+                         struct ocfs2_alloc_context **data_ac,
+                         struct ocfs2_alloc_context **meta_ac)
+{
+       int ret = 0, num_free_extents;
+       unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+       *meta_ac = NULL;
+       if (data_ac)
+               *data_ac = NULL;
+
+       BUG_ON(clusters_to_add != 0 && data_ac == NULL);
+
+       num_free_extents = ocfs2_num_free_extents(osb, inode, et);
+       if (num_free_extents < 0) {
+               ret = num_free_extents;
+               mlog_errno(ret);
+               goto out;
+       }
+
+       /*
+        * Sparse allocation file systems need to be more conservative
+        * with reserving room for expansion - the actual allocation
+        * happens while we've got a journal handle open so re-taking
+        * a cluster lock (because we ran out of room for another
+        * extent) will violate ordering rules.
+        *
+        * Most of the time we'll only be seeing this 1 cluster at a time
+        * anyway.
+        *
+        * Always lock for any unwritten extents - we might want to
+        * add blocks during a split.
+        */
+       if (!num_free_extents ||
+           (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
+               ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
+               if (ret < 0) {
+                       if (ret != -ENOSPC)
+                               mlog_errno(ret);
+                       goto out;
+               }
+       }
+
+       if (clusters_to_add == 0)
+               goto out;
+
+       ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
+       if (ret < 0) {
+               if (ret != -ENOSPC)
+                       mlog_errno(ret);
+               goto out;
+       }
+
+out:
+       if (ret) {
+               if (*meta_ac) {
+                       ocfs2_free_alloc_context(*meta_ac);
+                       *meta_ac = NULL;
+               }
+
+               /*
+                * We cannot have an error and a non null *data_ac.
+                */
+       }
+
+       return ret;
+}