#include "buffer_head_io.h"
+#define NOT_ALLOC_NEW_GROUP 0
+#define ALLOC_NEW_GROUP 1
+
+#define OCFS2_MAX_INODES_TO_STEAL 1024
+
static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
-static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
+static int ocfs2_block_group_fill(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *bg_bh,
u64 group_blkno,
struct inode *alloc_inode,
struct buffer_head *bh);
-static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
- struct ocfs2_alloc_context *ac);
-
static int ocfs2_cluster_group_search(struct inode *inode,
struct buffer_head *group_bh,
u32 bits_wanted, u32 min_bits,
struct buffer_head *group_bh,
u32 bits_wanted, u32 min_bits,
u16 *bit_off, u16 *bits_found);
-static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
- u32 bits_wanted,
- u32 min_bits,
- u16 *bit_off,
- unsigned int *num_bits,
- u64 *bg_blkno);
static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
struct ocfs2_alloc_context *ac,
+ handle_t *handle,
u32 bits_wanted,
u32 min_bits,
u16 *bit_off,
u64 *bg_blkno);
static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
int nr);
-static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
- struct buffer_head *bg_bh,
- unsigned int bits_wanted,
- u16 *bit_off,
- u16 *bits_found);
-static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_set_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
unsigned int bit_off,
unsigned int num_bits);
-static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_clear_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
unsigned int bit_off,
unsigned int num_bits);
-static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
+static int ocfs2_relink_block_group(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *fe_bh,
struct buffer_head *bg_bh,
u16 chain);
static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
u32 wanted);
-static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
- struct inode *alloc_inode,
- struct buffer_head *alloc_bh,
- unsigned int start_bit,
- u64 bg_blkno,
- unsigned int count);
-static inline u64 ocfs2_which_suballoc_group(u64 block,
- unsigned int bit);
static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
u64 bg_blkno,
u16 bg_bit_off);
-static inline u64 ocfs2_which_cluster_group(struct inode *inode,
- u32 cluster);
static inline void ocfs2_block_to_cluster_group(struct inode *inode,
u64 data_blkno,
u64 *bg_blkno,
u16 *bg_bit_off);
-void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
+static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
{
- if (ac->ac_inode)
- iput(ac->ac_inode);
- if (ac->ac_bh)
+ struct inode *inode = ac->ac_inode;
+
+ if (inode) {
+ if (ac->ac_which != OCFS2_AC_USE_LOCAL)
+ ocfs2_inode_unlock(inode, 1);
+
+ mutex_unlock(&inode->i_mutex);
+
+ iput(inode);
+ ac->ac_inode = NULL;
+ }
+ if (ac->ac_bh) {
brelse(ac->ac_bh);
+ ac->ac_bh = NULL;
+ }
+}
+
+void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
+{
+ ocfs2_free_ac_resource(ac);
kfree(ac);
}
return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
}
-static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
+/* somewhat more expensive than our other checks, so use sparingly. */
+int ocfs2_check_group_descriptor(struct super_block *sb,
+ struct ocfs2_dinode *di,
+ struct ocfs2_group_desc *gd)
+{
+ unsigned int max_bits;
+
+ if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+ OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
+ return -EIO;
+ }
+
+ if (di->i_blkno != gd->bg_parent_dinode) {
+ ocfs2_error(sb, "Group descriptor # %llu has bad parent "
+ "pointer (%llu, expected %llu)",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
+ (unsigned long long)le64_to_cpu(di->i_blkno));
+ return -EIO;
+ }
+
+ max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
+ if (le16_to_cpu(gd->bg_bits) > max_bits) {
+ ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_bits));
+ return -EIO;
+ }
+
+ if (le16_to_cpu(gd->bg_chain) >=
+ le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
+ ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_chain));
+ return -EIO;
+ }
+
+ if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
+ ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+ "claims that %u are free",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_bits),
+ le16_to_cpu(gd->bg_free_bits_count));
+ return -EIO;
+ }
+
+ if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
+ ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+ "max bitmap bits of %u",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_bits),
+ 8 * le16_to_cpu(gd->bg_size));
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int ocfs2_block_group_fill(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *bg_bh,
u64 group_blkno,
mlog_entry_void();
if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
- ocfs2_error(alloc_inode->i_sb, "group block (%"MLFu64") "
- "!= b_blocknr (%llu)", group_blkno,
+ ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
+ "b_blocknr (%llu)",
+ (unsigned long long)group_blkno,
(unsigned long long) bg_bh->b_blocknr);
status = -EIO;
goto bail;
struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
struct ocfs2_chain_list *cl;
struct ocfs2_alloc_context *ac = NULL;
- struct ocfs2_journal_handle *handle = NULL;
+ handle_t *handle = NULL;
u32 bit_off, num_bits;
u16 alloc_rec;
u64 bg_blkno;
mlog_entry_void();
- handle = ocfs2_alloc_handle(osb);
- if (!handle) {
- status = -ENOMEM;
- mlog_errno(status);
- goto bail;
- }
-
cl = &fe->id2.i_chain;
status = ocfs2_reserve_clusters(osb,
- handle,
le16_to_cpu(cl->cl_cpg),
&ac);
if (status < 0) {
credits = ocfs2_calc_group_alloc_credits(osb->sb,
le16_to_cpu(cl->cl_cpg));
- handle = ocfs2_start_trans(osb, handle, credits);
+ handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
/* setup the group */
bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
- mlog(0, "new descriptor, record %u, at block %"MLFu64"\n",
- alloc_rec, bg_blkno);
+ mlog(0, "new descriptor, record %u, at block %llu\n",
+ alloc_rec, (unsigned long long)bg_blkno);
bg_bh = sb_getblk(osb->sb, bg_blkno);
if (!bg_bh) {
le32_to_cpu(fe->i_clusters)));
spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
- alloc_inode->i_blocks =
- ocfs2_align_bytes_to_sectors(i_size_read(alloc_inode));
+ alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
status = 0;
bail:
if (handle)
- ocfs2_commit_trans(handle);
+ ocfs2_commit_trans(osb, handle);
if (ac)
ocfs2_free_alloc_context(ac);
}
static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
- struct ocfs2_alloc_context *ac)
+ struct ocfs2_alloc_context *ac,
+ int type,
+ u32 slot,
+ int alloc_new_group)
{
int status;
u32 bits_wanted = ac->ac_bits_wanted;
- struct inode *alloc_inode = ac->ac_inode;
+ struct inode *alloc_inode;
struct buffer_head *bh = NULL;
- struct ocfs2_journal_handle *handle = ac->ac_handle;
struct ocfs2_dinode *fe;
u32 free_bits;
mlog_entry_void();
- BUG_ON(handle->flags & OCFS2_HANDLE_STARTED);
+ alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
+ if (!alloc_inode) {
+ mlog_errno(-EINVAL);
+ return -EINVAL;
+ }
+
+ mutex_lock(&alloc_inode->i_mutex);
- ocfs2_handle_add_inode(handle, alloc_inode);
- status = ocfs2_meta_lock(alloc_inode, handle, &bh, 1);
+ status = ocfs2_inode_lock(alloc_inode, &bh, 1);
if (status < 0) {
+ mutex_unlock(&alloc_inode->i_mutex);
+ iput(alloc_inode);
+
mlog_errno(status);
- goto bail;
+ return status;
}
+ ac->ac_inode = alloc_inode;
+ ac->ac_alloc_slot = slot;
+
fe = (struct ocfs2_dinode *) bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
goto bail;
}
if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
- ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator "
- "# %"MLFu64, le64_to_cpu(fe->i_blkno));
+ ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
+ (unsigned long long)le64_to_cpu(fe->i_blkno));
status = -EIO;
goto bail;
}
goto bail;
}
+ if (alloc_new_group != ALLOC_NEW_GROUP) {
+ mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
+ "and we don't alloc a new group for it.\n",
+ slot, bits_wanted, free_bits);
+ status = -ENOSPC;
+ goto bail;
+ }
+
status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
if (status < 0) {
if (status != -ENOSPC)
}
int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
- struct ocfs2_journal_handle *handle,
struct ocfs2_dinode *fe,
struct ocfs2_alloc_context **ac)
{
int status;
- struct inode *alloc_inode = NULL;
+ u32 slot;
- *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+ *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
if (!(*ac)) {
status = -ENOMEM;
mlog_errno(status);
}
(*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
- (*ac)->ac_handle = handle;
(*ac)->ac_which = OCFS2_AC_USE_META;
-
-#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
- alloc_inode = ocfs2_get_system_file_inode(osb,
- EXTENT_ALLOC_SYSTEM_INODE,
- 0);
-#else
- alloc_inode = ocfs2_get_system_file_inode(osb,
- EXTENT_ALLOC_SYSTEM_INODE,
- osb->slot_num);
-#endif
- if (!alloc_inode) {
- status = -ENOMEM;
- mlog_errno(status);
- goto bail;
- }
-
- (*ac)->ac_inode = igrab(alloc_inode);
+ slot = osb->slot_num;
(*ac)->ac_group_search = ocfs2_block_group_search;
- status = ocfs2_reserve_suballoc_bits(osb, (*ac));
+ status = ocfs2_reserve_suballoc_bits(osb, (*ac),
+ EXTENT_ALLOC_SYSTEM_INODE,
+ slot, ALLOC_NEW_GROUP);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
*ac = NULL;
}
- if (alloc_inode)
- iput(alloc_inode);
-
mlog_exit(status);
return status;
}
+static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
+ struct ocfs2_alloc_context *ac)
+{
+ int i, status = -ENOSPC;
+ s16 slot = ocfs2_get_inode_steal_slot(osb);
+
+ /* Start to steal inodes from the first slot after ours. */
+ if (slot == OCFS2_INVALID_SLOT)
+ slot = osb->slot_num + 1;
+
+ for (i = 0; i < osb->max_slots; i++, slot++) {
+ if (slot == osb->max_slots)
+ slot = 0;
+
+ if (slot == osb->slot_num)
+ continue;
+
+ status = ocfs2_reserve_suballoc_bits(osb, ac,
+ INODE_ALLOC_SYSTEM_INODE,
+ slot, NOT_ALLOC_NEW_GROUP);
+ if (status >= 0) {
+ ocfs2_set_inode_steal_slot(osb, slot);
+ break;
+ }
+
+ ocfs2_free_ac_resource(ac);
+ }
+
+ return status;
+}
+
int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
- struct ocfs2_journal_handle *handle,
struct ocfs2_alloc_context **ac)
{
int status;
- struct inode *alloc_inode = NULL;
+ s16 slot = ocfs2_get_inode_steal_slot(osb);
- *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+ *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
if (!(*ac)) {
status = -ENOMEM;
mlog_errno(status);
}
(*ac)->ac_bits_wanted = 1;
- (*ac)->ac_handle = handle;
(*ac)->ac_which = OCFS2_AC_USE_INODE;
- alloc_inode = ocfs2_get_system_file_inode(osb,
- INODE_ALLOC_SYSTEM_INODE,
- osb->slot_num);
- if (!alloc_inode) {
- status = -ENOMEM;
+ (*ac)->ac_group_search = ocfs2_block_group_search;
+
+ /*
+ * slot is set when we successfully steal inode from other nodes.
+ * It is reset in 3 places:
+ * 1. when we flush the truncate log
+ * 2. when we complete local alloc recovery.
+ * 3. when we successfully allocate from our own slot.
+ * After it is set, we will go on stealing inodes until we find the
+ * need to check our slots to see whether there is some space for us.
+ */
+ if (slot != OCFS2_INVALID_SLOT &&
+ atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_INODES_TO_STEAL)
+ goto inode_steal;
+
+ atomic_set(&osb->s_num_inodes_stolen, 0);
+ status = ocfs2_reserve_suballoc_bits(osb, *ac,
+ INODE_ALLOC_SYSTEM_INODE,
+ osb->slot_num, ALLOC_NEW_GROUP);
+ if (status >= 0) {
+ status = 0;
+
+ /*
+ * Some inodes must be freed by us, so try to allocate
+ * from our own next time.
+ */
+ if (slot != OCFS2_INVALID_SLOT)
+ ocfs2_init_inode_steal_slot(osb);
+ goto bail;
+ } else if (status < 0 && status != -ENOSPC) {
mlog_errno(status);
goto bail;
}
- (*ac)->ac_inode = igrab(alloc_inode);
- (*ac)->ac_group_search = ocfs2_block_group_search;
+ ocfs2_free_ac_resource(*ac);
- status = ocfs2_reserve_suballoc_bits(osb, *ac);
+inode_steal:
+ status = ocfs2_steal_inode_from_other_nodes(osb, *ac);
+ atomic_inc(&osb->s_num_inodes_stolen);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
*ac = NULL;
}
- if (alloc_inode)
- iput(alloc_inode);
-
mlog_exit(status);
return status;
}
{
int status;
- ac->ac_inode = ocfs2_get_system_file_inode(osb,
- GLOBAL_BITMAP_SYSTEM_INODE,
- OCFS2_INVALID_SLOT);
- if (!ac->ac_inode) {
- status = -EINVAL;
- mlog(ML_ERROR, "Could not get bitmap inode!\n");
- goto bail;
- }
ac->ac_which = OCFS2_AC_USE_MAIN;
ac->ac_group_search = ocfs2_cluster_group_search;
- status = ocfs2_reserve_suballoc_bits(osb, ac);
- if (status < 0 && status != -ENOSPC)
+ status = ocfs2_reserve_suballoc_bits(osb, ac,
+ GLOBAL_BITMAP_SYSTEM_INODE,
+ OCFS2_INVALID_SLOT,
+ ALLOC_NEW_GROUP);
+ if (status < 0 && status != -ENOSPC) {
mlog_errno(status);
+ goto bail;
+ }
+
bail:
return status;
}
* use so we figure it out for them, but unfortunately this clutters
* things a bit. */
int ocfs2_reserve_clusters(struct ocfs2_super *osb,
- struct ocfs2_journal_handle *handle,
u32 bits_wanted,
struct ocfs2_alloc_context **ac)
{
mlog_entry_void();
- BUG_ON(!handle);
-
- *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
+ *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
if (!(*ac)) {
status = -ENOMEM;
mlog_errno(status);
}
(*ac)->ac_bits_wanted = bits_wanted;
- (*ac)->ac_handle = handle;
status = -ENOSPC;
if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
status = ocfs2_reserve_local_alloc_bits(osb,
- handle,
bits_wanted,
*ac);
if ((status < 0) && (status != -ENOSPC)) {
* sync-data inodes."
*
* Note: OCFS2 already does this differently for metadata vs data
- * allocations, as those bitmaps are seperate and undo access is never
+ * allocations, as those bitmaps are separate and undo access is never
* called on a metadata group descriptor.
*/
static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
struct buffer_head *bg_bh,
unsigned int bits_wanted,
+ unsigned int total_bits,
u16 *bit_off,
u16 *bits_found)
{
found = start = best_offset = best_size = 0;
bitmap = bg->bg_bitmap;
- while((offset = ocfs2_find_next_zero_bit(bitmap,
- le16_to_cpu(bg->bg_bits),
- start)) != -1) {
- if (offset == le16_to_cpu(bg->bg_bits))
+ while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
+ if (offset == total_bits)
break;
if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
return status;
}
-static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
+static inline int ocfs2_block_group_set_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
return best;
}
-static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
+static int ocfs2_relink_block_group(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *fe_bh,
struct buffer_head *bg_bh,
goto out;
}
- mlog(0, "In suballoc %"MLFu64", chain %u, move group %"MLFu64" to "
- "top, prev = %"MLFu64"\n",
- fe->i_blkno, chain, bg->bg_blkno, prev_bg->bg_blkno);
+ mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
+ (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
+ (unsigned long long)le64_to_cpu(bg->bg_blkno),
+ (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
bg_ptr = le64_to_cpu(bg->bg_next_group);
{
int search = -ENOSPC;
int ret;
- struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
+ struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
u16 tmp_off, tmp_found;
+ unsigned int max_bits, gd_cluster_off;
BUG_ON(!ocfs2_is_cluster_bitmap(inode));
- if (bg->bg_free_bits_count) {
+ if (gd->bg_free_bits_count) {
+ max_bits = le16_to_cpu(gd->bg_bits);
+
+ /* Tail groups in cluster bitmaps which aren't cpg
+ * aligned are prone to partial extention by a failed
+ * fs resize. If the file system resize never got to
+ * update the dinode cluster count, then we don't want
+ * to trust any clusters past it, regardless of what
+ * the group descriptor says. */
+ gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
+ le64_to_cpu(gd->bg_blkno));
+ if ((gd_cluster_off + max_bits) >
+ OCFS2_I(inode)->ip_clusters) {
+ max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
+ mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
+ (unsigned long long)le64_to_cpu(gd->bg_blkno),
+ le16_to_cpu(gd->bg_bits),
+ OCFS2_I(inode)->ip_clusters, max_bits);
+ }
+
ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
group_bh, bits_wanted,
+ max_bits,
&tmp_off, &tmp_found);
if (ret)
return ret;
if (bg->bg_free_bits_count)
ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
group_bh, bits_wanted,
+ le16_to_cpu(bg->bg_bits),
bit_off, bits_found);
return ret;
}
+static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+ handle_t *handle,
+ struct buffer_head *di_bh,
+ u32 num_bits,
+ u16 chain)
+{
+ int ret;
+ u32 tmp_used;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
+ struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
+
+ ret = ocfs2_journal_access(handle, inode, di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
+ di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
+ le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
+
+ ret = ocfs2_journal_dirty(handle, di_bh);
+ if (ret < 0)
+ mlog_errno(ret);
+
+out:
+ return ret;
+}
+
+static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
+ handle_t *handle,
+ u32 bits_wanted,
+ u32 min_bits,
+ u16 *bit_off,
+ unsigned int *num_bits,
+ u64 gd_blkno,
+ u16 *bits_left)
+{
+ int ret;
+ u16 found;
+ struct buffer_head *group_bh = NULL;
+ struct ocfs2_group_desc *gd;
+ struct inode *alloc_inode = ac->ac_inode;
+
+ ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
+ &group_bh, OCFS2_BH_CACHED, alloc_inode);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ gd = (struct ocfs2_group_desc *) group_bh->b_data;
+ if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+ OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
+ ret = -EIO;
+ goto out;
+ }
+
+ ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
+ bit_off, &found);
+ if (ret < 0) {
+ if (ret != -ENOSPC)
+ mlog_errno(ret);
+ goto out;
+ }
+
+ *num_bits = found;
+
+ ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
+ *num_bits,
+ le16_to_cpu(gd->bg_chain));
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
+ *bit_off, *num_bits);
+ if (ret < 0)
+ mlog_errno(ret);
+
+ *bits_left = le16_to_cpu(gd->bg_free_bits_count);
+
+out:
+ brelse(group_bh);
+
+ return ret;
+}
+
static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
+ handle_t *handle,
u32 bits_wanted,
u32 min_bits,
u16 *bit_off,
unsigned int *num_bits,
- u64 *bg_blkno)
+ u64 *bg_blkno,
+ u16 *bits_left)
{
int status;
u16 chain, tmp_bits;
u32 tmp_used;
u64 next_group;
- struct ocfs2_journal_handle *handle = ac->ac_handle;
struct inode *alloc_inode = ac->ac_inode;
struct buffer_head *group_bh = NULL;
struct buffer_head *prev_group_bh = NULL;
struct ocfs2_group_desc *bg;
chain = ac->ac_chain;
- mlog(0, "trying to alloc %u bits from chain %u, inode %"MLFu64"\n",
- bits_wanted, chain, OCFS2_I(alloc_inode)->ip_blkno);
+ mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
+ bits_wanted, chain,
+ (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
le64_to_cpu(cl->cl_recs[chain].c_blkno),
goto bail;
}
bg = (struct ocfs2_group_desc *) group_bh->b_data;
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
- status = -EIO;
+ status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
+ if (status) {
+ mlog_errno(status);
goto bail;
}
goto bail;
}
bg = (struct ocfs2_group_desc *) group_bh->b_data;
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
- status = -EIO;
+ status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
+ if (status) {
+ mlog_errno(status);
goto bail;
}
}
goto bail;
}
- mlog(0, "alloc succeeds: we give %u bits from block group %"MLFu64"\n",
- tmp_bits, bg->bg_blkno);
+ mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
+ tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
*num_bits = tmp_bits;
goto bail;
}
- mlog(0, "Allocated %u bits from suballocator %"MLFu64"\n",
- *num_bits, fe->i_blkno);
+ mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
+ (unsigned long long)le64_to_cpu(fe->i_blkno));
*bg_blkno = le64_to_cpu(bg->bg_blkno);
+ *bits_left = le16_to_cpu(bg->bg_free_bits_count);
bail:
if (group_bh)
brelse(group_bh);
/* will give out up to bits_wanted contiguous bits. */
static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
struct ocfs2_alloc_context *ac,
+ handle_t *handle,
u32 bits_wanted,
u32 min_bits,
u16 *bit_off,
{
int status;
u16 victim, i;
+ u16 bits_left = 0;
+ u64 hint_blkno = ac->ac_last_group;
struct ocfs2_chain_list *cl;
struct ocfs2_dinode *fe;
}
if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
le32_to_cpu(fe->id1.bitmap1.i_total)) {
- ocfs2_error(osb->sb, "Chain allocator dinode %"MLFu64" has %u"
- "used bits but only %u total.",
- le64_to_cpu(fe->i_blkno),
+ ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
+ "bits but only %u total.",
+ (unsigned long long)le64_to_cpu(fe->i_blkno),
le32_to_cpu(fe->id1.bitmap1.i_used),
le32_to_cpu(fe->id1.bitmap1.i_total));
status = -EIO;
goto bail;
}
+ if (hint_blkno) {
+ /* Attempt to short-circuit the usual search mechanism
+ * by jumping straight to the most recently used
+ * allocation group. This helps us mantain some
+ * contiguousness across allocations. */
+ status = ocfs2_search_one_group(ac, handle, bits_wanted,
+ min_bits, bit_off, num_bits,
+ hint_blkno, &bits_left);
+ if (!status) {
+ /* Be careful to update *bg_blkno here as the
+ * caller is expecting it to be filled in, and
+ * ocfs2_search_one_group() won't do that for
+ * us. */
+ *bg_blkno = hint_blkno;
+ goto set_hint;
+ }
+ if (status < 0 && status != -ENOSPC) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
victim = ocfs2_find_victim_chain(cl);
ac->ac_chain = victim;
ac->ac_allow_chain_relink = 1;
- status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off,
- num_bits, bg_blkno);
+ status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off,
+ num_bits, bg_blkno, &bits_left);
if (!status)
- goto bail;
+ goto set_hint;
if (status < 0 && status != -ENOSPC) {
mlog_errno(status);
goto bail;
continue;
ac->ac_chain = i;
- status = ocfs2_search_chain(ac, bits_wanted, min_bits,
- bit_off, num_bits,
- bg_blkno);
+ status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
+ bit_off, num_bits, bg_blkno,
+ &bits_left);
if (!status)
break;
if (status < 0 && status != -ENOSPC) {
goto bail;
}
}
-bail:
+set_hint:
+ if (status != -ENOSPC) {
+ /* If the next search of this group is not likely to
+ * yield a suitable extent, then we reset the last
+ * group hint so as to not waste a disk read */
+ if (bits_left < min_bits)
+ ac->ac_last_group = 0;
+ else
+ ac->ac_last_group = *bg_blkno;
+ }
+
+bail:
mlog_exit(status);
return status;
}
int ocfs2_claim_metadata(struct ocfs2_super *osb,
- struct ocfs2_journal_handle *handle,
+ handle_t *handle,
struct ocfs2_alloc_context *ac,
u32 bits_wanted,
u16 *suballoc_bit_start,
BUG_ON(!ac);
BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
- BUG_ON(ac->ac_handle != handle);
status = ocfs2_claim_suballoc_bits(osb,
ac,
+ handle,
bits_wanted,
1,
suballoc_bit_start,
}
int ocfs2_claim_new_inode(struct ocfs2_super *osb,
- struct ocfs2_journal_handle *handle,
+ handle_t *handle,
struct ocfs2_alloc_context *ac,
u16 *suballoc_bit,
u64 *fe_blkno)
BUG_ON(ac->ac_bits_given != 0);
BUG_ON(ac->ac_bits_wanted != 1);
BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
- BUG_ON(ac->ac_handle != handle);
status = ocfs2_claim_suballoc_bits(osb,
ac,
+ handle,
1,
1,
suballoc_bit,
/* given a cluster offset, calculate which block group it belongs to
* and return that block offset. */
-static inline u64 ocfs2_which_cluster_group(struct inode *inode,
- u32 cluster)
+u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
u32 group_no;
* contig. allocation, set to '1' to indicate we can deal with extents
* of any size.
*/
-int ocfs2_claim_clusters(struct ocfs2_super *osb,
- struct ocfs2_journal_handle *handle,
- struct ocfs2_alloc_context *ac,
- u32 min_clusters,
- u32 *cluster_start,
- u32 *num_clusters)
+int __ocfs2_claim_clusters(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 min_clusters,
+ u32 max_clusters,
+ u32 *cluster_start,
+ u32 *num_clusters)
{
int status;
- unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
- u64 bg_blkno;
+ unsigned int bits_wanted = max_clusters;
+ u64 bg_blkno = 0;
u16 bg_bit_off;
mlog_entry_void();
- BUG_ON(!ac);
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
&& ac->ac_which != OCFS2_AC_USE_MAIN);
- BUG_ON(ac->ac_handle != handle);
if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
status = ocfs2_claim_local_alloc_bits(osb,
if (min_clusters > (osb->bitmap_cpg - 1)) {
/* The only paths asking for contiguousness
* should know about this already. */
- mlog(ML_ERROR, "minimum allocation requested exceeds "
- "group bitmap size!");
+ mlog(ML_ERROR, "minimum allocation requested %u exceeds "
+ "group bitmap size %u!\n", min_clusters,
+ osb->bitmap_cpg);
status = -ENOSPC;
goto bail;
}
status = ocfs2_claim_suballoc_bits(osb,
ac,
+ handle,
bits_wanted,
min_clusters,
&bg_bit_off,
return status;
}
-static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle,
+int ocfs2_claim_clusters(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 min_clusters,
+ u32 *cluster_start,
+ u32 *num_clusters)
+{
+ unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+
+ return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
+ bits_wanted, cluster_start, num_clusters);
+}
+
+static inline int ocfs2_block_group_clear_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
/*
* expects the suballoc inode to already be locked.
*/
-static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
- struct inode *alloc_inode,
- struct buffer_head *alloc_bh,
- unsigned int start_bit,
- u64 bg_blkno,
- unsigned int count)
+int ocfs2_free_suballoc_bits(handle_t *handle,
+ struct inode *alloc_inode,
+ struct buffer_head *alloc_bh,
+ unsigned int start_bit,
+ u64 bg_blkno,
+ unsigned int count)
{
int status = 0;
u32 tmp_used;
}
BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
- mlog(0, "suballocator %"MLFu64": freeing %u bits from group %"MLFu64
- ", starting at %u\n",
- OCFS2_I(alloc_inode)->ip_blkno, count, bg_blkno,
- start_bit);
+ mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
+ (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
+ (unsigned long long)bg_blkno, start_bit);
status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
alloc_inode);
}
group = (struct ocfs2_group_desc *) group_bh->b_data;
- if (!OCFS2_IS_VALID_GROUP_DESC(group)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group);
- status = -EIO;
+ status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
+ if (status) {
+ mlog_errno(status);
goto bail;
}
BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
return status;
}
-static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
-{
- u64 group = block - (u64) bit;
-
- return group;
-}
-
-int ocfs2_free_dinode(struct ocfs2_journal_handle *handle,
+int ocfs2_free_dinode(handle_t *handle,
struct inode *inode_alloc_inode,
struct buffer_head *inode_alloc_bh,
struct ocfs2_dinode *di)
inode_alloc_bh, bit, bg_blkno, 1);
}
-int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle,
- struct inode *eb_alloc_inode,
- struct buffer_head *eb_alloc_bh,
- struct ocfs2_extent_block *eb)
-{
- u64 blk = le64_to_cpu(eb->h_blkno);
- u16 bit = le16_to_cpu(eb->h_suballoc_bit);
- u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
-
- return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh,
- bit, bg_blkno, 1);
-}
-
-int ocfs2_free_clusters(struct ocfs2_journal_handle *handle,
+int ocfs2_free_clusters(handle_t *handle,
struct inode *bitmap_inode,
struct buffer_head *bitmap_bh,
u64 start_blk,
ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
&bg_start_bit);
- mlog(0, "want to free %u clusters starting at block %"MLFu64"\n",
- num_clusters, start_blk);
- mlog(0, "bg_blkno = %"MLFu64", bg_start_bit = %u\n",
- bg_blkno, bg_start_bit);
+ mlog(0, "want to free %u clusters starting at block %llu\n",
+ num_clusters, (unsigned long long)start_blk);
+ mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
+ (unsigned long long)bg_blkno, bg_start_bit);
status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
bg_start_bit, bg_blkno,
printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
printk("bg_chain: %u\n", bg->bg_chain);
printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation));
- printk("bg_next_group: %"MLFu64"\n", bg->bg_next_group);
- printk("bg_parent_dinode: %"MLFu64"\n", bg->bg_parent_dinode);
- printk("bg_blkno: %"MLFu64"\n", bg->bg_blkno);
+ printk("bg_next_group: %llu\n",
+ (unsigned long long)bg->bg_next_group);
+ printk("bg_parent_dinode: %llu\n",
+ (unsigned long long)bg->bg_parent_dinode);
+ printk("bg_blkno: %llu\n",
+ (unsigned long long)bg->bg_blkno);
}
static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
{
int i;
- printk("Suballoc Inode %"MLFu64":\n", fe->i_blkno);
+ printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
printk("i_signature: %s\n", fe->i_signature);
- printk("i_size: %"MLFu64"\n", fe->i_size);
+ printk("i_size: %llu\n",
+ (unsigned long long)fe->i_size);
printk("i_clusters: %u\n", fe->i_clusters);
printk("i_generation: %u\n",
le32_to_cpu(fe->i_generation));
fe->id2.i_chain.cl_recs[i].c_free);
printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
fe->id2.i_chain.cl_recs[i].c_total);
- printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %"MLFu64"\n", i,
- fe->id2.i_chain.cl_recs[i].c_blkno);
+ printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
+ (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
}
}