Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 9 Jan 2009 01:14:59 +0000 (17:14 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 9 Jan 2009 01:14:59 +0000 (17:14 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jan 2009 01:14:59 +0000 (17:14 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jan 2009 01:14:59 +0000 (17:14 -0800)
diff --combined fs/block_dev.c

index 8ebbfdf708c24c9d70bc4cc88e8266202d56ec06,1dd07e66e98acf06f0999bea1926c7799a4a67f9..ac7031f12ea51b66177a3277b5cd48be3f4d6dc1
--- 1/fs/block_dev.c
--- 2/fs/block_dev.c
+++ b/fs/block_dev.c
@@@ -1005,7 -1005,6 +1005,7 @@@ static int __blkdev_get(struct block_de
         }
   
         lock_kernel();
+ + restart:
   
         ret = -ENXIO;
         disk = get_gendisk(bdev->bd_dev, &partno);
@@@ -1026,19 -1025,6 +1026,19 @@@
   
                         if (disk->fops->open) {
                                 ret = disk->fops->open(bdev, mode);
+ +                              if (ret == -ERESTARTSYS) {
+ +                                      /* Lost a race with 'disk' being
+ +                                       * deleted, try again.
+ +                                       * See md.c
+ +                                       */
+ +                                      disk_put_part(bdev->bd_part);
+ +                                      bdev->bd_part = NULL;
+ +                                      module_put(disk->fops->owner);
+ +                                      put_disk(disk);
+ +                                      bdev->bd_disk = NULL;
+ +                                      mutex_unlock(&bdev->bd_mutex);
+ +                                      goto restart;
+ +                              }
                                 if (ret)
                                         goto out_clear;
                         }
@@@ -1234,6 -1220,20 +1234,20 @@@ static long block_ioctl(struct file *fi
         return blkdev_ioctl(bdev, mode, cmd, arg);
   }
   
+ /*
+  * Try to release a page associated with block device when the system
+  * is under memory pressure.
+  */
+ static int blkdev_releasepage(struct page *page, gfp_t wait)
+ {
+       struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
+ 
+       if (super && super->s_op->bdev_try_to_free_page)
+               return super->s_op->bdev_try_to_free_page(super, page, wait);
+ 
+       return try_to_free_buffers(page);
+ }
+ 
   static const struct address_space_operations def_blk_aops = {
         .readpage       = blkdev_readpage,
         .writepage      = blkdev_writepage,
@@@ -1241,6 -1241,7 +1255,7 @@@
         .write_begin    = blkdev_write_begin,
         .write_end      = blkdev_write_end,
         .writepages     = generic_writepages,
+       .releasepage    = blkdev_releasepage,
         .direct_IO      = blkdev_direct_IO,
   };
   
@@@ -1276,7 -1277,7 +1291,7 @@@ EXPORT_SYMBOL(ioctl_by_bdev)
   
   /**
    * lookup_bdev  - lookup a struct block_device by name
- - * @path:     special file representing the block device
+ + * @pathname: special file representing the block device
    *
    * Get a reference to the blockdevice at @pathname in the current
    * namespace if possible and return it.  Return ERR_PTR(error)
diff --combined fs/ext3/namei.c

index 8d6f965e502cd9d56969fefcf347dbeec37a42c7,2c2d700c1ccfcb112d7df6ee9303b2cfc1716116..69a3d19ca9fd4dfbb80cf4de27b0fe10e9518120
--- 1/fs/ext3/namei.c
--- 2/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@@ -74,6 -74,10 +74,6 @@@ static struct buffer_head *ext3_append(
   #define assert(test) J_ASSERT(test)
   #endif
   
- -#ifndef swap
- -#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
- -#endif
- -
   #ifdef DX_DEBUG
   #define dxtrace(command) command
   #else
@@@ -364,6 -368,8 +364,8 @@@ dx_probe(struct qstr *entry, struct ino
                 goto fail;
         }
         hinfo->hash_version = root->info.hash_version;
+       if (hinfo->hash_version <= DX_HASH_TEA)
+               hinfo->hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned;
         hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
         if (entry)
                 ext3fs_dirhash(entry->name, entry->len, hinfo);
@@@ -632,6 -638,9 +634,9 @@@ int ext3_htree_fill_tree(struct file *d
         dir = dir_file->f_path.dentry->d_inode;
         if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
                 hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+               if (hinfo.hash_version <= DX_HASH_TEA)
+                       hinfo.hash_version +=
+                               EXT3_SB(dir->i_sb)->s_hash_unsigned;
                 hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
                 count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
                                                start_hash, start_minor_hash);
@@@ -1152,9 -1161,9 +1157,9 @@@ static struct ext3_dir_entry_2 *do_spli
         u32 hash2;
         struct dx_map_entry *map;
         char *data1 = (*bh)->b_data, *data2;
-       unsigned split, move, size, i;
+       unsigned split, move, size;
         struct ext3_dir_entry_2 *de = NULL, *de2;
-       int     err = 0;
+       int     err = 0, i;
   
         bh2 = ext3_append (handle, dir, &newblock, &err);
         if (!(bh2)) {
@@@ -1394,6 -1403,8 +1399,8 @@@ static int make_indexed_dir(handle_t *h
   
         /* Initialize as for dx_probe */
         hinfo.hash_version = root->info.hash_version;
+       if (hinfo.hash_version <= DX_HASH_TEA)
+               hinfo.hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned;
         hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
         ext3fs_dirhash(name, namelen, &hinfo);
         frame = frames;
diff --combined fs/ext3/super.c

index 01c235bc2054422a0bf1134d6af7214ab459689a,6900ff05e3ab1d392def2418f909251762533814..5d047a030a73d0570a9ec992f549a4e4d2fb7c6a
--- 1/fs/ext3/super.c
--- 2/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@@ -439,7 -439,6 +439,7 @@@ static void ext3_put_super (struct supe
                 ext3_blkdev_remove(sbi);
         }
         sb->s_fs_info = NULL;
+ +      kfree(sbi->s_blockgroup_lock);
         kfree(sbi);
         return;
   }
@@@ -683,6 -682,26 +683,26 @@@ static struct dentry *ext3_fh_to_parent
                                     ext3_nfs_get_inode);
   }
   
+ /*
+  * Try to release metadata pages (indirect blocks, directories) which are
+  * mapped via the block device.  Since these pages could have journal heads
+  * which would prevent try_to_free_buffers() from freeing them, we must use
+  * jbd layer's try_to_free_buffers() function to release them.
+  */
+ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
+                                gfp_t wait)
+ {
+       journal_t *journal = EXT3_SB(sb)->s_journal;
+ 
+       WARN_ON(PageChecked(page));
+       if (!page_has_buffers(page))
+               return 0;
+       if (journal)
+               return journal_try_to_free_buffers(journal, page, 
+                                                  wait & ~__GFP_WAIT);
+       return try_to_free_buffers(page);
+ }
+ 
   #ifdef CONFIG_QUOTA
   #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
   #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
@@@ -714,9 -733,7 +734,9 @@@ static struct dquot_operations ext3_quo
         .acquire_dquot  = ext3_acquire_dquot,
         .release_dquot  = ext3_release_dquot,
         .mark_dirty     = ext3_mark_dquot_dirty,
- -      .write_info     = ext3_write_info
+ +      .write_info     = ext3_write_info,
+ +      .alloc_dquot    = dquot_alloc,
+ +      .destroy_dquot  = dquot_destroy,
   };
   
   static struct quotactl_ops ext3_qctl_operations = {
@@@ -749,6 -766,7 +769,7 @@@ static const struct super_operations ex
         .quota_read     = ext3_quota_read,
         .quota_write    = ext3_quota_write,
   #endif
+       .bdev_try_to_free_page = bdev_try_to_free_page,
   };
   
   static const struct export_operations ext3_export_ops = {
@@@ -1038,7 -1056,8 +1059,7 @@@ static int parse_options (char *options
                 case Opt_grpjquota:
                         qtype = GRPQUOTA;
   set_qf_name:
- -                      if ((sb_any_quota_enabled(sb) ||
- -                           sb_any_quota_suspended(sb)) &&
+ +                      if (sb_any_quota_loaded(sb) &&
                             !sbi->s_qf_names[qtype]) {
                                 printk(KERN_ERR
                                         "EXT3-fs: Cannot change journaled "
@@@ -1077,7 -1096,8 +1098,7 @@@
                 case Opt_offgrpjquota:
                         qtype = GRPQUOTA;
   clear_qf_name:
- -                      if ((sb_any_quota_enabled(sb) ||
- -                           sb_any_quota_suspended(sb)) &&
+ +                      if (sb_any_quota_loaded(sb) &&
                             sbi->s_qf_names[qtype]) {
                                 printk(KERN_ERR "EXT3-fs: Cannot change "
                                         "journaled quota options when "
@@@ -1096,7 -1116,8 +1117,7 @@@
                 case Opt_jqfmt_vfsv0:
                         qfmt = QFMT_VFS_V0;
   set_qf_format:
- -                      if ((sb_any_quota_enabled(sb) ||
- -                           sb_any_quota_suspended(sb)) &&
+ +                      if (sb_any_quota_loaded(sb) &&
                             sbi->s_jquota_fmt != qfmt) {
                                 printk(KERN_ERR "EXT3-fs: Cannot change "
                                         "journaled quota options when "
@@@ -1115,7 -1136,8 +1136,7 @@@
                         set_opt(sbi->s_mount_opt, GRPQUOTA);
                         break;
                 case Opt_noquota:
- -                      if (sb_any_quota_enabled(sb) ||
- -                          sb_any_quota_suspended(sb)) {
+ +                      if (sb_any_quota_loaded(sb)) {
                                 printk(KERN_ERR "EXT3-fs: Cannot change quota "
                                         "options when quota turned on.\n");
                                 return 0;
@@@ -1547,13 -1569,6 +1568,13 @@@ static int ext3_fill_super (struct supe
         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
         if (!sbi)
                 return -ENOMEM;
+ +
+ +      sbi->s_blockgroup_lock =
+ +              kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
+ +      if (!sbi->s_blockgroup_lock) {
+ +              kfree(sbi);
+ +              return -ENOMEM;
+ +      }
         sb->s_fs_info = sbi;
         sbi->s_mount_opt = 0;
         sbi->s_resuid = EXT3_DEF_RESUID;
@@@ -1750,6 -1765,18 +1771,18 @@@
         for (i=0; i < 4; i++)
                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
         sbi->s_def_hash_version = es->s_def_hash_version;
+       i = le32_to_cpu(es->s_flags);
+       if (i & EXT2_FLAGS_UNSIGNED_HASH)
+               sbi->s_hash_unsigned = 3;
+       else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+ #ifdef __CHAR_UNSIGNED__
+               es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+               sbi->s_hash_unsigned = 3;
+ #else
+               es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+ #endif
+               sb->s_dirt = 1;
+       }
   
         if (sbi->s_blocks_per_group > blocksize * 8) {
                 printk (KERN_ERR
@@@ -1794,7 -1821,7 +1827,7 @@@
                 goto failed_mount;
         }
   
- -      bgl_lock_init(&sbi->s_blockgroup_lock);
+ +      bgl_lock_init(sbi->s_blockgroup_lock);
   
         for (i = 0; i < db_count; i++) {
                 block = descriptor_loc(sb, logic_sb_block, i);
diff --combined fs/ext4/ext4.h

index 6c46c648430d051ad8a0b25569df02cd1da60f26,db1718833f5817db2051922c0f08b4550a1b43df..c668e4377d76027a87f95eb168d6e54b024a1a69
--- 1/fs/ext4/ext4.h
--- 2/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@@ -19,6 -19,7 +19,7 @@@
   #include <linux/types.h>
   #include <linux/blkdev.h>
   #include <linux/magic.h>
+ #include <linux/jbd2.h>
   #include "ext4_i.h"
   
   /*
@@@ -94,9 -95,9 +95,9 @@@ struct ext4_allocation_request 
         /* phys. block for ^^^ */
         ext4_fsblk_t pright;
         /* how many blocks we want to allocate */
-       unsigned long len;
+       unsigned int len;
         /* flags. see above EXT4_MB_HINT_* */
-       unsigned long flags;
+       unsigned int flags;
   };
   
   /*
@@@ -156,12 -157,12 +157,12 @@@ struct ext4_group_des
         __le32  bg_block_bitmap_lo;     /* Blocks bitmap block */
         __le32  bg_inode_bitmap_lo;     /* Inodes bitmap block */
         __le32  bg_inode_table_lo;      /* Inodes table block */
-       __le16  bg_free_blocks_count;   /* Free blocks count */
-       __le16  bg_free_inodes_count;   /* Free inodes count */
-       __le16  bg_used_dirs_count;     /* Directories count */
+       __le16  bg_free_blocks_count_lo;/* Free blocks count */
+       __le16  bg_free_inodes_count_lo;/* Free inodes count */
+       __le16  bg_used_dirs_count_lo;  /* Directories count */
         __le16  bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
         __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
-       __le16  bg_itable_unused;       /* Unused inodes count */
+       __le16  bg_itable_unused_lo;    /* Unused inodes count */
         __le16  bg_checksum;            /* crc16(sb_uuid+group+desc) */
         __le32  bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
         __le32  bg_inode_bitmap_hi;     /* Inodes bitmap block MSB */
@@@ -169,7 -170,7 +170,7 @@@
         __le16  bg_free_blocks_count_hi;/* Free blocks count MSB */
         __le16  bg_free_inodes_count_hi;/* Free inodes count MSB */
         __le16  bg_used_dirs_count_hi;  /* Directories count MSB */
-       __le16  bg_itable_unused_hi;    /* Unused inodes count MSB */
+       __le16  bg_itable_unused_hi;    /* Unused inodes count MSB */
         __u32   bg_reserved2[3];
   };
   
@@@ -328,6 -329,7 +329,7 @@@ struct ext4_mount_options 
         uid_t s_resuid;
         gid_t s_resgid;
         unsigned long s_commit_interval;
+       u32 s_min_batch_time, s_max_batch_time;
   #ifdef CONFIG_QUOTA
         int s_jquota_fmt;
         char *s_qf_names[MAXQUOTAS];
@@@ -534,7 -536,6 +536,6 @@@ do {                                                                              
   #define EXT4_MOUNT_QUOTA              0x80000 /* Some quota option set */
   #define EXT4_MOUNT_USRQUOTA           0x100000 /* "old" user quota */
   #define EXT4_MOUNT_GRPQUOTA           0x200000 /* "old" group quota */
- #define EXT4_MOUNT_EXTENTS            0x400000 /* Extents support */
   #define EXT4_MOUNT_JOURNAL_CHECKSUM   0x800000 /* Journal checksums */
   #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT       0x1000000 /* Journal Async Commit */
   #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
@@@ -726,11 -727,11 +727,11 @@@ static inline int ext4_valid_inum(struc
    */
   
   #define EXT4_HAS_COMPAT_FEATURE(sb,mask)                      \
-       (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask))
+       ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0)
   #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask)                   \
-       (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask))
+       ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0)
   #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask)                    \
-       (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask))
+       ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0)
   #define EXT4_SET_COMPAT_FEATURE(sb,mask)                      \
         EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
   #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask)                   \
@@@ -805,6 -806,12 +806,12 @@@
   #define EXT4_DEFM_JMODE_ORDERED       0x0040
   #define EXT4_DEFM_JMODE_WBACK 0x0060
   
+ /*
+  * Default journal batch times
+  */
+ #define EXT4_DEF_MIN_BATCH_TIME       0
+ #define EXT4_DEF_MAX_BATCH_TIME       15000 /* 15ms */
+ 
   /*
    * Structure of a directory entry
    */
@@@ -891,6 -898,9 +898,9 @@@ static inline __le16 ext4_rec_len_to_di
   #define DX_HASH_LEGACY                0
   #define DX_HASH_HALF_MD4      1
   #define DX_HASH_TEA           2
+ #define DX_HASH_LEGACY_UNSIGNED       3
+ #define DX_HASH_HALF_MD4_UNSIGNED     4
+ #define DX_HASH_TEA_UNSIGNED          5
   
   #ifdef __KERNEL__
   
@@@ -955,7 -965,7 +965,7 @@@ ext4_group_first_block_no(struct super_
   #define ERR_BAD_DX_DIR        -75000
   
   void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
-                       unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
+                       ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
   
   extern struct proc_dir_entry *ext4_proc_root;
   
@@@ -987,6 -997,9 +997,9 @@@ do {                                                                       
   # define ATTRIB_NORET __attribute__((noreturn))
   # define NORET_AND    noreturn,
   
+ /* bitmap.c */
+ extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
+ 
   /* balloc.c */
   extern unsigned int ext4_block_group(struct super_block *sb,
                         ext4_fsblk_t blocknr);
@@@ -995,20 -1008,14 +1008,14 @@@ extern ext4_grpblk_t ext4_block_group_o
   extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
   extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
                         ext4_group_t group);
- extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t goal, int *errp);
   extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
                         ext4_fsblk_t goal, unsigned long *count, int *errp);
- extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
-                                       ext4_lblk_t iblock, ext4_fsblk_t goal,
-                                       unsigned long *count, int *errp);
   extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
   extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
   extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
                         ext4_fsblk_t block, unsigned long count, int metadata);
- extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
-                               ext4_fsblk_t block, unsigned long count,
-                               unsigned long *pdquot_freed_blocks);
+ extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
+                               ext4_fsblk_t block, unsigned long count);
   extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
   extern void ext4_check_blocks_bitmap(struct super_block *);
   extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
@@@ -1019,7 -1026,7 +1026,7 @@@ extern int ext4_should_retry_alloc(stru
   /* dir.c */
   extern int ext4_check_dir_entry(const char *, struct inode *,
                                 struct ext4_dir_entry_2 *,
-                               struct buffer_head *, unsigned long);
+                               struct buffer_head *, unsigned int);
   extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
                                     __u32 minor_hash,
                                     struct ext4_dir_entry_2 *dirent);
@@@ -1039,7 -1046,6 +1046,6 @@@ extern struct inode * ext4_orphan_get(s
   extern unsigned long ext4_count_free_inodes(struct super_block *);
   extern unsigned long ext4_count_dirs(struct super_block *);
   extern void ext4_check_inodes_bitmap(struct super_block *);
- extern unsigned long ext4_count_free(struct buffer_head *, unsigned);
   
   /* mballoc.c */
   extern long ext4_mb_stats;
@@@ -1054,12 -1060,13 +1060,13 @@@ extern int __init init_ext4_mballoc(voi
   extern void exit_ext4_mballoc(void);
   extern void ext4_mb_free_blocks(handle_t *, struct inode *,
                 unsigned long, unsigned long, int, unsigned long *);
- extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
+ extern int ext4_mb_add_groupinfo(struct super_block *sb,
                 ext4_group_t i, struct ext4_group_desc *desc);
   extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
                 ext4_grpblk_t add);
- 
- 
+ extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
+ extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
+                                               ext4_group_t, int);
   /* inode.c */
   int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
                 struct buffer_head *bh, ext4_fsblk_t blocknr);
@@@ -1069,10 -1076,6 +1076,6 @@@ struct buffer_head *ext4_bread(handle_
                                                 ext4_lblk_t, int, int *);
   int ext4_get_block(struct inode *inode, sector_t iblock,
                                 struct buffer_head *bh_result, int create);
- int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
-                               ext4_lblk_t iblock, unsigned long maxblocks,
-                               struct buffer_head *bh_result,
-                               int create, int extend_disksize);
   
   extern struct inode *ext4_iget(struct super_block *, unsigned long);
   extern int  ext4_write_inode(struct inode *, int);
@@@ -1123,6 -1126,9 +1126,9 @@@ extern void ext4_abort(struct super_blo
         __attribute__ ((format (printf, 3, 4)));
   extern void ext4_warning(struct super_block *, const char *, const char *, ...)
         __attribute__ ((format (printf, 3, 4)));
+ extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
+                               const char *, const char *, ...)
+       __attribute__ ((format (printf, 4, 5)));
   extern void ext4_update_dynamic_rev(struct super_block *sb);
   extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
                                         __u32 compat);
@@@ -1136,12 -1142,28 +1142,28 @@@ extern ext4_fsblk_t ext4_inode_bitmap(s
                                       struct ext4_group_desc *bg);
   extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
                                      struct ext4_group_desc *bg);
+ extern __u32 ext4_free_blks_count(struct super_block *sb,
+                               struct ext4_group_desc *bg);
+ extern __u32 ext4_free_inodes_count(struct super_block *sb,
+                                struct ext4_group_desc *bg);
+ extern __u32 ext4_used_dirs_count(struct super_block *sb,
+                               struct ext4_group_desc *bg);
+ extern __u32 ext4_itable_unused_count(struct super_block *sb,
+                                  struct ext4_group_desc *bg);
   extern void ext4_block_bitmap_set(struct super_block *sb,
                                   struct ext4_group_desc *bg, ext4_fsblk_t blk);
   extern void ext4_inode_bitmap_set(struct super_block *sb,
                                   struct ext4_group_desc *bg, ext4_fsblk_t blk);
   extern void ext4_inode_table_set(struct super_block *sb,
                                  struct ext4_group_desc *bg, ext4_fsblk_t blk);
+ extern void ext4_free_blks_set(struct super_block *sb,
+                              struct ext4_group_desc *bg, __u32 count);
+ extern void ext4_free_inodes_set(struct super_block *sb,
+                               struct ext4_group_desc *bg, __u32 count);
+ extern void ext4_used_dirs_set(struct super_block *sb,
+                               struct ext4_group_desc *bg, __u32 count);
+ extern void ext4_itable_unused_set(struct super_block *sb,
+                                  struct ext4_group_desc *bg, __u32 count);
   
   static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
   {
@@@ -1225,11 -1247,11 +1247,11 @@@ do {                                                         
   } while (0)
   
   #ifdef CONFIG_SMP
- -/* Each CPU can accumulate FBC_BATCH blocks in their local
+ +/* Each CPU can accumulate percpu_counter_batch blocks in their local
    * counters. So we need to make sure we have free blocks more
- - * than FBC_BATCH  * nr_cpu_ids. Also add a window of 4 times.
+ + * than percpu_counter_batch  * nr_cpu_ids. Also add a window of 4 times.
    */
- -#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
+ +#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
   #else
   #define EXT4_FREEBLOCKS_WATERMARK 0
   #endif
@@@ -1246,6 -1268,50 +1268,50 @@@ static inline void ext4_update_i_disksi
         return ;
   }
   
+ struct ext4_group_info {
+       unsigned long   bb_state;
+       struct rb_root  bb_free_root;
+       unsigned short  bb_first_free;
+       unsigned short  bb_free;
+       unsigned short  bb_fragments;
+       struct          list_head bb_prealloc_list;
+ #ifdef DOUBLE_CHECK
+       void            *bb_bitmap;
+ #endif
+       struct rw_semaphore alloc_sem;
+       unsigned short  bb_counters[];
+ };
+ 
+ #define EXT4_GROUP_INFO_NEED_INIT_BIT 0
+ #define EXT4_GROUP_INFO_LOCKED_BIT    1
+ 
+ #define EXT4_MB_GRP_NEED_INIT(grp)    \
+       (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+ 
+ static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+ {
+       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+ 
+       bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+ }
+ 
+ static inline void ext4_unlock_group(struct super_block *sb,
+                                       ext4_group_t group)
+ {
+       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+ 
+       bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+ }
+ 
+ static inline int ext4_is_group_locked(struct super_block *sb,
+                                       ext4_group_t group)
+ {
+       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+ 
+       return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
+                                               &(grinfo->bb_state));
+ }
+ 
   /*
    * Inodes and files operations
    */
@@@ -1271,18 -1337,38 +1337,38 @@@ extern int ext4_ext_writepage_trans_blo
   extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
                                        int chunk);
   extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
-                       ext4_lblk_t iblock,
-                       unsigned long max_blocks, struct buffer_head *bh_result,
-                       int create, int extend_disksize);
+                              ext4_lblk_t iblock, unsigned int max_blocks,
+                              struct buffer_head *bh_result,
+                              int create, int extend_disksize);
   extern void ext4_ext_truncate(struct inode *);
   extern void ext4_ext_init(struct super_block *);
   extern void ext4_ext_release(struct super_block *);
   extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
                           loff_t len);
   extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
-                       sector_t block, unsigned long max_blocks,
+                       sector_t block, unsigned int max_blocks,
                         struct buffer_head *bh, int create,
                         int extend_disksize, int flag);
+ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+                       __u64 start, __u64 len);
+ 
+ /*
+  * Add new method to test wether block and inode bitmaps are properly
+  * initialized. With uninit_bg reading the block from disk is not enough
+  * to mark the bitmap uptodate. We need to also zero-out the bitmap
+  */
+ #define BH_BITMAP_UPTODATE BH_JBDPrivateStart
+ 
+ static inline int bitmap_uptodate(struct buffer_head *bh)
+ {
+       return (buffer_uptodate(bh) &&
+                       test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state));
+ }
+ static inline void set_bitmap_uptodate(struct buffer_head *bh)
+ {
+       set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
+ }
+ 
   #endif        /* __KERNEL__ */
   
   #endif        /* _EXT4_H */
diff --combined fs/ext4/extents.c

index 3f54db31cdc233c4627a6956f2b47228ebffed2f,240cf0daad4b2771696af4f8ae257abce5527b32..54bf0623a9ae31f4db4dbf6cecc0a13c72038ce5
--- 1/fs/ext4/extents.c
--- 2/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@@ -97,6 -97,8 +97,8 @@@ static int ext4_ext_journal_restart(han
   {
         int err;
   
+       if (!ext4_handle_valid(handle))
+               return 0;
         if (handle->h_buffer_credits > needed)
                 return 0;
         err = ext4_journal_extend(handle, needed);
@@@ -134,7 -136,7 +136,7 @@@ static int ext4_ext_dirty(handle_t *han
         int err;
         if (path->p_bh) {
                 /* path points to block */
-               err = ext4_journal_dirty_metadata(handle, path->p_bh);
+               err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
         } else {
                 /* path points to leaf/index in inode body */
                 err = ext4_mark_inode_dirty(handle, inode);
@@@ -191,7 -193,7 +193,7 @@@ ext4_ext_new_meta_block(handle_t *handl
         ext4_fsblk_t goal, newblock;
   
         goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
-       newblock = ext4_new_meta_block(handle, inode, goal, err);
+       newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err);
         return newblock;
   }
   
@@@ -780,7 -782,7 +782,7 @@@ static int ext4_ext_split(handle_t *han
         set_buffer_uptodate(bh);
         unlock_buffer(bh);
   
-       err = ext4_journal_dirty_metadata(handle, bh);
+       err = ext4_handle_dirty_metadata(handle, inode, bh);
         if (err)
                 goto cleanup;
         brelse(bh);
@@@ -859,7 -861,7 +861,7 @@@
                 set_buffer_uptodate(bh);
                 unlock_buffer(bh);
   
-               err = ext4_journal_dirty_metadata(handle, bh);
+               err = ext4_handle_dirty_metadata(handle, inode, bh);
                 if (err)
                         goto cleanup;
                 brelse(bh);
@@@ -955,7 -957,7 +957,7 @@@ static int ext4_ext_grow_indepth(handle
         set_buffer_uptodate(bh);
         unlock_buffer(bh);
   
-       err = ext4_journal_dirty_metadata(handle, bh);
+       err = ext4_handle_dirty_metadata(handle, inode, bh);
         if (err)
                 goto out;
   
@@@ -1160,15 -1162,13 +1162,13 @@@ ext4_ext_search_right(struct inode *ino
         while (--depth >= 0) {
                 ix = path[depth].p_idx;
                 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
-                       break;
+                       goto got_index;
         }
   
-       if (depth < 0) {
-               /* we've gone up to the root and
-                * found no index to the right */
-               return 0;
-       }
+       /* we've gone up to the root and found no index to the right */
+       return 0;
   
+ got_index:
         /* we've found index to the right, let's
          * follow it and find the closest allocated
          * block to the right */
@@@ -1201,7 -1201,6 +1201,6 @@@
         *phys = ext_pblock(ex);
         put_bh(bh);
         return 0;
- 
   }
   
   /*
@@@ -1622,7 -1621,6 +1621,6 @@@ cleanup
                 ext4_ext_drop_refs(npath);
                 kfree(npath);
         }
-       ext4_ext_tree_changed(inode);
         ext4_ext_invalidate_cache(inode);
         return err;
   }
@@@ -2233,7 -2231,6 +2231,6 @@@ static int ext4_ext_remove_space(struc
                 }
         }
   out:
-       ext4_ext_tree_changed(inode);
         ext4_ext_drop_refs(path);
         kfree(path);
         ext4_journal_stop(handle);
@@@ -2250,7 -2247,7 +2247,7 @@@ void ext4_ext_init(struct super_block *
          * possible initialization would be here
          */
   
-       if (test_opt(sb, EXTENTS)) {
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
                 printk(KERN_INFO "EXT4-fs: file extents enabled");
   #ifdef AGGRESSIVE_TEST
                 printk(", aggressive tests");
@@@ -2275,7 -2272,7 +2272,7 @@@
    */
   void ext4_ext_release(struct super_block *sb)
   {
-       if (!test_opt(sb, EXTENTS))
+       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
                 return;
   
   #ifdef EXTENTS_STATS
@@@ -2380,7 -2377,7 +2377,7 @@@ static int ext4_ext_convert_to_initiali
                                                 struct inode *inode,
                                                 struct ext4_ext_path *path,
                                                 ext4_lblk_t iblock,
-                                               unsigned long max_blocks)
+                                               unsigned int max_blocks)
   {
         struct ext4_extent *ex, newex, orig_ex;
         struct ext4_extent *ex1 = NULL;
@@@ -2536,7 -2533,7 +2533,7 @@@
                  */
                 newdepth = ext_depth(inode);
                 /*
- -               * update the extent length after successfull insert of the
+ +               * update the extent length after successful insert of the
                  * split extent
                  */
                 orig_ex.ee_len = cpu_to_le16(ee_len -
@@@ -2678,26 -2675,26 +2675,26 @@@ fix_extent_len
    */
   int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                         ext4_lblk_t iblock,
-                       unsigned long max_blocks, struct buffer_head *bh_result,
+                       unsigned int max_blocks, struct buffer_head *bh_result,
                         int create, int extend_disksize)
   {
         struct ext4_ext_path *path = NULL;
         struct ext4_extent_header *eh;
         struct ext4_extent newex, *ex;
-       ext4_fsblk_t goal, newblock;
-       int err = 0, depth, ret;
-       unsigned long allocated = 0;
+       ext4_fsblk_t newblock;
+       int err = 0, depth, ret, cache_type;
+       unsigned int allocated = 0;
         struct ext4_allocation_request ar;
         loff_t disksize;
   
         __clear_bit(BH_New, &bh_result->b_state);
-       ext_debug("blocks %u/%lu requested for inode %u\n",
+       ext_debug("blocks %u/%u requested for inode %u\n",
                         iblock, max_blocks, inode->i_ino);
   
         /* check in cache */
-       goal = ext4_ext_in_cache(inode, iblock, &newex);
-       if (goal) {
-               if (goal == EXT4_EXT_CACHE_GAP) {
+       cache_type = ext4_ext_in_cache(inode, iblock, &newex);
+       if (cache_type) {
+               if (cache_type == EXT4_EXT_CACHE_GAP) {
                         if (!create) {
                                 /*
                                  * block isn't allocated yet and
@@@ -2706,7 -2703,7 +2703,7 @@@
                                 goto out2;
                         }
                         /* we should allocate requested block */
-               } else if (goal == EXT4_EXT_CACHE_EXTENT) {
+               } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
                         /* block is already allocated */
                         newblock = iblock
                                    - le32_to_cpu(newex.ee_block)
@@@ -2854,7 -2851,7 +2851,7 @@@
         if (!newblock)
                 goto out2;
         ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
-                       goal, newblock, allocated);
+                 ar.goal, newblock, allocated);
   
         /* try to insert new extent into found leaf and return */
         ext4_ext_store_pblock(&newex, newblock);
@@@ -2950,7 -2947,7 +2947,7 @@@ void ext4_ext_truncate(struct inode *in
          * transaction synchronous.
          */
         if (IS_SYNC(inode))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
   
   out_stop:
         up_write(&EXT4_I(inode)->i_data_sem);
@@@ -3004,7 -3001,7 +3001,7 @@@ long ext4_fallocate(struct inode *inode
         handle_t *handle;
         ext4_lblk_t block;
         loff_t new_size;
-       unsigned long max_blocks;
+       unsigned int max_blocks;
         int ret = 0;
         int ret2 = 0;
         int retries = 0;
@@@ -3083,7 -3080,7 +3080,7 @@@ retry
   /*
    * Callback function called for each extent to gather FIEMAP information.
    */
- int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
+ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
                        struct ext4_ext_cache *newex, struct ext4_extent *ex,
                        void *data)
   {
@@@ -3152,7 -3149,8 +3149,8 @@@
   /* fiemap flags we can handle specified here */
   #define EXT4_FIEMAP_FLAGS     (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
   
- int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
+ static int ext4_xattr_fiemap(struct inode *inode,
+                               struct fiemap_extent_info *fieinfo)
   {
         __u64 physical = 0;
         __u64 length;
diff --combined fs/ext4/inode.c

index 98d3fe7057efcd8a0a60048e37c900b20bb5a7c0,4cac8da4e0c18ae58549f4dc507eb0902fc8d3b7..a6444cee0c7e086c4b76b6906011d11b32de7c7f
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -72,12 -72,17 +72,17 @@@ static int ext4_inode_is_fast_symlink(s
    * "bh" may be NULL: a metadata block may have been freed from memory
    * but there may still be a record of it in the journal, and that record
    * still needs to be revoked.
+  *
+  * If the handle isn't valid we're not journaling so there's nothing to do.
    */
   int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
                         struct buffer_head *bh, ext4_fsblk_t blocknr)
   {
         int err;
   
+       if (!ext4_handle_valid(handle))
+               return 0;
+ 
         might_sleep();
   
         BUFFER_TRACE(bh, "enter");
@@@ -170,7 -175,9 +175,9 @@@ static handle_t *start_transaction(stru
    */
   static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
   {
-       if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+       if (!ext4_handle_valid(handle))
+               return 0;
+       if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
                 return 0;
         if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
                 return 0;
@@@ -184,6 -191,7 +191,7 @@@
    */
   static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
   {
+       BUG_ON(EXT4_JOURNAL(inode) == NULL);
         jbd_debug(2, "restarting handle %p\n", handle);
         return ext4_journal_restart(handle, blocks_for_truncate(inode));
   }
@@@ -216,7 -224,7 +224,7 @@@ void ext4_delete_inode(struct inode *in
         }
   
         if (IS_SYNC(inode))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
         inode->i_size = 0;
         err = ext4_mark_inode_dirty(handle, inode);
         if (err) {
@@@ -233,7 -241,7 +241,7 @@@
          * enough credits left in the handle to remove the inode from
          * the orphan list and set the dtime field.
          */
-       if (handle->h_buffer_credits < 3) {
+       if (!ext4_handle_has_enough_credits(handle, 3)) {
                 err = ext4_journal_extend(handle, 3);
                 if (err > 0)
                         err = ext4_journal_restart(handle, 3);
@@@ -506,10 -514,10 +514,10 @@@ static ext4_fsblk_t ext4_find_goal(stru
    *    return the total number of blocks to be allocate, including the
    *    direct and indirect blocks.
    */
- static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
+ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
                 int blocks_to_boundary)
   {
-       unsigned long count = 0;
+       unsigned int count = 0;
   
         /*
          * Simple case, [t,d]Indirect block(s) has not allocated yet
@@@ -547,6 -555,7 +555,7 @@@ static int ext4_alloc_blocks(handle_t *
                                 int indirect_blks, int blks,
                                 ext4_fsblk_t new_blocks[4], int *err)
   {
+       struct ext4_allocation_request ar;
         int target, i;
         unsigned long count = 0, blk_allocated = 0;
         int index = 0;
@@@ -595,10 -604,17 +604,17 @@@
         if (!target)
                 goto allocated;
         /* Now allocate data blocks */
-       count = target;
-       /* allocating blocks for data blocks */
-       current_block = ext4_new_blocks(handle, inode, iblock,
-                                               goal, &count, err);
+       memset(&ar, 0, sizeof(ar));
+       ar.inode = inode;
+       ar.goal = goal;
+       ar.len = target;
+       ar.logical = iblock;
+       if (S_ISREG(inode->i_mode))
+               /* enable in-core preallocation only for regular files */
+               ar.flags = EXT4_MB_HINT_DATA;
+ 
+       current_block = ext4_mb_new_blocks(handle, &ar, err);
+ 
         if (*err && (target == blks)) {
                 /*
                  * if the allocation failed and we didn't allocate
@@@ -614,7 -630,7 +630,7 @@@
                  */
                         new_blocks[index] = current_block;
                 }
-               blk_allocated += count;
+               blk_allocated += ar.len;
         }
   allocated:
         /* total number of blocks allocated for direct blocks */
@@@ -709,8 -725,8 +725,8 @@@ static int ext4_alloc_branch(handle_t *
                 set_buffer_uptodate(bh);
                 unlock_buffer(bh);
   
-               BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-               err = ext4_journal_dirty_metadata(handle, bh);
+               BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, inode, bh);
                 if (err)
                         goto failed;
         }
@@@ -792,8 -808,8 +808,8 @@@ static int ext4_splice_branch(handle_t 
                  * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
                  */
                 jbd_debug(5, "splicing indirect only\n");
-               BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata");
-               err = ext4_journal_dirty_metadata(handle, where->bh);
+               BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, inode, where->bh);
                 if (err)
                         goto err_out;
         } else {
@@@ -840,10 -856,10 +856,10 @@@ err_out
    * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
    * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
    */
- int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
-               ext4_lblk_t iblock, unsigned long maxblocks,
-               struct buffer_head *bh_result,
-               int create, int extend_disksize)
+ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
+                                 ext4_lblk_t iblock, unsigned int maxblocks,
+                                 struct buffer_head *bh_result,
+                                 int create, int extend_disksize)
   {
         int err = -EIO;
         ext4_lblk_t offsets[4];
@@@ -1045,7 -1061,7 +1061,7 @@@ static void ext4_da_update_reserve_spac
    * It returns the error in case of allocation failure.
    */
   int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
-                       unsigned long max_blocks, struct buffer_head *bh,
+                       unsigned int max_blocks, struct buffer_head *bh,
                         int create, int extend_disksize, int flag)
   {
         int retval;
@@@ -1221,8 -1237,8 +1237,8 @@@ struct buffer_head *ext4_getblk(handle_
                                 set_buffer_uptodate(bh);
                         }
                         unlock_buffer(bh);
-                       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-                       err = ext4_journal_dirty_metadata(handle, bh);
+                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+                       err = ext4_handle_dirty_metadata(handle, inode, bh);
                         if (!fatal)
                                 fatal = err;
                 } else {
@@@ -1335,6 -1351,10 +1351,10 @@@ static int ext4_write_begin(struct fil
         pgoff_t index;
         unsigned from, to;
   
+       trace_mark(ext4_write_begin,
+                  "dev %s ino %lu pos %llu len %u flags %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, flags);
         index = pos >> PAGE_CACHE_SHIFT;
         from = pos & (PAGE_CACHE_SIZE - 1);
         to = from + len;
@@@ -1387,7 -1407,7 +1407,7 @@@ static int write_end_fn(handle_t *handl
         if (!buffer_mapped(bh) || buffer_freed(bh))
                 return 0;
         set_buffer_uptodate(bh);
-       return ext4_journal_dirty_metadata(handle, bh);
+       return ext4_handle_dirty_metadata(handle, NULL, bh);
   }
   
   /*
@@@ -1406,6 -1426,10 +1426,10 @@@ static int ext4_ordered_write_end(struc
         struct inode *inode = mapping->host;
         int ret = 0, ret2;
   
+       trace_mark(ext4_ordered_write_end,
+                  "dev %s ino %lu pos %llu len %u copied %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, copied);
         ret = ext4_jbd2_file_inode(handle, inode);
   
         if (ret == 0) {
@@@ -1444,6 -1468,10 +1468,10 @@@ static int ext4_writeback_write_end(str
         int ret = 0, ret2;
         loff_t new_i_size;
   
+       trace_mark(ext4_writeback_write_end,
+                  "dev %s ino %lu pos %llu len %u copied %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, copied);
         new_i_size = pos + copied;
         if (new_i_size > EXT4_I(inode)->i_disksize) {
                 ext4_update_i_disksize(inode, new_i_size);
@@@ -1479,6 -1507,10 +1507,10 @@@ static int ext4_journalled_write_end(st
         unsigned from, to;
         loff_t new_i_size;
   
+       trace_mark(ext4_journalled_write_end,
+                  "dev %s ino %lu pos %llu len %u copied %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, copied);
         from = pos & (PAGE_CACHE_SIZE - 1);
         to = from + len;
   
@@@ -1625,7 -1657,7 +1657,7 @@@ struct mpage_da_data 
         get_block_t *get_block;
         struct writeback_control *wbc;
         int io_done;
-       long pages_written;
+       int pages_written;
         int retval;
   };
   
@@@ -1645,35 -1677,39 +1677,39 @@@
    */
   static int mpage_da_submit_io(struct mpage_da_data *mpd)
   {
-       struct address_space *mapping = mpd->inode->i_mapping;
-       int ret = 0, err, nr_pages, i;
-       unsigned long index, end;
-       struct pagevec pvec;
         long pages_skipped;
+       struct pagevec pvec;
+       unsigned long index, end;
+       int ret = 0, err, nr_pages, i;
+       struct inode *inode = mpd->inode;
+       struct address_space *mapping = inode->i_mapping;
   
         BUG_ON(mpd->next_page <= mpd->first_page);
-       pagevec_init(&pvec, 0);
+       /*
+        * We need to start from the first_page to the next_page - 1
+        * to make sure we also write the mapped dirty buffer_heads.
+        * If we look at mpd->lbh.b_blocknr we would only be looking
+        * at the currently mapped buffer_heads.
+        */
         index = mpd->first_page;
         end = mpd->next_page - 1;
   
+       pagevec_init(&pvec, 0);
         while (index <= end) {
-               /*
-                * We can use PAGECACHE_TAG_DIRTY lookup here because
-                * even though we have cleared the dirty flag on the page
-                * We still keep the page in the radix tree with tag
-                * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
-                * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
-                * which is called via the below writepage callback.
-                */
-               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-                                       PAGECACHE_TAG_DIRTY,
-                                       min(end - index,
-                                       (pgoff_t)PAGEVEC_SIZE-1) + 1);
+               nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
                 if (nr_pages == 0)
                         break;
                 for (i = 0; i < nr_pages; i++) {
                         struct page *page = pvec.pages[i];
   
+                       index = page->index;
+                       if (index > end)
+                               break;
+                       index++;
+ 
+                       BUG_ON(!PageLocked(page));
+                       BUG_ON(PageWriteback(page));
+ 
                         pages_skipped = mpd->wbc->pages_skipped;
                         err = mapping->a_ops->writepage(page, mpd->wbc);
                         if (!err && (pages_skipped == mpd->wbc->pages_skipped))
@@@ -1831,13 -1867,13 +1867,13 @@@ static void ext4_print_free_blocks(stru
                         ext4_count_free_blocks(inode->i_sb));
         printk(KERN_EMERG "Free/Dirty block details\n");
         printk(KERN_EMERG "free_blocks=%lld\n",
-                       percpu_counter_sum(&sbi->s_freeblocks_counter));
+                       (long long)percpu_counter_sum(&sbi->s_freeblocks_counter));
         printk(KERN_EMERG "dirty_blocks=%lld\n",
-                       percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+                       (long long)percpu_counter_sum(&sbi->s_dirtyblocks_counter));
         printk(KERN_EMERG "Block reservation details\n");
-       printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
+       printk(KERN_EMERG "i_reserved_data_blocks=%u\n",
                         EXT4_I(inode)->i_reserved_data_blocks);
-       printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
+       printk(KERN_EMERG "i_reserved_meta_blocks=%u\n",
                         EXT4_I(inode)->i_reserved_meta_blocks);
         return;
   }
@@@ -2087,11 -2123,29 +2123,29 @@@ static int __mpage_da_writepage(struct 
                 bh = head;
                 do {
                         BUG_ON(buffer_locked(bh));
+                       /*
+                        * We need to try to allocate
+                        * unmapped blocks in the same page.
+                        * Otherwise we won't make progress
+                        * with the page in ext4_da_writepage
+                        */
                         if (buffer_dirty(bh) &&
                                 (!buffer_mapped(bh) || buffer_delay(bh))) {
                                 mpage_add_bh_to_extent(mpd, logical, bh);
                                 if (mpd->io_done)
                                         return MPAGE_DA_EXTENT_TAIL;
+                       } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
+                               /*
+                                * mapped dirty buffer. We need to update
+                                * the b_state because we look at
+                                * b_state in mpage_da_map_blocks. We don't
+                                * update b_size because if we find an
+                                * unmapped buffer_head later we need to
+                                * use the b_state flag of that buffer_head.
+                                */
+                               if (mpd->lbh.b_size == 0)
+                                       mpd->lbh.b_state =
+                                               bh->b_state & BH_FLAGS;
                         }
                         logical++;
                 } while ((bh = bh->b_this_page) != head);
@@@ -2269,10 -2323,13 +2323,13 @@@ static int ext4_da_writepage(struct pag
   {
         int ret = 0;
         loff_t size;
-       unsigned long len;
+       unsigned int len;
         struct buffer_head *page_bufs;
         struct inode *inode = page->mapping->host;
   
+       trace_mark(ext4_da_writepage,
+                  "dev %s ino %lu page_index %lu",
+                  inode->i_sb->s_id, inode->i_ino, page->index);
         size = i_size_read(inode);
         if (page->index == size >> PAGE_CACHE_SHIFT)
                 len = size & ~PAGE_CACHE_MASK;
@@@ -2378,10 -2435,25 +2435,25 @@@ static int ext4_da_writepages(struct ad
         struct mpage_da_data mpd;
         struct inode *inode = mapping->host;
         int no_nrwrite_index_update;
-       long pages_written = 0, pages_skipped;
+       int pages_written = 0;
+       long pages_skipped;
         int needed_blocks, ret = 0, nr_to_writebump = 0;
         struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
   
+       trace_mark(ext4_da_writepages,
+                  "dev %s ino %lu nr_t_write %ld "
+                  "pages_skipped %ld range_start %llu "
+                  "range_end %llu nonblocking %d "
+                  "for_kupdate %d for_reclaim %d "
+                  "for_writepages %d range_cyclic %d",
+                  inode->i_sb->s_id, inode->i_ino,
+                  wbc->nr_to_write, wbc->pages_skipped,
+                  (unsigned long long) wbc->range_start,
+                  (unsigned long long) wbc->range_end,
+                  wbc->nonblocking, wbc->for_kupdate,
+                  wbc->for_reclaim, wbc->for_writepages,
+                  wbc->range_cyclic);
+ 
         /*
          * No pages to write? This is mainly a kludge to avoid starting
          * a transaction for special inodes like journal inode on last iput()
@@@ -2389,6 -2461,20 +2461,20 @@@
          */
         if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                 return 0;
+ 
+       /*
+        * If the filesystem has aborted, it is read-only, so return
+        * right away instead of dumping stack traces later on that
+        * will obscure the real source of the problem.  We test
+        * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because
+        * the latter could be true if the filesystem is mounted
+        * read-only, and in that case, ext4_da_writepages should
+        * *never* be called, so if that ever happens, we would want
+        * the stack trace.
+        */
+       if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT))
+               return -EROFS;
+ 
         /*
          * Make sure nr_to_write is >= sbi->s_mb_stream_request
          * This make sure small files blocks are allocated in
@@@ -2433,7 -2519,7 +2519,7 @@@
                 handle = ext4_journal_start(inode, needed_blocks);
                 if (IS_ERR(handle)) {
                         ret = PTR_ERR(handle);
-                       printk(KERN_EMERG "%s: jbd2_start: "
+                       printk(KERN_CRIT "%s: jbd2_start: "
                                "%ld pages, ino %lu; err %d\n", __func__,
                                 wbc->nr_to_write, inode->i_ino, ret);
                         dump_stack();
@@@ -2486,6 -2572,14 +2572,14 @@@ out_writepages
         if (!no_nrwrite_index_update)
                 wbc->no_nrwrite_index_update = 0;
         wbc->nr_to_write -= nr_to_writebump;
+       trace_mark(ext4_da_writepage_result,
+                  "dev %s ino %lu ret %d pages_written %d "
+                  "pages_skipped %ld congestion %d "
+                  "more_io %d no_nrwrite_index_update %d",
+                  inode->i_sb->s_id, inode->i_ino, ret,
+                  pages_written, wbc->pages_skipped,
+                  wbc->encountered_congestion, wbc->more_io,
+                  wbc->no_nrwrite_index_update);
         return ret;
   }
   
@@@ -2498,7 -2592,7 +2592,7 @@@ static int ext4_nonda_switch(struct sup
         /*
          * switch to non delalloc mode if we are running low
          * on free block. The free block accounting via percpu
- -       * counters can get slightly wrong with FBC_BATCH getting
+ +       * counters can get slightly wrong with percpu_counter_batch getting
          * accumulated on each CPU without updating global counters
          * Delalloc need an accurate free block accounting. So switch
          * to non delalloc when we are near to error range.
@@@ -2537,6 -2631,11 +2631,11 @@@ static int ext4_da_write_begin(struct f
                                         len, flags, pagep, fsdata);
         }
         *fsdata = (void *)0;
+ 
+       trace_mark(ext4_da_write_begin,
+                  "dev %s ino %lu pos %llu len %u flags %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, flags);
   retry:
         /*
          * With delayed allocation, we don't log the i_disksize update
@@@ -2626,6 -2725,10 +2725,10 @@@ static int ext4_da_write_end(struct fil
                 }
         }
   
+       trace_mark(ext4_da_write_end,
+                  "dev %s ino %lu pos %llu len %u copied %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, copied);
         start = pos & (PAGE_CACHE_SIZE - 1);
         end = start + copied - 1;
   
@@@ -2718,7 -2821,10 +2821,10 @@@ static sector_t ext4_bmap(struct addres
                 filemap_write_and_wait(mapping);
         }
   
-       if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
+       BUG_ON(!EXT4_JOURNAL(inode) &&
+              EXT4_I(inode)->i_state & EXT4_STATE_JDATA);
+ 
+       if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
                 /*
                  * This is a REALLY heavyweight approach, but the use of
                  * bmap on dirty files is expected to be extremely rare:
@@@ -2836,6 -2942,9 +2942,9 @@@ static int ext4_normal_writepage(struc
         loff_t size = i_size_read(inode);
         loff_t len;
   
+       trace_mark(ext4_normal_writepage,
+                  "dev %s ino %lu page_index %lu",
+                  inode->i_sb->s_id, inode->i_ino, page->index);
         J_ASSERT(PageLocked(page));
         if (page->index == size >> PAGE_CACHE_SHIFT)
                 len = size & ~PAGE_CACHE_MASK;
@@@ -2921,6 -3030,9 +3030,9 @@@ static int ext4_journalled_writepage(st
         loff_t size = i_size_read(inode);
         loff_t len;
   
+       trace_mark(ext4_journalled_writepage,
+                  "dev %s ino %lu page_index %lu",
+                  inode->i_sb->s_id, inode->i_ino, page->index);
         J_ASSERT(PageLocked(page));
         if (page->index == size >> PAGE_CACHE_SHIFT)
                 len = size & ~PAGE_CACHE_MASK;
@@@ -2989,7 -3101,10 +3101,10 @@@ static void ext4_invalidatepage(struct 
         if (offset == 0)
                 ClearPageChecked(page);
   
-       jbd2_journal_invalidatepage(journal, page, offset);
+       if (journal)
+               jbd2_journal_invalidatepage(journal, page, offset);
+       else
+               block_invalidatepage(page, offset);
   }
   
   static int ext4_releasepage(struct page *page, gfp_t wait)
@@@ -2999,7 -3114,10 +3114,10 @@@
         WARN_ON(PageChecked(page));
         if (!page_has_buffers(page))
                 return 0;
-       return jbd2_journal_try_to_free_buffers(journal, page, wait);
+       if (journal)
+               return jbd2_journal_try_to_free_buffers(journal, page, wait);
+       else
+               return try_to_free_buffers(page);
   }
   
   /*
@@@ -3271,7 -3389,7 +3389,7 @@@ int ext4_block_truncate_page(handle_t *
   
         err = 0;
         if (ext4_should_journal_data(inode)) {
-               err = ext4_journal_dirty_metadata(handle, bh);
+               err = ext4_handle_dirty_metadata(handle, inode, bh);
         } else {
                 if (ext4_should_order_data(inode))
                         err = ext4_jbd2_file_inode(handle, inode);
@@@ -3395,8 -3513,8 +3513,8 @@@ static void ext4_clear_blocks(handle_t 
         __le32 *p;
         if (try_to_extend_transaction(handle, inode)) {
                 if (bh) {
-                       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-                       ext4_journal_dirty_metadata(handle, bh);
+                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+                       ext4_handle_dirty_metadata(handle, inode, bh);
                 }
                 ext4_mark_inode_dirty(handle, inode);
                 ext4_journal_test_restart(handle, inode);
@@@ -3496,7 -3614,7 +3614,7 @@@ static void ext4_free_data(handle_t *ha
                                   count, block_to_free_p, p);
   
         if (this_bh) {
-               BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata");
+               BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
   
                 /*
                  * The buffer head should have an attached journal head at this
@@@ -3505,7 -3623,7 +3623,7 @@@
                  * the block was cleared. Check for this instead of OOPSing.
                  */
                 if (bh2jh(this_bh))
-                       ext4_journal_dirty_metadata(handle, this_bh);
+                       ext4_handle_dirty_metadata(handle, inode, this_bh);
                 else
                         ext4_error(inode->i_sb, __func__,
                                    "circular indirect block detected, "
@@@ -3535,7 -3653,7 +3653,7 @@@ static void ext4_free_branches(handle_
         ext4_fsblk_t nr;
         __le32 *p;
   
-       if (is_handle_aborted(handle))
+       if (ext4_handle_is_aborted(handle))
                 return;
   
         if (depth--) {
@@@ -3605,7 -3723,7 +3723,7 @@@
                          * will merely complain about releasing a free block,
                          * rather than leaking blocks.
                          */
-                       if (is_handle_aborted(handle))
+                       if (ext4_handle_is_aborted(handle))
                                 return;
                         if (try_to_extend_transaction(handle, inode)) {
                                 ext4_mark_inode_dirty(handle, inode);
@@@ -3624,9 -3742,10 +3742,10 @@@
                                                                    parent_bh)){
                                         *p = 0;
                                         BUFFER_TRACE(parent_bh,
-                                       "call ext4_journal_dirty_metadata");
-                                       ext4_journal_dirty_metadata(handle,
-                                                                   parent_bh);
+                                       "call ext4_handle_dirty_metadata");
+                                       ext4_handle_dirty_metadata(handle,
+                                                                  inode,
+                                                                  parent_bh);
                                 }
                         }
                 }
@@@ -3814,7 -3933,7 +3933,7 @@@ do_indirects
          * synchronous
          */
         if (IS_SYNC(inode))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
   out_stop:
         /*
          * If this was a simple ftruncate(), and the file will remain alive
@@@ -3844,7 -3963,7 +3963,7 @@@ static int __ext4_get_inode_loc(struct 
         ext4_fsblk_t            block;
         int                     inodes_per_block, inode_offset;
   
-       iloc->bh = 0;
+       iloc->bh = NULL;
         if (!ext4_valid_inum(sb, inode->i_ino))
                 return -EIO;
   
@@@ -3951,7 -4070,7 +4070,7 @@@ make_io
                         num = EXT4_INODES_PER_GROUP(sb);
                         if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
-                               num -= le16_to_cpu(gdp->bg_itable_unused);
+                               num -= ext4_itable_unused_count(sb, gdp);
                         table += num / inodes_per_block;
                         if (end > table)
                                 end = table;
@@@ -4313,8 -4432,8 +4432,8 @@@ static int ext4_do_update_inode(handle_
                         EXT4_SET_RO_COMPAT_FEATURE(sb,
                                         EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
                         sb->s_dirt = 1;
-                       handle->h_sync = 1;
-                       err = ext4_journal_dirty_metadata(handle,
+                       ext4_handle_sync(handle);
+                       err = ext4_handle_dirty_metadata(handle, inode,
                                         EXT4_SB(sb)->s_sbh);
                 }
         }
@@@ -4341,9 -4460,8 +4460,8 @@@
                 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
         }
   
- 
-       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-       rc = ext4_journal_dirty_metadata(handle, bh);
+       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+       rc = ext4_handle_dirty_metadata(handle, inode, bh);
         if (!err)
                 err = rc;
         ei->i_state &= ~EXT4_STATE_NEW;
@@@ -4406,6 -4524,25 +4524,25 @@@ int ext4_write_inode(struct inode *inod
         return ext4_force_commit(inode->i_sb);
   }
   
+ int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
+ {
+       int err = 0;
+ 
+       mark_buffer_dirty(bh);
+       if (inode && inode_needs_sync(inode)) {
+               sync_dirty_buffer(bh);
+               if (buffer_req(bh) && !buffer_uptodate(bh)) {
+                       ext4_error(inode->i_sb, __func__,
+                                  "IO error syncing inode, "
+                                  "inode=%lu, block=%llu",
+                                  inode->i_ino,
+                                  (unsigned long long)bh->b_blocknr);
+                       err = -EIO;
+               }
+       }
+       return err;
+ }
+ 
   /*
    * ext4_setattr()
    *
@@@ -4710,16 -4847,15 +4847,15 @@@ in
   ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
                          struct ext4_iloc *iloc)
   {
-       int err = 0;
-       if (handle) {
-               err = ext4_get_inode_loc(inode, iloc);
-               if (!err) {
-                       BUFFER_TRACE(iloc->bh, "get_write_access");
-                       err = ext4_journal_get_write_access(handle, iloc->bh);
-                       if (err) {
-                               brelse(iloc->bh);
-                               iloc->bh = NULL;
-                       }
+       int err;
+ 
+       err = ext4_get_inode_loc(inode, iloc);
+       if (!err) {
+               BUFFER_TRACE(iloc->bh, "get_write_access");
+               err = ext4_journal_get_write_access(handle, iloc->bh);
+               if (err) {
+                       brelse(iloc->bh);
+                       iloc->bh = NULL;
                 }
         }
         ext4_std_error(inode->i_sb, err);
@@@ -4791,7 -4927,8 +4927,8 @@@ int ext4_mark_inode_dirty(handle_t *han
   
         might_sleep();
         err = ext4_reserve_inode_write(handle, inode, &iloc);
-       if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
+       if (ext4_handle_valid(handle) &&
+           EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
             !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
                 /*
                  * We need extra buffer credits since we may write into EA block
@@@ -4843,6 -4980,11 +4980,11 @@@ void ext4_dirty_inode(struct inode *ino
         handle_t *current_handle = ext4_journal_current_handle();
         handle_t *handle;
   
+       if (!ext4_handle_valid(current_handle)) {
+               ext4_mark_inode_dirty(current_handle, inode);
+               return;
+       }
+ 
         handle = ext4_journal_start(inode, 2);
         if (IS_ERR(handle))
                 goto out;
@@@ -4880,8 -5022,9 +5022,9 @@@ static int ext4_pin_inode(handle_t *han
                         BUFFER_TRACE(iloc.bh, "get_write_access");
                         err = jbd2_journal_get_write_access(handle, iloc.bh);
                         if (!err)
-                               err = ext4_journal_dirty_metadata(handle,
-                                                                 iloc.bh);
+                               err = ext4_handle_dirty_metadata(handle,
+                                                                inode,
+                                                                iloc.bh);
                         brelse(iloc.bh);
                 }
         }
@@@ -4907,6 -5050,8 +5050,8 @@@ int ext4_change_inode_journal_flag(stru
          */
   
         journal = EXT4_JOURNAL(inode);
+       if (!journal)
+               return 0;
         if (is_journal_aborted(journal))
                 return -EROFS;
   
@@@ -4936,7 -5081,7 +5081,7 @@@
                 return PTR_ERR(handle);
   
         err = ext4_mark_inode_dirty(handle, inode);
-       handle->h_sync = 1;
+       ext4_handle_sync(handle);
         ext4_journal_stop(handle);
         ext4_std_error(inode->i_sb, err);
   
diff --combined fs/ext4/namei.c

index 4b8d431d7dff8b6763f6433db2fa4ce1d2f90235,183a09a8b14e63346d5ff5e00719addfe390d48c..fec0b4c2f5f1904c89436c1f825e82338489232f
--- 1/fs/ext4/namei.c
--- 2/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@@ -74,6 -74,10 +74,6 @@@ static struct buffer_head *ext4_append(
   #define assert(test) J_ASSERT(test)
   #endif
   
- -#ifndef swap
- -#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
- -#endif
- -
   #ifdef DX_DEBUG
   #define dxtrace(command) command
   #else
@@@ -368,6 -372,8 +368,8 @@@ dx_probe(const struct qstr *d_name, str
                 goto fail;
         }
         hinfo->hash_version = root->info.hash_version;
+       if (hinfo->hash_version <= DX_HASH_TEA)
+               hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
         hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
         if (d_name)
                 ext4fs_dirhash(d_name->name, d_name->len, hinfo);
@@@ -637,6 -643,9 +639,9 @@@ int ext4_htree_fill_tree(struct file *d
         dir = dir_file->f_path.dentry->d_inode;
         if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
                 hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+               if (hinfo.hash_version <= DX_HASH_TEA)
+                       hinfo.hash_version +=
+                               EXT4_SB(dir->i_sb)->s_hash_unsigned;
                 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
                 count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
                                                start_hash, start_minor_hash);
@@@ -802,7 -811,7 +807,7 @@@ static inline int ext4_match (int len, 
   static inline int search_dirblock(struct buffer_head *bh,
                                   struct inode *dir,
                                   const struct qstr *d_name,
-                                 unsigned long offset,
+                                 unsigned int offset,
                                   struct ext4_dir_entry_2 ** res_dir)
   {
         struct ext4_dir_entry_2 * de;
@@@ -1039,11 -1048,11 +1044,11 @@@ static struct dentry *ext4_lookup(struc
         bh = ext4_find_entry(dir, &dentry->d_name, &de);
         inode = NULL;
         if (bh) {
-               unsigned long ino = le32_to_cpu(de->inode);
+               __u32 ino = le32_to_cpu(de->inode);
                 brelse(bh);
                 if (!ext4_valid_inum(dir->i_sb, ino)) {
                         ext4_error(dir->i_sb, "ext4_lookup",
-                                  "bad inode number: %lu", ino);
+                                  "bad inode number: %u", ino);
                         return ERR_PTR(-EIO);
                 }
                 inode = ext4_iget(dir->i_sb, ino);
@@@ -1056,7 -1065,7 +1061,7 @@@
   
   struct dentry *ext4_get_parent(struct dentry *child)
   {
-       unsigned long ino;
+       __u32 ino;
         struct inode *inode;
         static const struct qstr dotdot = {
                 .name = "..",
@@@ -1074,7 -1083,7 +1079,7 @@@
   
         if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
                 ext4_error(child->d_inode->i_sb, "ext4_get_parent",
-                          "bad inode number: %lu", ino);
+                          "bad inode number: %u", ino);
                 return ERR_PTR(-EIO);
         }
   
@@@ -1162,9 -1171,9 +1167,9 @@@ static struct ext4_dir_entry_2 *do_spli
         u32 hash2;
         struct dx_map_entry *map;
         char *data1 = (*bh)->b_data, *data2;
-       unsigned split, move, size, i;
+       unsigned split, move, size;
         struct ext4_dir_entry_2 *de = NULL, *de2;
-       int     err = 0;
+       int     err = 0, i;
   
         bh2 = ext4_append (handle, dir, &newblock, &err);
         if (!(bh2)) {
@@@ -1224,10 -1233,10 +1229,10 @@@
                 de = de2;
         }
         dx_insert_block(frame, hash2 + continued, newblock);
-       err = ext4_journal_dirty_metadata(handle, bh2);
+       err = ext4_handle_dirty_metadata(handle, dir, bh2);
         if (err)
                 goto journal_error;
-       err = ext4_journal_dirty_metadata(handle, frame->bh);
+       err = ext4_handle_dirty_metadata(handle, dir, frame->bh);
         if (err)
                 goto journal_error;
         brelse(bh2);
@@@ -1262,7 -1271,7 +1267,7 @@@ static int add_dirent_to_buf(handle_t *
         struct inode    *dir = dentry->d_parent->d_inode;
         const char      *name = dentry->d_name.name;
         int             namelen = dentry->d_name.len;
-       unsigned long   offset = 0;
+       unsigned int    offset = 0;
         unsigned short  reclen;
         int             nlen, rlen, err;
         char            *top;
@@@ -1331,8 -1340,8 +1336,8 @@@
         ext4_update_dx_flag(dir);
         dir->i_version++;
         ext4_mark_inode_dirty(handle, dir);
-       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-       err = ext4_journal_dirty_metadata(handle, bh);
+       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+       err = ext4_handle_dirty_metadata(handle, dir, bh);
         if (err)
                 ext4_std_error(dir->i_sb, err);
         brelse(bh);
@@@ -1404,6 -1413,8 +1409,8 @@@ static int make_indexed_dir(handle_t *h
   
         /* Initialize as for dx_probe */
         hinfo.hash_version = root->info.hash_version;
+       if (hinfo.hash_version <= DX_HASH_TEA)
+               hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
         hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
         ext4fs_dirhash(name, namelen, &hinfo);
         frame = frames;
@@@ -1433,7 -1444,6 +1440,6 @@@ static int ext4_add_entry(handle_t *han
                           struct inode *inode)
   {
         struct inode *dir = dentry->d_parent->d_inode;
-       unsigned long offset;
         struct buffer_head *bh;
         struct ext4_dir_entry_2 *de;
         struct super_block *sb;
@@@ -1455,7 -1465,7 +1461,7 @@@
                 ext4_mark_inode_dirty(handle, dir);
         }
         blocks = dir->i_size >> sb->s_blocksize_bits;
-       for (block = 0, offset = 0; block < blocks; block++) {
+       for (block = 0; block < blocks; block++) {
                 bh = ext4_bread(handle, dir, block, 0, &retval);
                 if(!bh)
                         return retval;
@@@ -1570,7 -1580,7 +1576,7 @@@ static int ext4_dx_add_entry(handle_t *
                         dxtrace(dx_show_index("node", frames[1].entries));
                         dxtrace(dx_show_index("node",
                                ((struct dx_node *) bh2->b_data)->entries));
-                       err = ext4_journal_dirty_metadata(handle, bh2);
+                       err = ext4_handle_dirty_metadata(handle, inode, bh2);
                         if (err)
                                 goto journal_error;
                         brelse (bh2);
@@@ -1596,7 -1606,7 +1602,7 @@@
                         if (err)
                                 goto journal_error;
                 }
-               ext4_journal_dirty_metadata(handle, frames[0].bh);
+               ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
         }
         de = do_split(handle, dir, &bh, frame, &hinfo, &err);
         if (!de)
@@@ -1642,8 -1652,8 +1648,8 @@@ static int ext4_delete_entry(handle_t *
                         else
                                 de->inode = 0;
                         dir->i_version++;
-                       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-                       ext4_journal_dirty_metadata(handle, bh);
+                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+                       ext4_handle_dirty_metadata(handle, dir, bh);
                         return 0;
                 }
                 i += ext4_rec_len_from_disk(de->rec_len);
@@@ -1721,7 -1731,7 +1727,7 @@@ retry
                 return PTR_ERR(handle);
   
         if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
   
         inode = ext4_new_inode (handle, dir, mode);
         err = PTR_ERR(inode);
@@@ -1755,7 -1765,7 +1761,7 @@@ retry
                 return PTR_ERR(handle);
   
         if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
   
         inode = ext4_new_inode(handle, dir, mode);
         err = PTR_ERR(inode);
@@@ -1791,7 -1801,7 +1797,7 @@@ retry
                 return PTR_ERR(handle);
   
         if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
   
         inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
         err = PTR_ERR(inode);
@@@ -1820,8 -1830,8 +1826,8 @@@
         strcpy(de->name, "..");
         ext4_set_de_type(dir->i_sb, de, S_IFDIR);
         inode->i_nlink = 2;
-       BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
-       ext4_journal_dirty_metadata(handle, dir_block);
+       BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
+       ext4_handle_dirty_metadata(handle, dir, dir_block);
         brelse(dir_block);
         ext4_mark_inode_dirty(handle, inode);
         err = ext4_add_entry(handle, dentry, inode);
@@@ -1850,7 -1860,7 +1856,7 @@@ out_stop
    */
   static int empty_dir(struct inode *inode)
   {
-       unsigned long offset;
+       unsigned int offset;
         struct buffer_head *bh;
         struct ext4_dir_entry_2 *de, *de1;
         struct super_block *sb;
@@@ -1895,7 -1905,7 +1901,7 @@@
                                 if (err)
                                         ext4_error(sb, __func__,
                                                    "error %d reading directory"
-                                                  " #%lu offset %lu",
+                                                  " #%lu offset %u",
                                                    err, inode->i_ino, offset);
                                 offset += sb->s_blocksize;
                                 continue;
@@@ -1933,6 -1943,9 +1939,9 @@@ int ext4_orphan_add(handle_t *handle, s
         struct ext4_iloc iloc;
         int err = 0, rc;
   
+       if (!ext4_handle_valid(handle))
+               return 0;
+ 
         lock_super(sb);
         if (!list_empty(&EXT4_I(inode)->i_orphan))
                 goto out_unlock;
@@@ -1961,7 -1974,7 +1970,7 @@@
         /* Insert this inode at the head of the on-disk orphan list... */
         NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
         EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
-       err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+       err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh);
         rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
         if (!err)
                 err = rc;
@@@ -1995,10 -2008,13 +2004,13 @@@ int ext4_orphan_del(handle_t *handle, s
         struct list_head *prev;
         struct ext4_inode_info *ei = EXT4_I(inode);
         struct ext4_sb_info *sbi;
-       unsigned long ino_next;
+       __u32 ino_next;
         struct ext4_iloc iloc;
         int err = 0;
   
+       if (!ext4_handle_valid(handle))
+               return 0;
+ 
         lock_super(inode->i_sb);
         if (list_empty(&ei->i_orphan)) {
                 unlock_super(inode->i_sb);
@@@ -2017,7 -2033,7 +2029,7 @@@
          * transaction handle with which to update the orphan list on
          * disk, but we still need to remove the inode from the linked
          * list in memory. */
-       if (!handle)
+       if (sbi->s_journal && !handle)
                 goto out;
   
         err = ext4_reserve_inode_write(handle, inode, &iloc);
@@@ -2025,19 -2041,19 +2037,19 @@@
                 goto out_err;
   
         if (prev == &sbi->s_orphan) {
-               jbd_debug(4, "superblock will point to %lu\n", ino_next);
+               jbd_debug(4, "superblock will point to %u\n", ino_next);
                 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
                 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
                 if (err)
                         goto out_brelse;
                 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
-               err = ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+               err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh);
         } else {
                 struct ext4_iloc iloc2;
                 struct inode *i_prev =
                         &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
   
-               jbd_debug(4, "orphan inode %lu will point to %lu\n",
+               jbd_debug(4, "orphan inode %lu will point to %u\n",
                           i_prev->i_ino, ino_next);
                 err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
                 if (err)
@@@ -2082,7 -2098,7 +2094,7 @@@ static int ext4_rmdir(struct inode *dir
                 goto end_rmdir;
   
         if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
   
         inode = dentry->d_inode;
   
@@@ -2136,7 -2152,7 +2148,7 @@@ static int ext4_unlink(struct inode *di
                 return PTR_ERR(handle);
   
         if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
   
         retval = -ENOENT;
         bh = ext4_find_entry(dir, &dentry->d_name, &de);
@@@ -2193,7 -2209,7 +2205,7 @@@ retry
                 return PTR_ERR(handle);
   
         if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
   
         inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
         err = PTR_ERR(inode);
@@@ -2256,7 -2272,7 +2268,7 @@@ retry
                 return PTR_ERR(handle);
   
         if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
   
         inode->i_ctime = ext4_current_time(inode);
         ext4_inc_count(handle, inode);
@@@ -2305,7 -2321,7 +2317,7 @@@ static int ext4_rename(struct inode *ol
                 return PTR_ERR(handle);
   
         if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
   
         old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
         /*
@@@ -2359,8 -2375,8 +2371,8 @@@
                 new_dir->i_ctime = new_dir->i_mtime =
                                         ext4_current_time(new_dir);
                 ext4_mark_inode_dirty(handle, new_dir);
-               BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
-               ext4_journal_dirty_metadata(handle, new_bh);
+               BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
+               ext4_handle_dirty_metadata(handle, new_dir, new_bh);
                 brelse(new_bh);
                 new_bh = NULL;
         }
@@@ -2410,8 -2426,8 +2422,8 @@@
                 BUFFER_TRACE(dir_bh, "get_write_access");
                 ext4_journal_get_write_access(handle, dir_bh);
                 PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
-               BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
-               ext4_journal_dirty_metadata(handle, dir_bh);
+               BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
+               ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
                 ext4_dec_count(handle, old_dir);
                 if (new_inode) {
                         /* checked empty_dir above, can't have another parent,
diff --combined fs/ext4/super.c

index 9494bb2493901b353ad2f3be6f93535600896624,acb69c00fd424462bfb93be640174aa9d67d2bf7..8f7e0be8ab1b6928be0c9d3d4ee64f5806e90d4a
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -51,8 -51,6 +51,6 @@@ struct proc_dir_entry *ext4_proc_root
   
   static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                              unsigned long journal_devnum);
- static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
-                              unsigned int);
   static void ext4_commit_super(struct super_block *sb,
                               struct ext4_super_block *es, int sync);
   static void ext4_mark_recovery_complete(struct super_block *sb,
@@@ -93,6 -91,38 +91,38 @@@ ext4_fsblk_t ext4_inode_table(struct su
                 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
   }
   
+ __u32 ext4_free_blks_count(struct super_block *sb,
+                             struct ext4_group_desc *bg)
+ {
+       return le16_to_cpu(bg->bg_free_blocks_count_lo) |
+               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+               (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
+ }
+ 
+ __u32 ext4_free_inodes_count(struct super_block *sb,
+                             struct ext4_group_desc *bg)
+ {
+       return le16_to_cpu(bg->bg_free_inodes_count_lo) |
+               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+               (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
+ }
+ 
+ __u32 ext4_used_dirs_count(struct super_block *sb,
+                             struct ext4_group_desc *bg)
+ {
+       return le16_to_cpu(bg->bg_used_dirs_count_lo) |
+               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+               (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
+ }
+ 
+ __u32 ext4_itable_unused_count(struct super_block *sb,
+                             struct ext4_group_desc *bg)
+ {
+       return le16_to_cpu(bg->bg_itable_unused_lo) |
+               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+               (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
+ }
+ 
   void ext4_block_bitmap_set(struct super_block *sb,
                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
   {
@@@ -117,6 -147,38 +147,38 @@@ void ext4_inode_table_set(struct super_
                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
   }
   
+ void ext4_free_blks_set(struct super_block *sb,
+                         struct ext4_group_desc *bg, __u32 count)
+ {
+       bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
+       if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+               bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
+ }
+ 
+ void ext4_free_inodes_set(struct super_block *sb,
+                         struct ext4_group_desc *bg, __u32 count)
+ {
+       bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
+       if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+               bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
+ }
+ 
+ void ext4_used_dirs_set(struct super_block *sb,
+                         struct ext4_group_desc *bg, __u32 count)
+ {
+       bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
+       if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+               bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
+ }
+ 
+ void ext4_itable_unused_set(struct super_block *sb,
+                         struct ext4_group_desc *bg, __u32 count)
+ {
+       bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
+       if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+               bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
+ }
+ 
   /*
    * Wrappers for jbd2_journal_start/end.
    *
@@@ -136,13 -198,19 +198,19 @@@ handle_t *ext4_journal_start_sb(struct 
          * backs (eg. EIO in the commit thread), then we still need to
          * take the FS itself readonly cleanly. */
         journal = EXT4_SB(sb)->s_journal;
-       if (is_journal_aborted(journal)) {
-               ext4_abort(sb, __func__,
-                          "Detected aborted journal");
-               return ERR_PTR(-EROFS);
+       if (journal) {
+               if (is_journal_aborted(journal)) {
+                       ext4_abort(sb, __func__,
+                                  "Detected aborted journal");
+                       return ERR_PTR(-EROFS);
+               }
+               return jbd2_journal_start(journal, nblocks);
         }
- 
-       return jbd2_journal_start(journal, nblocks);
+       /*
+        * We're not journaling, return the appropriate indication.
+        */
+       current->journal_info = EXT4_NOJOURNAL_HANDLE;
+       return current->journal_info;
   }
   
   /*
@@@ -157,6 -225,14 +225,14 @@@ int __ext4_journal_stop(const char *whe
         int err;
         int rc;
   
+       if (!ext4_handle_valid(handle)) {
+               /*
+                * Do this here since we don't call jbd2_journal_stop() in
+                * no-journal mode.
+                */
+               current->journal_info = NULL;
+               return 0;
+       }
         sb = handle->h_transaction->t_journal->j_private;
         err = handle->h_err;
         rc = jbd2_journal_stop(handle);
@@@ -174,6 -250,8 +250,8 @@@ void ext4_journal_abort_handle(const ch
         char nbuf[16];
         const char *errstr = ext4_decode_error(NULL, err, nbuf);
   
+       BUG_ON(!ext4_handle_valid(handle));
+ 
         if (bh)
                 BUFFER_TRACE(bh, "abort");
   
@@@ -350,6 -428,44 +428,44 @@@ void ext4_warning(struct super_block *s
         va_end(args);
   }
   
+ void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
+                               const char *function, const char *fmt, ...)
+ __releases(bitlock)
+ __acquires(bitlock)
+ {
+       va_list args;
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ 
+       va_start(args, fmt);
+       printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+       vprintk(fmt, args);
+       printk("\n");
+       va_end(args);
+ 
+       if (test_opt(sb, ERRORS_CONT)) {
+               EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+               es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+               ext4_commit_super(sb, es, 0);
+               return;
+       }
+       ext4_unlock_group(sb, grp);
+       ext4_handle_error(sb);
+       /*
+        * We only get here in the ERRORS_RO case; relocking the group
+        * may be dangerous, but nothing bad will happen since the
+        * filesystem will have already been marked read/only and the
+        * journal has been aborted.  We return 1 as a hint to callers
+        * who might what to use the return value from
+        * ext4_grp_locked_error() to distinguish beween the
+        * ERRORS_CONT and ERRORS_RO case, and perhaps return more
+        * aggressively from the ext4 function in question, with a
+        * more appropriate error code.
+        */
+       ext4_lock_group(sb, grp);
+       return;
+ }
+ 
+ 
   void ext4_update_dynamic_rev(struct super_block *sb)
   {
         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@@ -389,7 -505,7 +505,7 @@@ static struct block_device *ext4_blkdev
         return bdev;
   
   fail:
-       printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n",
+       printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
                         __bdevname(dev, b), PTR_ERR(bdev));
         return NULL;
   }
@@@ -448,11 -564,13 +564,13 @@@ static void ext4_put_super(struct super
         ext4_mb_release(sb);
         ext4_ext_release(sb);
         ext4_xattr_put_super(sb);
-       err = jbd2_journal_destroy(sbi->s_journal);
-       sbi->s_journal = NULL;
-       if (err < 0)
-               ext4_abort(sb, __func__, "Couldn't clean up the journal");
- 
+       if (sbi->s_journal) {
+               err = jbd2_journal_destroy(sbi->s_journal);
+               sbi->s_journal = NULL;
+               if (err < 0)
+                       ext4_abort(sb, __func__,
+                                  "Couldn't clean up the journal");
+       }
         if (!(sb->s_flags & MS_RDONLY)) {
                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
                 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@@ -522,6 -640,11 +640,11 @@@ static struct inode *ext4_alloc_inode(s
         memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
         INIT_LIST_HEAD(&ei->i_prealloc_list);
         spin_lock_init(&ei->i_prealloc_lock);
+       /*
+        * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
+        * therefore it can be null here.  Don't check it, just initialize
+        * jinode.
+        */
         jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
         ei->i_reserved_data_blocks = 0;
         ei->i_reserved_meta_blocks = 0;
@@@ -588,7 -711,8 +711,8 @@@ static void ext4_clear_inode(struct ino
         }
   #endif
         ext4_discard_preallocations(inode);
-       jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
+       if (EXT4_JOURNAL(inode))
+               jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
                                        &EXT4_I(inode)->jinode);
   }
   
@@@ -681,10 -805,19 +805,19 @@@ static int ext4_show_options(struct seq
   #endif
         if (!test_opt(sb, RESERVATION))
                 seq_puts(seq, ",noreservation");
-       if (sbi->s_commit_interval) {
+       if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
                 seq_printf(seq, ",commit=%u",
                            (unsigned) (sbi->s_commit_interval / HZ));
         }
+       if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
+               seq_printf(seq, ",min_batch_time=%u",
+                          (unsigned) sbi->s_min_batch_time);
+       }
+       if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
+               seq_printf(seq, ",max_batch_time=%u",
+                          (unsigned) sbi->s_min_batch_time);
+       }
+ 
         /*
          * We're changing the default of barrier mount option, so
          * let's always display its mount state so it's clear what its
@@@ -696,8 -829,6 +829,6 @@@
                 seq_puts(seq, ",journal_async_commit");
         if (test_opt(sb, NOBH))
                 seq_puts(seq, ",nobh");
-       if (!test_opt(sb, EXTENTS))
-               seq_puts(seq, ",noextents");
         if (test_opt(sb, I_VERSION))
                 seq_puts(seq, ",i_version");
         if (!test_opt(sb, DELALLOC))
@@@ -772,6 -903,25 +903,25 @@@ static struct dentry *ext4_fh_to_parent
                                     ext4_nfs_get_inode);
   }
   
+ /*
+  * Try to release metadata pages (indirect blocks, directories) which are
+  * mapped via the block device.  Since these pages could have journal heads
+  * which would prevent try_to_free_buffers() from freeing them, we must use
+  * jbd2 layer's try_to_free_buffers() function to release them.
+  */
+ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
+ {
+       journal_t *journal = EXT4_SB(sb)->s_journal;
+ 
+       WARN_ON(PageChecked(page));
+       if (!page_has_buffers(page))
+               return 0;
+       if (journal)
+               return jbd2_journal_try_to_free_buffers(journal, page,
+                                                       wait & ~__GFP_WAIT);
+       return try_to_free_buffers(page);
+ }
+ 
   #ifdef CONFIG_QUOTA
   #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
   #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
@@@ -803,9 -953,7 +953,9 @@@ static struct dquot_operations ext4_quo
         .acquire_dquot  = ext4_acquire_dquot,
         .release_dquot  = ext4_release_dquot,
         .mark_dirty     = ext4_mark_dquot_dirty,
- -      .write_info     = ext4_write_info
+ +      .write_info     = ext4_write_info,
+ +      .alloc_dquot    = dquot_alloc,
+ +      .destroy_dquot  = dquot_destroy,
   };
   
   static struct quotactl_ops ext4_qctl_operations = {
@@@ -838,6 -986,7 +988,7 @@@ static const struct super_operations ex
         .quota_read     = ext4_quota_read,
         .quota_write    = ext4_quota_write,
   #endif
+       .bdev_try_to_free_page = bdev_try_to_free_page,
   };
   
   static const struct export_operations ext4_export_ops = {
@@@ -852,16 -1001,17 +1003,17 @@@ enum 
         Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
         Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
         Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
-       Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
+       Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
+       Opt_journal_update, Opt_journal_dev,
         Opt_journal_checksum, Opt_journal_async_commit,
         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
         Opt_data_err_abort, Opt_data_err_ignore,
         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
-       Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
+       Opt_grpquota, Opt_i_version,
         Opt_stripe, Opt_delalloc, Opt_nodelalloc,
-       Opt_inode_readahead_blks
+       Opt_inode_readahead_blks, Opt_journal_ioprio
   };
   
   static const match_table_t tokens = {
@@@ -891,8 -1041,9 +1043,9 @@@
         {Opt_nobh, "nobh"},
         {Opt_bh, "bh"},
         {Opt_commit, "commit=%u"},
+       {Opt_min_batch_time, "min_batch_time=%u"},
+       {Opt_max_batch_time, "max_batch_time=%u"},
         {Opt_journal_update, "journal=update"},
-       {Opt_journal_inum, "journal=%u"},
         {Opt_journal_dev, "journal_dev=%u"},
         {Opt_journal_checksum, "journal_checksum"},
         {Opt_journal_async_commit, "journal_async_commit"},
@@@ -913,14 -1064,13 +1066,13 @@@
         {Opt_quota, "quota"},
         {Opt_usrquota, "usrquota"},
         {Opt_barrier, "barrier=%u"},
-       {Opt_extents, "extents"},
-       {Opt_noextents, "noextents"},
         {Opt_i_version, "i_version"},
         {Opt_stripe, "stripe=%u"},
         {Opt_resize, "resize"},
         {Opt_delalloc, "delalloc"},
         {Opt_nodelalloc, "nodelalloc"},
         {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
+       {Opt_journal_ioprio, "journal_ioprio=%u"},
         {Opt_err, NULL},
   };
   
@@@ -945,8 -1095,11 +1097,11 @@@ static ext4_fsblk_t get_sb_block(void *
         return sb_block;
   }
   
+ #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
+ 
   static int parse_options(char *options, struct super_block *sb,
-                        unsigned int *inum, unsigned long *journal_devnum,
+                        unsigned long *journal_devnum,
+                        unsigned int *journal_ioprio,
                          ext4_fsblk_t *n_blocks_count, int is_remount)
   {
         struct ext4_sb_info *sbi = EXT4_SB(sb);
@@@ -958,7 -1111,6 +1113,6 @@@
         int qtype, qfmt;
         char *qname;
   #endif
-       ext4_fsblk_t last_block;
   
         if (!options)
                 return 1;
@@@ -1070,16 -1222,6 +1224,6 @@@
                         }
                         set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
                         break;
-               case Opt_journal_inum:
-                       if (is_remount) {
-                               printk(KERN_ERR "EXT4-fs: cannot specify "
-                                      "journal on remount\n");
-                               return 0;
-                       }
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       *inum = option;
-                       break;
                 case Opt_journal_dev:
                         if (is_remount) {
                                 printk(KERN_ERR "EXT4-fs: cannot specify "
@@@ -1109,6 -1251,22 +1253,22 @@@
                                 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
                         sbi->s_commit_interval = HZ * option;
                         break;
+               case Opt_max_batch_time:
+                       if (match_int(&args[0], &option))
+                               return 0;
+                       if (option < 0)
+                               return 0;
+                       if (option == 0)
+                               option = EXT4_DEF_MAX_BATCH_TIME;
+                       sbi->s_max_batch_time = option;
+                       break;
+               case Opt_min_batch_time:
+                       if (match_int(&args[0], &option))
+                               return 0;
+                       if (option < 0)
+                               return 0;
+                       sbi->s_min_batch_time = option;
+                       break;
                 case Opt_data_journal:
                         data_opt = EXT4_MOUNT_JOURNAL_DATA;
                         goto datacheck;
@@@ -1144,7 -1302,8 +1304,7 @@@
                 case Opt_grpjquota:
                         qtype = GRPQUOTA;
   set_qf_name:
- -                      if ((sb_any_quota_enabled(sb) ||
- -                           sb_any_quota_suspended(sb)) &&
+ +                      if (sb_any_quota_loaded(sb) &&
                             !sbi->s_qf_names[qtype]) {
                                 printk(KERN_ERR
                                        "EXT4-fs: Cannot change journaled "
@@@ -1183,7 -1342,8 +1343,7 @@@
                 case Opt_offgrpjquota:
                         qtype = GRPQUOTA;
   clear_qf_name:
- -                      if ((sb_any_quota_enabled(sb) ||
- -                           sb_any_quota_suspended(sb)) &&
+ +                      if (sb_any_quota_loaded(sb) &&
                             sbi->s_qf_names[qtype]) {
                                 printk(KERN_ERR "EXT4-fs: Cannot change "
                                         "journaled quota options when "
@@@ -1202,7 -1362,8 +1362,7 @@@
                 case Opt_jqfmt_vfsv0:
                         qfmt = QFMT_VFS_V0;
   set_qf_format:
- -                      if ((sb_any_quota_enabled(sb) ||
- -                           sb_any_quota_suspended(sb)) &&
+ +                      if (sb_any_quota_loaded(sb) &&
                             sbi->s_jquota_fmt != qfmt) {
                                 printk(KERN_ERR "EXT4-fs: Cannot change "
                                         "journaled quota options when "
@@@ -1221,7 -1382,7 +1381,7 @@@
                         set_opt(sbi->s_mount_opt, GRPQUOTA);
                         break;
                 case Opt_noquota:
- -                      if (sb_any_quota_enabled(sb)) {
+ +                      if (sb_any_quota_loaded(sb)) {
                                 printk(KERN_ERR "EXT4-fs: Cannot change quota "
                                         "options when quota turned on.\n");
                                 return 0;
@@@ -1279,33 -1440,6 +1439,6 @@@
                 case Opt_bh:
                         clear_opt(sbi->s_mount_opt, NOBH);
                         break;
-               case Opt_extents:
-                       if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
-                                       EXT4_FEATURE_INCOMPAT_EXTENTS)) {
-                               ext4_warning(sb, __func__,
-                                       "extents feature not enabled "
-                                       "on this filesystem, use tune2fs\n");
-                               return 0;
-                       }
-                       set_opt(sbi->s_mount_opt, EXTENTS);
-                       break;
-               case Opt_noextents:
-                       /*
-                        * When e2fsprogs support resizing an already existing
-                        * ext3 file system to greater than 2**32 we need to
-                        * add support to block allocator to handle growing
-                        * already existing block  mapped inode so that blocks
-                        * allocated for them fall within 2**32
-                        */
-                       last_block = ext4_blocks_count(sbi->s_es) - 1;
-                       if (last_block  > 0xffffffffULL) {
-                               printk(KERN_ERR "EXT4-fs: Filesystem too "
-                                               "large to mount with "
-                                               "-o noextents options\n");
-                               return 0;
-                       }
-                       clear_opt(sbi->s_mount_opt, EXTENTS);
-                       break;
                 case Opt_i_version:
                         set_opt(sbi->s_mount_opt, I_VERSION);
                         sb->s_flags |= MS_I_VERSION;
@@@ -1330,6 -1464,14 +1463,14 @@@
                                 return 0;
                         sbi->s_inode_readahead_blks = option;
                         break;
+               case Opt_journal_ioprio:
+                       if (match_int(&args[0], &option))
+                               return 0;
+                       if (option < 0 || option > 7)
+                               break;
+                       *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
+                                                           option);
+                       break;
                 default:
                         printk(KERN_ERR
                                "EXT4-fs: Unrecognized mount option \"%s\" "
@@@ -1405,24 -1547,19 +1546,19 @@@ static int ext4_setup_super(struct supe
                 printk(KERN_WARNING
                        "EXT4-fs warning: checktime reached, "
                        "running e2fsck is recommended\n");
- #if 0
-               /* @@@ We _will_ want to clear the valid bit if we find
-                * inconsistencies, to force a fsck at reboot.  But for
-                * a plain journaled filesystem we can keep it set as
-                * valid forever! :)
-                */
-       es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
- #endif
+       if (!sbi->s_journal) 
+               es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
         le16_add_cpu(&es->s_mnt_count, 1);
         es->s_mtime = cpu_to_le32(get_seconds());
         ext4_update_dynamic_rev(sb);
-       EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+       if (sbi->s_journal)
+               EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
   
         ext4_commit_super(sb, es, 1);
         if (test_opt(sb, DEBUG))
-               printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, "
+               printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
                                 "bpg=%lu, ipg=%lu, mo=%04lx]\n",
                         sb->s_blocksize,
                         sbi->s_groups_count,
@@@ -1430,9 -1567,13 +1566,13 @@@
                         EXT4_INODES_PER_GROUP(sb),
                         sbi->s_mount_opt);
   
-       printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
-              sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
-              "external", EXT4_SB(sb)->s_journal->j_devname);
+       if (EXT4_SB(sb)->s_journal) {
+               printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
+                      sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+                      "external", EXT4_SB(sb)->s_journal->j_devname);
+       } else {
+               printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
+       }
         return res;
   }
   
@@@ -1444,7 -1585,6 +1584,6 @@@ static int ext4_fill_flex_info(struct s
         ext4_group_t flex_group_count;
         ext4_group_t flex_group;
         int groups_per_flex = 0;
-       __u64 block_bitmap = 0;
         int i;
   
         if (!sbi->s_es->s_log_groups_per_flex) {
@@@ -1463,21 -1603,18 +1602,18 @@@
                                      sizeof(struct flex_groups), GFP_KERNEL);
         if (sbi->s_flex_groups == NULL) {
                 printk(KERN_ERR "EXT4-fs: not enough memory for "
-                               "%lu flex groups\n", flex_group_count);
+                               "%u flex groups\n", flex_group_count);
                 goto failed;
         }
   
-       gdp = ext4_get_group_desc(sb, 1, &bh);
-       block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
- 
         for (i = 0; i < sbi->s_groups_count; i++) {
                 gdp = ext4_get_group_desc(sb, i, &bh);
   
                 flex_group = ext4_flex_group(sbi, i);
                 sbi->s_flex_groups[flex_group].free_inodes +=
-                       le16_to_cpu(gdp->bg_free_inodes_count);
+                       ext4_free_inodes_count(sb, gdp);
                 sbi->s_flex_groups[flex_group].free_blocks +=
-                       le16_to_cpu(gdp->bg_free_blocks_count);
+                       ext4_free_blks_count(sb, gdp);
         }
   
         return 1;
@@@ -1551,14 -1688,14 +1687,14 @@@ static int ext4_check_descriptors(struc
                 block_bitmap = ext4_block_bitmap(sb, gdp);
                 if (block_bitmap < first_block || block_bitmap > last_block) {
                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                              "Block bitmap for group %lu not in group "
+                              "Block bitmap for group %u not in group "
                                "(block %llu)!\n", i, block_bitmap);
                         return 0;
                 }
                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                              "Inode bitmap for group %lu not in group "
+                              "Inode bitmap for group %u not in group "
                                "(block %llu)!\n", i, inode_bitmap);
                         return 0;
                 }
@@@ -1566,14 -1703,14 +1702,14 @@@
                 if (inode_table < first_block ||
                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                              "Inode table for group %lu not in group "
+                              "Inode table for group %u not in group "
                                "(block %llu)!\n", i, inode_table);
                         return 0;
                 }
                 spin_lock(sb_bgl_lock(sbi, i));
                 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
                         printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                              "Checksum for group %lu failed (%u!=%u)\n",
+                              "Checksum for group %u failed (%u!=%u)\n",
                                i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
                                gdp)), le16_to_cpu(gdp->bg_checksum));
                         if (!(sb->s_flags & MS_RDONLY)) {
@@@ -1865,19 -2002,20 +2001,20 @@@ static int ext4_fill_super(struct super
         ext4_fsblk_t sb_block = get_sb_block(&data);
         ext4_fsblk_t logical_sb_block;
         unsigned long offset = 0;
-       unsigned int journal_inum = 0;
         unsigned long journal_devnum = 0;
         unsigned long def_mount_opts;
         struct inode *root;
         char *cp;
+       const char *descr;
         int ret = -EINVAL;
         int blocksize;
-       int db_count;
-       int i;
+       unsigned int db_count;
+       unsigned int i;
         int needs_recovery, has_huge_files;
-       __le32 features;
+       int features;
         __u64 blocks_count;
         int err;
+       unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
   
         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
         if (!sbi)
@@@ -1958,22 -2096,13 +2095,13 @@@
   
         sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
         sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+       sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
+       sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
+       sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
   
         set_opt(sbi->s_mount_opt, RESERVATION);
         set_opt(sbi->s_mount_opt, BARRIER);
   
-       /*
-        * turn on extents feature by default in ext4 filesystem
-        * only if feature flag already set by mkfs or tune2fs.
-        * Use -o noextents to turn it off
-        */
-       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
-               set_opt(sbi->s_mount_opt, EXTENTS);
-       else
-               ext4_warning(sb, __func__,
-                       "extents feature not enabled on this filesystem, "
-                       "use tune2fs.\n");
- 
         /*
          * enable delayed allocation by default
          * Use -o nodelalloc to turn it off
@@@ -1981,8 -2110,8 +2109,8 @@@
         set_opt(sbi->s_mount_opt, DELALLOC);
   
   
-       if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum,
-                          NULL, 0))
+       if (!parse_options((char *) data, sb, &journal_devnum,
+                          &journal_ioprio, NULL, 0))
                 goto failed_mount;
   
         sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@@ -2004,15 -2133,17 +2132,17 @@@
         features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
         if (features) {
                 printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
-                      "unsupported optional features (%x).\n",
-                      sb->s_id, le32_to_cpu(features));
+                      "unsupported optional features (%x).\n", sb->s_id,
+                       (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
+                       ~EXT4_FEATURE_INCOMPAT_SUPP));
                 goto failed_mount;
         }
         features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
         if (!(sb->s_flags & MS_RDONLY) && features) {
                 printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
-                      "unsupported optional features (%x).\n",
-                      sb->s_id, le32_to_cpu(features));
+                      "unsupported optional features (%x).\n", sb->s_id,
+                       (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
+                       ~EXT4_FEATURE_RO_COMPAT_SUPP));
                 goto failed_mount;
         }
         has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@@ -2117,6 -2248,18 +2247,18 @@@
         for (i = 0; i < 4; i++)
                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
         sbi->s_def_hash_version = es->s_def_hash_version;
+       i = le32_to_cpu(es->s_flags);
+       if (i & EXT2_FLAGS_UNSIGNED_HASH)
+               sbi->s_hash_unsigned = 3;
+       else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+ #ifdef __CHAR_UNSIGNED__
+               es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+               sbi->s_hash_unsigned = 3;
+ #else
+               es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+ #endif
+               sb->s_dirt = 1;
+       }
   
         if (sbi->s_blocks_per_group > blocksize * 8) {
                 printk(KERN_ERR
@@@ -2144,20 -2287,30 +2286,30 @@@
         if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
                 goto cantfind_ext4;
   
-       /* ensure blocks_count calculation below doesn't sign-extend */
-       if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) <
-           le32_to_cpu(es->s_first_data_block) + 1) {
-               printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, "
-                      "first data block %u, blocks per group %lu\n",
-                       ext4_blocks_count(es),
-                       le32_to_cpu(es->s_first_data_block),
-                       EXT4_BLOCKS_PER_GROUP(sb));
+         /*
+          * It makes no sense for the first data block to be beyond the end
+          * of the filesystem.
+          */
+         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
+                 printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
+                      "block %u is beyond end of filesystem (%llu)\n",
+                      le32_to_cpu(es->s_first_data_block),
+                      ext4_blocks_count(es));
                 goto failed_mount;
         }
         blocks_count = (ext4_blocks_count(es) -
                         le32_to_cpu(es->s_first_data_block) +
                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
+       if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
+               printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
+                      "(block count %llu, first data block %u, "
+                      "blocks per group %lu)\n", sbi->s_groups_count,
+                      ext4_blocks_count(es),
+                      le32_to_cpu(es->s_first_data_block),
+                      EXT4_BLOCKS_PER_GROUP(sb));
+               goto failed_mount;
+       }
         sbi->s_groups_count = blocks_count;
         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
                    EXT4_DESC_PER_BLOCK(sb);
@@@ -2269,27 -2422,26 +2421,26 @@@
                                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
                                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
                                 ext4_commit_super(sb, es, 1);
-                               printk(KERN_CRIT
-                                      "EXT4-fs (device %s): mount failed\n",
-                                     sb->s_id);
                                 goto failed_mount4;
                         }
                 }
-       } else if (journal_inum) {
-               if (ext4_create_journal(sb, es, journal_inum))
-                       goto failed_mount3;
+       } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
+             EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
+               printk(KERN_ERR "EXT4-fs: required journal recovery "
+                      "suppressed and not mounted read-only\n");
+               goto failed_mount4;
         } else {
-               if (!silent)
-                       printk(KERN_ERR
-                              "ext4: No journal on filesystem on %s\n",
-                              sb->s_id);
-               goto failed_mount3;
+               clear_opt(sbi->s_mount_opt, DATA_FLAGS);
+               set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+               sbi->s_journal = NULL;
+               needs_recovery = 0;
+               goto no_journal;
         }
   
         if (ext4_blocks_count(es) > 0xffffffffULL &&
             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
-               printk(KERN_ERR "ext4: Failed to set 64-bit journal feature\n");
+               printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
                 goto failed_mount4;
         }
   
@@@ -2334,6 -2486,9 +2485,9 @@@
         default:
                 break;
         }
+       set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+ 
+ no_journal:
   
         if (test_opt(sb, NOBH)) {
                 if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
@@@ -2419,13 -2574,22 +2573,22 @@@
         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
         ext4_orphan_cleanup(sb, es);
         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
-       if (needs_recovery)
+       if (needs_recovery) {
                 printk(KERN_INFO "EXT4-fs: recovery complete.\n");
-       ext4_mark_recovery_complete(sb, es);
-       printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
-              test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
-              test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
-              "writeback");
+               ext4_mark_recovery_complete(sb, es);
+       }
+       if (EXT4_SB(sb)->s_journal) {
+               if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+                       descr = " journalled data mode";
+               else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+                       descr = " ordered data mode";
+               else
+                       descr = " writeback data mode";
+       } else
+               descr = "out journal";
+ 
+       printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
+              sb->s_id, descr);
   
         lock_kernel();
         return 0;
@@@ -2437,8 -2601,11 +2600,11 @@@ cantfind_ext4
         goto failed_mount;
   
   failed_mount4:
-       jbd2_journal_destroy(sbi->s_journal);
-       sbi->s_journal = NULL;
+       printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
+       if (sbi->s_journal) {
+               jbd2_journal_destroy(sbi->s_journal);
+               sbi->s_journal = NULL;
+       }
   failed_mount3:
         percpu_counter_destroy(&sbi->s_freeblocks_counter);
         percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@@ -2475,11 -2642,9 +2641,9 @@@ static void ext4_init_journal_params(st
   {
         struct ext4_sb_info *sbi = EXT4_SB(sb);
   
-       if (sbi->s_commit_interval)
-               journal->j_commit_interval = sbi->s_commit_interval;
-       /* We could also set up an ext4-specific default for the commit
-        * interval here, but for now we'll just fall back to the jbd
-        * default. */
+       journal->j_commit_interval = sbi->s_commit_interval;
+       journal->j_min_batch_time = sbi->s_min_batch_time;
+       journal->j_max_batch_time = sbi->s_max_batch_time;
   
         spin_lock(&journal->j_state_lock);
         if (test_opt(sb, BARRIER))
@@@ -2499,6 -2664,8 +2663,8 @@@ static journal_t *ext4_get_journal(stru
         struct inode *journal_inode;
         journal_t *journal;
   
+       BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+ 
         /* First, test for the existence of a valid inode on disk.  Bad
          * things happen if we iget() an unused inode, as the subsequent
          * iput() will try to delete it. */
@@@ -2547,13 -2714,15 +2713,15 @@@ static journal_t *ext4_get_dev_journal(
         struct ext4_super_block *es;
         struct block_device *bdev;
   
+       BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+ 
         bdev = ext4_blkdev_get(j_dev);
         if (bdev == NULL)
                 return NULL;
   
         if (bd_claim(bdev, sb)) {
                 printk(KERN_ERR
-                       "EXT4: failed to claim external journal device.\n");
+                       "EXT4-fs: failed to claim external journal device.\n");
                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
                 return NULL;
         }
@@@ -2634,6 -2803,8 +2802,8 @@@ static int ext4_load_journal(struct sup
         int err = 0;
         int really_read_only;
   
+       BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+ 
         if (journal_devnum &&
             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
                 printk(KERN_INFO "EXT4-fs: external journal device major/minor "
@@@ -2718,48 -2889,6 +2888,6 @@@
         return 0;
   }
   
- static int ext4_create_journal(struct super_block *sb,
-                              struct ext4_super_block *es,
-                              unsigned int journal_inum)
- {
-       journal_t *journal;
-       int err;
- 
-       if (sb->s_flags & MS_RDONLY) {
-               printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to "
-                               "create journal.\n");
-               return -EROFS;
-       }
- 
-       journal = ext4_get_journal(sb, journal_inum);
-       if (!journal)
-               return -EINVAL;
- 
-       printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n",
-              journal_inum);
- 
-       err = jbd2_journal_create(journal);
-       if (err) {
-               printk(KERN_ERR "EXT4-fs: error creating journal.\n");
-               jbd2_journal_destroy(journal);
-               return -EIO;
-       }
- 
-       EXT4_SB(sb)->s_journal = journal;
- 
-       ext4_update_dynamic_rev(sb);
-       EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-       EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL);
- 
-       es->s_journal_inum = cpu_to_le32(journal_inum);
-       sb->s_dirt = 1;
- 
-       /* Make sure we flush the recovery flag to disk. */
-       ext4_commit_super(sb, es, 1);
- 
-       return 0;
- }
- 
   static void ext4_commit_super(struct super_block *sb,
                               struct ext4_super_block *es, int sync)
   {
@@@ -2776,20 -2905,23 +2904,23 @@@
                  * be remapped.  Nothing we can do but to retry the
                  * write and hope for the best.
                  */
-               printk(KERN_ERR "ext4: previous I/O error to "
+               printk(KERN_ERR "EXT4-fs: previous I/O error to "
                        "superblock detected for %s.\n", sb->s_id);
                 clear_buffer_write_io_error(sbh);
                 set_buffer_uptodate(sbh);
         }
         es->s_wtime = cpu_to_le32(get_seconds());
-       ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
-       es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
+       ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
+                                       &EXT4_SB(sb)->s_freeblocks_counter));
+       es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
+                                       &EXT4_SB(sb)->s_freeinodes_counter));
+ 
         BUFFER_TRACE(sbh, "marking dirty");
         mark_buffer_dirty(sbh);
         if (sync) {
                 sync_dirty_buffer(sbh);
                 if (buffer_write_io_error(sbh)) {
-                       printk(KERN_ERR "ext4: I/O error while writing "
+                       printk(KERN_ERR "EXT4-fs: I/O error while writing "
                                "superblock for %s.\n", sb->s_id);
                         clear_buffer_write_io_error(sbh);
                         set_buffer_uptodate(sbh);
@@@ -2808,6 -2940,10 +2939,10 @@@ static void ext4_mark_recovery_complete
   {
         journal_t *journal = EXT4_SB(sb)->s_journal;
   
+       if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
+               BUG_ON(journal != NULL);
+               return;
+       }
         jbd2_journal_lock_updates(journal);
         if (jbd2_journal_flush(journal) < 0)
                 goto out;
@@@ -2837,6 -2973,8 +2972,8 @@@ static void ext4_clear_journal_err(stru
         int j_errno;
         const char *errstr;
   
+       BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+ 
         journal = EXT4_SB(sb)->s_journal;
   
         /*
@@@ -2869,14 -3007,17 +3006,17 @@@
   int ext4_force_commit(struct super_block *sb)
   {
         journal_t *journal;
-       int ret;
+       int ret = 0;
   
         if (sb->s_flags & MS_RDONLY)
                 return 0;
   
         journal = EXT4_SB(sb)->s_journal;
-       sb->s_dirt = 0;
-       ret = ext4_journal_force_commit(journal);
+       if (journal) {
+               sb->s_dirt = 0;
+               ret = ext4_journal_force_commit(journal);
+       }
+ 
         return ret;
   }
   
@@@ -2888,9 -3029,13 +3028,13 @@@
    */
   static void ext4_write_super(struct super_block *sb)
   {
-       if (mutex_trylock(&sb->s_lock) != 0)
-               BUG();
-       sb->s_dirt = 0;
+       if (EXT4_SB(sb)->s_journal) {
+               if (mutex_trylock(&sb->s_lock) != 0)
+                       BUG();
+               sb->s_dirt = 0;
+       } else {
+               ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
+       }
   }
   
   static int ext4_sync_fs(struct super_block *sb, int wait)
@@@ -2899,10 -3044,14 +3043,14 @@@
   
         trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
         sb->s_dirt = 0;
-       if (wait)
-               ret = ext4_force_commit(sb);
-       else
-               jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+       if (EXT4_SB(sb)->s_journal) {
+               if (wait)
+                       ret = ext4_force_commit(sb);
+               else
+                       jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+       } else {
+               ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
+       }
         return ret;
   }
   
@@@ -2917,15 -3066,17 +3065,17 @@@ static void ext4_write_super_lockfs(str
         if (!(sb->s_flags & MS_RDONLY)) {
                 journal_t *journal = EXT4_SB(sb)->s_journal;
   
-               /* Now we set up the journal barrier. */
-               jbd2_journal_lock_updates(journal);
+               if (journal) {
+                       /* Now we set up the journal barrier. */
+                       jbd2_journal_lock_updates(journal);
   
-               /*
-                * We don't want to clear needs_recovery flag when we failed
-                * to flush the journal.
-                */
-               if (jbd2_journal_flush(journal) < 0)
-                       return;
+                       /*
+                        * We don't want to clear needs_recovery flag when we
+                        * failed to flush the journal.
+                        */
+                       if (jbd2_journal_flush(journal) < 0)
+                               return;
+               }
   
                 /* Journal blocked and flushed, clear needs_recovery flag. */
                 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@@ -2939,7 -3090,7 +3089,7 @@@
    */
   static void ext4_unlockfs(struct super_block *sb)
   {
-       if (!(sb->s_flags & MS_RDONLY)) {
+       if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
                 lock_super(sb);
                 /* Reser the needs_recovery flag before the fs is unlocked. */
                 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@@ -2957,6 -3108,7 +3107,7 @@@ static int ext4_remount(struct super_bl
         unsigned long old_sb_flags;
         struct ext4_mount_options old_opts;
         ext4_group_t g;
+       unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
         int err;
   #ifdef CONFIG_QUOTA
         int i;
@@@ -2968,16 -3120,21 +3119,21 @@@
         old_opts.s_resuid = sbi->s_resuid;
         old_opts.s_resgid = sbi->s_resgid;
         old_opts.s_commit_interval = sbi->s_commit_interval;
+       old_opts.s_min_batch_time = sbi->s_min_batch_time;
+       old_opts.s_max_batch_time = sbi->s_max_batch_time;
   #ifdef CONFIG_QUOTA
         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
         for (i = 0; i < MAXQUOTAS; i++)
                 old_opts.s_qf_names[i] = sbi->s_qf_names[i];
   #endif
+       if (sbi->s_journal && sbi->s_journal->j_task->io_context)
+               journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
   
         /*
          * Allow the "check" option to be passed as a remount option.
          */
-       if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
+       if (!parse_options(data, sb, NULL, &journal_ioprio,
+                          &n_blocks_count, 1)) {
                 err = -EINVAL;
                 goto restore_opts;
         }
@@@ -2990,7 -3147,10 +3146,10 @@@
   
         es = sbi->s_es;
   
-       ext4_init_journal_params(sb, sbi->s_journal);
+       if (sbi->s_journal) {
+               ext4_init_journal_params(sb, sbi->s_journal);
+               set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+       }
   
         if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
                 n_blocks_count > ext4_blocks_count(es)) {
@@@ -3019,17 -3179,20 +3178,20 @@@
                          * We have to unlock super so that we can wait for
                          * transactions.
                          */
-                       unlock_super(sb);
-                       ext4_mark_recovery_complete(sb, es);
-                       lock_super(sb);
+                       if (sbi->s_journal) {
+                               unlock_super(sb);
+                               ext4_mark_recovery_complete(sb, es);
+                               lock_super(sb);
+                       }
                 } else {
-                       __le32 ret;
+                       int ret;
                         if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                         ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
                                 printk(KERN_WARNING "EXT4-fs: %s: couldn't "
                                        "remount RDWR because of unsupported "
-                                      "optional features (%x).\n",
-                                      sb->s_id, le32_to_cpu(ret));
+                                      "optional features (%x).\n", sb->s_id,
+                               (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
+                                       ~EXT4_FEATURE_RO_COMPAT_SUPP));
                                 err = -EROFS;
                                 goto restore_opts;
                         }
@@@ -3046,7 -3209,7 +3208,7 @@@
                                 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
                                         printk(KERN_ERR
                "EXT4-fs: ext4_remount: "
-               "Checksum for group %lu failed (%u!=%u)\n",
+               "Checksum for group %u failed (%u!=%u)\n",
                 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
                                                le16_to_cpu(gdp->bg_checksum));
                                         err = -EINVAL;
@@@ -3075,7 -3238,8 +3237,8 @@@
                          * been changed by e2fsck since we originally mounted
                          * the partition.)
                          */
-                       ext4_clear_journal_err(sb, es);
+                       if (sbi->s_journal)
+                               ext4_clear_journal_err(sb, es);
                         sbi->s_mount_state = le16_to_cpu(es->s_state);
                         if ((err = ext4_group_extend(sb, es, n_blocks_count)))
                                 goto restore_opts;
@@@ -3083,6 -3247,9 +3246,9 @@@
                                 sb->s_flags &= ~MS_RDONLY;
                 }
         }
+       if (sbi->s_journal == NULL)
+               ext4_commit_super(sb, es, 1);
+ 
   #ifdef CONFIG_QUOTA
         /* Release old quota file names */
         for (i = 0; i < MAXQUOTAS; i++)
@@@ -3097,6 -3264,8 +3263,8 @@@ restore_opts
         sbi->s_resuid = old_opts.s_resuid;
         sbi->s_resgid = old_opts.s_resgid;
         sbi->s_commit_interval = old_opts.s_commit_interval;
+       sbi->s_min_batch_time = old_opts.s_min_batch_time;
+       sbi->s_max_batch_time = old_opts.s_max_batch_time;
   #ifdef CONFIG_QUOTA
         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
         for (i = 0; i < MAXQUOTAS; i++) {
@@@ -3359,7 -3528,8 +3527,8 @@@ static int ext4_quota_on(struct super_b
          * When we journal data on quota file, we have to flush journal to see
          * all updates to the file when we bypass pagecache...
          */
-       if (ext4_should_journal_data(path.dentry->d_inode)) {
+       if (EXT4_SB(sb)->s_journal &&
+           ext4_should_journal_data(path.dentry->d_inode)) {
                 /*
                  * We don't need to lock updates but journal_flush() could
                  * otherwise be livelocked...
@@@ -3433,7 -3603,7 +3602,7 @@@ static ssize_t ext4_quota_write(struct 
         struct buffer_head *bh;
         handle_t *handle = journal_current_handle();
   
-       if (!handle) {
+       if (EXT4_SB(sb)->s_journal && !handle) {
                 printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
                         " cancelled because transaction is not started.\n",
                         (unsigned long long)off, (unsigned long long)len);
@@@ -3458,7 -3628,7 +3627,7 @@@
                 flush_dcache_page(bh->b_page);
                 unlock_buffer(bh);
                 if (journal_quota)
-                       err = ext4_journal_dirty_metadata(handle, bh);
+                       err = ext4_handle_dirty_metadata(handle, NULL, bh);
                 else {
                         /* Always do at least ordered writes for quotas */
                         err = ext4_jbd2_file_inode(handle, inode);
@@@ -3512,18 -3682,15 +3681,15 @@@ static int ext4_ui_proc_open(struct ino
   static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
                                size_t cnt, loff_t *ppos)
   {
-       unsigned int *p = PDE(file->f_path.dentry->d_inode)->data;
+       unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
         char str[32];
-       unsigned long value;
   
         if (cnt >= sizeof(str))
                 return -EINVAL;
         if (copy_from_user(str, buf, cnt))
                 return -EFAULT;
-       value = simple_strtol(str, NULL, 0);
-       if (value < 0)
-               return -ERANGE;
-       *p = value;
+ 
+       *p = simple_strtoul(str, NULL, 0);
         return cnt;
   }
   
@@@ -3614,7 -3781,7 +3780,7 @@@ static void __exit exit_ext4_fs(void
   }
   
   MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
- MODULE_DESCRIPTION("Fourth Extended Filesystem with extents");
+ MODULE_DESCRIPTION("Fourth Extended Filesystem");
   MODULE_LICENSE("GPL");
   module_init(init_ext4_fs)
   module_exit(exit_ext4_fs)
diff --combined fs/jbd2/commit.c

index c8a1bace685a83b1b6944de6e7e5ad13faa4f61b,073124a29b8c4cc869aeaf106fef38b23bb5ca10..62804e57a44caf2f893d16cc481f7ac347efe19a
--- 1/fs/jbd2/commit.c
--- 2/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@@ -25,6 -25,7 +25,7 @@@
   #include <linux/crc32.h>
   #include <linux/writeback.h>
   #include <linux/backing-dev.h>
+ #include <linux/bio.h>
   
   /*
    * Default IO end handler for temporary BJ_IO buffer_heads.
@@@ -137,7 -138,7 +138,7 @@@ static int journal_submit_commit_record
                 set_buffer_ordered(bh);
                 barrier_done = 1;
         }
-       ret = submit_bh(WRITE, bh);
+       ret = submit_bh(WRITE_SYNC, bh);
         if (barrier_done)
                 clear_buffer_ordered(bh);
   
@@@ -158,7 -159,7 +159,7 @@@
                 lock_buffer(bh);
                 set_buffer_uptodate(bh);
                 clear_buffer_dirty(bh);
-               ret = submit_bh(WRITE, bh);
+               ret = submit_bh(WRITE_SYNC, bh);
         }
         *cbh = bh;
         return ret;
@@@ -168,12 -169,34 +169,34 @@@
    * This function along with journal_submit_commit_record
    * allows to write the commit record asynchronously.
    */
- static int journal_wait_on_commit_record(struct buffer_head *bh)
+ static int journal_wait_on_commit_record(journal_t *journal,
+                                        struct buffer_head *bh)
   {
         int ret = 0;
   
+ retry:
         clear_buffer_dirty(bh);
         wait_on_buffer(bh);
+       if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
+               printk(KERN_WARNING
+                      "JBD2: wait_on_commit_record: sync failed on %s - "
+                      "disabling barriers\n", journal->j_devname);
+               spin_lock(&journal->j_state_lock);
+               journal->j_flags &= ~JBD2_BARRIER;
+               spin_unlock(&journal->j_state_lock);
+ 
+               lock_buffer(bh);
+               clear_buffer_dirty(bh);
+               set_buffer_uptodate(bh);
+               bh->b_end_io = journal_end_buffer_io_sync;
+ 
+               ret = submit_bh(WRITE_SYNC, bh);
+               if (ret) {
+                       unlock_buffer(bh);
+                       return ret;
+               }
+               goto retry;
+       }
   
         if (unlikely(!buffer_uptodate(bh)))
                 ret = -EIO;
@@@ -332,13 -355,15 +355,15 @@@ void jbd2_journal_commit_transaction(jo
         int flags;
         int err;
         unsigned long long blocknr;
+       ktime_t start_time;
+       u64 commit_time;
         char *tagp = NULL;
         journal_header_t *header;
         journal_block_tag_t *tag = NULL;
         int space_left = 0;
         int first_tag = 0;
         int tag_flag;
-       int i;
+       int i, to_free = 0;
         int tag_bytes = journal_tag_bytes(journal);
         struct buffer_head *cbh = NULL; /* For transactional checksums */
         __u32 crc32_sum = ~0;
@@@ -458,6 -483,7 +483,7 @@@
         commit_transaction->t_state = T_FLUSH;
         journal->j_committing_transaction = commit_transaction;
         journal->j_running_transaction = NULL;
+       start_time = ktime_get();
         commit_transaction->t_log_start = journal->j_head;
         wake_up(&journal->j_wait_transaction_locked);
         spin_unlock(&journal->j_state_lock);
@@@ -509,10 -535,6 +535,10 @@@
                 if (is_journal_aborted(journal)) {
                         clear_buffer_jbddirty(jh2bh(jh));
                         JBUFFER_TRACE(jh, "journal is aborting: refile");
+ +                      jbd2_buffer_abort_trigger(jh,
+ +                                                jh->b_frozen_data ?
+ +                                                jh->b_frozen_triggers :
+ +                                                jh->b_triggers);
                         jbd2_journal_refile_buffer(journal, jh);
                         /* If that was the last one, we need to clean up
                          * any descriptor buffers which may have been
@@@ -803,7 -825,7 +829,7 @@@ wait_for_iobuf
                         __jbd2_journal_abort_hard(journal);
         }
         if (!err && !is_journal_aborted(journal))
-               err = journal_wait_on_commit_record(cbh);
+               err = journal_wait_on_commit_record(journal, cbh);
   
         if (err)
                 jbd2_journal_abort(journal, err);
@@@ -848,9 -870,6 +874,9 @@@ restart_loop
                  * data.
                  *
                  * Otherwise, we can just throw away the frozen data now.
+ +               *
+ +               * We also know that the frozen data has already fired
+ +               * its triggers if they exist, so we can clear that too.
                  */
                 if (jh->b_committed_data) {
                         jbd2_free(jh->b_committed_data, bh->b_size);
@@@ -858,12 -877,10 +884,12 @@@
                         if (jh->b_frozen_data) {
                                 jh->b_committed_data = jh->b_frozen_data;
                                 jh->b_frozen_data = NULL;
+ +                              jh->b_frozen_triggers = NULL;
                         }
                 } else if (jh->b_frozen_data) {
                         jbd2_free(jh->b_frozen_data, bh->b_size);
                         jh->b_frozen_data = NULL;
+ +                      jh->b_frozen_triggers = NULL;
                 }
   
                 spin_lock(&journal->j_list_lock);
@@@ -981,14 -998,23 +1007,23 @@@
         J_ASSERT(commit_transaction == journal->j_committing_transaction);
         journal->j_commit_sequence = commit_transaction->t_tid;
         journal->j_committing_transaction = NULL;
-       spin_unlock(&journal->j_state_lock);
+       commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
   
-       if (journal->j_commit_callback)
-               journal->j_commit_callback(journal, commit_transaction);
+       /*
+        * weight the commit time higher than the average time so we don't
+        * react too strongly to vast changes in the commit time
+        */
+       if (likely(journal->j_average_commit_time))
+               journal->j_average_commit_time = (commit_time +
+                               journal->j_average_commit_time*3) / 4;
+       else
+               journal->j_average_commit_time = commit_time;
+       spin_unlock(&journal->j_state_lock);
   
         if (commit_transaction->t_checkpoint_list == NULL &&
             commit_transaction->t_checkpoint_io_list == NULL) {
                 __jbd2_journal_drop_transaction(journal, commit_transaction);
+               to_free = 1;
         } else {
                 if (journal->j_checkpoint_transactions == NULL) {
                         journal->j_checkpoint_transactions = commit_transaction;
@@@ -1007,11 -1033,16 +1042,16 @@@
         }
         spin_unlock(&journal->j_list_lock);
   
+       if (journal->j_commit_callback)
+               journal->j_commit_callback(journal, commit_transaction);
+ 
         trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
-                  journal->j_devname, journal->j_commit_sequence,
+                  journal->j_devname, commit_transaction->t_tid,
                    journal->j_tail_sequence);
         jbd_debug(1, "JBD: commit %d complete, head %d\n",
                   journal->j_commit_sequence, journal->j_tail_sequence);
+       if (to_free)
+               kfree(commit_transaction);
   
         wake_up(&journal->j_wait_done_commit);
   }
diff --combined fs/jbd2/journal.c

index f6bff9d6f8df8193e6fc13f1a2b1763124d29cb6,2932c8f55199768c3887663cd264e1a6e5aabf99..56675306ed817eacb654a3f245a14d81cc4d6971
--- 1/fs/jbd2/journal.c
--- 2/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@@ -40,6 -40,7 +40,7 @@@
   
   #include <asm/uaccess.h>
   #include <asm/page.h>
+ #include <asm/div64.h>
   
   EXPORT_SYMBOL(jbd2_journal_start);
   EXPORT_SYMBOL(jbd2_journal_restart);
@@@ -50,7 -51,6 +51,7 @@@ EXPORT_SYMBOL(jbd2_journal_unlock_updat
   EXPORT_SYMBOL(jbd2_journal_get_write_access);
   EXPORT_SYMBOL(jbd2_journal_get_create_access);
   EXPORT_SYMBOL(jbd2_journal_get_undo_access);
+ +EXPORT_SYMBOL(jbd2_journal_set_triggers);
   EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
   EXPORT_SYMBOL(jbd2_journal_release_buffer);
   EXPORT_SYMBOL(jbd2_journal_forget);
@@@ -66,7 -66,6 +67,6 @@@ EXPORT_SYMBOL(jbd2_journal_update_forma
   EXPORT_SYMBOL(jbd2_journal_check_used_features);
   EXPORT_SYMBOL(jbd2_journal_check_available_features);
   EXPORT_SYMBOL(jbd2_journal_set_features);
- EXPORT_SYMBOL(jbd2_journal_create);
   EXPORT_SYMBOL(jbd2_journal_load);
   EXPORT_SYMBOL(jbd2_journal_destroy);
   EXPORT_SYMBOL(jbd2_journal_abort);
@@@ -132,8 -131,9 +132,9 @@@ static int kjournald2(void *arg
         journal->j_task = current;
         wake_up(&journal->j_wait_done_commit);
   
-       printk(KERN_INFO "kjournald2 starting.  Commit interval %ld seconds\n",
-                       journal->j_commit_interval / HZ);
+       printk(KERN_INFO "kjournald2 starting: pid %d, dev %s, "
+              "commit interval %ld seconds\n", current->pid,
+              journal->j_devname, journal->j_commit_interval / HZ);
   
         /*
          * And now, wait forever for commit wakeup events.
@@@ -291,7 -291,6 +292,7 @@@ int jbd2_journal_write_metadata_buffer(
         struct page *new_page;
         unsigned int new_offset;
         struct buffer_head *bh_in = jh2bh(jh_in);
+ +      struct jbd2_buffer_trigger_type *triggers;
   
         /*
          * The buffer really shouldn't be locked: only the current committing
@@@ -316,22 -315,12 +317,22 @@@ repeat
                 done_copy_out = 1;
                 new_page = virt_to_page(jh_in->b_frozen_data);
                 new_offset = offset_in_page(jh_in->b_frozen_data);
+ +              triggers = jh_in->b_frozen_triggers;
         } else {
                 new_page = jh2bh(jh_in)->b_page;
                 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
+ +              triggers = jh_in->b_triggers;
         }
   
         mapped_data = kmap_atomic(new_page, KM_USER0);
+ +      /*
+ +       * Fire any commit trigger.  Do this before checking for escaping,
+ +       * as the trigger may modify the magic offset.  If a copy-out
+ +       * happens afterwards, it will have the correct data in the buffer.
+ +       */
+ +      jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
+ +                                 triggers);
+ +
         /*
          * Check for escaping
          */
@@@ -364,13 -353,6 +365,13 @@@
                 new_page = virt_to_page(tmp);
                 new_offset = offset_in_page(tmp);
                 done_copy_out = 1;
+ +
+ +              /*
+ +               * This isn't strictly necessary, as we're using frozen
+ +               * data for the escaping, but it keeps consistency with
+ +               * b_frozen_data usage.
+ +               */
+ +              jh_in->b_frozen_triggers = jh_in->b_triggers;
         }
   
         /*
@@@ -650,6 -632,8 +651,8 @@@ struct journal_head *jbd2_journal_get_d
                 return NULL;
   
         bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+       if (!bh)
+               return NULL;
         lock_buffer(bh);
         memset(bh->b_data, 0, journal->j_blocksize);
         set_buffer_uptodate(bh);
@@@ -843,6 -827,8 +846,8 @@@ static int jbd2_seq_info_show(struct se
             jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid));
         seq_printf(seq, "  %ums logging transaction\n",
             jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid));
+       seq_printf(seq, "  %luus average transaction commit time\n",
+                  do_div(s->journal->j_average_commit_time, 1000));
         seq_printf(seq, "  %lu handles per transaction\n",
             s->stats->u.run.rs_handle_count / s->stats->ts_tid);
         seq_printf(seq, "  %lu blocks per transaction\n",
@@@ -980,6 -966,8 +985,8 @@@ static journal_t * journal_init_common 
         spin_lock_init(&journal->j_state_lock);
   
         journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
+       journal->j_min_batch_time = 0;
+       journal->j_max_batch_time = 15000; /* 15ms */
   
         /* The journal is marked for error until we succeed with recovery! */
         journal->j_flags = JBD2_ABORT;
@@@ -1035,15 -1023,14 +1042,14 @@@ journal_t * jbd2_journal_init_dev(struc
   
         /* journal descriptor can store up to n blocks -bzzz */
         journal->j_blocksize = blocksize;
+       jbd2_stats_proc_init(journal);
         n = journal->j_blocksize / sizeof(journal_block_tag_t);
         journal->j_wbufsize = n;
         journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
         if (!journal->j_wbuf) {
                 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
                         __func__);
-               kfree(journal);
-               journal = NULL;
-               goto out;
+               goto out_err;
         }
         journal->j_dev = bdev;
         journal->j_fs_dev = fs_dev;
@@@ -1053,14 -1040,22 +1059,22 @@@
         p = journal->j_devname;
         while ((p = strchr(p, '/')))
                 *p = '!';
-       jbd2_stats_proc_init(journal);
   
         bh = __getblk(journal->j_dev, start, journal->j_blocksize);
-       J_ASSERT(bh != NULL);
+       if (!bh) {
+               printk(KERN_ERR
+                      "%s: Cannot get buffer for journal superblock\n",
+                      __func__);
+               goto out_err;
+       }
         journal->j_sb_buffer = bh;
         journal->j_superblock = (journal_superblock_t *)bh->b_data;
- out:
+ 
         return journal;
+ out_err:
+       jbd2_stats_proc_exit(journal);
+       kfree(journal);
+       return NULL;
   }
   
   /**
@@@ -1108,9 -1103,7 +1122,7 @@@ journal_t * jbd2_journal_init_inode (st
         if (!journal->j_wbuf) {
                 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
                         __func__);
-               jbd2_stats_proc_exit(journal);
-               kfree(journal);
-               return NULL;
+               goto out_err;
         }
   
         err = jbd2_journal_bmap(journal, 0, &blocknr);
@@@ -1118,17 -1111,24 +1130,24 @@@
         if (err) {
                 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
                        __func__);
-               jbd2_stats_proc_exit(journal);
-               kfree(journal);
-               return NULL;
+               goto out_err;
         }
   
         bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
-       J_ASSERT(bh != NULL);
+       if (!bh) {
+               printk(KERN_ERR
+                      "%s: Cannot get buffer for journal superblock\n",
+                      __func__);
+               goto out_err;
+       }
         journal->j_sb_buffer = bh;
         journal->j_superblock = (journal_superblock_t *)bh->b_data;
   
         return journal;
+ out_err:
+       jbd2_stats_proc_exit(journal);
+       kfree(journal);
+       return NULL;
   }
   
   /*
@@@ -1176,77 -1176,6 +1195,6 @@@ static int journal_reset(journal_t *jou
         return jbd2_journal_start_thread(journal);
   }
   
- /**
-  * int jbd2_journal_create() - Initialise the new journal file
-  * @journal: Journal to create. This structure must have been initialised
-  *
-  * Given a journal_t structure which tells us which disk blocks we can
-  * use, create a new journal superblock and initialise all of the
-  * journal fields from scratch.
-  **/
- int jbd2_journal_create(journal_t *journal)
- {
-       unsigned long long blocknr;
-       struct buffer_head *bh;
-       journal_superblock_t *sb;
-       int i, err;
- 
-       if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) {
-               printk (KERN_ERR "Journal length (%d blocks) too short.\n",
-                       journal->j_maxlen);
-               journal_fail_superblock(journal);
-               return -EINVAL;
-       }
- 
-       if (journal->j_inode == NULL) {
-               /*
-                * We don't know what block to start at!
-                */
-               printk(KERN_EMERG
-                      "%s: creation of journal on external device!\n",
-                      __func__);
-               BUG();
-       }
- 
-       /* Zero out the entire journal on disk.  We cannot afford to
-          have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */
-       jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
-       for (i = 0; i < journal->j_maxlen; i++) {
-               err = jbd2_journal_bmap(journal, i, &blocknr);
-               if (err)
-                       return err;
-               bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
-               lock_buffer(bh);
-               memset (bh->b_data, 0, journal->j_blocksize);
-               BUFFER_TRACE(bh, "marking dirty");
-               mark_buffer_dirty(bh);
-               BUFFER_TRACE(bh, "marking uptodate");
-               set_buffer_uptodate(bh);
-               unlock_buffer(bh);
-               __brelse(bh);
-       }
- 
-       sync_blockdev(journal->j_dev);
-       jbd_debug(1, "JBD: journal cleared.\n");
- 
-       /* OK, fill in the initial static fields in the new superblock */
-       sb = journal->j_superblock;
- 
-       sb->s_header.h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
-       sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
- 
-       sb->s_blocksize = cpu_to_be32(journal->j_blocksize);
-       sb->s_maxlen    = cpu_to_be32(journal->j_maxlen);
-       sb->s_first     = cpu_to_be32(1);
- 
-       journal->j_transaction_sequence = 1;
- 
-       journal->j_flags &= ~JBD2_ABORT;
-       journal->j_format_version = 2;
- 
-       return journal_reset(journal);
- }
- 
   /**
    * void jbd2_journal_update_superblock() - Update journal sb on disk.
    * @journal: The journal to update.
@@@ -1491,7 -1420,9 +1439,9 @@@ int jbd2_journal_destroy(journal_t *jou
         spin_lock(&journal->j_list_lock);
         while (journal->j_checkpoint_transactions != NULL) {
                 spin_unlock(&journal->j_list_lock);
+               mutex_lock(&journal->j_checkpoint_mutex);
                 jbd2_log_do_checkpoint(journal);
+               mutex_unlock(&journal->j_checkpoint_mutex);
                 spin_lock(&journal->j_list_lock);
         }
   
diff --combined fs/jbd2/transaction.c

index 4f925a4f3d05051ec7b1edd65c50ecdabe27000a,48c21bac5a567f5ae27dd683c2b119ca2c7c7544..46b4e347ed7d9f3949df7b2b475bda7d7f2b8fef
--- 1/fs/jbd2/transaction.c
--- 2/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@@ -25,6 -25,7 +25,7 @@@
   #include <linux/timer.h>
   #include <linux/mm.h>
   #include <linux/highmem.h>
+ #include <linux/hrtimer.h>
   
   static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
   
@@@ -48,6 -49,7 +49,7 @@@ jbd2_get_transaction(journal_t *journal
   {
         transaction->t_journal = journal;
         transaction->t_state = T_RUNNING;
+       transaction->t_start_time = ktime_get();
         transaction->t_tid = journal->j_transaction_sequence++;
         transaction->t_expires = jiffies + journal->j_commit_interval;
         spin_lock_init(&transaction->t_handle_lock);
@@@ -741,12 -743,6 +743,12 @@@ done
                 source = kmap_atomic(page, KM_USER0);
                 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
                 kunmap_atomic(source, KM_USER0);
+ +
+ +              /*
+ +               * Now that the frozen data is saved off, we need to store
+ +               * any matching triggers.
+ +               */
+ +              jh->b_frozen_triggers = jh->b_triggers;
         }
         jbd_unlock_bh_state(bh);
   
@@@ -949,47 -945,6 +951,47 @@@ out
         return err;
   }
   
+ +/**
+ + * void jbd2_journal_set_triggers() - Add triggers for commit writeout
+ + * @bh: buffer to trigger on
+ + * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
+ + *
+ + * Set any triggers on this journal_head.  This is always safe, because
+ + * triggers for a committing buffer will be saved off, and triggers for
+ + * a running transaction will match the buffer in that transaction.
+ + *
+ + * Call with NULL to clear the triggers.
+ + */
+ +void jbd2_journal_set_triggers(struct buffer_head *bh,
+ +                             struct jbd2_buffer_trigger_type *type)
+ +{
+ +      struct journal_head *jh = bh2jh(bh);
+ +
+ +      jh->b_triggers = type;
+ +}
+ +
+ +void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+ +                              struct jbd2_buffer_trigger_type *triggers)
+ +{
+ +      struct buffer_head *bh = jh2bh(jh);
+ +
+ +      if (!triggers || !triggers->t_commit)
+ +              return;
+ +
+ +      triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+ +}
+ +
+ +void jbd2_buffer_abort_trigger(struct journal_head *jh,
+ +                             struct jbd2_buffer_trigger_type *triggers)
+ +{
+ +      if (!triggers || !triggers->t_abort)
+ +              return;
+ +
+ +      triggers->t_abort(triggers, jh2bh(jh));
+ +}
+ +
+ +
+ +
   /**
    * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
    * @handle: transaction to add buffer to.
@@@ -1240,7 -1195,7 +1242,7 @@@ int jbd2_journal_stop(handle_t *handle
   {
         transaction_t *transaction = handle->h_transaction;
         journal_t *journal = transaction->t_journal;
-       int old_handle_count, err;
+       int err;
         pid_t pid;
   
         J_ASSERT(journal_current_handle() == handle);
@@@ -1263,24 -1218,54 +1265,54 @@@
         /*
          * Implement synchronous transaction batching.  If the handle
          * was synchronous, don't force a commit immediately.  Let's
-        * yield and let another thread piggyback onto this transaction.
-        * Keep doing that while new threads continue to arrive.
-        * It doesn't cost much - we're about to run a commit and sleep
-        * on IO anyway.  Speeds up many-threaded, many-dir operations
-        * by 30x or more...
+        * yield and let another thread piggyback onto this
+        * transaction.  Keep doing that while new threads continue to
+        * arrive.  It doesn't cost much - we're about to run a commit
+        * and sleep on IO anyway.  Speeds up many-threaded, many-dir
+        * operations by 30x or more...
+        *
+        * We try and optimize the sleep time against what the
+        * underlying disk can do, instead of having a static sleep
+        * time.  This is useful for the case where our storage is so
+        * fast that it is more optimal to go ahead and force a flush
+        * and wait for the transaction to be committed than it is to
+        * wait for an arbitrary amount of time for new writers to
+        * join the transaction.  We achieve this by measuring how
+        * long it takes to commit a transaction, and compare it with
+        * how long this transaction has been running, and if run time
+        * < commit time then we sleep for the delta and commit.  This
+        * greatly helps super fast disks that would see slowdowns as
+        * more threads started doing fsyncs.
          *
-        * But don't do this if this process was the most recent one to
-        * perform a synchronous write.  We do this to detect the case where a
-        * single process is doing a stream of sync writes.  No point in waiting
-        * for joiners in that case.
+        * But don't do this if this process was the most recent one
+        * to perform a synchronous write.  We do this to detect the
+        * case where a single process is doing a stream of sync
+        * writes.  No point in waiting for joiners in that case.
          */
         pid = current->pid;
         if (handle->h_sync && journal->j_last_sync_writer != pid) {
+               u64 commit_time, trans_time;
+ 
                 journal->j_last_sync_writer = pid;
-               do {
-                       old_handle_count = transaction->t_handle_count;
-                       schedule_timeout_uninterruptible(1);
-               } while (old_handle_count != transaction->t_handle_count);
+ 
+               spin_lock(&journal->j_state_lock);
+               commit_time = journal->j_average_commit_time;
+               spin_unlock(&journal->j_state_lock);
+ 
+               trans_time = ktime_to_ns(ktime_sub(ktime_get(),
+                                                  transaction->t_start_time));
+ 
+               commit_time = max_t(u64, commit_time,
+                                   1000*journal->j_min_batch_time);
+               commit_time = min_t(u64, commit_time,
+                                   1000*journal->j_max_batch_time);
+ 
+               if (trans_time < commit_time) {
+                       ktime_t expires = ktime_add_ns(ktime_get(),
+                                                      commit_time);
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
+               }
         }
   
         current->journal_info = NULL;
diff --combined fs/super.c

index 7d67387496cb3348d81795c694a84cb136d655aa,d5fd4498548a0c31d0711b0cf04dae4d22a0203f..ed080c41716757a3c0a71c714c5a7db8ec489287
--- 1/fs/super.c
--- 2/fs/super.c
+++ b/fs/super.c
@@@ -38,7 -38,6 +38,7 @@@
   #include <linux/kobject.h>
   #include <linux/mutex.h>
   #include <linux/file.h>
+ +#include <linux/async.h>
   #include <asm/uaccess.h>
   #include "internal.h"
   
@@@ -72,7 -71,6 +72,7 @@@ static struct super_block *alloc_super(
                 INIT_HLIST_HEAD(&s->s_anon);
                 INIT_LIST_HEAD(&s->s_inodes);
                 INIT_LIST_HEAD(&s->s_dentry_lru);
+ +              INIT_LIST_HEAD(&s->s_async_list);
                 init_rwsem(&s->s_umount);
                 mutex_init(&s->s_lock);
                 lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@@ -291,18 -289,11 +291,18 @@@ void generic_shutdown_super(struct supe
   {
         const struct super_operations *sop = sb->s_op;
   
+ +
         if (sb->s_root) {
                 shrink_dcache_for_umount(sb);
                 fsync_super(sb);
                 lock_super(sb);
                 sb->s_flags &= ~MS_ACTIVE;
+ +
+ +              /*
+ +               * wait for asynchronous fs operations to finish before going further
+ +               */
+ +              async_synchronize_full_special(&sb->s_async_list);
+ +
                 /* bad name - it should be evict_inodes() */
                 invalidate_inodes(sb);
                 lock_kernel();
@@@ -470,7 -461,6 +470,7 @@@ restart
                 sb->s_count++;
                 spin_unlock(&sb_lock);
                 down_read(&sb->s_umount);
+ +              async_synchronize_full_special(&sb->s_async_list);
                 if (sb->s_root && (wait || sb->s_dirt))
                         sb->s_op->sync_fs(sb, wait);
                 up_read(&sb->s_umount);
@@@ -810,6 -800,7 +810,7 @@@ int get_sb_bdev(struct file_system_typ
                 }
   
                 s->s_flags |= MS_ACTIVE;
+               bdev->bd_super = s;
         }
   
         return simple_set_mnt(mnt, s);
@@@ -829,6 -820,7 +830,7 @@@ void kill_block_super(struct super_bloc
         struct block_device *bdev = sb->s_bdev;
         fmode_t mode = sb->s_mode;
   
+       bdev->bd_super = 0;
         generic_shutdown_super(sb);
         sync_blockdev(bdev);
         close_bdev_exclusive(bdev, mode);
diff --combined include/linux/ext3_fs.h

index d76800f6ecf0fb927bf16f144fa6516f5ac62111,9004794a35fea1096d99154c81c06e6230052456..dd495b8c3091e21b3b55a25e8c8294e07f611113
--- 1/include/linux/ext3_fs.h
--- 2/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@@ -178,30 -178,6 +178,30 @@@ struct ext3_group_des
   #define EXT3_FL_USER_VISIBLE          0x0003DFFF /* User visible flags */
   #define EXT3_FL_USER_MODIFIABLE               0x000380FF /* User modifiable flags */
   
+ +/* Flags that should be inherited by new inodes from their parent. */
+ +#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
+ +                         EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\
+ +                         EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\
+ +                         EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
+ +                         EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
+ +
+ +/* Flags that are appropriate for regular files (all but dir-specific ones). */
+ +#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL))
+ +
+ +/* Flags that are appropriate for non-directories/regular files. */
+ +#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL)
+ +
+ +/* Mask out flags that are inappropriate for the given type of inode. */
+ +static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
+ +{
+ +      if (S_ISDIR(mode))
+ +              return flags;
+ +      else if (S_ISREG(mode))
+ +              return flags & EXT3_REG_FLMASK;
+ +      else
+ +              return flags & EXT3_OTHER_FLMASK;
+ +}
+ +
   /*
    * Inode dynamic state flags
    */
@@@ -377,6 -353,13 +377,13 @@@ struct ext3_inode 
   #define       EXT3_ERROR_FS                   0x0002  /* Errors detected */
   #define       EXT3_ORPHAN_FS                  0x0004  /* Orphans being recovered */
   
+ /*
+  * Misc. filesystem flags
+  */
+ #define EXT2_FLAGS_SIGNED_HASH                0x0001  /* Signed dirhash in use */
+ #define EXT2_FLAGS_UNSIGNED_HASH      0x0002  /* Unsigned dirhash in use */
+ #define EXT2_FLAGS_TEST_FILESYS               0x0004  /* to test development code */
+ 
   /*
    * Mount flags
    */
@@@ -513,7 -496,23 +520,23 @@@ struct ext3_super_block 
         __u16   s_reserved_word_pad;
         __le32  s_default_mount_opts;
         __le32  s_first_meta_bg;        /* First metablock block group */
-       __u32   s_reserved[190];        /* Padding to the end of the block */
+       __le32  s_mkfs_time;            /* When the filesystem was created */
+       __le32  s_jnl_blocks[17];       /* Backup of the journal inode */
+       /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+ /*150*/       __le32  s_blocks_count_hi;      /* Blocks count */
+       __le32  s_r_blocks_count_hi;    /* Reserved blocks count */
+       __le32  s_free_blocks_count_hi; /* Free blocks count */
+       __le16  s_min_extra_isize;      /* All inodes have at least # bytes */
+       __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
+       __le32  s_flags;                /* Miscellaneous flags */
+       __le16  s_raid_stride;          /* RAID stride */
+       __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
+       __le64  s_mmp_block;            /* Block for multi-mount protection */
+       __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
+       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
+       __u8    s_reserved_char_pad2;
+       __le16  s_reserved_pad;
+       __u32   s_reserved[162];        /* Padding to the end of the block */
   };
   
   #ifdef __KERNEL__
@@@ -718,6 -717,9 +741,9 @@@ static inline __le16 ext3_rec_len_to_di
   #define DX_HASH_LEGACY                0
   #define DX_HASH_HALF_MD4      1
   #define DX_HASH_TEA           2
+ #define DX_HASH_LEGACY_UNSIGNED       3
+ #define DX_HASH_HALF_MD4_UNSIGNED     4
+ #define DX_HASH_TEA_UNSIGNED          5
   
   #ifdef __KERNEL__
   
diff --combined include/linux/ext3_fs_sb.h

index 76fdc0f4b0287f3e3b0450a4357de6519d69f0c0,a4e9216b3a6dd7894d53b6b1636acb360973861b..f07f34de2f0ecb7ed987c49c604ce3efce4690e6
--- 1/include/linux/ext3_fs_sb.h
--- 2/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@@ -57,10 -57,11 +57,11 @@@ struct ext3_sb_info 
         u32 s_next_generation;
         u32 s_hash_seed[4];
         int s_def_hash_version;
+       int s_hash_unsigned;    /* 3 if hash should be signed, 0 if not */
         struct percpu_counter s_freeblocks_counter;
         struct percpu_counter s_freeinodes_counter;
         struct percpu_counter s_dirs_counter;
- -      struct blockgroup_lock s_blockgroup_lock;
+ +      struct blockgroup_lock *s_blockgroup_lock;
   
         /* root of the per fs reservation window tree */
         spinlock_t s_rsv_window_lock;
@@@ -86,7 -87,7 +87,7 @@@
   static inline spinlock_t *
   sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
   {
- -      return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
+ +      return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
   }
   
   #endif        /* _LINUX_EXT3_FS_SB */
diff --combined include/linux/fs.h

index e38a64d71efff910fbc9b6a8ad578a0cd509aea4,0f54ae0f0ccde915e2d50c873bf51462f3843460..0b87b29f4797fa9960373a24194501f0ebc5fab9
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -565,6 -565,7 +565,7 @@@ struct address_space 
   struct block_device {
         dev_t                   bd_dev;  /* not a kdev_t - it's a search key */
         struct inode *          bd_inode;       /* will die */
+       struct super_block *    bd_super;
         int                     bd_openers;
         struct mutex            bd_mutex;       /* open/close mutex */
         struct semaphore        bd_mount_sem;
@@@ -1133,6 -1134,7 +1134,6 @@@ struct super_block 
         struct rw_semaphore     s_umount;
         struct mutex            s_lock;
         int                     s_count;
- -      int                     s_syncing;
         int                     s_need_sync_fs;
         atomic_t                s_active;
   #ifdef CONFIG_SECURITY
@@@ -1184,11 -1186,6 +1185,11 @@@
          * generic_show_options()
          */
         char *s_options;
+ +
+ +      /*
+ +       * storage for asynchronous operations
+ +       */
+ +      struct list_head s_async_list;
   };
   
   extern struct timespec current_fs_time(struct super_block *sb);
@@@ -1389,6 -1386,7 +1390,7 @@@ struct super_operations 
         ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
         ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
   #endif
+       int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
   };
   
   /*
@@@ -1834,7 -1832,7 +1836,7 @@@ extern int __filemap_fdatawrite_range(s
   extern int filemap_fdatawrite_range(struct address_space *mapping,
                                 loff_t start, loff_t end);
   
- -extern long do_fsync(struct file *file, int datasync);
+ +extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
   extern void sync_supers(void);
   extern void sync_filesystems(int wait);
   extern void __fsync_super(struct super_block *sb);
@@@ -2063,9 -2061,6 +2065,9 @@@ extern int vfs_fstat(unsigned int, stru
   
   extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
                     unsigned long arg);
+ +extern int __generic_block_fiemap(struct inode *inode,
+ +                                struct fiemap_extent_info *fieinfo, u64 start,
+ +                                u64 len, get_block_t *get_block);
   extern int generic_block_fiemap(struct inode *inode,
                                 struct fiemap_extent_info *fieinfo, u64 start,
                                 u64 len, get_block_t *get_block);
diff --combined include/linux/jbd2.h

index 34456476e761330686a4e6d6afa98cf570fc94de,adef1c9940d3719730aa68d5647e049b084c51f5..b45109c61fba90451a0396f6c845e7c55964ccb3
--- 1/include/linux/jbd2.h
--- 2/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@@ -637,6 -637,11 +637,11 @@@ struct transaction_
          */
         unsigned long           t_expires;
   
+       /*
+        * When this transaction started, in nanoseconds [no locking]
+        */
+       ktime_t                 t_start_time;
+ 
         /*
          * How many handles used this transaction? [t_handle_lock]
          */
@@@ -682,6 -687,8 +687,8 @@@ jbd2_time_diff(unsigned long start, uns
         return end + (MAX_JIFFY_OFFSET - start);
   }
   
+ #define JBD2_NR_BATCH 64
+ 
   /**
    * struct journal_s - The journal_s type is the concrete type associated with
    *     journal_t.
@@@ -825,6 -832,14 +832,14 @@@ struct journal_
         /* Semaphore for locking against concurrent checkpoints */
         struct mutex            j_checkpoint_mutex;
   
+       /*
+        * List of buffer heads used by the checkpoint routine.  This
+        * was moved from jbd2_log_do_checkpoint() to reduce stack
+        * usage.  Access to this array is controlled by the
+        * j_checkpoint_mutex.  [j_checkpoint_mutex]
+        */
+       struct buffer_head      *j_chkpt_bhs[JBD2_NR_BATCH];
+       
         /*
          * Journal head: identifies the first unused block in the journal.
          * [j_state_lock]
@@@ -939,8 -954,26 +954,26 @@@
         struct buffer_head      **j_wbuf;
         int                     j_wbufsize;
   
+       /*
+        * this is the pid of hte last person to run a synchronous operation
+        * through the journal
+        */
         pid_t                   j_last_sync_writer;
   
+       /*
+        * the average amount of time in nanoseconds it takes to commit a
+        * transaction to disk. [j_state_lock]
+        */
+       u64                     j_average_commit_time;
+ 
+       /*
+        * minimum and maximum times that we should wait for
+        * additional filesystem operations to get batched into a
+        * synchronous handle in microseconds
+        */
+       u32                     j_min_batch_time;
+       u32                     j_max_batch_time;
+ 
         /* This function is called when a transaction is closed */
         void                    (*j_commit_callback)(journal_t *,
                                                      transaction_t *);
@@@ -1008,35 -1041,6 +1041,35 @@@ int __jbd2_journal_clean_checkpoint_lis
   int __jbd2_journal_remove_checkpoint(struct journal_head *);
   void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
   
+ +
+ +/*
+ + * Triggers
+ + */
+ +
+ +struct jbd2_buffer_trigger_type {
+ +      /*
+ +       * Fired just before a buffer is written to the journal.
+ +       * mapped_data is a mapped buffer that is the frozen data for
+ +       * commit.
+ +       */
+ +      void (*t_commit)(struct jbd2_buffer_trigger_type *type,
+ +                       struct buffer_head *bh, void *mapped_data,
+ +                       size_t size);
+ +
+ +      /*
+ +       * Fired during journal abort for dirty buffers that will not be
+ +       * committed.
+ +       */
+ +      void (*t_abort)(struct jbd2_buffer_trigger_type *type,
+ +                      struct buffer_head *bh);
+ +};
+ +
+ +extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
+ +                                     void *mapped_data,
+ +                                     struct jbd2_buffer_trigger_type *triggers);
+ +extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
+ +                                    struct jbd2_buffer_trigger_type *triggers);
+ +
   /* Buffer IO */
   extern int
   jbd2_journal_write_metadata_buffer(transaction_t        *transaction,
@@@ -1075,8 -1079,6 +1108,8 @@@ extern int       jbd2_journal_extend (handle
   extern int     jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
   extern int     jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
   extern int     jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
+ +void           jbd2_journal_set_triggers(struct buffer_head *,
+ +                                         struct jbd2_buffer_trigger_type *type);
   extern int     jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
   extern void    jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
   extern int     jbd2_journal_forget (handle_t *, struct buffer_head *);
@@@ -1102,7 -1104,6 +1135,6 @@@ extern int         jbd2_journal_set_feature
                    (journal_t *, unsigned long, unsigned long, unsigned long);
   extern void      jbd2_journal_clear_features
                    (journal_t *, unsigned long, unsigned long, unsigned long);
- extern int       jbd2_journal_create     (journal_t *);
   extern int       jbd2_journal_load       (journal_t *journal);
   extern int       jbd2_journal_destroy    (journal_t *);
   extern int       jbd2_journal_recover    (journal_t *journal);
@@@ -1177,8 -1178,8 +1209,8 @@@ int jbd2_log_wait_commit(journal_t *jou
   int jbd2_log_do_checkpoint(journal_t *journal);
   
   void __jbd2_log_wait_for_space(journal_t *journal);
- extern void   __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
- extern int    jbd2_cleanup_journal_tail(journal_t *);
+ extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
+ extern int jbd2_cleanup_journal_tail(journal_t *);
   
   /* Debugging code only: */
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 9 Jan 2009 01:14:59 +0000 (17:14 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 9 Jan 2009 01:14:59 +0000 (17:14 -0800)
		1	2
fs/block_dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext3/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext3/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ext4.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/extents.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/jbd2/commit.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/jbd2/journal.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/jbd2/transaction.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/ext3_fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/ext3_fs_sb.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/jbd2.h	patch \|	diff1 \|	diff2 \|	blob \| history