]> pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jan 2009 01:14:59 +0000 (17:14 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jan 2009 01:14:59 +0000 (17:14 -0800)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (57 commits)
  jbd2: Fix oops in jbd2_journal_init_inode() on corrupted fs
  ext4: Remove "extents" mount option
  block: Add Kconfig help which notes that ext4 needs CONFIG_LBD
  ext4: Make printk's consistently prefixed with "EXT4-fs: "
  ext4: Add sanity checks for the superblock before mounting the filesystem
  ext4: Add mount option to set kjournald's I/O priority
  jbd2: Submit writes to the journal using WRITE_SYNC
  jbd2: Add pid and journal device name to the "kjournald2 starting" message
  ext4: Add markers for better debuggability
  ext4: Remove code to create the journal inode
  ext4: provide function to release metadata pages under memory pressure
  ext3: provide function to release metadata pages under memory pressure
  add releasepage hooks to block devices which can be used by file systems
  ext4: Fix s_dirty_blocks_counter if block allocation failed with nodelalloc
  ext4: Init the complete page while building buddy cache
  ext4: Don't allow new groups to be added during block allocation
  ext4: mark the blocks/inode bitmap beyond end of group as used
  ext4: Use new buffer_head flag to check uninit group bitmaps initialization
  ext4: Fix the race between read_inode_bitmap() and ext4_new_inode()
  ext4: code cleanup
  ...

16 files changed:
1  2 
fs/block_dev.c
fs/ext3/namei.c
fs/ext3/super.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/inode.c
fs/ext4/namei.c
fs/ext4/super.c
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
fs/super.c
include/linux/ext3_fs.h
include/linux/ext3_fs_sb.h
include/linux/fs.h
include/linux/jbd2.h

diff --combined fs/block_dev.c
index 8ebbfdf708c24c9d70bc4cc88e8266202d56ec06,1dd07e66e98acf06f0999bea1926c7799a4a67f9..ac7031f12ea51b66177a3277b5cd48be3f4d6dc1
@@@ -1005,7 -1005,6 +1005,7 @@@ static int __blkdev_get(struct block_de
        }
  
        lock_kernel();
 + restart:
  
        ret = -ENXIO;
        disk = get_gendisk(bdev->bd_dev, &partno);
  
                        if (disk->fops->open) {
                                ret = disk->fops->open(bdev, mode);
 +                              if (ret == -ERESTARTSYS) {
 +                                      /* Lost a race with 'disk' being
 +                                       * deleted, try again.
 +                                       * See md.c
 +                                       */
 +                                      disk_put_part(bdev->bd_part);
 +                                      bdev->bd_part = NULL;
 +                                      module_put(disk->fops->owner);
 +                                      put_disk(disk);
 +                                      bdev->bd_disk = NULL;
 +                                      mutex_unlock(&bdev->bd_mutex);
 +                                      goto restart;
 +                              }
                                if (ret)
                                        goto out_clear;
                        }
@@@ -1234,6 -1220,20 +1234,20 @@@ static long block_ioctl(struct file *fi
        return blkdev_ioctl(bdev, mode, cmd, arg);
  }
  
+ /*
+  * Try to release a page associated with block device when the system
+  * is under memory pressure.
+  */
+ static int blkdev_releasepage(struct page *page, gfp_t wait)
+ {
+       struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super;
+       if (super && super->s_op->bdev_try_to_free_page)
+               return super->s_op->bdev_try_to_free_page(super, page, wait);
+       return try_to_free_buffers(page);
+ }
  static const struct address_space_operations def_blk_aops = {
        .readpage       = blkdev_readpage,
        .writepage      = blkdev_writepage,
        .write_begin    = blkdev_write_begin,
        .write_end      = blkdev_write_end,
        .writepages     = generic_writepages,
+       .releasepage    = blkdev_releasepage,
        .direct_IO      = blkdev_direct_IO,
  };
  
@@@ -1276,7 -1277,7 +1291,7 @@@ EXPORT_SYMBOL(ioctl_by_bdev)
  
  /**
   * lookup_bdev  - lookup a struct block_device by name
 - * @path:     special file representing the block device
 + * @pathname: special file representing the block device
   *
   * Get a reference to the blockdevice at @pathname in the current
   * namespace if possible and return it.  Return ERR_PTR(error)
diff --combined fs/ext3/namei.c
index 8d6f965e502cd9d56969fefcf347dbeec37a42c7,2c2d700c1ccfcb112d7df6ee9303b2cfc1716116..69a3d19ca9fd4dfbb80cf4de27b0fe10e9518120
@@@ -74,6 -74,10 +74,6 @@@ static struct buffer_head *ext3_append(
  #define assert(test) J_ASSERT(test)
  #endif
  
 -#ifndef swap
 -#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
 -#endif
 -
  #ifdef DX_DEBUG
  #define dxtrace(command) command
  #else
@@@ -364,6 -368,8 +364,8 @@@ dx_probe(struct qstr *entry, struct ino
                goto fail;
        }
        hinfo->hash_version = root->info.hash_version;
+       if (hinfo->hash_version <= DX_HASH_TEA)
+               hinfo->hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned;
        hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
        if (entry)
                ext3fs_dirhash(entry->name, entry->len, hinfo);
@@@ -632,6 -638,9 +634,9 @@@ int ext3_htree_fill_tree(struct file *d
        dir = dir_file->f_path.dentry->d_inode;
        if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
                hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+               if (hinfo.hash_version <= DX_HASH_TEA)
+                       hinfo.hash_version +=
+                               EXT3_SB(dir->i_sb)->s_hash_unsigned;
                hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
                count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
                                               start_hash, start_minor_hash);
@@@ -1152,9 -1161,9 +1157,9 @@@ static struct ext3_dir_entry_2 *do_spli
        u32 hash2;
        struct dx_map_entry *map;
        char *data1 = (*bh)->b_data, *data2;
-       unsigned split, move, size, i;
+       unsigned split, move, size;
        struct ext3_dir_entry_2 *de = NULL, *de2;
-       int     err = 0;
+       int     err = 0, i;
  
        bh2 = ext3_append (handle, dir, &newblock, &err);
        if (!(bh2)) {
@@@ -1394,6 -1403,8 +1399,8 @@@ static int make_indexed_dir(handle_t *h
  
        /* Initialize as for dx_probe */
        hinfo.hash_version = root->info.hash_version;
+       if (hinfo.hash_version <= DX_HASH_TEA)
+               hinfo.hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned;
        hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
        ext3fs_dirhash(name, namelen, &hinfo);
        frame = frames;
diff --combined fs/ext3/super.c
index 01c235bc2054422a0bf1134d6af7214ab459689a,6900ff05e3ab1d392def2418f909251762533814..5d047a030a73d0570a9ec992f549a4e4d2fb7c6a
@@@ -439,7 -439,6 +439,7 @@@ static void ext3_put_super (struct supe
                ext3_blkdev_remove(sbi);
        }
        sb->s_fs_info = NULL;
 +      kfree(sbi->s_blockgroup_lock);
        kfree(sbi);
        return;
  }
@@@ -683,6 -682,26 +683,26 @@@ static struct dentry *ext3_fh_to_parent
                                    ext3_nfs_get_inode);
  }
  
+ /*
+  * Try to release metadata pages (indirect blocks, directories) which are
+  * mapped via the block device.  Since these pages could have journal heads
+  * which would prevent try_to_free_buffers() from freeing them, we must use
+  * jbd layer's try_to_free_buffers() function to release them.
+  */
+ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
+                                gfp_t wait)
+ {
+       journal_t *journal = EXT3_SB(sb)->s_journal;
+       WARN_ON(PageChecked(page));
+       if (!page_has_buffers(page))
+               return 0;
+       if (journal)
+               return journal_try_to_free_buffers(journal, page, 
+                                                  wait & ~__GFP_WAIT);
+       return try_to_free_buffers(page);
+ }
  #ifdef CONFIG_QUOTA
  #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
  #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
@@@ -714,9 -733,7 +734,9 @@@ static struct dquot_operations ext3_quo
        .acquire_dquot  = ext3_acquire_dquot,
        .release_dquot  = ext3_release_dquot,
        .mark_dirty     = ext3_mark_dquot_dirty,
 -      .write_info     = ext3_write_info
 +      .write_info     = ext3_write_info,
 +      .alloc_dquot    = dquot_alloc,
 +      .destroy_dquot  = dquot_destroy,
  };
  
  static struct quotactl_ops ext3_qctl_operations = {
@@@ -749,6 -766,7 +769,7 @@@ static const struct super_operations ex
        .quota_read     = ext3_quota_read,
        .quota_write    = ext3_quota_write,
  #endif
+       .bdev_try_to_free_page = bdev_try_to_free_page,
  };
  
  static const struct export_operations ext3_export_ops = {
@@@ -1038,7 -1056,8 +1059,7 @@@ static int parse_options (char *options
                case Opt_grpjquota:
                        qtype = GRPQUOTA;
  set_qf_name:
 -                      if ((sb_any_quota_enabled(sb) ||
 -                           sb_any_quota_suspended(sb)) &&
 +                      if (sb_any_quota_loaded(sb) &&
                            !sbi->s_qf_names[qtype]) {
                                printk(KERN_ERR
                                        "EXT3-fs: Cannot change journaled "
                case Opt_offgrpjquota:
                        qtype = GRPQUOTA;
  clear_qf_name:
 -                      if ((sb_any_quota_enabled(sb) ||
 -                           sb_any_quota_suspended(sb)) &&
 +                      if (sb_any_quota_loaded(sb) &&
                            sbi->s_qf_names[qtype]) {
                                printk(KERN_ERR "EXT3-fs: Cannot change "
                                        "journaled quota options when "
                case Opt_jqfmt_vfsv0:
                        qfmt = QFMT_VFS_V0;
  set_qf_format:
 -                      if ((sb_any_quota_enabled(sb) ||
 -                           sb_any_quota_suspended(sb)) &&
 +                      if (sb_any_quota_loaded(sb) &&
                            sbi->s_jquota_fmt != qfmt) {
                                printk(KERN_ERR "EXT3-fs: Cannot change "
                                        "journaled quota options when "
                        set_opt(sbi->s_mount_opt, GRPQUOTA);
                        break;
                case Opt_noquota:
 -                      if (sb_any_quota_enabled(sb) ||
 -                          sb_any_quota_suspended(sb)) {
 +                      if (sb_any_quota_loaded(sb)) {
                                printk(KERN_ERR "EXT3-fs: Cannot change quota "
                                        "options when quota turned on.\n");
                                return 0;
@@@ -1547,13 -1569,6 +1568,13 @@@ static int ext3_fill_super (struct supe
        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
        if (!sbi)
                return -ENOMEM;
 +
 +      sbi->s_blockgroup_lock =
 +              kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
 +      if (!sbi->s_blockgroup_lock) {
 +              kfree(sbi);
 +              return -ENOMEM;
 +      }
        sb->s_fs_info = sbi;
        sbi->s_mount_opt = 0;
        sbi->s_resuid = EXT3_DEF_RESUID;
        for (i=0; i < 4; i++)
                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
        sbi->s_def_hash_version = es->s_def_hash_version;
+       i = le32_to_cpu(es->s_flags);
+       if (i & EXT2_FLAGS_UNSIGNED_HASH)
+               sbi->s_hash_unsigned = 3;
+       else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+ #ifdef __CHAR_UNSIGNED__
+               es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+               sbi->s_hash_unsigned = 3;
+ #else
+               es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+ #endif
+               sb->s_dirt = 1;
+       }
  
        if (sbi->s_blocks_per_group > blocksize * 8) {
                printk (KERN_ERR
                goto failed_mount;
        }
  
 -      bgl_lock_init(&sbi->s_blockgroup_lock);
 +      bgl_lock_init(sbi->s_blockgroup_lock);
  
        for (i = 0; i < db_count; i++) {
                block = descriptor_loc(sb, logic_sb_block, i);
diff --combined fs/ext4/ext4.h
index 6c46c648430d051ad8a0b25569df02cd1da60f26,db1718833f5817db2051922c0f08b4550a1b43df..c668e4377d76027a87f95eb168d6e54b024a1a69
@@@ -19,6 -19,7 +19,7 @@@
  #include <linux/types.h>
  #include <linux/blkdev.h>
  #include <linux/magic.h>
+ #include <linux/jbd2.h>
  #include "ext4_i.h"
  
  /*
@@@ -94,9 -95,9 +95,9 @@@ struct ext4_allocation_request 
        /* phys. block for ^^^ */
        ext4_fsblk_t pright;
        /* how many blocks we want to allocate */
-       unsigned long len;
+       unsigned int len;
        /* flags. see above EXT4_MB_HINT_* */
-       unsigned long flags;
+       unsigned int flags;
  };
  
  /*
@@@ -156,12 -157,12 +157,12 @@@ struct ext4_group_des
        __le32  bg_block_bitmap_lo;     /* Blocks bitmap block */
        __le32  bg_inode_bitmap_lo;     /* Inodes bitmap block */
        __le32  bg_inode_table_lo;      /* Inodes table block */
-       __le16  bg_free_blocks_count;   /* Free blocks count */
-       __le16  bg_free_inodes_count;   /* Free inodes count */
-       __le16  bg_used_dirs_count;     /* Directories count */
+       __le16  bg_free_blocks_count_lo;/* Free blocks count */
+       __le16  bg_free_inodes_count_lo;/* Free inodes count */
+       __le16  bg_used_dirs_count_lo;  /* Directories count */
        __le16  bg_flags;               /* EXT4_BG_flags (INODE_UNINIT, etc) */
        __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
-       __le16  bg_itable_unused;       /* Unused inodes count */
+       __le16  bg_itable_unused_lo;    /* Unused inodes count */
        __le16  bg_checksum;            /* crc16(sb_uuid+group+desc) */
        __le32  bg_block_bitmap_hi;     /* Blocks bitmap block MSB */
        __le32  bg_inode_bitmap_hi;     /* Inodes bitmap block MSB */
        __le16  bg_free_blocks_count_hi;/* Free blocks count MSB */
        __le16  bg_free_inodes_count_hi;/* Free inodes count MSB */
        __le16  bg_used_dirs_count_hi;  /* Directories count MSB */
-       __le16  bg_itable_unused_hi;    /* Unused inodes count MSB */
+       __le16  bg_itable_unused_hi;    /* Unused inodes count MSB */
        __u32   bg_reserved2[3];
  };
  
@@@ -328,6 -329,7 +329,7 @@@ struct ext4_mount_options 
        uid_t s_resuid;
        gid_t s_resgid;
        unsigned long s_commit_interval;
+       u32 s_min_batch_time, s_max_batch_time;
  #ifdef CONFIG_QUOTA
        int s_jquota_fmt;
        char *s_qf_names[MAXQUOTAS];
@@@ -534,7 -536,6 +536,6 @@@ do {                                                                              
  #define EXT4_MOUNT_QUOTA              0x80000 /* Some quota option set */
  #define EXT4_MOUNT_USRQUOTA           0x100000 /* "old" user quota */
  #define EXT4_MOUNT_GRPQUOTA           0x200000 /* "old" group quota */
- #define EXT4_MOUNT_EXTENTS            0x400000 /* Extents support */
  #define EXT4_MOUNT_JOURNAL_CHECKSUM   0x800000 /* Journal checksums */
  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT       0x1000000 /* Journal Async Commit */
  #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
@@@ -726,11 -727,11 +727,11 @@@ static inline int ext4_valid_inum(struc
   */
  
  #define EXT4_HAS_COMPAT_FEATURE(sb,mask)                      \
-       (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask))
+       ((EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) != 0)
  #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask)                   \
-       (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask))
+       ((EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) != 0)
  #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask)                    \
-       (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask))
+       ((EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) != 0)
  #define EXT4_SET_COMPAT_FEATURE(sb,mask)                      \
        EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
  #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask)                   \
  #define EXT4_DEFM_JMODE_ORDERED       0x0040
  #define EXT4_DEFM_JMODE_WBACK 0x0060
  
+ /*
+  * Default journal batch times
+  */
+ #define EXT4_DEF_MIN_BATCH_TIME       0
+ #define EXT4_DEF_MAX_BATCH_TIME       15000 /* 15ms */
  /*
   * Structure of a directory entry
   */
@@@ -891,6 -898,9 +898,9 @@@ static inline __le16 ext4_rec_len_to_di
  #define DX_HASH_LEGACY                0
  #define DX_HASH_HALF_MD4      1
  #define DX_HASH_TEA           2
+ #define DX_HASH_LEGACY_UNSIGNED       3
+ #define DX_HASH_HALF_MD4_UNSIGNED     4
+ #define DX_HASH_TEA_UNSIGNED          5
  
  #ifdef __KERNEL__
  
@@@ -955,7 -965,7 +965,7 @@@ ext4_group_first_block_no(struct super_
  #define ERR_BAD_DX_DIR        -75000
  
  void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
-                       unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
+                       ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
  
  extern struct proc_dir_entry *ext4_proc_root;
  
@@@ -987,6 -997,9 +997,9 @@@ do {                                                                       
  # define ATTRIB_NORET __attribute__((noreturn))
  # define NORET_AND    noreturn,
  
+ /* bitmap.c */
+ extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
  /* balloc.c */
  extern unsigned int ext4_block_group(struct super_block *sb,
                        ext4_fsblk_t blocknr);
@@@ -995,20 -1008,14 +1008,14 @@@ extern ext4_grpblk_t ext4_block_group_o
  extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
  extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
                        ext4_group_t group);
- extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t goal, int *errp);
  extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
                        ext4_fsblk_t goal, unsigned long *count, int *errp);
- extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
-                                       ext4_lblk_t iblock, ext4_fsblk_t goal,
-                                       unsigned long *count, int *errp);
  extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
  extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
  extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
                        ext4_fsblk_t block, unsigned long count, int metadata);
- extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
-                               ext4_fsblk_t block, unsigned long count,
-                               unsigned long *pdquot_freed_blocks);
+ extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
+                               ext4_fsblk_t block, unsigned long count);
  extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
  extern void ext4_check_blocks_bitmap(struct super_block *);
  extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
@@@ -1019,7 -1026,7 +1026,7 @@@ extern int ext4_should_retry_alloc(stru
  /* dir.c */
  extern int ext4_check_dir_entry(const char *, struct inode *,
                                struct ext4_dir_entry_2 *,
-                               struct buffer_head *, unsigned long);
+                               struct buffer_head *, unsigned int);
  extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
                                    __u32 minor_hash,
                                    struct ext4_dir_entry_2 *dirent);
@@@ -1039,7 -1046,6 +1046,6 @@@ extern struct inode * ext4_orphan_get(s
  extern unsigned long ext4_count_free_inodes(struct super_block *);
  extern unsigned long ext4_count_dirs(struct super_block *);
  extern void ext4_check_inodes_bitmap(struct super_block *);
- extern unsigned long ext4_count_free(struct buffer_head *, unsigned);
  
  /* mballoc.c */
  extern long ext4_mb_stats;
@@@ -1054,12 -1060,13 +1060,13 @@@ extern int __init init_ext4_mballoc(voi
  extern void exit_ext4_mballoc(void);
  extern void ext4_mb_free_blocks(handle_t *, struct inode *,
                unsigned long, unsigned long, int, unsigned long *);
- extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
+ extern int ext4_mb_add_groupinfo(struct super_block *sb,
                ext4_group_t i, struct ext4_group_desc *desc);
  extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
                ext4_grpblk_t add);
+ extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
+ extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
+                                               ext4_group_t, int);
  /* inode.c */
  int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
                struct buffer_head *bh, ext4_fsblk_t blocknr);
@@@ -1069,10 -1076,6 +1076,6 @@@ struct buffer_head *ext4_bread(handle_
                                                ext4_lblk_t, int, int *);
  int ext4_get_block(struct inode *inode, sector_t iblock,
                                struct buffer_head *bh_result, int create);
- int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
-                               ext4_lblk_t iblock, unsigned long maxblocks,
-                               struct buffer_head *bh_result,
-                               int create, int extend_disksize);
  
  extern struct inode *ext4_iget(struct super_block *, unsigned long);
  extern int  ext4_write_inode(struct inode *, int);
@@@ -1123,6 -1126,9 +1126,9 @@@ extern void ext4_abort(struct super_blo
        __attribute__ ((format (printf, 3, 4)));
  extern void ext4_warning(struct super_block *, const char *, const char *, ...)
        __attribute__ ((format (printf, 3, 4)));
+ extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
+                               const char *, const char *, ...)
+       __attribute__ ((format (printf, 4, 5)));
  extern void ext4_update_dynamic_rev(struct super_block *sb);
  extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
                                        __u32 compat);
@@@ -1136,12 -1142,28 +1142,28 @@@ extern ext4_fsblk_t ext4_inode_bitmap(s
                                      struct ext4_group_desc *bg);
  extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
                                     struct ext4_group_desc *bg);
+ extern __u32 ext4_free_blks_count(struct super_block *sb,
+                               struct ext4_group_desc *bg);
+ extern __u32 ext4_free_inodes_count(struct super_block *sb,
+                                struct ext4_group_desc *bg);
+ extern __u32 ext4_used_dirs_count(struct super_block *sb,
+                               struct ext4_group_desc *bg);
+ extern __u32 ext4_itable_unused_count(struct super_block *sb,
+                                  struct ext4_group_desc *bg);
  extern void ext4_block_bitmap_set(struct super_block *sb,
                                  struct ext4_group_desc *bg, ext4_fsblk_t blk);
  extern void ext4_inode_bitmap_set(struct super_block *sb,
                                  struct ext4_group_desc *bg, ext4_fsblk_t blk);
  extern void ext4_inode_table_set(struct super_block *sb,
                                 struct ext4_group_desc *bg, ext4_fsblk_t blk);
+ extern void ext4_free_blks_set(struct super_block *sb,
+                              struct ext4_group_desc *bg, __u32 count);
+ extern void ext4_free_inodes_set(struct super_block *sb,
+                               struct ext4_group_desc *bg, __u32 count);
+ extern void ext4_used_dirs_set(struct super_block *sb,
+                               struct ext4_group_desc *bg, __u32 count);
+ extern void ext4_itable_unused_set(struct super_block *sb,
+                                  struct ext4_group_desc *bg, __u32 count);
  
  static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
  {
@@@ -1225,11 -1247,11 +1247,11 @@@ do {                                                         
  } while (0)
  
  #ifdef CONFIG_SMP
 -/* Each CPU can accumulate FBC_BATCH blocks in their local
 +/* Each CPU can accumulate percpu_counter_batch blocks in their local
   * counters. So we need to make sure we have free blocks more
 - * than FBC_BATCH  * nr_cpu_ids. Also add a window of 4 times.
 + * than percpu_counter_batch  * nr_cpu_ids. Also add a window of 4 times.
   */
 -#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
 +#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
  #else
  #define EXT4_FREEBLOCKS_WATERMARK 0
  #endif
@@@ -1246,6 -1268,50 +1268,50 @@@ static inline void ext4_update_i_disksi
        return ;
  }
  
+ struct ext4_group_info {
+       unsigned long   bb_state;
+       struct rb_root  bb_free_root;
+       unsigned short  bb_first_free;
+       unsigned short  bb_free;
+       unsigned short  bb_fragments;
+       struct          list_head bb_prealloc_list;
+ #ifdef DOUBLE_CHECK
+       void            *bb_bitmap;
+ #endif
+       struct rw_semaphore alloc_sem;
+       unsigned short  bb_counters[];
+ };
+ #define EXT4_GROUP_INFO_NEED_INIT_BIT 0
+ #define EXT4_GROUP_INFO_LOCKED_BIT    1
+ #define EXT4_MB_GRP_NEED_INIT(grp)    \
+       (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+ static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+ {
+       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+       bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+ }
+ static inline void ext4_unlock_group(struct super_block *sb,
+                                       ext4_group_t group)
+ {
+       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+       bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+ }
+ static inline int ext4_is_group_locked(struct super_block *sb,
+                                       ext4_group_t group)
+ {
+       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+       return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
+                                               &(grinfo->bb_state));
+ }
  /*
   * Inodes and files operations
   */
@@@ -1271,18 -1337,38 +1337,38 @@@ extern int ext4_ext_writepage_trans_blo
  extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
                                       int chunk);
  extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
-                       ext4_lblk_t iblock,
-                       unsigned long max_blocks, struct buffer_head *bh_result,
-                       int create, int extend_disksize);
+                              ext4_lblk_t iblock, unsigned int max_blocks,
+                              struct buffer_head *bh_result,
+                              int create, int extend_disksize);
  extern void ext4_ext_truncate(struct inode *);
  extern void ext4_ext_init(struct super_block *);
  extern void ext4_ext_release(struct super_block *);
  extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
                          loff_t len);
  extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
-                       sector_t block, unsigned long max_blocks,
+                       sector_t block, unsigned int max_blocks,
                        struct buffer_head *bh, int create,
                        int extend_disksize, int flag);
+ extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+                       __u64 start, __u64 len);
+ /*
+  * Add new method to test wether block and inode bitmaps are properly
+  * initialized. With uninit_bg reading the block from disk is not enough
+  * to mark the bitmap uptodate. We need to also zero-out the bitmap
+  */
+ #define BH_BITMAP_UPTODATE BH_JBDPrivateStart
+ static inline int bitmap_uptodate(struct buffer_head *bh)
+ {
+       return (buffer_uptodate(bh) &&
+                       test_bit(BH_BITMAP_UPTODATE, &(bh)->b_state));
+ }
+ static inline void set_bitmap_uptodate(struct buffer_head *bh)
+ {
+       set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
+ }
  #endif        /* __KERNEL__ */
  
  #endif        /* _EXT4_H */
diff --combined fs/ext4/extents.c
index 3f54db31cdc233c4627a6956f2b47228ebffed2f,240cf0daad4b2771696af4f8ae257abce5527b32..54bf0623a9ae31f4db4dbf6cecc0a13c72038ce5
@@@ -97,6 -97,8 +97,8 @@@ static int ext4_ext_journal_restart(han
  {
        int err;
  
+       if (!ext4_handle_valid(handle))
+               return 0;
        if (handle->h_buffer_credits > needed)
                return 0;
        err = ext4_journal_extend(handle, needed);
@@@ -134,7 -136,7 +136,7 @@@ static int ext4_ext_dirty(handle_t *han
        int err;
        if (path->p_bh) {
                /* path points to block */
-               err = ext4_journal_dirty_metadata(handle, path->p_bh);
+               err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
        } else {
                /* path points to leaf/index in inode body */
                err = ext4_mark_inode_dirty(handle, inode);
@@@ -191,7 -193,7 +193,7 @@@ ext4_ext_new_meta_block(handle_t *handl
        ext4_fsblk_t goal, newblock;
  
        goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
-       newblock = ext4_new_meta_block(handle, inode, goal, err);
+       newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err);
        return newblock;
  }
  
@@@ -780,7 -782,7 +782,7 @@@ static int ext4_ext_split(handle_t *han
        set_buffer_uptodate(bh);
        unlock_buffer(bh);
  
-       err = ext4_journal_dirty_metadata(handle, bh);
+       err = ext4_handle_dirty_metadata(handle, inode, bh);
        if (err)
                goto cleanup;
        brelse(bh);
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
  
-               err = ext4_journal_dirty_metadata(handle, bh);
+               err = ext4_handle_dirty_metadata(handle, inode, bh);
                if (err)
                        goto cleanup;
                brelse(bh);
@@@ -955,7 -957,7 +957,7 @@@ static int ext4_ext_grow_indepth(handle
        set_buffer_uptodate(bh);
        unlock_buffer(bh);
  
-       err = ext4_journal_dirty_metadata(handle, bh);
+       err = ext4_handle_dirty_metadata(handle, inode, bh);
        if (err)
                goto out;
  
@@@ -1160,15 -1162,13 +1162,13 @@@ ext4_ext_search_right(struct inode *ino
        while (--depth >= 0) {
                ix = path[depth].p_idx;
                if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
-                       break;
+                       goto got_index;
        }
  
-       if (depth < 0) {
-               /* we've gone up to the root and
-                * found no index to the right */
-               return 0;
-       }
+       /* we've gone up to the root and found no index to the right */
+       return 0;
  
+ got_index:
        /* we've found index to the right, let's
         * follow it and find the closest allocated
         * block to the right */
        *phys = ext_pblock(ex);
        put_bh(bh);
        return 0;
  }
  
  /*
@@@ -1622,7 -1621,6 +1621,6 @@@ cleanup
                ext4_ext_drop_refs(npath);
                kfree(npath);
        }
-       ext4_ext_tree_changed(inode);
        ext4_ext_invalidate_cache(inode);
        return err;
  }
@@@ -2233,7 -2231,6 +2231,6 @@@ static int ext4_ext_remove_space(struc
                }
        }
  out:
-       ext4_ext_tree_changed(inode);
        ext4_ext_drop_refs(path);
        kfree(path);
        ext4_journal_stop(handle);
@@@ -2250,7 -2247,7 +2247,7 @@@ void ext4_ext_init(struct super_block *
         * possible initialization would be here
         */
  
-       if (test_opt(sb, EXTENTS)) {
+       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
                printk(KERN_INFO "EXT4-fs: file extents enabled");
  #ifdef AGGRESSIVE_TEST
                printk(", aggressive tests");
   */
  void ext4_ext_release(struct super_block *sb)
  {
-       if (!test_opt(sb, EXTENTS))
+       if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
                return;
  
  #ifdef EXTENTS_STATS
@@@ -2380,7 -2377,7 +2377,7 @@@ static int ext4_ext_convert_to_initiali
                                                struct inode *inode,
                                                struct ext4_ext_path *path,
                                                ext4_lblk_t iblock,
-                                               unsigned long max_blocks)
+                                               unsigned int max_blocks)
  {
        struct ext4_extent *ex, newex, orig_ex;
        struct ext4_extent *ex1 = NULL;
                 */
                newdepth = ext_depth(inode);
                /*
 -               * update the extent length after successfull insert of the
 +               * update the extent length after successful insert of the
                 * split extent
                 */
                orig_ex.ee_len = cpu_to_le16(ee_len -
@@@ -2678,26 -2675,26 +2675,26 @@@ fix_extent_len
   */
  int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
                        ext4_lblk_t iblock,
-                       unsigned long max_blocks, struct buffer_head *bh_result,
+                       unsigned int max_blocks, struct buffer_head *bh_result,
                        int create, int extend_disksize)
  {
        struct ext4_ext_path *path = NULL;
        struct ext4_extent_header *eh;
        struct ext4_extent newex, *ex;
-       ext4_fsblk_t goal, newblock;
-       int err = 0, depth, ret;
-       unsigned long allocated = 0;
+       ext4_fsblk_t newblock;
+       int err = 0, depth, ret, cache_type;
+       unsigned int allocated = 0;
        struct ext4_allocation_request ar;
        loff_t disksize;
  
        __clear_bit(BH_New, &bh_result->b_state);
-       ext_debug("blocks %u/%lu requested for inode %u\n",
+       ext_debug("blocks %u/%u requested for inode %u\n",
                        iblock, max_blocks, inode->i_ino);
  
        /* check in cache */
-       goal = ext4_ext_in_cache(inode, iblock, &newex);
-       if (goal) {
-               if (goal == EXT4_EXT_CACHE_GAP) {
+       cache_type = ext4_ext_in_cache(inode, iblock, &newex);
+       if (cache_type) {
+               if (cache_type == EXT4_EXT_CACHE_GAP) {
                        if (!create) {
                                /*
                                 * block isn't allocated yet and
                                goto out2;
                        }
                        /* we should allocate requested block */
-               } else if (goal == EXT4_EXT_CACHE_EXTENT) {
+               } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
                        /* block is already allocated */
                        newblock = iblock
                                   - le32_to_cpu(newex.ee_block)
        if (!newblock)
                goto out2;
        ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
-                       goal, newblock, allocated);
+                 ar.goal, newblock, allocated);
  
        /* try to insert new extent into found leaf and return */
        ext4_ext_store_pblock(&newex, newblock);
@@@ -2950,7 -2947,7 +2947,7 @@@ void ext4_ext_truncate(struct inode *in
         * transaction synchronous.
         */
        if (IS_SYNC(inode))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
  
  out_stop:
        up_write(&EXT4_I(inode)->i_data_sem);
@@@ -3004,7 -3001,7 +3001,7 @@@ long ext4_fallocate(struct inode *inode
        handle_t *handle;
        ext4_lblk_t block;
        loff_t new_size;
-       unsigned long max_blocks;
+       unsigned int max_blocks;
        int ret = 0;
        int ret2 = 0;
        int retries = 0;
@@@ -3083,7 -3080,7 +3080,7 @@@ retry
  /*
   * Callback function called for each extent to gather FIEMAP information.
   */
- int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
                       struct ext4_ext_cache *newex, struct ext4_extent *ex,
                       void *data)
  {
  /* fiemap flags we can handle specified here */
  #define EXT4_FIEMAP_FLAGS     (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
  
- int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo)
+ static int ext4_xattr_fiemap(struct inode *inode,
+                               struct fiemap_extent_info *fieinfo)
  {
        __u64 physical = 0;
        __u64 length;
diff --combined fs/ext4/inode.c
index 98d3fe7057efcd8a0a60048e37c900b20bb5a7c0,4cac8da4e0c18ae58549f4dc507eb0902fc8d3b7..a6444cee0c7e086c4b76b6906011d11b32de7c7f
@@@ -72,12 -72,17 +72,17 @@@ static int ext4_inode_is_fast_symlink(s
   * "bh" may be NULL: a metadata block may have been freed from memory
   * but there may still be a record of it in the journal, and that record
   * still needs to be revoked.
+  *
+  * If the handle isn't valid we're not journaling so there's nothing to do.
   */
  int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
                        struct buffer_head *bh, ext4_fsblk_t blocknr)
  {
        int err;
  
+       if (!ext4_handle_valid(handle))
+               return 0;
        might_sleep();
  
        BUFFER_TRACE(bh, "enter");
@@@ -170,7 -175,9 +175,9 @@@ static handle_t *start_transaction(stru
   */
  static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
  {
-       if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+       if (!ext4_handle_valid(handle))
+               return 0;
+       if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
                return 0;
        if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
                return 0;
   */
  static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
  {
+       BUG_ON(EXT4_JOURNAL(inode) == NULL);
        jbd_debug(2, "restarting handle %p\n", handle);
        return ext4_journal_restart(handle, blocks_for_truncate(inode));
  }
@@@ -216,7 -224,7 +224,7 @@@ void ext4_delete_inode(struct inode *in
        }
  
        if (IS_SYNC(inode))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
        inode->i_size = 0;
        err = ext4_mark_inode_dirty(handle, inode);
        if (err) {
         * enough credits left in the handle to remove the inode from
         * the orphan list and set the dtime field.
         */
-       if (handle->h_buffer_credits < 3) {
+       if (!ext4_handle_has_enough_credits(handle, 3)) {
                err = ext4_journal_extend(handle, 3);
                if (err > 0)
                        err = ext4_journal_restart(handle, 3);
@@@ -506,10 -514,10 +514,10 @@@ static ext4_fsblk_t ext4_find_goal(stru
   *    return the total number of blocks to be allocate, including the
   *    direct and indirect blocks.
   */
- static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
+ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
                int blocks_to_boundary)
  {
-       unsigned long count = 0;
+       unsigned int count = 0;
  
        /*
         * Simple case, [t,d]Indirect block(s) has not allocated yet
@@@ -547,6 -555,7 +555,7 @@@ static int ext4_alloc_blocks(handle_t *
                                int indirect_blks, int blks,
                                ext4_fsblk_t new_blocks[4], int *err)
  {
+       struct ext4_allocation_request ar;
        int target, i;
        unsigned long count = 0, blk_allocated = 0;
        int index = 0;
        if (!target)
                goto allocated;
        /* Now allocate data blocks */
-       count = target;
-       /* allocating blocks for data blocks */
-       current_block = ext4_new_blocks(handle, inode, iblock,
-                                               goal, &count, err);
+       memset(&ar, 0, sizeof(ar));
+       ar.inode = inode;
+       ar.goal = goal;
+       ar.len = target;
+       ar.logical = iblock;
+       if (S_ISREG(inode->i_mode))
+               /* enable in-core preallocation only for regular files */
+               ar.flags = EXT4_MB_HINT_DATA;
+       current_block = ext4_mb_new_blocks(handle, &ar, err);
        if (*err && (target == blks)) {
                /*
                 * if the allocation failed and we didn't allocate
                 */
                        new_blocks[index] = current_block;
                }
-               blk_allocated += count;
+               blk_allocated += ar.len;
        }
  allocated:
        /* total number of blocks allocated for direct blocks */
@@@ -709,8 -725,8 +725,8 @@@ static int ext4_alloc_branch(handle_t *
                set_buffer_uptodate(bh);
                unlock_buffer(bh);
  
-               BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-               err = ext4_journal_dirty_metadata(handle, bh);
+               BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, inode, bh);
                if (err)
                        goto failed;
        }
@@@ -792,8 -808,8 +808,8 @@@ static int ext4_splice_branch(handle_t 
                 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
                 */
                jbd_debug(5, "splicing indirect only\n");
-               BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata");
-               err = ext4_journal_dirty_metadata(handle, where->bh);
+               BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
+               err = ext4_handle_dirty_metadata(handle, inode, where->bh);
                if (err)
                        goto err_out;
        } else {
@@@ -840,10 -856,10 +856,10 @@@ err_out
   * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
   * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
   */
- int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
-               ext4_lblk_t iblock, unsigned long maxblocks,
-               struct buffer_head *bh_result,
-               int create, int extend_disksize)
static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
+                                 ext4_lblk_t iblock, unsigned int maxblocks,
+                                 struct buffer_head *bh_result,
+                                 int create, int extend_disksize)
  {
        int err = -EIO;
        ext4_lblk_t offsets[4];
@@@ -1045,7 -1061,7 +1061,7 @@@ static void ext4_da_update_reserve_spac
   * It returns the error in case of allocation failure.
   */
  int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
-                       unsigned long max_blocks, struct buffer_head *bh,
+                       unsigned int max_blocks, struct buffer_head *bh,
                        int create, int extend_disksize, int flag)
  {
        int retval;
@@@ -1221,8 -1237,8 +1237,8 @@@ struct buffer_head *ext4_getblk(handle_
                                set_buffer_uptodate(bh);
                        }
                        unlock_buffer(bh);
-                       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-                       err = ext4_journal_dirty_metadata(handle, bh);
+                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+                       err = ext4_handle_dirty_metadata(handle, inode, bh);
                        if (!fatal)
                                fatal = err;
                } else {
@@@ -1335,6 -1351,10 +1351,10 @@@ static int ext4_write_begin(struct fil
        pgoff_t index;
        unsigned from, to;
  
+       trace_mark(ext4_write_begin,
+                  "dev %s ino %lu pos %llu len %u flags %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, flags);
        index = pos >> PAGE_CACHE_SHIFT;
        from = pos & (PAGE_CACHE_SIZE - 1);
        to = from + len;
@@@ -1387,7 -1407,7 +1407,7 @@@ static int write_end_fn(handle_t *handl
        if (!buffer_mapped(bh) || buffer_freed(bh))
                return 0;
        set_buffer_uptodate(bh);
-       return ext4_journal_dirty_metadata(handle, bh);
+       return ext4_handle_dirty_metadata(handle, NULL, bh);
  }
  
  /*
@@@ -1406,6 -1426,10 +1426,10 @@@ static int ext4_ordered_write_end(struc
        struct inode *inode = mapping->host;
        int ret = 0, ret2;
  
+       trace_mark(ext4_ordered_write_end,
+                  "dev %s ino %lu pos %llu len %u copied %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, copied);
        ret = ext4_jbd2_file_inode(handle, inode);
  
        if (ret == 0) {
@@@ -1444,6 -1468,10 +1468,10 @@@ static int ext4_writeback_write_end(str
        int ret = 0, ret2;
        loff_t new_i_size;
  
+       trace_mark(ext4_writeback_write_end,
+                  "dev %s ino %lu pos %llu len %u copied %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, copied);
        new_i_size = pos + copied;
        if (new_i_size > EXT4_I(inode)->i_disksize) {
                ext4_update_i_disksize(inode, new_i_size);
@@@ -1479,6 -1507,10 +1507,10 @@@ static int ext4_journalled_write_end(st
        unsigned from, to;
        loff_t new_i_size;
  
+       trace_mark(ext4_journalled_write_end,
+                  "dev %s ino %lu pos %llu len %u copied %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, copied);
        from = pos & (PAGE_CACHE_SIZE - 1);
        to = from + len;
  
@@@ -1625,7 -1657,7 +1657,7 @@@ struct mpage_da_data 
        get_block_t *get_block;
        struct writeback_control *wbc;
        int io_done;
-       long pages_written;
+       int pages_written;
        int retval;
  };
  
   */
  static int mpage_da_submit_io(struct mpage_da_data *mpd)
  {
-       struct address_space *mapping = mpd->inode->i_mapping;
-       int ret = 0, err, nr_pages, i;
-       unsigned long index, end;
-       struct pagevec pvec;
        long pages_skipped;
+       struct pagevec pvec;
+       unsigned long index, end;
+       int ret = 0, err, nr_pages, i;
+       struct inode *inode = mpd->inode;
+       struct address_space *mapping = inode->i_mapping;
  
        BUG_ON(mpd->next_page <= mpd->first_page);
-       pagevec_init(&pvec, 0);
+       /*
+        * We need to start from the first_page to the next_page - 1
+        * to make sure we also write the mapped dirty buffer_heads.
+        * If we look at mpd->lbh.b_blocknr we would only be looking
+        * at the currently mapped buffer_heads.
+        */
        index = mpd->first_page;
        end = mpd->next_page - 1;
  
+       pagevec_init(&pvec, 0);
        while (index <= end) {
-               /*
-                * We can use PAGECACHE_TAG_DIRTY lookup here because
-                * even though we have cleared the dirty flag on the page
-                * We still keep the page in the radix tree with tag
-                * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
-                * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
-                * which is called via the below writepage callback.
-                */
-               nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-                                       PAGECACHE_TAG_DIRTY,
-                                       min(end - index,
-                                       (pgoff_t)PAGEVEC_SIZE-1) + 1);
+               nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
                if (nr_pages == 0)
                        break;
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
  
+                       index = page->index;
+                       if (index > end)
+                               break;
+                       index++;
+                       BUG_ON(!PageLocked(page));
+                       BUG_ON(PageWriteback(page));
                        pages_skipped = mpd->wbc->pages_skipped;
                        err = mapping->a_ops->writepage(page, mpd->wbc);
                        if (!err && (pages_skipped == mpd->wbc->pages_skipped))
@@@ -1831,13 -1867,13 +1867,13 @@@ static void ext4_print_free_blocks(stru
                        ext4_count_free_blocks(inode->i_sb));
        printk(KERN_EMERG "Free/Dirty block details\n");
        printk(KERN_EMERG "free_blocks=%lld\n",
-                       percpu_counter_sum(&sbi->s_freeblocks_counter));
+                       (long long)percpu_counter_sum(&sbi->s_freeblocks_counter));
        printk(KERN_EMERG "dirty_blocks=%lld\n",
-                       percpu_counter_sum(&sbi->s_dirtyblocks_counter));
+                       (long long)percpu_counter_sum(&sbi->s_dirtyblocks_counter));
        printk(KERN_EMERG "Block reservation details\n");
-       printk(KERN_EMERG "i_reserved_data_blocks=%lu\n",
+       printk(KERN_EMERG "i_reserved_data_blocks=%u\n",
                        EXT4_I(inode)->i_reserved_data_blocks);
-       printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n",
+       printk(KERN_EMERG "i_reserved_meta_blocks=%u\n",
                        EXT4_I(inode)->i_reserved_meta_blocks);
        return;
  }
@@@ -2087,11 -2123,29 +2123,29 @@@ static int __mpage_da_writepage(struct 
                bh = head;
                do {
                        BUG_ON(buffer_locked(bh));
+                       /*
+                        * We need to try to allocate
+                        * unmapped blocks in the same page.
+                        * Otherwise we won't make progress
+                        * with the page in ext4_da_writepage
+                        */
                        if (buffer_dirty(bh) &&
                                (!buffer_mapped(bh) || buffer_delay(bh))) {
                                mpage_add_bh_to_extent(mpd, logical, bh);
                                if (mpd->io_done)
                                        return MPAGE_DA_EXTENT_TAIL;
+                       } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
+                               /*
+                                * mapped dirty buffer. We need to update
+                                * the b_state because we look at
+                                * b_state in mpage_da_map_blocks. We don't
+                                * update b_size because if we find an
+                                * unmapped buffer_head later we need to
+                                * use the b_state flag of that buffer_head.
+                                */
+                               if (mpd->lbh.b_size == 0)
+                                       mpd->lbh.b_state =
+                                               bh->b_state & BH_FLAGS;
                        }
                        logical++;
                } while ((bh = bh->b_this_page) != head);
@@@ -2269,10 -2323,13 +2323,13 @@@ static int ext4_da_writepage(struct pag
  {
        int ret = 0;
        loff_t size;
-       unsigned long len;
+       unsigned int len;
        struct buffer_head *page_bufs;
        struct inode *inode = page->mapping->host;
  
+       trace_mark(ext4_da_writepage,
+                  "dev %s ino %lu page_index %lu",
+                  inode->i_sb->s_id, inode->i_ino, page->index);
        size = i_size_read(inode);
        if (page->index == size >> PAGE_CACHE_SHIFT)
                len = size & ~PAGE_CACHE_MASK;
@@@ -2378,10 -2435,25 +2435,25 @@@ static int ext4_da_writepages(struct ad
        struct mpage_da_data mpd;
        struct inode *inode = mapping->host;
        int no_nrwrite_index_update;
-       long pages_written = 0, pages_skipped;
+       int pages_written = 0;
+       long pages_skipped;
        int needed_blocks, ret = 0, nr_to_writebump = 0;
        struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
  
+       trace_mark(ext4_da_writepages,
+                  "dev %s ino %lu nr_t_write %ld "
+                  "pages_skipped %ld range_start %llu "
+                  "range_end %llu nonblocking %d "
+                  "for_kupdate %d for_reclaim %d "
+                  "for_writepages %d range_cyclic %d",
+                  inode->i_sb->s_id, inode->i_ino,
+                  wbc->nr_to_write, wbc->pages_skipped,
+                  (unsigned long long) wbc->range_start,
+                  (unsigned long long) wbc->range_end,
+                  wbc->nonblocking, wbc->for_kupdate,
+                  wbc->for_reclaim, wbc->for_writepages,
+                  wbc->range_cyclic);
        /*
         * No pages to write? This is mainly a kludge to avoid starting
         * a transaction for special inodes like journal inode on last iput()
         */
        if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                return 0;
+       /*
+        * If the filesystem has aborted, it is read-only, so return
+        * right away instead of dumping stack traces later on that
+        * will obscure the real source of the problem.  We test
+        * EXT4_MOUNT_ABORT instead of sb->s_flag's MS_RDONLY because
+        * the latter could be true if the filesystem is mounted
+        * read-only, and in that case, ext4_da_writepages should
+        * *never* be called, so if that ever happens, we would want
+        * the stack trace.
+        */
+       if (unlikely(sbi->s_mount_opt & EXT4_MOUNT_ABORT))
+               return -EROFS;
        /*
         * Make sure nr_to_write is >= sbi->s_mb_stream_request
         * This make sure small files blocks are allocated in
                handle = ext4_journal_start(inode, needed_blocks);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
-                       printk(KERN_EMERG "%s: jbd2_start: "
+                       printk(KERN_CRIT "%s: jbd2_start: "
                               "%ld pages, ino %lu; err %d\n", __func__,
                                wbc->nr_to_write, inode->i_ino, ret);
                        dump_stack();
@@@ -2486,6 -2572,14 +2572,14 @@@ out_writepages
        if (!no_nrwrite_index_update)
                wbc->no_nrwrite_index_update = 0;
        wbc->nr_to_write -= nr_to_writebump;
+       trace_mark(ext4_da_writepage_result,
+                  "dev %s ino %lu ret %d pages_written %d "
+                  "pages_skipped %ld congestion %d "
+                  "more_io %d no_nrwrite_index_update %d",
+                  inode->i_sb->s_id, inode->i_ino, ret,
+                  pages_written, wbc->pages_skipped,
+                  wbc->encountered_congestion, wbc->more_io,
+                  wbc->no_nrwrite_index_update);
        return ret;
  }
  
@@@ -2498,7 -2592,7 +2592,7 @@@ static int ext4_nonda_switch(struct sup
        /*
         * switch to non delalloc mode if we are running low
         * on free block. The free block accounting via percpu
 -       * counters can get slightly wrong with FBC_BATCH getting
 +       * counters can get slightly wrong with percpu_counter_batch getting
         * accumulated on each CPU without updating global counters
         * Delalloc need an accurate free block accounting. So switch
         * to non delalloc when we are near to error range.
@@@ -2537,6 -2631,11 +2631,11 @@@ static int ext4_da_write_begin(struct f
                                        len, flags, pagep, fsdata);
        }
        *fsdata = (void *)0;
+       trace_mark(ext4_da_write_begin,
+                  "dev %s ino %lu pos %llu len %u flags %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, flags);
  retry:
        /*
         * With delayed allocation, we don't log the i_disksize update
@@@ -2626,6 -2725,10 +2725,10 @@@ static int ext4_da_write_end(struct fil
                }
        }
  
+       trace_mark(ext4_da_write_end,
+                  "dev %s ino %lu pos %llu len %u copied %u",
+                  inode->i_sb->s_id, inode->i_ino,
+                  (unsigned long long) pos, len, copied);
        start = pos & (PAGE_CACHE_SIZE - 1);
        end = start + copied - 1;
  
@@@ -2718,7 -2821,10 +2821,10 @@@ static sector_t ext4_bmap(struct addres
                filemap_write_and_wait(mapping);
        }
  
-       if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
+       BUG_ON(!EXT4_JOURNAL(inode) &&
+              EXT4_I(inode)->i_state & EXT4_STATE_JDATA);
+       if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
                /*
                 * This is a REALLY heavyweight approach, but the use of
                 * bmap on dirty files is expected to be extremely rare:
@@@ -2836,6 -2942,9 +2942,9 @@@ static int ext4_normal_writepage(struc
        loff_t size = i_size_read(inode);
        loff_t len;
  
+       trace_mark(ext4_normal_writepage,
+                  "dev %s ino %lu page_index %lu",
+                  inode->i_sb->s_id, inode->i_ino, page->index);
        J_ASSERT(PageLocked(page));
        if (page->index == size >> PAGE_CACHE_SHIFT)
                len = size & ~PAGE_CACHE_MASK;
@@@ -2921,6 -3030,9 +3030,9 @@@ static int ext4_journalled_writepage(st
        loff_t size = i_size_read(inode);
        loff_t len;
  
+       trace_mark(ext4_journalled_writepage,
+                  "dev %s ino %lu page_index %lu",
+                  inode->i_sb->s_id, inode->i_ino, page->index);
        J_ASSERT(PageLocked(page));
        if (page->index == size >> PAGE_CACHE_SHIFT)
                len = size & ~PAGE_CACHE_MASK;
@@@ -2989,7 -3101,10 +3101,10 @@@ static void ext4_invalidatepage(struct 
        if (offset == 0)
                ClearPageChecked(page);
  
-       jbd2_journal_invalidatepage(journal, page, offset);
+       if (journal)
+               jbd2_journal_invalidatepage(journal, page, offset);
+       else
+               block_invalidatepage(page, offset);
  }
  
  static int ext4_releasepage(struct page *page, gfp_t wait)
        WARN_ON(PageChecked(page));
        if (!page_has_buffers(page))
                return 0;
-       return jbd2_journal_try_to_free_buffers(journal, page, wait);
+       if (journal)
+               return jbd2_journal_try_to_free_buffers(journal, page, wait);
+       else
+               return try_to_free_buffers(page);
  }
  
  /*
@@@ -3271,7 -3389,7 +3389,7 @@@ int ext4_block_truncate_page(handle_t *
  
        err = 0;
        if (ext4_should_journal_data(inode)) {
-               err = ext4_journal_dirty_metadata(handle, bh);
+               err = ext4_handle_dirty_metadata(handle, inode, bh);
        } else {
                if (ext4_should_order_data(inode))
                        err = ext4_jbd2_file_inode(handle, inode);
@@@ -3395,8 -3513,8 +3513,8 @@@ static void ext4_clear_blocks(handle_t 
        __le32 *p;
        if (try_to_extend_transaction(handle, inode)) {
                if (bh) {
-                       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-                       ext4_journal_dirty_metadata(handle, bh);
+                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+                       ext4_handle_dirty_metadata(handle, inode, bh);
                }
                ext4_mark_inode_dirty(handle, inode);
                ext4_journal_test_restart(handle, inode);
@@@ -3496,7 -3614,7 +3614,7 @@@ static void ext4_free_data(handle_t *ha
                                  count, block_to_free_p, p);
  
        if (this_bh) {
-               BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata");
+               BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
  
                /*
                 * The buffer head should have an attached journal head at this
                 * the block was cleared. Check for this instead of OOPSing.
                 */
                if (bh2jh(this_bh))
-                       ext4_journal_dirty_metadata(handle, this_bh);
+                       ext4_handle_dirty_metadata(handle, inode, this_bh);
                else
                        ext4_error(inode->i_sb, __func__,
                                   "circular indirect block detected, "
@@@ -3535,7 -3653,7 +3653,7 @@@ static void ext4_free_branches(handle_
        ext4_fsblk_t nr;
        __le32 *p;
  
-       if (is_handle_aborted(handle))
+       if (ext4_handle_is_aborted(handle))
                return;
  
        if (depth--) {
                         * will merely complain about releasing a free block,
                         * rather than leaking blocks.
                         */
-                       if (is_handle_aborted(handle))
+                       if (ext4_handle_is_aborted(handle))
                                return;
                        if (try_to_extend_transaction(handle, inode)) {
                                ext4_mark_inode_dirty(handle, inode);
                                                                   parent_bh)){
                                        *p = 0;
                                        BUFFER_TRACE(parent_bh,
-                                       "call ext4_journal_dirty_metadata");
-                                       ext4_journal_dirty_metadata(handle,
-                                                                   parent_bh);
+                                       "call ext4_handle_dirty_metadata");
+                                       ext4_handle_dirty_metadata(handle,
+                                                                  inode,
+                                                                  parent_bh);
                                }
                        }
                }
@@@ -3814,7 -3933,7 +3933,7 @@@ do_indirects
         * synchronous
         */
        if (IS_SYNC(inode))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
  out_stop:
        /*
         * If this was a simple ftruncate(), and the file will remain alive
@@@ -3844,7 -3963,7 +3963,7 @@@ static int __ext4_get_inode_loc(struct 
        ext4_fsblk_t            block;
        int                     inodes_per_block, inode_offset;
  
-       iloc->bh = 0;
+       iloc->bh = NULL;
        if (!ext4_valid_inum(sb, inode->i_ino))
                return -EIO;
  
@@@ -3951,7 -4070,7 +4070,7 @@@ make_io
                        num = EXT4_INODES_PER_GROUP(sb);
                        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                       EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
-                               num -= le16_to_cpu(gdp->bg_itable_unused);
+                               num -= ext4_itable_unused_count(sb, gdp);
                        table += num / inodes_per_block;
                        if (end > table)
                                end = table;
@@@ -4313,8 -4432,8 +4432,8 @@@ static int ext4_do_update_inode(handle_
                        EXT4_SET_RO_COMPAT_FEATURE(sb,
                                        EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
                        sb->s_dirt = 1;
-                       handle->h_sync = 1;
-                       err = ext4_journal_dirty_metadata(handle,
+                       ext4_handle_sync(handle);
+                       err = ext4_handle_dirty_metadata(handle, inode,
                                        EXT4_SB(sb)->s_sbh);
                }
        }
                raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
        }
  
-       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-       rc = ext4_journal_dirty_metadata(handle, bh);
+       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+       rc = ext4_handle_dirty_metadata(handle, inode, bh);
        if (!err)
                err = rc;
        ei->i_state &= ~EXT4_STATE_NEW;
@@@ -4406,6 -4524,25 +4524,25 @@@ int ext4_write_inode(struct inode *inod
        return ext4_force_commit(inode->i_sb);
  }
  
+ int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
+ {
+       int err = 0;
+       mark_buffer_dirty(bh);
+       if (inode && inode_needs_sync(inode)) {
+               sync_dirty_buffer(bh);
+               if (buffer_req(bh) && !buffer_uptodate(bh)) {
+                       ext4_error(inode->i_sb, __func__,
+                                  "IO error syncing inode, "
+                                  "inode=%lu, block=%llu",
+                                  inode->i_ino,
+                                  (unsigned long long)bh->b_blocknr);
+                       err = -EIO;
+               }
+       }
+       return err;
+ }
  /*
   * ext4_setattr()
   *
  ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
                         struct ext4_iloc *iloc)
  {
-       int err = 0;
-       if (handle) {
-               err = ext4_get_inode_loc(inode, iloc);
-               if (!err) {
-                       BUFFER_TRACE(iloc->bh, "get_write_access");
-                       err = ext4_journal_get_write_access(handle, iloc->bh);
-                       if (err) {
-                               brelse(iloc->bh);
-                               iloc->bh = NULL;
-                       }
+       int err;
+       err = ext4_get_inode_loc(inode, iloc);
+       if (!err) {
+               BUFFER_TRACE(iloc->bh, "get_write_access");
+               err = ext4_journal_get_write_access(handle, iloc->bh);
+               if (err) {
+                       brelse(iloc->bh);
+                       iloc->bh = NULL;
                }
        }
        ext4_std_error(inode->i_sb, err);
@@@ -4791,7 -4927,8 +4927,8 @@@ int ext4_mark_inode_dirty(handle_t *han
  
        might_sleep();
        err = ext4_reserve_inode_write(handle, inode, &iloc);
-       if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
+       if (ext4_handle_valid(handle) &&
+           EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
            !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
                /*
                 * We need extra buffer credits since we may write into EA block
@@@ -4843,6 -4980,11 +4980,11 @@@ void ext4_dirty_inode(struct inode *ino
        handle_t *current_handle = ext4_journal_current_handle();
        handle_t *handle;
  
+       if (!ext4_handle_valid(current_handle)) {
+               ext4_mark_inode_dirty(current_handle, inode);
+               return;
+       }
        handle = ext4_journal_start(inode, 2);
        if (IS_ERR(handle))
                goto out;
@@@ -4880,8 -5022,9 +5022,9 @@@ static int ext4_pin_inode(handle_t *han
                        BUFFER_TRACE(iloc.bh, "get_write_access");
                        err = jbd2_journal_get_write_access(handle, iloc.bh);
                        if (!err)
-                               err = ext4_journal_dirty_metadata(handle,
-                                                                 iloc.bh);
+                               err = ext4_handle_dirty_metadata(handle,
+                                                                inode,
+                                                                iloc.bh);
                        brelse(iloc.bh);
                }
        }
@@@ -4907,6 -5050,8 +5050,8 @@@ int ext4_change_inode_journal_flag(stru
         */
  
        journal = EXT4_JOURNAL(inode);
+       if (!journal)
+               return 0;
        if (is_journal_aborted(journal))
                return -EROFS;
  
                return PTR_ERR(handle);
  
        err = ext4_mark_inode_dirty(handle, inode);
-       handle->h_sync = 1;
+       ext4_handle_sync(handle);
        ext4_journal_stop(handle);
        ext4_std_error(inode->i_sb, err);
  
diff --combined fs/ext4/namei.c
index 4b8d431d7dff8b6763f6433db2fa4ce1d2f90235,183a09a8b14e63346d5ff5e00719addfe390d48c..fec0b4c2f5f1904c89436c1f825e82338489232f
@@@ -74,6 -74,10 +74,6 @@@ static struct buffer_head *ext4_append(
  #define assert(test) J_ASSERT(test)
  #endif
  
 -#ifndef swap
 -#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
 -#endif
 -
  #ifdef DX_DEBUG
  #define dxtrace(command) command
  #else
@@@ -368,6 -372,8 +368,8 @@@ dx_probe(const struct qstr *d_name, str
                goto fail;
        }
        hinfo->hash_version = root->info.hash_version;
+       if (hinfo->hash_version <= DX_HASH_TEA)
+               hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
        hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
        if (d_name)
                ext4fs_dirhash(d_name->name, d_name->len, hinfo);
@@@ -637,6 -643,9 +639,9 @@@ int ext4_htree_fill_tree(struct file *d
        dir = dir_file->f_path.dentry->d_inode;
        if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
                hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+               if (hinfo.hash_version <= DX_HASH_TEA)
+                       hinfo.hash_version +=
+                               EXT4_SB(dir->i_sb)->s_hash_unsigned;
                hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
                count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
                                               start_hash, start_minor_hash);
@@@ -802,7 -811,7 +807,7 @@@ static inline int ext4_match (int len, 
  static inline int search_dirblock(struct buffer_head *bh,
                                  struct inode *dir,
                                  const struct qstr *d_name,
-                                 unsigned long offset,
+                                 unsigned int offset,
                                  struct ext4_dir_entry_2 ** res_dir)
  {
        struct ext4_dir_entry_2 * de;
@@@ -1039,11 -1048,11 +1044,11 @@@ static struct dentry *ext4_lookup(struc
        bh = ext4_find_entry(dir, &dentry->d_name, &de);
        inode = NULL;
        if (bh) {
-               unsigned long ino = le32_to_cpu(de->inode);
+               __u32 ino = le32_to_cpu(de->inode);
                brelse(bh);
                if (!ext4_valid_inum(dir->i_sb, ino)) {
                        ext4_error(dir->i_sb, "ext4_lookup",
-                                  "bad inode number: %lu", ino);
+                                  "bad inode number: %u", ino);
                        return ERR_PTR(-EIO);
                }
                inode = ext4_iget(dir->i_sb, ino);
  
  struct dentry *ext4_get_parent(struct dentry *child)
  {
-       unsigned long ino;
+       __u32 ino;
        struct inode *inode;
        static const struct qstr dotdot = {
                .name = "..",
  
        if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
                ext4_error(child->d_inode->i_sb, "ext4_get_parent",
-                          "bad inode number: %lu", ino);
+                          "bad inode number: %u", ino);
                return ERR_PTR(-EIO);
        }
  
@@@ -1162,9 -1171,9 +1167,9 @@@ static struct ext4_dir_entry_2 *do_spli
        u32 hash2;
        struct dx_map_entry *map;
        char *data1 = (*bh)->b_data, *data2;
-       unsigned split, move, size, i;
+       unsigned split, move, size;
        struct ext4_dir_entry_2 *de = NULL, *de2;
-       int     err = 0;
+       int     err = 0, i;
  
        bh2 = ext4_append (handle, dir, &newblock, &err);
        if (!(bh2)) {
                de = de2;
        }
        dx_insert_block(frame, hash2 + continued, newblock);
-       err = ext4_journal_dirty_metadata(handle, bh2);
+       err = ext4_handle_dirty_metadata(handle, dir, bh2);
        if (err)
                goto journal_error;
-       err = ext4_journal_dirty_metadata(handle, frame->bh);
+       err = ext4_handle_dirty_metadata(handle, dir, frame->bh);
        if (err)
                goto journal_error;
        brelse(bh2);
@@@ -1262,7 -1271,7 +1267,7 @@@ static int add_dirent_to_buf(handle_t *
        struct inode    *dir = dentry->d_parent->d_inode;
        const char      *name = dentry->d_name.name;
        int             namelen = dentry->d_name.len;
-       unsigned long   offset = 0;
+       unsigned int    offset = 0;
        unsigned short  reclen;
        int             nlen, rlen, err;
        char            *top;
        ext4_update_dx_flag(dir);
        dir->i_version++;
        ext4_mark_inode_dirty(handle, dir);
-       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-       err = ext4_journal_dirty_metadata(handle, bh);
+       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+       err = ext4_handle_dirty_metadata(handle, dir, bh);
        if (err)
                ext4_std_error(dir->i_sb, err);
        brelse(bh);
@@@ -1404,6 -1413,8 +1409,8 @@@ static int make_indexed_dir(handle_t *h
  
        /* Initialize as for dx_probe */
        hinfo.hash_version = root->info.hash_version;
+       if (hinfo.hash_version <= DX_HASH_TEA)
+               hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
        hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
        ext4fs_dirhash(name, namelen, &hinfo);
        frame = frames;
@@@ -1433,7 -1444,6 +1440,6 @@@ static int ext4_add_entry(handle_t *han
                          struct inode *inode)
  {
        struct inode *dir = dentry->d_parent->d_inode;
-       unsigned long offset;
        struct buffer_head *bh;
        struct ext4_dir_entry_2 *de;
        struct super_block *sb;
                ext4_mark_inode_dirty(handle, dir);
        }
        blocks = dir->i_size >> sb->s_blocksize_bits;
-       for (block = 0, offset = 0; block < blocks; block++) {
+       for (block = 0; block < blocks; block++) {
                bh = ext4_bread(handle, dir, block, 0, &retval);
                if(!bh)
                        return retval;
@@@ -1570,7 -1580,7 +1576,7 @@@ static int ext4_dx_add_entry(handle_t *
                        dxtrace(dx_show_index("node", frames[1].entries));
                        dxtrace(dx_show_index("node",
                               ((struct dx_node *) bh2->b_data)->entries));
-                       err = ext4_journal_dirty_metadata(handle, bh2);
+                       err = ext4_handle_dirty_metadata(handle, inode, bh2);
                        if (err)
                                goto journal_error;
                        brelse (bh2);
                        if (err)
                                goto journal_error;
                }
-               ext4_journal_dirty_metadata(handle, frames[0].bh);
+               ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
        }
        de = do_split(handle, dir, &bh, frame, &hinfo, &err);
        if (!de)
@@@ -1642,8 -1652,8 +1648,8 @@@ static int ext4_delete_entry(handle_t *
                        else
                                de->inode = 0;
                        dir->i_version++;
-                       BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
-                       ext4_journal_dirty_metadata(handle, bh);
+                       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+                       ext4_handle_dirty_metadata(handle, dir, bh);
                        return 0;
                }
                i += ext4_rec_len_from_disk(de->rec_len);
@@@ -1721,7 -1731,7 +1727,7 @@@ retry
                return PTR_ERR(handle);
  
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
  
        inode = ext4_new_inode (handle, dir, mode);
        err = PTR_ERR(inode);
@@@ -1755,7 -1765,7 +1761,7 @@@ retry
                return PTR_ERR(handle);
  
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
  
        inode = ext4_new_inode(handle, dir, mode);
        err = PTR_ERR(inode);
@@@ -1791,7 -1801,7 +1797,7 @@@ retry
                return PTR_ERR(handle);
  
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
  
        inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
        err = PTR_ERR(inode);
        strcpy(de->name, "..");
        ext4_set_de_type(dir->i_sb, de, S_IFDIR);
        inode->i_nlink = 2;
-       BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
-       ext4_journal_dirty_metadata(handle, dir_block);
+       BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
+       ext4_handle_dirty_metadata(handle, dir, dir_block);
        brelse(dir_block);
        ext4_mark_inode_dirty(handle, inode);
        err = ext4_add_entry(handle, dentry, inode);
@@@ -1850,7 -1860,7 +1856,7 @@@ out_stop
   */
  static int empty_dir(struct inode *inode)
  {
-       unsigned long offset;
+       unsigned int offset;
        struct buffer_head *bh;
        struct ext4_dir_entry_2 *de, *de1;
        struct super_block *sb;
                                if (err)
                                        ext4_error(sb, __func__,
                                                   "error %d reading directory"
-                                                  " #%lu offset %lu",
+                                                  " #%lu offset %u",
                                                   err, inode->i_ino, offset);
                                offset += sb->s_blocksize;
                                continue;
@@@ -1933,6 -1943,9 +1939,9 @@@ int ext4_orphan_add(handle_t *handle, s
        struct ext4_iloc iloc;
        int err = 0, rc;
  
+       if (!ext4_handle_valid(handle))
+               return 0;
        lock_super(sb);
        if (!list_empty(&EXT4_I(inode)->i_orphan))
                goto out_unlock;
        /* Insert this inode at the head of the on-disk orphan list... */
        NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
        EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
-       err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+       err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh);
        rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
        if (!err)
                err = rc;
@@@ -1995,10 -2008,13 +2004,13 @@@ int ext4_orphan_del(handle_t *handle, s
        struct list_head *prev;
        struct ext4_inode_info *ei = EXT4_I(inode);
        struct ext4_sb_info *sbi;
-       unsigned long ino_next;
+       __u32 ino_next;
        struct ext4_iloc iloc;
        int err = 0;
  
+       if (!ext4_handle_valid(handle))
+               return 0;
        lock_super(inode->i_sb);
        if (list_empty(&ei->i_orphan)) {
                unlock_super(inode->i_sb);
         * transaction handle with which to update the orphan list on
         * disk, but we still need to remove the inode from the linked
         * list in memory. */
-       if (!handle)
+       if (sbi->s_journal && !handle)
                goto out;
  
        err = ext4_reserve_inode_write(handle, inode, &iloc);
                goto out_err;
  
        if (prev == &sbi->s_orphan) {
-               jbd_debug(4, "superblock will point to %lu\n", ino_next);
+               jbd_debug(4, "superblock will point to %u\n", ino_next);
                BUFFER_TRACE(sbi->s_sbh, "get_write_access");
                err = ext4_journal_get_write_access(handle, sbi->s_sbh);
                if (err)
                        goto out_brelse;
                sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
-               err = ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+               err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh);
        } else {
                struct ext4_iloc iloc2;
                struct inode *i_prev =
                        &list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
  
-               jbd_debug(4, "orphan inode %lu will point to %lu\n",
+               jbd_debug(4, "orphan inode %lu will point to %u\n",
                          i_prev->i_ino, ino_next);
                err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
                if (err)
@@@ -2082,7 -2098,7 +2094,7 @@@ static int ext4_rmdir(struct inode *dir
                goto end_rmdir;
  
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
  
        inode = dentry->d_inode;
  
@@@ -2136,7 -2152,7 +2148,7 @@@ static int ext4_unlink(struct inode *di
                return PTR_ERR(handle);
  
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
  
        retval = -ENOENT;
        bh = ext4_find_entry(dir, &dentry->d_name, &de);
@@@ -2193,7 -2209,7 +2205,7 @@@ retry
                return PTR_ERR(handle);
  
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
  
        inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
        err = PTR_ERR(inode);
@@@ -2256,7 -2272,7 +2268,7 @@@ retry
                return PTR_ERR(handle);
  
        if (IS_DIRSYNC(dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
  
        inode->i_ctime = ext4_current_time(inode);
        ext4_inc_count(handle, inode);
@@@ -2305,7 -2321,7 +2317,7 @@@ static int ext4_rename(struct inode *ol
                return PTR_ERR(handle);
  
        if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
-               handle->h_sync = 1;
+               ext4_handle_sync(handle);
  
        old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
        /*
                new_dir->i_ctime = new_dir->i_mtime =
                                        ext4_current_time(new_dir);
                ext4_mark_inode_dirty(handle, new_dir);
-               BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
-               ext4_journal_dirty_metadata(handle, new_bh);
+               BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
+               ext4_handle_dirty_metadata(handle, new_dir, new_bh);
                brelse(new_bh);
                new_bh = NULL;
        }
                BUFFER_TRACE(dir_bh, "get_write_access");
                ext4_journal_get_write_access(handle, dir_bh);
                PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
-               BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
-               ext4_journal_dirty_metadata(handle, dir_bh);
+               BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
+               ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
                ext4_dec_count(handle, old_dir);
                if (new_inode) {
                        /* checked empty_dir above, can't have another parent,
diff --combined fs/ext4/super.c
index 9494bb2493901b353ad2f3be6f93535600896624,acb69c00fd424462bfb93be640174aa9d67d2bf7..8f7e0be8ab1b6928be0c9d3d4ee64f5806e90d4a
@@@ -51,8 -51,6 +51,6 @@@ struct proc_dir_entry *ext4_proc_root
  
  static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                             unsigned long journal_devnum);
- static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
-                              unsigned int);
  static void ext4_commit_super(struct super_block *sb,
                              struct ext4_super_block *es, int sync);
  static void ext4_mark_recovery_complete(struct super_block *sb,
@@@ -93,6 -91,38 +91,38 @@@ ext4_fsblk_t ext4_inode_table(struct su
                (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
  }
  
+ __u32 ext4_free_blks_count(struct super_block *sb,
+                             struct ext4_group_desc *bg)
+ {
+       return le16_to_cpu(bg->bg_free_blocks_count_lo) |
+               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+               (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
+ }
+ __u32 ext4_free_inodes_count(struct super_block *sb,
+                             struct ext4_group_desc *bg)
+ {
+       return le16_to_cpu(bg->bg_free_inodes_count_lo) |
+               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+               (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
+ }
+ __u32 ext4_used_dirs_count(struct super_block *sb,
+                             struct ext4_group_desc *bg)
+ {
+       return le16_to_cpu(bg->bg_used_dirs_count_lo) |
+               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+               (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
+ }
+ __u32 ext4_itable_unused_count(struct super_block *sb,
+                             struct ext4_group_desc *bg)
+ {
+       return le16_to_cpu(bg->bg_itable_unused_lo) |
+               (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+               (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
+ }
  void ext4_block_bitmap_set(struct super_block *sb,
                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
  {
@@@ -117,6 -147,38 +147,38 @@@ void ext4_inode_table_set(struct super_
                bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
  }
  
+ void ext4_free_blks_set(struct super_block *sb,
+                         struct ext4_group_desc *bg, __u32 count)
+ {
+       bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
+       if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+               bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
+ }
+ void ext4_free_inodes_set(struct super_block *sb,
+                         struct ext4_group_desc *bg, __u32 count)
+ {
+       bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
+       if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+               bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
+ }
+ void ext4_used_dirs_set(struct super_block *sb,
+                         struct ext4_group_desc *bg, __u32 count)
+ {
+       bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
+       if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+               bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
+ }
+ void ext4_itable_unused_set(struct super_block *sb,
+                         struct ext4_group_desc *bg, __u32 count)
+ {
+       bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
+       if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+               bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
+ }
  /*
   * Wrappers for jbd2_journal_start/end.
   *
@@@ -136,13 -198,19 +198,19 @@@ handle_t *ext4_journal_start_sb(struct 
         * backs (eg. EIO in the commit thread), then we still need to
         * take the FS itself readonly cleanly. */
        journal = EXT4_SB(sb)->s_journal;
-       if (is_journal_aborted(journal)) {
-               ext4_abort(sb, __func__,
-                          "Detected aborted journal");
-               return ERR_PTR(-EROFS);
+       if (journal) {
+               if (is_journal_aborted(journal)) {
+                       ext4_abort(sb, __func__,
+                                  "Detected aborted journal");
+                       return ERR_PTR(-EROFS);
+               }
+               return jbd2_journal_start(journal, nblocks);
        }
-       return jbd2_journal_start(journal, nblocks);
+       /*
+        * We're not journaling, return the appropriate indication.
+        */
+       current->journal_info = EXT4_NOJOURNAL_HANDLE;
+       return current->journal_info;
  }
  
  /*
@@@ -157,6 -225,14 +225,14 @@@ int __ext4_journal_stop(const char *whe
        int err;
        int rc;
  
+       if (!ext4_handle_valid(handle)) {
+               /*
+                * Do this here since we don't call jbd2_journal_stop() in
+                * no-journal mode.
+                */
+               current->journal_info = NULL;
+               return 0;
+       }
        sb = handle->h_transaction->t_journal->j_private;
        err = handle->h_err;
        rc = jbd2_journal_stop(handle);
@@@ -174,6 -250,8 +250,8 @@@ void ext4_journal_abort_handle(const ch
        char nbuf[16];
        const char *errstr = ext4_decode_error(NULL, err, nbuf);
  
+       BUG_ON(!ext4_handle_valid(handle));
        if (bh)
                BUFFER_TRACE(bh, "abort");
  
@@@ -350,6 -428,44 +428,44 @@@ void ext4_warning(struct super_block *s
        va_end(args);
  }
  
+ void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
+                               const char *function, const char *fmt, ...)
+ __releases(bitlock)
+ __acquires(bitlock)
+ {
+       va_list args;
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+       va_start(args, fmt);
+       printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
+       vprintk(fmt, args);
+       printk("\n");
+       va_end(args);
+       if (test_opt(sb, ERRORS_CONT)) {
+               EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+               es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+               ext4_commit_super(sb, es, 0);
+               return;
+       }
+       ext4_unlock_group(sb, grp);
+       ext4_handle_error(sb);
+       /*
+        * We only get here in the ERRORS_RO case; relocking the group
+        * may be dangerous, but nothing bad will happen since the
+        * filesystem will have already been marked read/only and the
+        * journal has been aborted.  We return 1 as a hint to callers
+        * who might what to use the return value from
+        * ext4_grp_locked_error() to distinguish beween the
+        * ERRORS_CONT and ERRORS_RO case, and perhaps return more
+        * aggressively from the ext4 function in question, with a
+        * more appropriate error code.
+        */
+       ext4_lock_group(sb, grp);
+       return;
+ }
  void ext4_update_dynamic_rev(struct super_block *sb)
  {
        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@@ -389,7 -505,7 +505,7 @@@ static struct block_device *ext4_blkdev
        return bdev;
  
  fail:
-       printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n",
+       printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
                        __bdevname(dev, b), PTR_ERR(bdev));
        return NULL;
  }
@@@ -448,11 -564,13 +564,13 @@@ static void ext4_put_super(struct super
        ext4_mb_release(sb);
        ext4_ext_release(sb);
        ext4_xattr_put_super(sb);
-       err = jbd2_journal_destroy(sbi->s_journal);
-       sbi->s_journal = NULL;
-       if (err < 0)
-               ext4_abort(sb, __func__, "Couldn't clean up the journal");
+       if (sbi->s_journal) {
+               err = jbd2_journal_destroy(sbi->s_journal);
+               sbi->s_journal = NULL;
+               if (err < 0)
+                       ext4_abort(sb, __func__,
+                                  "Couldn't clean up the journal");
+       }
        if (!(sb->s_flags & MS_RDONLY)) {
                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
                es->s_state = cpu_to_le16(sbi->s_mount_state);
@@@ -522,6 -640,11 +640,11 @@@ static struct inode *ext4_alloc_inode(s
        memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
        INIT_LIST_HEAD(&ei->i_prealloc_list);
        spin_lock_init(&ei->i_prealloc_lock);
+       /*
+        * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
+        * therefore it can be null here.  Don't check it, just initialize
+        * jinode.
+        */
        jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
        ei->i_reserved_data_blocks = 0;
        ei->i_reserved_meta_blocks = 0;
@@@ -588,7 -711,8 +711,8 @@@ static void ext4_clear_inode(struct ino
        }
  #endif
        ext4_discard_preallocations(inode);
-       jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
+       if (EXT4_JOURNAL(inode))
+               jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
                                       &EXT4_I(inode)->jinode);
  }
  
@@@ -681,10 -805,19 +805,19 @@@ static int ext4_show_options(struct seq
  #endif
        if (!test_opt(sb, RESERVATION))
                seq_puts(seq, ",noreservation");
-       if (sbi->s_commit_interval) {
+       if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
                seq_printf(seq, ",commit=%u",
                           (unsigned) (sbi->s_commit_interval / HZ));
        }
+       if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
+               seq_printf(seq, ",min_batch_time=%u",
+                          (unsigned) sbi->s_min_batch_time);
+       }
+       if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
+               seq_printf(seq, ",max_batch_time=%u",
+                          (unsigned) sbi->s_min_batch_time);
+       }
        /*
         * We're changing the default of barrier mount option, so
         * let's always display its mount state so it's clear what its
                seq_puts(seq, ",journal_async_commit");
        if (test_opt(sb, NOBH))
                seq_puts(seq, ",nobh");
-       if (!test_opt(sb, EXTENTS))
-               seq_puts(seq, ",noextents");
        if (test_opt(sb, I_VERSION))
                seq_puts(seq, ",i_version");
        if (!test_opt(sb, DELALLOC))
@@@ -772,6 -903,25 +903,25 @@@ static struct dentry *ext4_fh_to_parent
                                    ext4_nfs_get_inode);
  }
  
+ /*
+  * Try to release metadata pages (indirect blocks, directories) which are
+  * mapped via the block device.  Since these pages could have journal heads
+  * which would prevent try_to_free_buffers() from freeing them, we must use
+  * jbd2 layer's try_to_free_buffers() function to release them.
+  */
+ static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
+ {
+       journal_t *journal = EXT4_SB(sb)->s_journal;
+       WARN_ON(PageChecked(page));
+       if (!page_has_buffers(page))
+               return 0;
+       if (journal)
+               return jbd2_journal_try_to_free_buffers(journal, page,
+                                                       wait & ~__GFP_WAIT);
+       return try_to_free_buffers(page);
+ }
  #ifdef CONFIG_QUOTA
  #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
  #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
@@@ -803,9 -953,7 +953,9 @@@ static struct dquot_operations ext4_quo
        .acquire_dquot  = ext4_acquire_dquot,
        .release_dquot  = ext4_release_dquot,
        .mark_dirty     = ext4_mark_dquot_dirty,
 -      .write_info     = ext4_write_info
 +      .write_info     = ext4_write_info,
 +      .alloc_dquot    = dquot_alloc,
 +      .destroy_dquot  = dquot_destroy,
  };
  
  static struct quotactl_ops ext4_qctl_operations = {
@@@ -838,6 -986,7 +988,7 @@@ static const struct super_operations ex
        .quota_read     = ext4_quota_read,
        .quota_write    = ext4_quota_write,
  #endif
+       .bdev_try_to_free_page = bdev_try_to_free_page,
  };
  
  static const struct export_operations ext4_export_ops = {
@@@ -852,16 -1001,17 +1003,17 @@@ enum 
        Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
        Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
-       Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
+       Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
+       Opt_journal_update, Opt_journal_dev,
        Opt_journal_checksum, Opt_journal_async_commit,
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_data_err_abort, Opt_data_err_ignore,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
-       Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
+       Opt_grpquota, Opt_i_version,
        Opt_stripe, Opt_delalloc, Opt_nodelalloc,
-       Opt_inode_readahead_blks
+       Opt_inode_readahead_blks, Opt_journal_ioprio
  };
  
  static const match_table_t tokens = {
        {Opt_nobh, "nobh"},
        {Opt_bh, "bh"},
        {Opt_commit, "commit=%u"},
+       {Opt_min_batch_time, "min_batch_time=%u"},
+       {Opt_max_batch_time, "max_batch_time=%u"},
        {Opt_journal_update, "journal=update"},
-       {Opt_journal_inum, "journal=%u"},
        {Opt_journal_dev, "journal_dev=%u"},
        {Opt_journal_checksum, "journal_checksum"},
        {Opt_journal_async_commit, "journal_async_commit"},
        {Opt_quota, "quota"},
        {Opt_usrquota, "usrquota"},
        {Opt_barrier, "barrier=%u"},
-       {Opt_extents, "extents"},
-       {Opt_noextents, "noextents"},
        {Opt_i_version, "i_version"},
        {Opt_stripe, "stripe=%u"},
        {Opt_resize, "resize"},
        {Opt_delalloc, "delalloc"},
        {Opt_nodelalloc, "nodelalloc"},
        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
+       {Opt_journal_ioprio, "journal_ioprio=%u"},
        {Opt_err, NULL},
  };
  
@@@ -945,8 -1095,11 +1097,11 @@@ static ext4_fsblk_t get_sb_block(void *
        return sb_block;
  }
  
+ #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
  static int parse_options(char *options, struct super_block *sb,
-                        unsigned int *inum, unsigned long *journal_devnum,
+                        unsigned long *journal_devnum,
+                        unsigned int *journal_ioprio,
                         ext4_fsblk_t *n_blocks_count, int is_remount)
  {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        int qtype, qfmt;
        char *qname;
  #endif
-       ext4_fsblk_t last_block;
  
        if (!options)
                return 1;
                        }
                        set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
                        break;
-               case Opt_journal_inum:
-                       if (is_remount) {
-                               printk(KERN_ERR "EXT4-fs: cannot specify "
-                                      "journal on remount\n");
-                               return 0;
-                       }
-                       if (match_int(&args[0], &option))
-                               return 0;
-                       *inum = option;
-                       break;
                case Opt_journal_dev:
                        if (is_remount) {
                                printk(KERN_ERR "EXT4-fs: cannot specify "
                                option = JBD2_DEFAULT_MAX_COMMIT_AGE;
                        sbi->s_commit_interval = HZ * option;
                        break;
+               case Opt_max_batch_time:
+                       if (match_int(&args[0], &option))
+                               return 0;
+                       if (option < 0)
+                               return 0;
+                       if (option == 0)
+                               option = EXT4_DEF_MAX_BATCH_TIME;
+                       sbi->s_max_batch_time = option;
+                       break;
+               case Opt_min_batch_time:
+                       if (match_int(&args[0], &option))
+                               return 0;
+                       if (option < 0)
+                               return 0;
+                       sbi->s_min_batch_time = option;
+                       break;
                case Opt_data_journal:
                        data_opt = EXT4_MOUNT_JOURNAL_DATA;
                        goto datacheck;
                case Opt_grpjquota:
                        qtype = GRPQUOTA;
  set_qf_name:
 -                      if ((sb_any_quota_enabled(sb) ||
 -                           sb_any_quota_suspended(sb)) &&
 +                      if (sb_any_quota_loaded(sb) &&
                            !sbi->s_qf_names[qtype]) {
                                printk(KERN_ERR
                                       "EXT4-fs: Cannot change journaled "
                case Opt_offgrpjquota:
                        qtype = GRPQUOTA;
  clear_qf_name:
 -                      if ((sb_any_quota_enabled(sb) ||
 -                           sb_any_quota_suspended(sb)) &&
 +                      if (sb_any_quota_loaded(sb) &&
                            sbi->s_qf_names[qtype]) {
                                printk(KERN_ERR "EXT4-fs: Cannot change "
                                        "journaled quota options when "
                case Opt_jqfmt_vfsv0:
                        qfmt = QFMT_VFS_V0;
  set_qf_format:
 -                      if ((sb_any_quota_enabled(sb) ||
 -                           sb_any_quota_suspended(sb)) &&
 +                      if (sb_any_quota_loaded(sb) &&
                            sbi->s_jquota_fmt != qfmt) {
                                printk(KERN_ERR "EXT4-fs: Cannot change "
                                        "journaled quota options when "
                        set_opt(sbi->s_mount_opt, GRPQUOTA);
                        break;
                case Opt_noquota:
 -                      if (sb_any_quota_enabled(sb)) {
 +                      if (sb_any_quota_loaded(sb)) {
                                printk(KERN_ERR "EXT4-fs: Cannot change quota "
                                        "options when quota turned on.\n");
                                return 0;
                case Opt_bh:
                        clear_opt(sbi->s_mount_opt, NOBH);
                        break;
-               case Opt_extents:
-                       if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
-                                       EXT4_FEATURE_INCOMPAT_EXTENTS)) {
-                               ext4_warning(sb, __func__,
-                                       "extents feature not enabled "
-                                       "on this filesystem, use tune2fs\n");
-                               return 0;
-                       }
-                       set_opt(sbi->s_mount_opt, EXTENTS);
-                       break;
-               case Opt_noextents:
-                       /*
-                        * When e2fsprogs support resizing an already existing
-                        * ext3 file system to greater than 2**32 we need to
-                        * add support to block allocator to handle growing
-                        * already existing block  mapped inode so that blocks
-                        * allocated for them fall within 2**32
-                        */
-                       last_block = ext4_blocks_count(sbi->s_es) - 1;
-                       if (last_block  > 0xffffffffULL) {
-                               printk(KERN_ERR "EXT4-fs: Filesystem too "
-                                               "large to mount with "
-                                               "-o noextents options\n");
-                               return 0;
-                       }
-                       clear_opt(sbi->s_mount_opt, EXTENTS);
-                       break;
                case Opt_i_version:
                        set_opt(sbi->s_mount_opt, I_VERSION);
                        sb->s_flags |= MS_I_VERSION;
                                return 0;
                        sbi->s_inode_readahead_blks = option;
                        break;
+               case Opt_journal_ioprio:
+                       if (match_int(&args[0], &option))
+                               return 0;
+                       if (option < 0 || option > 7)
+                               break;
+                       *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
+                                                           option);
+                       break;
                default:
                        printk(KERN_ERR
                               "EXT4-fs: Unrecognized mount option \"%s\" "
@@@ -1405,24 -1547,19 +1546,19 @@@ static int ext4_setup_super(struct supe
                printk(KERN_WARNING
                       "EXT4-fs warning: checktime reached, "
                       "running e2fsck is recommended\n");
- #if 0
-               /* @@@ We _will_ want to clear the valid bit if we find
-                * inconsistencies, to force a fsck at reboot.  But for
-                * a plain journaled filesystem we can keep it set as
-                * valid forever! :)
-                */
-       es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
- #endif
+       if (!sbi->s_journal) 
+               es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
        if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
                es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
        le16_add_cpu(&es->s_mnt_count, 1);
        es->s_mtime = cpu_to_le32(get_seconds());
        ext4_update_dynamic_rev(sb);
-       EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+       if (sbi->s_journal)
+               EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
  
        ext4_commit_super(sb, es, 1);
        if (test_opt(sb, DEBUG))
-               printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, "
+               printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
                                "bpg=%lu, ipg=%lu, mo=%04lx]\n",
                        sb->s_blocksize,
                        sbi->s_groups_count,
                        EXT4_INODES_PER_GROUP(sb),
                        sbi->s_mount_opt);
  
-       printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
-              sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
-              "external", EXT4_SB(sb)->s_journal->j_devname);
+       if (EXT4_SB(sb)->s_journal) {
+               printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
+                      sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+                      "external", EXT4_SB(sb)->s_journal->j_devname);
+       } else {
+               printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
+       }
        return res;
  }
  
@@@ -1444,7 -1585,6 +1584,6 @@@ static int ext4_fill_flex_info(struct s
        ext4_group_t flex_group_count;
        ext4_group_t flex_group;
        int groups_per_flex = 0;
-       __u64 block_bitmap = 0;
        int i;
  
        if (!sbi->s_es->s_log_groups_per_flex) {
                                     sizeof(struct flex_groups), GFP_KERNEL);
        if (sbi->s_flex_groups == NULL) {
                printk(KERN_ERR "EXT4-fs: not enough memory for "
-                               "%lu flex groups\n", flex_group_count);
+                               "%u flex groups\n", flex_group_count);
                goto failed;
        }
  
-       gdp = ext4_get_group_desc(sb, 1, &bh);
-       block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
        for (i = 0; i < sbi->s_groups_count; i++) {
                gdp = ext4_get_group_desc(sb, i, &bh);
  
                flex_group = ext4_flex_group(sbi, i);
                sbi->s_flex_groups[flex_group].free_inodes +=
-                       le16_to_cpu(gdp->bg_free_inodes_count);
+                       ext4_free_inodes_count(sb, gdp);
                sbi->s_flex_groups[flex_group].free_blocks +=
-                       le16_to_cpu(gdp->bg_free_blocks_count);
+                       ext4_free_blks_count(sb, gdp);
        }
  
        return 1;
@@@ -1551,14 -1688,14 +1687,14 @@@ static int ext4_check_descriptors(struc
                block_bitmap = ext4_block_bitmap(sb, gdp);
                if (block_bitmap < first_block || block_bitmap > last_block) {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                              "Block bitmap for group %lu not in group "
+                              "Block bitmap for group %u not in group "
                               "(block %llu)!\n", i, block_bitmap);
                        return 0;
                }
                inode_bitmap = ext4_inode_bitmap(sb, gdp);
                if (inode_bitmap < first_block || inode_bitmap > last_block) {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                              "Inode bitmap for group %lu not in group "
+                              "Inode bitmap for group %u not in group "
                               "(block %llu)!\n", i, inode_bitmap);
                        return 0;
                }
                if (inode_table < first_block ||
                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                              "Inode table for group %lu not in group "
+                              "Inode table for group %u not in group "
                               "(block %llu)!\n", i, inode_table);
                        return 0;
                }
                spin_lock(sb_bgl_lock(sbi, i));
                if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
                        printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
-                              "Checksum for group %lu failed (%u!=%u)\n",
+                              "Checksum for group %u failed (%u!=%u)\n",
                               i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
                               gdp)), le16_to_cpu(gdp->bg_checksum));
                        if (!(sb->s_flags & MS_RDONLY)) {
@@@ -1865,19 -2002,20 +2001,20 @@@ static int ext4_fill_super(struct super
        ext4_fsblk_t sb_block = get_sb_block(&data);
        ext4_fsblk_t logical_sb_block;
        unsigned long offset = 0;
-       unsigned int journal_inum = 0;
        unsigned long journal_devnum = 0;
        unsigned long def_mount_opts;
        struct inode *root;
        char *cp;
+       const char *descr;
        int ret = -EINVAL;
        int blocksize;
-       int db_count;
-       int i;
+       unsigned int db_count;
+       unsigned int i;
        int needs_recovery, has_huge_files;
-       __le32 features;
+       int features;
        __u64 blocks_count;
        int err;
+       unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
  
        sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
        if (!sbi)
  
        sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
        sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+       sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
+       sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
+       sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
  
        set_opt(sbi->s_mount_opt, RESERVATION);
        set_opt(sbi->s_mount_opt, BARRIER);
  
-       /*
-        * turn on extents feature by default in ext4 filesystem
-        * only if feature flag already set by mkfs or tune2fs.
-        * Use -o noextents to turn it off
-        */
-       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
-               set_opt(sbi->s_mount_opt, EXTENTS);
-       else
-               ext4_warning(sb, __func__,
-                       "extents feature not enabled on this filesystem, "
-                       "use tune2fs.\n");
        /*
         * enable delayed allocation by default
         * Use -o nodelalloc to turn it off
        set_opt(sbi->s_mount_opt, DELALLOC);
  
  
-       if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum,
-                          NULL, 0))
+       if (!parse_options((char *) data, sb, &journal_devnum,
+                          &journal_ioprio, NULL, 0))
                goto failed_mount;
  
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
        features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
        if (features) {
                printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
-                      "unsupported optional features (%x).\n",
-                      sb->s_id, le32_to_cpu(features));
+                      "unsupported optional features (%x).\n", sb->s_id,
+                       (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
+                       ~EXT4_FEATURE_INCOMPAT_SUPP));
                goto failed_mount;
        }
        features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
        if (!(sb->s_flags & MS_RDONLY) && features) {
                printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
-                      "unsupported optional features (%x).\n",
-                      sb->s_id, le32_to_cpu(features));
+                      "unsupported optional features (%x).\n", sb->s_id,
+                       (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
+                       ~EXT4_FEATURE_RO_COMPAT_SUPP));
                goto failed_mount;
        }
        has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
        for (i = 0; i < 4; i++)
                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
        sbi->s_def_hash_version = es->s_def_hash_version;
+       i = le32_to_cpu(es->s_flags);
+       if (i & EXT2_FLAGS_UNSIGNED_HASH)
+               sbi->s_hash_unsigned = 3;
+       else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+ #ifdef __CHAR_UNSIGNED__
+               es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+               sbi->s_hash_unsigned = 3;
+ #else
+               es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+ #endif
+               sb->s_dirt = 1;
+       }
  
        if (sbi->s_blocks_per_group > blocksize * 8) {
                printk(KERN_ERR
        if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
                goto cantfind_ext4;
  
-       /* ensure blocks_count calculation below doesn't sign-extend */
-       if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) <
-           le32_to_cpu(es->s_first_data_block) + 1) {
-               printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, "
-                      "first data block %u, blocks per group %lu\n",
-                       ext4_blocks_count(es),
-                       le32_to_cpu(es->s_first_data_block),
-                       EXT4_BLOCKS_PER_GROUP(sb));
+         /*
+          * It makes no sense for the first data block to be beyond the end
+          * of the filesystem.
+          */
+         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
+                 printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
+                      "block %u is beyond end of filesystem (%llu)\n",
+                      le32_to_cpu(es->s_first_data_block),
+                      ext4_blocks_count(es));
                goto failed_mount;
        }
        blocks_count = (ext4_blocks_count(es) -
                        le32_to_cpu(es->s_first_data_block) +
                        EXT4_BLOCKS_PER_GROUP(sb) - 1);
        do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
+       if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
+               printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
+                      "(block count %llu, first data block %u, "
+                      "blocks per group %lu)\n", sbi->s_groups_count,
+                      ext4_blocks_count(es),
+                      le32_to_cpu(es->s_first_data_block),
+                      EXT4_BLOCKS_PER_GROUP(sb));
+               goto failed_mount;
+       }
        sbi->s_groups_count = blocks_count;
        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
                   EXT4_DESC_PER_BLOCK(sb);
                                EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
                                es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
                                ext4_commit_super(sb, es, 1);
-                               printk(KERN_CRIT
-                                      "EXT4-fs (device %s): mount failed\n",
-                                     sb->s_id);
                                goto failed_mount4;
                        }
                }
-       } else if (journal_inum) {
-               if (ext4_create_journal(sb, es, journal_inum))
-                       goto failed_mount3;
+       } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
+             EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
+               printk(KERN_ERR "EXT4-fs: required journal recovery "
+                      "suppressed and not mounted read-only\n");
+               goto failed_mount4;
        } else {
-               if (!silent)
-                       printk(KERN_ERR
-                              "ext4: No journal on filesystem on %s\n",
-                              sb->s_id);
-               goto failed_mount3;
+               clear_opt(sbi->s_mount_opt, DATA_FLAGS);
+               set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+               sbi->s_journal = NULL;
+               needs_recovery = 0;
+               goto no_journal;
        }
  
        if (ext4_blocks_count(es) > 0xffffffffULL &&
            !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
                                       JBD2_FEATURE_INCOMPAT_64BIT)) {
-               printk(KERN_ERR "ext4: Failed to set 64-bit journal feature\n");
+               printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
                goto failed_mount4;
        }
  
        default:
                break;
        }
+       set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+ no_journal:
  
        if (test_opt(sb, NOBH)) {
                if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
        ext4_orphan_cleanup(sb, es);
        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
-       if (needs_recovery)
+       if (needs_recovery) {
                printk(KERN_INFO "EXT4-fs: recovery complete.\n");
-       ext4_mark_recovery_complete(sb, es);
-       printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
-              test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
-              test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
-              "writeback");
+               ext4_mark_recovery_complete(sb, es);
+       }
+       if (EXT4_SB(sb)->s_journal) {
+               if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+                       descr = " journalled data mode";
+               else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+                       descr = " ordered data mode";
+               else
+                       descr = " writeback data mode";
+       } else
+               descr = "out journal";
+       printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
+              sb->s_id, descr);
  
        lock_kernel();
        return 0;
@@@ -2437,8 -2601,11 +2600,11 @@@ cantfind_ext4
        goto failed_mount;
  
  failed_mount4:
-       jbd2_journal_destroy(sbi->s_journal);
-       sbi->s_journal = NULL;
+       printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
+       if (sbi->s_journal) {
+               jbd2_journal_destroy(sbi->s_journal);
+               sbi->s_journal = NULL;
+       }
  failed_mount3:
        percpu_counter_destroy(&sbi->s_freeblocks_counter);
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@@ -2475,11 -2642,9 +2641,9 @@@ static void ext4_init_journal_params(st
  {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
  
-       if (sbi->s_commit_interval)
-               journal->j_commit_interval = sbi->s_commit_interval;
-       /* We could also set up an ext4-specific default for the commit
-        * interval here, but for now we'll just fall back to the jbd
-        * default. */
+       journal->j_commit_interval = sbi->s_commit_interval;
+       journal->j_min_batch_time = sbi->s_min_batch_time;
+       journal->j_max_batch_time = sbi->s_max_batch_time;
  
        spin_lock(&journal->j_state_lock);
        if (test_opt(sb, BARRIER))
@@@ -2499,6 -2664,8 +2663,8 @@@ static journal_t *ext4_get_journal(stru
        struct inode *journal_inode;
        journal_t *journal;
  
+       BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
        /* First, test for the existence of a valid inode on disk.  Bad
         * things happen if we iget() an unused inode, as the subsequent
         * iput() will try to delete it. */
@@@ -2547,13 -2714,15 +2713,15 @@@ static journal_t *ext4_get_dev_journal(
        struct ext4_super_block *es;
        struct block_device *bdev;
  
+       BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
        bdev = ext4_blkdev_get(j_dev);
        if (bdev == NULL)
                return NULL;
  
        if (bd_claim(bdev, sb)) {
                printk(KERN_ERR
-                       "EXT4: failed to claim external journal device.\n");
+                       "EXT4-fs: failed to claim external journal device.\n");
                blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
                return NULL;
        }
@@@ -2634,6 -2803,8 +2802,8 @@@ static int ext4_load_journal(struct sup
        int err = 0;
        int really_read_only;
  
+       BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
        if (journal_devnum &&
            journal_devnum != le32_to_cpu(es->s_journal_dev)) {
                printk(KERN_INFO "EXT4-fs: external journal device major/minor "
        return 0;
  }
  
- static int ext4_create_journal(struct super_block *sb,
-                              struct ext4_super_block *es,
-                              unsigned int journal_inum)
- {
-       journal_t *journal;
-       int err;
-       if (sb->s_flags & MS_RDONLY) {
-               printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to "
-                               "create journal.\n");
-               return -EROFS;
-       }
-       journal = ext4_get_journal(sb, journal_inum);
-       if (!journal)
-               return -EINVAL;
-       printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n",
-              journal_inum);
-       err = jbd2_journal_create(journal);
-       if (err) {
-               printk(KERN_ERR "EXT4-fs: error creating journal.\n");
-               jbd2_journal_destroy(journal);
-               return -EIO;
-       }
-       EXT4_SB(sb)->s_journal = journal;
-       ext4_update_dynamic_rev(sb);
-       EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
-       EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL);
-       es->s_journal_inum = cpu_to_le32(journal_inum);
-       sb->s_dirt = 1;
-       /* Make sure we flush the recovery flag to disk. */
-       ext4_commit_super(sb, es, 1);
-       return 0;
- }
  static void ext4_commit_super(struct super_block *sb,
                              struct ext4_super_block *es, int sync)
  {
                 * be remapped.  Nothing we can do but to retry the
                 * write and hope for the best.
                 */
-               printk(KERN_ERR "ext4: previous I/O error to "
+               printk(KERN_ERR "EXT4-fs: previous I/O error to "
                       "superblock detected for %s.\n", sb->s_id);
                clear_buffer_write_io_error(sbh);
                set_buffer_uptodate(sbh);
        }
        es->s_wtime = cpu_to_le32(get_seconds());
-       ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
-       es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
+       ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
+                                       &EXT4_SB(sb)->s_freeblocks_counter));
+       es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
+                                       &EXT4_SB(sb)->s_freeinodes_counter));
        BUFFER_TRACE(sbh, "marking dirty");
        mark_buffer_dirty(sbh);
        if (sync) {
                sync_dirty_buffer(sbh);
                if (buffer_write_io_error(sbh)) {
-                       printk(KERN_ERR "ext4: I/O error while writing "
+                       printk(KERN_ERR "EXT4-fs: I/O error while writing "
                               "superblock for %s.\n", sb->s_id);
                        clear_buffer_write_io_error(sbh);
                        set_buffer_uptodate(sbh);
@@@ -2808,6 -2940,10 +2939,10 @@@ static void ext4_mark_recovery_complete
  {
        journal_t *journal = EXT4_SB(sb)->s_journal;
  
+       if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
+               BUG_ON(journal != NULL);
+               return;
+       }
        jbd2_journal_lock_updates(journal);
        if (jbd2_journal_flush(journal) < 0)
                goto out;
@@@ -2837,6 -2973,8 +2972,8 @@@ static void ext4_clear_journal_err(stru
        int j_errno;
        const char *errstr;
  
+       BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
        journal = EXT4_SB(sb)->s_journal;
  
        /*
  int ext4_force_commit(struct super_block *sb)
  {
        journal_t *journal;
-       int ret;
+       int ret = 0;
  
        if (sb->s_flags & MS_RDONLY)
                return 0;
  
        journal = EXT4_SB(sb)->s_journal;
-       sb->s_dirt = 0;
-       ret = ext4_journal_force_commit(journal);
+       if (journal) {
+               sb->s_dirt = 0;
+               ret = ext4_journal_force_commit(journal);
+       }
        return ret;
  }
  
   */
  static void ext4_write_super(struct super_block *sb)
  {
-       if (mutex_trylock(&sb->s_lock) != 0)
-               BUG();
-       sb->s_dirt = 0;
+       if (EXT4_SB(sb)->s_journal) {
+               if (mutex_trylock(&sb->s_lock) != 0)
+                       BUG();
+               sb->s_dirt = 0;
+       } else {
+               ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
+       }
  }
  
  static int ext4_sync_fs(struct super_block *sb, int wait)
  
        trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
        sb->s_dirt = 0;
-       if (wait)
-               ret = ext4_force_commit(sb);
-       else
-               jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+       if (EXT4_SB(sb)->s_journal) {
+               if (wait)
+                       ret = ext4_force_commit(sb);
+               else
+                       jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+       } else {
+               ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
+       }
        return ret;
  }
  
@@@ -2917,15 -3066,17 +3065,17 @@@ static void ext4_write_super_lockfs(str
        if (!(sb->s_flags & MS_RDONLY)) {
                journal_t *journal = EXT4_SB(sb)->s_journal;
  
-               /* Now we set up the journal barrier. */
-               jbd2_journal_lock_updates(journal);
+               if (journal) {
+                       /* Now we set up the journal barrier. */
+                       jbd2_journal_lock_updates(journal);
  
-               /*
-                * We don't want to clear needs_recovery flag when we failed
-                * to flush the journal.
-                */
-               if (jbd2_journal_flush(journal) < 0)
-                       return;
+                       /*
+                        * We don't want to clear needs_recovery flag when we
+                        * failed to flush the journal.
+                        */
+                       if (jbd2_journal_flush(journal) < 0)
+                               return;
+               }
  
                /* Journal blocked and flushed, clear needs_recovery flag. */
                EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
   */
  static void ext4_unlockfs(struct super_block *sb)
  {
-       if (!(sb->s_flags & MS_RDONLY)) {
+       if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
                lock_super(sb);
                /* Reser the needs_recovery flag before the fs is unlocked. */
                EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@@ -2957,6 -3108,7 +3107,7 @@@ static int ext4_remount(struct super_bl
        unsigned long old_sb_flags;
        struct ext4_mount_options old_opts;
        ext4_group_t g;
+       unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
        int err;
  #ifdef CONFIG_QUOTA
        int i;
        old_opts.s_resuid = sbi->s_resuid;
        old_opts.s_resgid = sbi->s_resgid;
        old_opts.s_commit_interval = sbi->s_commit_interval;
+       old_opts.s_min_batch_time = sbi->s_min_batch_time;
+       old_opts.s_max_batch_time = sbi->s_max_batch_time;
  #ifdef CONFIG_QUOTA
        old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
        for (i = 0; i < MAXQUOTAS; i++)
                old_opts.s_qf_names[i] = sbi->s_qf_names[i];
  #endif
+       if (sbi->s_journal && sbi->s_journal->j_task->io_context)
+               journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
  
        /*
         * Allow the "check" option to be passed as a remount option.
         */
-       if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
+       if (!parse_options(data, sb, NULL, &journal_ioprio,
+                          &n_blocks_count, 1)) {
                err = -EINVAL;
                goto restore_opts;
        }
  
        es = sbi->s_es;
  
-       ext4_init_journal_params(sb, sbi->s_journal);
+       if (sbi->s_journal) {
+               ext4_init_journal_params(sb, sbi->s_journal);
+               set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
+       }
  
        if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
                n_blocks_count > ext4_blocks_count(es)) {
                         * We have to unlock super so that we can wait for
                         * transactions.
                         */
-                       unlock_super(sb);
-                       ext4_mark_recovery_complete(sb, es);
-                       lock_super(sb);
+                       if (sbi->s_journal) {
+                               unlock_super(sb);
+                               ext4_mark_recovery_complete(sb, es);
+                               lock_super(sb);
+                       }
                } else {
-                       __le32 ret;
+                       int ret;
                        if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                        ~EXT4_FEATURE_RO_COMPAT_SUPP))) {
                                printk(KERN_WARNING "EXT4-fs: %s: couldn't "
                                       "remount RDWR because of unsupported "
-                                      "optional features (%x).\n",
-                                      sb->s_id, le32_to_cpu(ret));
+                                      "optional features (%x).\n", sb->s_id,
+                               (le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
+                                       ~EXT4_FEATURE_RO_COMPAT_SUPP));
                                err = -EROFS;
                                goto restore_opts;
                        }
                                if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
                                        printk(KERN_ERR
               "EXT4-fs: ext4_remount: "
-               "Checksum for group %lu failed (%u!=%u)\n",
+               "Checksum for group %u failed (%u!=%u)\n",
                g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
                                               le16_to_cpu(gdp->bg_checksum));
                                        err = -EINVAL;
                         * been changed by e2fsck since we originally mounted
                         * the partition.)
                         */
-                       ext4_clear_journal_err(sb, es);
+                       if (sbi->s_journal)
+                               ext4_clear_journal_err(sb, es);
                        sbi->s_mount_state = le16_to_cpu(es->s_state);
                        if ((err = ext4_group_extend(sb, es, n_blocks_count)))
                                goto restore_opts;
                                sb->s_flags &= ~MS_RDONLY;
                }
        }
+       if (sbi->s_journal == NULL)
+               ext4_commit_super(sb, es, 1);
  #ifdef CONFIG_QUOTA
        /* Release old quota file names */
        for (i = 0; i < MAXQUOTAS; i++)
@@@ -3097,6 -3264,8 +3263,8 @@@ restore_opts
        sbi->s_resuid = old_opts.s_resuid;
        sbi->s_resgid = old_opts.s_resgid;
        sbi->s_commit_interval = old_opts.s_commit_interval;
+       sbi->s_min_batch_time = old_opts.s_min_batch_time;
+       sbi->s_max_batch_time = old_opts.s_max_batch_time;
  #ifdef CONFIG_QUOTA
        sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
        for (i = 0; i < MAXQUOTAS; i++) {
@@@ -3359,7 -3528,8 +3527,8 @@@ static int ext4_quota_on(struct super_b
         * When we journal data on quota file, we have to flush journal to see
         * all updates to the file when we bypass pagecache...
         */
-       if (ext4_should_journal_data(path.dentry->d_inode)) {
+       if (EXT4_SB(sb)->s_journal &&
+           ext4_should_journal_data(path.dentry->d_inode)) {
                /*
                 * We don't need to lock updates but journal_flush() could
                 * otherwise be livelocked...
@@@ -3433,7 -3603,7 +3602,7 @@@ static ssize_t ext4_quota_write(struct 
        struct buffer_head *bh;
        handle_t *handle = journal_current_handle();
  
-       if (!handle) {
+       if (EXT4_SB(sb)->s_journal && !handle) {
                printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
                        " cancelled because transaction is not started.\n",
                        (unsigned long long)off, (unsigned long long)len);
                flush_dcache_page(bh->b_page);
                unlock_buffer(bh);
                if (journal_quota)
-                       err = ext4_journal_dirty_metadata(handle, bh);
+                       err = ext4_handle_dirty_metadata(handle, NULL, bh);
                else {
                        /* Always do at least ordered writes for quotas */
                        err = ext4_jbd2_file_inode(handle, inode);
@@@ -3512,18 -3682,15 +3681,15 @@@ static int ext4_ui_proc_open(struct ino
  static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
                               size_t cnt, loff_t *ppos)
  {
-       unsigned int *p = PDE(file->f_path.dentry->d_inode)->data;
+       unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
        char str[32];
-       unsigned long value;
  
        if (cnt >= sizeof(str))
                return -EINVAL;
        if (copy_from_user(str, buf, cnt))
                return -EFAULT;
-       value = simple_strtol(str, NULL, 0);
-       if (value < 0)
-               return -ERANGE;
-       *p = value;
+       *p = simple_strtoul(str, NULL, 0);
        return cnt;
  }
  
@@@ -3614,7 -3781,7 +3780,7 @@@ static void __exit exit_ext4_fs(void
  }
  
  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
- MODULE_DESCRIPTION("Fourth Extended Filesystem with extents");
+ MODULE_DESCRIPTION("Fourth Extended Filesystem");
  MODULE_LICENSE("GPL");
  module_init(init_ext4_fs)
  module_exit(exit_ext4_fs)
diff --combined fs/jbd2/commit.c
index c8a1bace685a83b1b6944de6e7e5ad13faa4f61b,073124a29b8c4cc869aeaf106fef38b23bb5ca10..62804e57a44caf2f893d16cc481f7ac347efe19a
@@@ -25,6 -25,7 +25,7 @@@
  #include <linux/crc32.h>
  #include <linux/writeback.h>
  #include <linux/backing-dev.h>
+ #include <linux/bio.h>
  
  /*
   * Default IO end handler for temporary BJ_IO buffer_heads.
@@@ -137,7 -138,7 +138,7 @@@ static int journal_submit_commit_record
                set_buffer_ordered(bh);
                barrier_done = 1;
        }
-       ret = submit_bh(WRITE, bh);
+       ret = submit_bh(WRITE_SYNC, bh);
        if (barrier_done)
                clear_buffer_ordered(bh);
  
                lock_buffer(bh);
                set_buffer_uptodate(bh);
                clear_buffer_dirty(bh);
-               ret = submit_bh(WRITE, bh);
+               ret = submit_bh(WRITE_SYNC, bh);
        }
        *cbh = bh;
        return ret;
   * This function along with journal_submit_commit_record
   * allows to write the commit record asynchronously.
   */
- static int journal_wait_on_commit_record(struct buffer_head *bh)
+ static int journal_wait_on_commit_record(journal_t *journal,
+                                        struct buffer_head *bh)
  {
        int ret = 0;
  
+ retry:
        clear_buffer_dirty(bh);
        wait_on_buffer(bh);
+       if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
+               printk(KERN_WARNING
+                      "JBD2: wait_on_commit_record: sync failed on %s - "
+                      "disabling barriers\n", journal->j_devname);
+               spin_lock(&journal->j_state_lock);
+               journal->j_flags &= ~JBD2_BARRIER;
+               spin_unlock(&journal->j_state_lock);
+               lock_buffer(bh);
+               clear_buffer_dirty(bh);
+               set_buffer_uptodate(bh);
+               bh->b_end_io = journal_end_buffer_io_sync;
+               ret = submit_bh(WRITE_SYNC, bh);
+               if (ret) {
+                       unlock_buffer(bh);
+                       return ret;
+               }
+               goto retry;
+       }
  
        if (unlikely(!buffer_uptodate(bh)))
                ret = -EIO;
@@@ -332,13 -355,15 +355,15 @@@ void jbd2_journal_commit_transaction(jo
        int flags;
        int err;
        unsigned long long blocknr;
+       ktime_t start_time;
+       u64 commit_time;
        char *tagp = NULL;
        journal_header_t *header;
        journal_block_tag_t *tag = NULL;
        int space_left = 0;
        int first_tag = 0;
        int tag_flag;
-       int i;
+       int i, to_free = 0;
        int tag_bytes = journal_tag_bytes(journal);
        struct buffer_head *cbh = NULL; /* For transactional checksums */
        __u32 crc32_sum = ~0;
        commit_transaction->t_state = T_FLUSH;
        journal->j_committing_transaction = commit_transaction;
        journal->j_running_transaction = NULL;
+       start_time = ktime_get();
        commit_transaction->t_log_start = journal->j_head;
        wake_up(&journal->j_wait_transaction_locked);
        spin_unlock(&journal->j_state_lock);
                if (is_journal_aborted(journal)) {
                        clear_buffer_jbddirty(jh2bh(jh));
                        JBUFFER_TRACE(jh, "journal is aborting: refile");
 +                      jbd2_buffer_abort_trigger(jh,
 +                                                jh->b_frozen_data ?
 +                                                jh->b_frozen_triggers :
 +                                                jh->b_triggers);
                        jbd2_journal_refile_buffer(journal, jh);
                        /* If that was the last one, we need to clean up
                         * any descriptor buffers which may have been
@@@ -803,7 -825,7 +829,7 @@@ wait_for_iobuf
                        __jbd2_journal_abort_hard(journal);
        }
        if (!err && !is_journal_aborted(journal))
-               err = journal_wait_on_commit_record(cbh);
+               err = journal_wait_on_commit_record(journal, cbh);
  
        if (err)
                jbd2_journal_abort(journal, err);
@@@ -848,9 -870,6 +874,9 @@@ restart_loop
                 * data.
                 *
                 * Otherwise, we can just throw away the frozen data now.
 +               *
 +               * We also know that the frozen data has already fired
 +               * its triggers if they exist, so we can clear that too.
                 */
                if (jh->b_committed_data) {
                        jbd2_free(jh->b_committed_data, bh->b_size);
                        if (jh->b_frozen_data) {
                                jh->b_committed_data = jh->b_frozen_data;
                                jh->b_frozen_data = NULL;
 +                              jh->b_frozen_triggers = NULL;
                        }
                } else if (jh->b_frozen_data) {
                        jbd2_free(jh->b_frozen_data, bh->b_size);
                        jh->b_frozen_data = NULL;
 +                      jh->b_frozen_triggers = NULL;
                }
  
                spin_lock(&journal->j_list_lock);
        J_ASSERT(commit_transaction == journal->j_committing_transaction);
        journal->j_commit_sequence = commit_transaction->t_tid;
        journal->j_committing_transaction = NULL;
-       spin_unlock(&journal->j_state_lock);
+       commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
  
-       if (journal->j_commit_callback)
-               journal->j_commit_callback(journal, commit_transaction);
+       /*
+        * weight the commit time higher than the average time so we don't
+        * react too strongly to vast changes in the commit time
+        */
+       if (likely(journal->j_average_commit_time))
+               journal->j_average_commit_time = (commit_time +
+                               journal->j_average_commit_time*3) / 4;
+       else
+               journal->j_average_commit_time = commit_time;
+       spin_unlock(&journal->j_state_lock);
  
        if (commit_transaction->t_checkpoint_list == NULL &&
            commit_transaction->t_checkpoint_io_list == NULL) {
                __jbd2_journal_drop_transaction(journal, commit_transaction);
+               to_free = 1;
        } else {
                if (journal->j_checkpoint_transactions == NULL) {
                        journal->j_checkpoint_transactions = commit_transaction;
        }
        spin_unlock(&journal->j_list_lock);
  
+       if (journal->j_commit_callback)
+               journal->j_commit_callback(journal, commit_transaction);
        trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
-                  journal->j_devname, journal->j_commit_sequence,
+                  journal->j_devname, commit_transaction->t_tid,
                   journal->j_tail_sequence);
        jbd_debug(1, "JBD: commit %d complete, head %d\n",
                  journal->j_commit_sequence, journal->j_tail_sequence);
+       if (to_free)
+               kfree(commit_transaction);
  
        wake_up(&journal->j_wait_done_commit);
  }
diff --combined fs/jbd2/journal.c
index f6bff9d6f8df8193e6fc13f1a2b1763124d29cb6,2932c8f55199768c3887663cd264e1a6e5aabf99..56675306ed817eacb654a3f245a14d81cc4d6971
@@@ -40,6 -40,7 +40,7 @@@
  
  #include <asm/uaccess.h>
  #include <asm/page.h>
+ #include <asm/div64.h>
  
  EXPORT_SYMBOL(jbd2_journal_start);
  EXPORT_SYMBOL(jbd2_journal_restart);
@@@ -50,7 -51,6 +51,7 @@@ EXPORT_SYMBOL(jbd2_journal_unlock_updat
  EXPORT_SYMBOL(jbd2_journal_get_write_access);
  EXPORT_SYMBOL(jbd2_journal_get_create_access);
  EXPORT_SYMBOL(jbd2_journal_get_undo_access);
 +EXPORT_SYMBOL(jbd2_journal_set_triggers);
  EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
  EXPORT_SYMBOL(jbd2_journal_release_buffer);
  EXPORT_SYMBOL(jbd2_journal_forget);
@@@ -66,7 -66,6 +67,6 @@@ EXPORT_SYMBOL(jbd2_journal_update_forma
  EXPORT_SYMBOL(jbd2_journal_check_used_features);
  EXPORT_SYMBOL(jbd2_journal_check_available_features);
  EXPORT_SYMBOL(jbd2_journal_set_features);
- EXPORT_SYMBOL(jbd2_journal_create);
  EXPORT_SYMBOL(jbd2_journal_load);
  EXPORT_SYMBOL(jbd2_journal_destroy);
  EXPORT_SYMBOL(jbd2_journal_abort);
@@@ -132,8 -131,9 +132,9 @@@ static int kjournald2(void *arg
        journal->j_task = current;
        wake_up(&journal->j_wait_done_commit);
  
-       printk(KERN_INFO "kjournald2 starting.  Commit interval %ld seconds\n",
-                       journal->j_commit_interval / HZ);
+       printk(KERN_INFO "kjournald2 starting: pid %d, dev %s, "
+              "commit interval %ld seconds\n", current->pid,
+              journal->j_devname, journal->j_commit_interval / HZ);
  
        /*
         * And now, wait forever for commit wakeup events.
@@@ -291,7 -291,6 +292,7 @@@ int jbd2_journal_write_metadata_buffer(
        struct page *new_page;
        unsigned int new_offset;
        struct buffer_head *bh_in = jh2bh(jh_in);
 +      struct jbd2_buffer_trigger_type *triggers;
  
        /*
         * The buffer really shouldn't be locked: only the current committing
@@@ -316,22 -315,12 +317,22 @@@ repeat
                done_copy_out = 1;
                new_page = virt_to_page(jh_in->b_frozen_data);
                new_offset = offset_in_page(jh_in->b_frozen_data);
 +              triggers = jh_in->b_frozen_triggers;
        } else {
                new_page = jh2bh(jh_in)->b_page;
                new_offset = offset_in_page(jh2bh(jh_in)->b_data);
 +              triggers = jh_in->b_triggers;
        }
  
        mapped_data = kmap_atomic(new_page, KM_USER0);
 +      /*
 +       * Fire any commit trigger.  Do this before checking for escaping,
 +       * as the trigger may modify the magic offset.  If a copy-out
 +       * happens afterwards, it will have the correct data in the buffer.
 +       */
 +      jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
 +                                 triggers);
 +
        /*
         * Check for escaping
         */
                new_page = virt_to_page(tmp);
                new_offset = offset_in_page(tmp);
                done_copy_out = 1;
 +
 +              /*
 +               * This isn't strictly necessary, as we're using frozen
 +               * data for the escaping, but it keeps consistency with
 +               * b_frozen_data usage.
 +               */
 +              jh_in->b_frozen_triggers = jh_in->b_triggers;
        }
  
        /*
@@@ -650,6 -632,8 +651,8 @@@ struct journal_head *jbd2_journal_get_d
                return NULL;
  
        bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+       if (!bh)
+               return NULL;
        lock_buffer(bh);
        memset(bh->b_data, 0, journal->j_blocksize);
        set_buffer_uptodate(bh);
@@@ -843,6 -827,8 +846,8 @@@ static int jbd2_seq_info_show(struct se
            jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid));
        seq_printf(seq, "  %ums logging transaction\n",
            jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid));
+       seq_printf(seq, "  %luus average transaction commit time\n",
+                  do_div(s->journal->j_average_commit_time, 1000));
        seq_printf(seq, "  %lu handles per transaction\n",
            s->stats->u.run.rs_handle_count / s->stats->ts_tid);
        seq_printf(seq, "  %lu blocks per transaction\n",
@@@ -980,6 -966,8 +985,8 @@@ static journal_t * journal_init_common 
        spin_lock_init(&journal->j_state_lock);
  
        journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
+       journal->j_min_batch_time = 0;
+       journal->j_max_batch_time = 15000; /* 15ms */
  
        /* The journal is marked for error until we succeed with recovery! */
        journal->j_flags = JBD2_ABORT;
@@@ -1035,15 -1023,14 +1042,14 @@@ journal_t * jbd2_journal_init_dev(struc
  
        /* journal descriptor can store up to n blocks -bzzz */
        journal->j_blocksize = blocksize;
+       jbd2_stats_proc_init(journal);
        n = journal->j_blocksize / sizeof(journal_block_tag_t);
        journal->j_wbufsize = n;
        journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
        if (!journal->j_wbuf) {
                printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
                        __func__);
-               kfree(journal);
-               journal = NULL;
-               goto out;
+               goto out_err;
        }
        journal->j_dev = bdev;
        journal->j_fs_dev = fs_dev;
        p = journal->j_devname;
        while ((p = strchr(p, '/')))
                *p = '!';
-       jbd2_stats_proc_init(journal);
  
        bh = __getblk(journal->j_dev, start, journal->j_blocksize);
-       J_ASSERT(bh != NULL);
+       if (!bh) {
+               printk(KERN_ERR
+                      "%s: Cannot get buffer for journal superblock\n",
+                      __func__);
+               goto out_err;
+       }
        journal->j_sb_buffer = bh;
        journal->j_superblock = (journal_superblock_t *)bh->b_data;
- out:
        return journal;
+ out_err:
+       jbd2_stats_proc_exit(journal);
+       kfree(journal);
+       return NULL;
  }
  
  /**
@@@ -1108,9 -1103,7 +1122,7 @@@ journal_t * jbd2_journal_init_inode (st
        if (!journal->j_wbuf) {
                printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
                        __func__);
-               jbd2_stats_proc_exit(journal);
-               kfree(journal);
-               return NULL;
+               goto out_err;
        }
  
        err = jbd2_journal_bmap(journal, 0, &blocknr);
        if (err) {
                printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
                       __func__);
-               jbd2_stats_proc_exit(journal);
-               kfree(journal);
-               return NULL;
+               goto out_err;
        }
  
        bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
-       J_ASSERT(bh != NULL);
+       if (!bh) {
+               printk(KERN_ERR
+                      "%s: Cannot get buffer for journal superblock\n",
+                      __func__);
+               goto out_err;
+       }
        journal->j_sb_buffer = bh;
        journal->j_superblock = (journal_superblock_t *)bh->b_data;
  
        return journal;
+ out_err:
+       jbd2_stats_proc_exit(journal);
+       kfree(journal);
+       return NULL;
  }
  
  /*
@@@ -1176,77 -1176,6 +1195,6 @@@ static int journal_reset(journal_t *jou
        return jbd2_journal_start_thread(journal);
  }
  
- /**
-  * int jbd2_journal_create() - Initialise the new journal file
-  * @journal: Journal to create. This structure must have been initialised
-  *
-  * Given a journal_t structure which tells us which disk blocks we can
-  * use, create a new journal superblock and initialise all of the
-  * journal fields from scratch.
-  **/
- int jbd2_journal_create(journal_t *journal)
- {
-       unsigned long long blocknr;
-       struct buffer_head *bh;
-       journal_superblock_t *sb;
-       int i, err;
-       if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) {
-               printk (KERN_ERR "Journal length (%d blocks) too short.\n",
-                       journal->j_maxlen);
-               journal_fail_superblock(journal);
-               return -EINVAL;
-       }
-       if (journal->j_inode == NULL) {
-               /*
-                * We don't know what block to start at!
-                */
-               printk(KERN_EMERG
-                      "%s: creation of journal on external device!\n",
-                      __func__);
-               BUG();
-       }
-       /* Zero out the entire journal on disk.  We cannot afford to
-          have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */
-       jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
-       for (i = 0; i < journal->j_maxlen; i++) {
-               err = jbd2_journal_bmap(journal, i, &blocknr);
-               if (err)
-                       return err;
-               bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
-               lock_buffer(bh);
-               memset (bh->b_data, 0, journal->j_blocksize);
-               BUFFER_TRACE(bh, "marking dirty");
-               mark_buffer_dirty(bh);
-               BUFFER_TRACE(bh, "marking uptodate");
-               set_buffer_uptodate(bh);
-               unlock_buffer(bh);
-               __brelse(bh);
-       }
-       sync_blockdev(journal->j_dev);
-       jbd_debug(1, "JBD: journal cleared.\n");
-       /* OK, fill in the initial static fields in the new superblock */
-       sb = journal->j_superblock;
-       sb->s_header.h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
-       sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
-       sb->s_blocksize = cpu_to_be32(journal->j_blocksize);
-       sb->s_maxlen    = cpu_to_be32(journal->j_maxlen);
-       sb->s_first     = cpu_to_be32(1);
-       journal->j_transaction_sequence = 1;
-       journal->j_flags &= ~JBD2_ABORT;
-       journal->j_format_version = 2;
-       return journal_reset(journal);
- }
  /**
   * void jbd2_journal_update_superblock() - Update journal sb on disk.
   * @journal: The journal to update.
@@@ -1491,7 -1420,9 +1439,9 @@@ int jbd2_journal_destroy(journal_t *jou
        spin_lock(&journal->j_list_lock);
        while (journal->j_checkpoint_transactions != NULL) {
                spin_unlock(&journal->j_list_lock);
+               mutex_lock(&journal->j_checkpoint_mutex);
                jbd2_log_do_checkpoint(journal);
+               mutex_unlock(&journal->j_checkpoint_mutex);
                spin_lock(&journal->j_list_lock);
        }
  
diff --combined fs/jbd2/transaction.c
index 4f925a4f3d05051ec7b1edd65c50ecdabe27000a,48c21bac5a567f5ae27dd683c2b119ca2c7c7544..46b4e347ed7d9f3949df7b2b475bda7d7f2b8fef
@@@ -25,6 -25,7 +25,7 @@@
  #include <linux/timer.h>
  #include <linux/mm.h>
  #include <linux/highmem.h>
+ #include <linux/hrtimer.h>
  
  static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
  
@@@ -48,6 -49,7 +49,7 @@@ jbd2_get_transaction(journal_t *journal
  {
        transaction->t_journal = journal;
        transaction->t_state = T_RUNNING;
+       transaction->t_start_time = ktime_get();
        transaction->t_tid = journal->j_transaction_sequence++;
        transaction->t_expires = jiffies + journal->j_commit_interval;
        spin_lock_init(&transaction->t_handle_lock);
@@@ -741,12 -743,6 +743,12 @@@ done
                source = kmap_atomic(page, KM_USER0);
                memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
                kunmap_atomic(source, KM_USER0);
 +
 +              /*
 +               * Now that the frozen data is saved off, we need to store
 +               * any matching triggers.
 +               */
 +              jh->b_frozen_triggers = jh->b_triggers;
        }
        jbd_unlock_bh_state(bh);
  
@@@ -949,47 -945,6 +951,47 @@@ out
        return err;
  }
  
 +/**
 + * void jbd2_journal_set_triggers() - Add triggers for commit writeout
 + * @bh: buffer to trigger on
 + * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
 + *
 + * Set any triggers on this journal_head.  This is always safe, because
 + * triggers for a committing buffer will be saved off, and triggers for
 + * a running transaction will match the buffer in that transaction.
 + *
 + * Call with NULL to clear the triggers.
 + */
 +void jbd2_journal_set_triggers(struct buffer_head *bh,
 +                             struct jbd2_buffer_trigger_type *type)
 +{
 +      struct journal_head *jh = bh2jh(bh);
 +
 +      jh->b_triggers = type;
 +}
 +
 +void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
 +                              struct jbd2_buffer_trigger_type *triggers)
 +{
 +      struct buffer_head *bh = jh2bh(jh);
 +
 +      if (!triggers || !triggers->t_commit)
 +              return;
 +
 +      triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
 +}
 +
 +void jbd2_buffer_abort_trigger(struct journal_head *jh,
 +                             struct jbd2_buffer_trigger_type *triggers)
 +{
 +      if (!triggers || !triggers->t_abort)
 +              return;
 +
 +      triggers->t_abort(triggers, jh2bh(jh));
 +}
 +
 +
 +
  /**
   * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
   * @handle: transaction to add buffer to.
@@@ -1240,7 -1195,7 +1242,7 @@@ int jbd2_journal_stop(handle_t *handle
  {
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal = transaction->t_journal;
-       int old_handle_count, err;
+       int err;
        pid_t pid;
  
        J_ASSERT(journal_current_handle() == handle);
        /*
         * Implement synchronous transaction batching.  If the handle
         * was synchronous, don't force a commit immediately.  Let's
-        * yield and let another thread piggyback onto this transaction.
-        * Keep doing that while new threads continue to arrive.
-        * It doesn't cost much - we're about to run a commit and sleep
-        * on IO anyway.  Speeds up many-threaded, many-dir operations
-        * by 30x or more...
+        * yield and let another thread piggyback onto this
+        * transaction.  Keep doing that while new threads continue to
+        * arrive.  It doesn't cost much - we're about to run a commit
+        * and sleep on IO anyway.  Speeds up many-threaded, many-dir
+        * operations by 30x or more...
+        *
+        * We try and optimize the sleep time against what the
+        * underlying disk can do, instead of having a static sleep
+        * time.  This is useful for the case where our storage is so
+        * fast that it is more optimal to go ahead and force a flush
+        * and wait for the transaction to be committed than it is to
+        * wait for an arbitrary amount of time for new writers to
+        * join the transaction.  We achieve this by measuring how
+        * long it takes to commit a transaction, and compare it with
+        * how long this transaction has been running, and if run time
+        * < commit time then we sleep for the delta and commit.  This
+        * greatly helps super fast disks that would see slowdowns as
+        * more threads started doing fsyncs.
         *
-        * But don't do this if this process was the most recent one to
-        * perform a synchronous write.  We do this to detect the case where a
-        * single process is doing a stream of sync writes.  No point in waiting
-        * for joiners in that case.
+        * But don't do this if this process was the most recent one
+        * to perform a synchronous write.  We do this to detect the
+        * case where a single process is doing a stream of sync
+        * writes.  No point in waiting for joiners in that case.
         */
        pid = current->pid;
        if (handle->h_sync && journal->j_last_sync_writer != pid) {
+               u64 commit_time, trans_time;
                journal->j_last_sync_writer = pid;
-               do {
-                       old_handle_count = transaction->t_handle_count;
-                       schedule_timeout_uninterruptible(1);
-               } while (old_handle_count != transaction->t_handle_count);
+               spin_lock(&journal->j_state_lock);
+               commit_time = journal->j_average_commit_time;
+               spin_unlock(&journal->j_state_lock);
+               trans_time = ktime_to_ns(ktime_sub(ktime_get(),
+                                                  transaction->t_start_time));
+               commit_time = max_t(u64, commit_time,
+                                   1000*journal->j_min_batch_time);
+               commit_time = min_t(u64, commit_time,
+                                   1000*journal->j_max_batch_time);
+               if (trans_time < commit_time) {
+                       ktime_t expires = ktime_add_ns(ktime_get(),
+                                                      commit_time);
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
+               }
        }
  
        current->journal_info = NULL;
diff --combined fs/super.c
index 7d67387496cb3348d81795c694a84cb136d655aa,d5fd4498548a0c31d0711b0cf04dae4d22a0203f..ed080c41716757a3c0a71c714c5a7db8ec489287
@@@ -38,7 -38,6 +38,7 @@@
  #include <linux/kobject.h>
  #include <linux/mutex.h>
  #include <linux/file.h>
 +#include <linux/async.h>
  #include <asm/uaccess.h>
  #include "internal.h"
  
@@@ -72,7 -71,6 +72,7 @@@ static struct super_block *alloc_super(
                INIT_HLIST_HEAD(&s->s_anon);
                INIT_LIST_HEAD(&s->s_inodes);
                INIT_LIST_HEAD(&s->s_dentry_lru);
 +              INIT_LIST_HEAD(&s->s_async_list);
                init_rwsem(&s->s_umount);
                mutex_init(&s->s_lock);
                lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@@ -291,18 -289,11 +291,18 @@@ void generic_shutdown_super(struct supe
  {
        const struct super_operations *sop = sb->s_op;
  
 +
        if (sb->s_root) {
                shrink_dcache_for_umount(sb);
                fsync_super(sb);
                lock_super(sb);
                sb->s_flags &= ~MS_ACTIVE;
 +
 +              /*
 +               * wait for asynchronous fs operations to finish before going further
 +               */
 +              async_synchronize_full_special(&sb->s_async_list);
 +
                /* bad name - it should be evict_inodes() */
                invalidate_inodes(sb);
                lock_kernel();
@@@ -470,7 -461,6 +470,7 @@@ restart
                sb->s_count++;
                spin_unlock(&sb_lock);
                down_read(&sb->s_umount);
 +              async_synchronize_full_special(&sb->s_async_list);
                if (sb->s_root && (wait || sb->s_dirt))
                        sb->s_op->sync_fs(sb, wait);
                up_read(&sb->s_umount);
@@@ -810,6 -800,7 +810,7 @@@ int get_sb_bdev(struct file_system_typ
                }
  
                s->s_flags |= MS_ACTIVE;
+               bdev->bd_super = s;
        }
  
        return simple_set_mnt(mnt, s);
@@@ -829,6 -820,7 +830,7 @@@ void kill_block_super(struct super_bloc
        struct block_device *bdev = sb->s_bdev;
        fmode_t mode = sb->s_mode;
  
+       bdev->bd_super = 0;
        generic_shutdown_super(sb);
        sync_blockdev(bdev);
        close_bdev_exclusive(bdev, mode);
diff --combined include/linux/ext3_fs.h
index d76800f6ecf0fb927bf16f144fa6516f5ac62111,9004794a35fea1096d99154c81c06e6230052456..dd495b8c3091e21b3b55a25e8c8294e07f611113
@@@ -178,30 -178,6 +178,30 @@@ struct ext3_group_des
  #define EXT3_FL_USER_VISIBLE          0x0003DFFF /* User visible flags */
  #define EXT3_FL_USER_MODIFIABLE               0x000380FF /* User modifiable flags */
  
 +/* Flags that should be inherited by new inodes from their parent. */
 +#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
 +                         EXT3_SYNC_FL | EXT3_IMMUTABLE_FL | EXT3_APPEND_FL |\
 +                         EXT3_NODUMP_FL | EXT3_NOATIME_FL | EXT3_COMPRBLK_FL|\
 +                         EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
 +                         EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
 +
 +/* Flags that are appropriate for regular files (all but dir-specific ones). */
 +#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL))
 +
 +/* Flags that are appropriate for non-directories/regular files. */
 +#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL)
 +
 +/* Mask out flags that are inappropriate for the given type of inode. */
 +static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
 +{
 +      if (S_ISDIR(mode))
 +              return flags;
 +      else if (S_ISREG(mode))
 +              return flags & EXT3_REG_FLMASK;
 +      else
 +              return flags & EXT3_OTHER_FLMASK;
 +}
 +
  /*
   * Inode dynamic state flags
   */
@@@ -377,6 -353,13 +377,13 @@@ struct ext3_inode 
  #define       EXT3_ERROR_FS                   0x0002  /* Errors detected */
  #define       EXT3_ORPHAN_FS                  0x0004  /* Orphans being recovered */
  
+ /*
+  * Misc. filesystem flags
+  */
+ #define EXT2_FLAGS_SIGNED_HASH                0x0001  /* Signed dirhash in use */
+ #define EXT2_FLAGS_UNSIGNED_HASH      0x0002  /* Unsigned dirhash in use */
+ #define EXT2_FLAGS_TEST_FILESYS               0x0004  /* to test development code */
  /*
   * Mount flags
   */
@@@ -513,7 -496,23 +520,23 @@@ struct ext3_super_block 
        __u16   s_reserved_word_pad;
        __le32  s_default_mount_opts;
        __le32  s_first_meta_bg;        /* First metablock block group */
-       __u32   s_reserved[190];        /* Padding to the end of the block */
+       __le32  s_mkfs_time;            /* When the filesystem was created */
+       __le32  s_jnl_blocks[17];       /* Backup of the journal inode */
+       /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+ /*150*/       __le32  s_blocks_count_hi;      /* Blocks count */
+       __le32  s_r_blocks_count_hi;    /* Reserved blocks count */
+       __le32  s_free_blocks_count_hi; /* Free blocks count */
+       __le16  s_min_extra_isize;      /* All inodes have at least # bytes */
+       __le16  s_want_extra_isize;     /* New inodes should reserve # bytes */
+       __le32  s_flags;                /* Miscellaneous flags */
+       __le16  s_raid_stride;          /* RAID stride */
+       __le16  s_mmp_interval;         /* # seconds to wait in MMP checking */
+       __le64  s_mmp_block;            /* Block for multi-mount protection */
+       __le32  s_raid_stripe_width;    /* blocks on all data disks (N*stride)*/
+       __u8    s_log_groups_per_flex;  /* FLEX_BG group size */
+       __u8    s_reserved_char_pad2;
+       __le16  s_reserved_pad;
+       __u32   s_reserved[162];        /* Padding to the end of the block */
  };
  
  #ifdef __KERNEL__
@@@ -718,6 -717,9 +741,9 @@@ static inline __le16 ext3_rec_len_to_di
  #define DX_HASH_LEGACY                0
  #define DX_HASH_HALF_MD4      1
  #define DX_HASH_TEA           2
+ #define DX_HASH_LEGACY_UNSIGNED       3
+ #define DX_HASH_HALF_MD4_UNSIGNED     4
+ #define DX_HASH_TEA_UNSIGNED          5
  
  #ifdef __KERNEL__
  
index 76fdc0f4b0287f3e3b0450a4357de6519d69f0c0,a4e9216b3a6dd7894d53b6b1636acb360973861b..f07f34de2f0ecb7ed987c49c604ce3efce4690e6
@@@ -57,10 -57,11 +57,11 @@@ struct ext3_sb_info 
        u32 s_next_generation;
        u32 s_hash_seed[4];
        int s_def_hash_version;
+       int s_hash_unsigned;    /* 3 if hash should be signed, 0 if not */
        struct percpu_counter s_freeblocks_counter;
        struct percpu_counter s_freeinodes_counter;
        struct percpu_counter s_dirs_counter;
 -      struct blockgroup_lock s_blockgroup_lock;
 +      struct blockgroup_lock *s_blockgroup_lock;
  
        /* root of the per fs reservation window tree */
        spinlock_t s_rsv_window_lock;
@@@ -86,7 -87,7 +87,7 @@@
  static inline spinlock_t *
  sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
  {
 -      return bgl_lock_ptr(&sbi->s_blockgroup_lock, block_group);
 +      return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
  }
  
  #endif        /* _LINUX_EXT3_FS_SB */
diff --combined include/linux/fs.h
index e38a64d71efff910fbc9b6a8ad578a0cd509aea4,0f54ae0f0ccde915e2d50c873bf51462f3843460..0b87b29f4797fa9960373a24194501f0ebc5fab9
@@@ -565,6 -565,7 +565,7 @@@ struct address_space 
  struct block_device {
        dev_t                   bd_dev;  /* not a kdev_t - it's a search key */
        struct inode *          bd_inode;       /* will die */
+       struct super_block *    bd_super;
        int                     bd_openers;
        struct mutex            bd_mutex;       /* open/close mutex */
        struct semaphore        bd_mount_sem;
@@@ -1133,6 -1134,7 +1134,6 @@@ struct super_block 
        struct rw_semaphore     s_umount;
        struct mutex            s_lock;
        int                     s_count;
 -      int                     s_syncing;
        int                     s_need_sync_fs;
        atomic_t                s_active;
  #ifdef CONFIG_SECURITY
         * generic_show_options()
         */
        char *s_options;
 +
 +      /*
 +       * storage for asynchronous operations
 +       */
 +      struct list_head s_async_list;
  };
  
  extern struct timespec current_fs_time(struct super_block *sb);
@@@ -1389,6 -1386,7 +1390,7 @@@ struct super_operations 
        ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
        ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
  #endif
+       int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
  };
  
  /*
@@@ -1834,7 -1832,7 +1836,7 @@@ extern int __filemap_fdatawrite_range(s
  extern int filemap_fdatawrite_range(struct address_space *mapping,
                                loff_t start, loff_t end);
  
 -extern long do_fsync(struct file *file, int datasync);
 +extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
  extern void sync_supers(void);
  extern void sync_filesystems(int wait);
  extern void __fsync_super(struct super_block *sb);
@@@ -2063,9 -2061,6 +2065,9 @@@ extern int vfs_fstat(unsigned int, stru
  
  extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
                    unsigned long arg);
 +extern int __generic_block_fiemap(struct inode *inode,
 +                                struct fiemap_extent_info *fieinfo, u64 start,
 +                                u64 len, get_block_t *get_block);
  extern int generic_block_fiemap(struct inode *inode,
                                struct fiemap_extent_info *fieinfo, u64 start,
                                u64 len, get_block_t *get_block);
diff --combined include/linux/jbd2.h
index 34456476e761330686a4e6d6afa98cf570fc94de,adef1c9940d3719730aa68d5647e049b084c51f5..b45109c61fba90451a0396f6c845e7c55964ccb3
@@@ -637,6 -637,11 +637,11 @@@ struct transaction_
         */
        unsigned long           t_expires;
  
+       /*
+        * When this transaction started, in nanoseconds [no locking]
+        */
+       ktime_t                 t_start_time;
        /*
         * How many handles used this transaction? [t_handle_lock]
         */
@@@ -682,6 -687,8 +687,8 @@@ jbd2_time_diff(unsigned long start, uns
        return end + (MAX_JIFFY_OFFSET - start);
  }
  
+ #define JBD2_NR_BATCH 64
  /**
   * struct journal_s - The journal_s type is the concrete type associated with
   *     journal_t.
@@@ -825,6 -832,14 +832,14 @@@ struct journal_
        /* Semaphore for locking against concurrent checkpoints */
        struct mutex            j_checkpoint_mutex;
  
+       /*
+        * List of buffer heads used by the checkpoint routine.  This
+        * was moved from jbd2_log_do_checkpoint() to reduce stack
+        * usage.  Access to this array is controlled by the
+        * j_checkpoint_mutex.  [j_checkpoint_mutex]
+        */
+       struct buffer_head      *j_chkpt_bhs[JBD2_NR_BATCH];
+       
        /*
         * Journal head: identifies the first unused block in the journal.
         * [j_state_lock]
        struct buffer_head      **j_wbuf;
        int                     j_wbufsize;
  
+       /*
+        * this is the pid of hte last person to run a synchronous operation
+        * through the journal
+        */
        pid_t                   j_last_sync_writer;
  
+       /*
+        * the average amount of time in nanoseconds it takes to commit a
+        * transaction to disk. [j_state_lock]
+        */
+       u64                     j_average_commit_time;
+       /*
+        * minimum and maximum times that we should wait for
+        * additional filesystem operations to get batched into a
+        * synchronous handle in microseconds
+        */
+       u32                     j_min_batch_time;
+       u32                     j_max_batch_time;
        /* This function is called when a transaction is closed */
        void                    (*j_commit_callback)(journal_t *,
                                                     transaction_t *);
@@@ -1008,35 -1041,6 +1041,35 @@@ int __jbd2_journal_clean_checkpoint_lis
  int __jbd2_journal_remove_checkpoint(struct journal_head *);
  void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
  
 +
 +/*
 + * Triggers
 + */
 +
 +struct jbd2_buffer_trigger_type {
 +      /*
 +       * Fired just before a buffer is written to the journal.
 +       * mapped_data is a mapped buffer that is the frozen data for
 +       * commit.
 +       */
 +      void (*t_commit)(struct jbd2_buffer_trigger_type *type,
 +                       struct buffer_head *bh, void *mapped_data,
 +                       size_t size);
 +
 +      /*
 +       * Fired during journal abort for dirty buffers that will not be
 +       * committed.
 +       */
 +      void (*t_abort)(struct jbd2_buffer_trigger_type *type,
 +                      struct buffer_head *bh);
 +};
 +
 +extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
 +                                     void *mapped_data,
 +                                     struct jbd2_buffer_trigger_type *triggers);
 +extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
 +                                    struct jbd2_buffer_trigger_type *triggers);
 +
  /* Buffer IO */
  extern int
  jbd2_journal_write_metadata_buffer(transaction_t        *transaction,
@@@ -1075,8 -1079,6 +1108,8 @@@ extern int       jbd2_journal_extend (handle
  extern int     jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
  extern int     jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
  extern int     jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
 +void           jbd2_journal_set_triggers(struct buffer_head *,
 +                                         struct jbd2_buffer_trigger_type *type);
  extern int     jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
  extern void    jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
  extern int     jbd2_journal_forget (handle_t *, struct buffer_head *);
@@@ -1102,7 -1104,6 +1135,6 @@@ extern int         jbd2_journal_set_feature
                   (journal_t *, unsigned long, unsigned long, unsigned long);
  extern void      jbd2_journal_clear_features
                   (journal_t *, unsigned long, unsigned long, unsigned long);
- extern int       jbd2_journal_create     (journal_t *);
  extern int       jbd2_journal_load       (journal_t *journal);
  extern int       jbd2_journal_destroy    (journal_t *);
  extern int       jbd2_journal_recover    (journal_t *journal);
@@@ -1177,8 -1178,8 +1209,8 @@@ int jbd2_log_wait_commit(journal_t *jou
  int jbd2_log_do_checkpoint(journal_t *journal);
  
  void __jbd2_log_wait_for_space(journal_t *journal);
- extern void   __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
- extern int    jbd2_cleanup_journal_tail(journal_t *);
+ extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
+ extern int jbd2_cleanup_journal_tail(journal_t *);
  
  /* Debugging code only: */