journal_do_submit_data(wbuf, bufs);
 }
 
+/*
+ * Submit all the data buffers of inode associated with the transaction to
+ * disk.
+ *
+ * We are in a committing transaction. Therefore no new inode can be added to
+ * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
+ * operate on from being released while we write out pages.
+ */
+static int journal_submit_inode_data_buffers(journal_t *journal,
+               transaction_t *commit_transaction)
+{
+       struct jbd2_inode *jinode;
+       int err, ret = 0;
+       struct address_space *mapping;
+
+       spin_lock(&journal->j_list_lock);
+       list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+               mapping = jinode->i_vfs_inode->i_mapping;
+               jinode->i_flags |= JI_COMMIT_RUNNING;
+               spin_unlock(&journal->j_list_lock);
+               err = filemap_fdatawrite_range(mapping, 0,
+                                       i_size_read(jinode->i_vfs_inode));
+               if (!ret)
+                       ret = err;
+               spin_lock(&journal->j_list_lock);
+               J_ASSERT(jinode->i_transaction == commit_transaction);
+               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
+       }
+       spin_unlock(&journal->j_list_lock);
+       return ret;
+}
+
+/*
+ * Wait for data submitted for writeout, refile inodes to proper
+ * transaction if needed.
+ *
+ */
+static int journal_finish_inode_data_buffers(journal_t *journal,
+               transaction_t *commit_transaction)
+{
+       struct jbd2_inode *jinode, *next_i;
+       int err, ret = 0;
+
+       /* For locking, see the comment in journal_submit_inode_data_buffers() */
+       spin_lock(&journal->j_list_lock);
+       list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+               jinode->i_flags |= JI_COMMIT_RUNNING;
+               spin_unlock(&journal->j_list_lock);
+               err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
+               if (!ret)
+                       ret = err;
+               spin_lock(&journal->j_list_lock);
+               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
+       }
+
+       /* Now refile inode to proper lists */
+       list_for_each_entry_safe(jinode, next_i,
+                                &commit_transaction->t_inode_list, i_list) {
+               list_del(&jinode->i_list);
+               if (jinode->i_next_transaction) {
+                       jinode->i_transaction = jinode->i_next_transaction;
+                       jinode->i_next_transaction = NULL;
+                       list_add(&jinode->i_list,
+                               &jinode->i_transaction->t_inode_list);
+               } else {
+                       jinode->i_transaction = NULL;
+               }
+       }
+       spin_unlock(&journal->j_list_lock);
+
+       return ret;
+}
+
 static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
 {
        struct page *page = bh->b_page;
         */
        err = 0;
        journal_submit_data_buffers(journal, commit_transaction);
+       err = journal_submit_inode_data_buffers(journal, commit_transaction);
+       if (err)
+               jbd2_journal_abort(journal, err);
 
        /*
         * Wait for all previously submitted IO to complete if commit
                        __jbd2_journal_abort_hard(journal);
        }
 
+       /*
+        * This is the right place to wait for data buffers both for ASYNC
+        * and !ASYNC commit. If commit is ASYNC, we need to wait only after
+        * the commit block went to disk (which happens above). If commit is
+        * SYNC, we need to wait for data buffers before we start writing
+        * commit block, which happens below in such setting.
+        */
+       err = journal_finish_inode_data_buffers(journal, commit_transaction);
+       if (err)
+               jbd2_journal_abort(journal, err);
+
        /* Lo and behold: we have just managed to send a transaction to
            the log.  Before we can commit it, wait for the IO so far to
            complete.  Control buffers being written are on the
        jbd_debug(3, "JBD: commit phase 7\n");
 
        J_ASSERT(commit_transaction->t_sync_datalist == NULL);
+       J_ASSERT(list_empty(&commit_transaction->t_inode_list));
        J_ASSERT(commit_transaction->t_buffers == NULL);
        J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
        J_ASSERT(commit_transaction->t_iobuf_list == NULL);
 
 EXPORT_SYMBOL(jbd2_journal_invalidatepage);
 EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
 EXPORT_SYMBOL(jbd2_journal_force_commit);
+EXPORT_SYMBOL(jbd2_journal_file_inode);
+EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
+EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
+EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
        jbd_unlock_bh_journal_head(bh);
 }
 
+/*
+ * Initialize jbd inode head
+ */
+void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
+{
+       jinode->i_transaction = NULL;
+       jinode->i_next_transaction = NULL;
+       jinode->i_vfs_inode = inode;
+       jinode->i_flags = 0;
+       INIT_LIST_HEAD(&jinode->i_list);
+}
+
+/*
+ * Function to be called before we start removing inode from memory (i.e.,
+ * clear_inode() is a fine place to be called from). It removes inode from
+ * transaction's lists.
+ */
+void jbd2_journal_release_jbd_inode(journal_t *journal,
+                                   struct jbd2_inode *jinode)
+{
+       int writeout = 0;
+
+       if (!journal)
+               return;
+restart:
+       spin_lock(&journal->j_list_lock);
+       /* Is commit writing out inode - we have to wait */
+       if (jinode->i_flags & JI_COMMIT_RUNNING) {
+               wait_queue_head_t *wq;
+               DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
+               wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
+               prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+               spin_unlock(&journal->j_list_lock);
+               schedule();
+               finish_wait(wq, &wait.wait);
+               goto restart;
+       }
+
+       /* Do we need to wait for data writeback? */
+       if (journal->j_committing_transaction == jinode->i_transaction)
+               writeout = 1;
+       if (jinode->i_transaction) {
+               list_del(&jinode->i_list);
+               jinode->i_transaction = NULL;
+       }
+       spin_unlock(&journal->j_list_lock);
+}
+
 /*
  * debugfs tunables
  */
 
        transaction->t_tid = journal->j_transaction_sequence++;
        transaction->t_expires = jiffies + journal->j_commit_interval;
        spin_lock_init(&transaction->t_handle_lock);
+       INIT_LIST_HEAD(&transaction->t_inode_list);
 
        /* Set up the commit timer for the new transaction. */
        journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
        spin_unlock(&journal->j_list_lock);
        __brelse(bh);
 }
+
+/*
+ * File inode in the inode list of the handle's transaction
+ */
+int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
+{
+       transaction_t *transaction = handle->h_transaction;
+       journal_t *journal = transaction->t_journal;
+
+       if (is_handle_aborted(handle))
+               return -EIO;
+
+       jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
+                       transaction->t_tid);
+
+       /*
+        * First check whether inode isn't already on the transaction's
+        * lists without taking the lock. Note that this check is safe
+        * without the lock as we cannot race with somebody removing inode
+        * from the transaction. The reason is that we remove inode from the
+        * transaction only in journal_release_jbd_inode() and when we commit
+        * the transaction. We are guarded from the first case by holding
+        * a reference to the inode. We are safe against the second case
+        * because if jinode->i_transaction == transaction, commit code
+        * cannot touch the transaction because we hold reference to it,
+        * and if jinode->i_next_transaction == transaction, commit code
+        * will only file the inode where we want it.
+        */
+       if (jinode->i_transaction == transaction ||
+           jinode->i_next_transaction == transaction)
+               return 0;
+
+       spin_lock(&journal->j_list_lock);
+
+       if (jinode->i_transaction == transaction ||
+           jinode->i_next_transaction == transaction)
+               goto done;
+
+       /* On some different transaction's list - should be
+        * the committing one */
+       if (jinode->i_transaction) {
+               J_ASSERT(jinode->i_next_transaction == NULL);
+               J_ASSERT(jinode->i_transaction ==
+                                       journal->j_committing_transaction);
+               jinode->i_next_transaction = transaction;
+               goto done;
+       }
+       /* Not on any transaction list... */
+       J_ASSERT(!jinode->i_next_transaction);
+       jinode->i_transaction = transaction;
+       list_add(&jinode->i_list, &transaction->t_inode_list);
+done:
+       spin_unlock(&journal->j_list_lock);
+
+       return 0;
+}
+
+/*
+ * This function must be called when inode is journaled in ordered mode
+ * before truncation happens. It starts writeout of truncated part in
+ * case it is in the committing transaction so that we stand to ordered
+ * mode consistency guarantees.
+ */
+int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
+                                       loff_t new_size)
+{
+       journal_t *journal;
+       transaction_t *commit_trans;
+       int ret = 0;
+
+       if (!inode->i_transaction && !inode->i_next_transaction)
+               goto out;
+       journal = inode->i_transaction->t_journal;
+       spin_lock(&journal->j_state_lock);
+       commit_trans = journal->j_committing_transaction;
+       spin_unlock(&journal->j_state_lock);
+       if (inode->i_transaction == commit_trans) {
+               ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
+                       new_size, LLONG_MAX);
+               if (ret)
+                       jbd2_journal_abort(journal, ret);
+       }
+out:
+       return ret;
+}
 
        bit_spin_unlock(BH_JournalHead, &bh->b_state);
 }
 
+/* Flags in jbd_inode->i_flags */
+#define __JI_COMMIT_RUNNING 0
+/* Commit of the inode data in progress. We use this flag to protect us from
+ * concurrent deletion of inode. We cannot use reference to inode for this
+ * since we cannot afford doing last iput() on behalf of kjournald
+ */
+#define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING)
+
+/**
+ * struct jbd_inode is the structure linking inodes in ordered mode
+ *   present in a transaction so that we can sync them during commit.
+ */
+struct jbd2_inode {
+       /* Which transaction does this inode belong to? Either the running
+        * transaction or the committing one. [j_list_lock] */
+       transaction_t *i_transaction;
+
+       /* Pointer to the running transaction modifying inode's data in case
+        * there is already a committing transaction touching it. [j_list_lock] */
+       transaction_t *i_next_transaction;
+
+       /* List of inodes in the i_transaction [j_list_lock] */
+       struct list_head i_list;
+
+       /* VFS inode this inode belongs to [constant during the lifetime
+        * of the structure] */
+       struct inode *i_vfs_inode;
+
+       /* Flags of inode [j_list_lock] */
+       unsigned int i_flags;
+};
+
 struct jbd2_revoke_table_s;
 
 /**
         */
        struct journal_head     *t_log_list;
 
+       /*
+        * List of inodes whose data we've modified in data=ordered mode.
+        * [j_list_lock]
+        */
+       struct list_head        t_inode_list;
+
        /*
         * Protects info related to handles
         */
 extern int        jbd2_journal_clear_err  (journal_t *);
 extern int        jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int        jbd2_journal_force_commit(journal_t *);
+extern int        jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
+extern int        jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
+extern void       jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
+extern void       jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
 
 /*
  * journal_head management