]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - fs/jbd2/commit.c
driver core: remove device_create()
[linux-2.6-omap-h63xx.git] / fs / jbd2 / commit.c
index 92b6ac3df8ababdd3ee55fa7489ecbca73a952b3..f8b3be8732262f1642c0a1b4e19399bd048b964f 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/pagemap.h>
 #include <linux/jiffies.h>
 #include <linux/crc32.h>
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -37,8 +39,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 }
 
 /*
- * When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * When an ext4 file is truncated, it is possible that some pages are not
+ * successfully freed, because they are attached to a committing transaction.
  * After the transaction commits, these pages are left on the LRU, with no
  * ->mapping, and with attached buffers.  These pages are trivially reclaimable
  * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -79,21 +81,6 @@ nope:
        __brelse(bh);
 }
 
-/*
- * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
- * held.  For ranking reasons we must trylock.  If we lose, schedule away and
- * return 0.  j_list_lock is dropped in this case.
- */
-static int inverted_lock(journal_t *journal, struct buffer_head *bh)
-{
-       if (!jbd_trylock_bh_state(bh)) {
-               spin_unlock(&journal->j_list_lock);
-               schedule();
-               return 0;
-       }
-       return 1;
-}
-
 /*
  * Done it all: now submit the commit record.  We should have
  * cleaned up our previous buffers by now, so if we are in abort
@@ -200,159 +187,104 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
 }
 
 /*
- * Wait for all submitted IO to complete.
+ * write the filemap data using writepage() address_space_operations.
+ * We don't do block allocation here even for delalloc. We don't
+ * use writepages() because with dealyed allocation we may be doing
+ * block allocation in writepages().
  */
-static int journal_wait_on_locked_list(journal_t *journal,
-                                      transaction_t *commit_transaction)
+static int journal_submit_inode_data_buffers(struct address_space *mapping)
 {
-       int ret = 0;
-       struct journal_head *jh;
-
-       while (commit_transaction->t_locked_list) {
-               struct buffer_head *bh;
-
-               jh = commit_transaction->t_locked_list->b_tprev;
-               bh = jh2bh(jh);
-               get_bh(bh);
-               if (buffer_locked(bh)) {
-                       spin_unlock(&journal->j_list_lock);
-                       wait_on_buffer(bh);
-                       if (unlikely(!buffer_uptodate(bh)))
-                               ret = -EIO;
-                       spin_lock(&journal->j_list_lock);
-               }
-               if (!inverted_lock(journal, bh)) {
-                       put_bh(bh);
-                       spin_lock(&journal->j_list_lock);
-                       continue;
-               }
-               if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
-                       __jbd2_journal_unfile_buffer(jh);
-                       jbd_unlock_bh_state(bh);
-                       jbd2_journal_remove_journal_head(bh);
-                       put_bh(bh);
-               } else {
-                       jbd_unlock_bh_state(bh);
-               }
-               put_bh(bh);
-               cond_resched_lock(&journal->j_list_lock);
-       }
+       int ret;
+       struct writeback_control wbc = {
+               .sync_mode =  WB_SYNC_ALL,
+               .nr_to_write = mapping->nrpages * 2,
+               .range_start = 0,
+               .range_end = i_size_read(mapping->host),
+               .for_writepages = 1,
+       };
+
+       ret = generic_writepages(mapping, &wbc);
        return ret;
-  }
+}
 
-static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+/*
+ * Submit all the data buffers of inode associated with the transaction to
+ * disk.
+ *
+ * We are in a committing transaction. Therefore no new inode can be added to
+ * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
+ * operate on from being released while we write out pages.
+ */
+static int journal_submit_data_buffers(journal_t *journal,
+               transaction_t *commit_transaction)
 {
-       int i;
+       struct jbd2_inode *jinode;
+       int err, ret = 0;
+       struct address_space *mapping;
 
-       for (i = 0; i < bufs; i++) {
-               wbuf[i]->b_end_io = end_buffer_write_sync;
-               /* We use-up our safety reference in submit_bh() */
-               submit_bh(WRITE, wbuf[i]);
+       spin_lock(&journal->j_list_lock);
+       list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+               mapping = jinode->i_vfs_inode->i_mapping;
+               jinode->i_flags |= JI_COMMIT_RUNNING;
+               spin_unlock(&journal->j_list_lock);
+               /*
+                * submit the inode data buffers. We use writepage
+                * instead of writepages. Because writepages can do
+                * block allocation  with delalloc. We need to write
+                * only allocated blocks here.
+                */
+               err = journal_submit_inode_data_buffers(mapping);
+               if (!ret)
+                       ret = err;
+               spin_lock(&journal->j_list_lock);
+               J_ASSERT(jinode->i_transaction == commit_transaction);
+               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
        }
+       spin_unlock(&journal->j_list_lock);
+       return ret;
 }
 
 /*
- *  Submit all the data buffers to disk
+ * Wait for data submitted for writeout, refile inodes to proper
+ * transaction if needed.
+ *
  */
-static void journal_submit_data_buffers(journal_t *journal,
-                               transaction_t *commit_transaction)
+static int journal_finish_inode_data_buffers(journal_t *journal,
+               transaction_t *commit_transaction)
 {
-       struct journal_head *jh;
-       struct buffer_head *bh;
-       int locked;
-       int bufs = 0;
-       struct buffer_head **wbuf = journal->j_wbuf;
+       struct jbd2_inode *jinode, *next_i;
+       int err, ret = 0;
 
-       /*
-        * Whenever we unlock the journal and sleep, things can get added
-        * onto ->t_sync_datalist, so we have to keep looping back to
-        * write_out_data until we *know* that the list is empty.
-        *
-        * Cleanup any flushed data buffers from the data list.  Even in
-        * abort mode, we want to flush this out as soon as possible.
-        */
-write_out_data:
-       cond_resched();
+       /* For locking, see the comment in journal_submit_data_buffers() */
        spin_lock(&journal->j_list_lock);
+       list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
+               jinode->i_flags |= JI_COMMIT_RUNNING;
+               spin_unlock(&journal->j_list_lock);
+               err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
+               if (!ret)
+                       ret = err;
+               spin_lock(&journal->j_list_lock);
+               jinode->i_flags &= ~JI_COMMIT_RUNNING;
+               wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
+       }
 
-       while (commit_transaction->t_sync_datalist) {
-               jh = commit_transaction->t_sync_datalist;
-               bh = jh2bh(jh);
-               locked = 0;
-
-               /* Get reference just to make sure buffer does not disappear
-                * when we are forced to drop various locks */
-               get_bh(bh);
-               /* If the buffer is dirty, we need to submit IO and hence
-                * we need the buffer lock. We try to lock the buffer without
-                * blocking. If we fail, we need to drop j_list_lock and do
-                * blocking lock_buffer().
-                */
-               if (buffer_dirty(bh)) {
-                       if (test_set_buffer_locked(bh)) {
-                               BUFFER_TRACE(bh, "needs blocking lock");
-                               spin_unlock(&journal->j_list_lock);
-                               /* Write out all data to prevent deadlocks */
-                               journal_do_submit_data(wbuf, bufs);
-                               bufs = 0;
-                               lock_buffer(bh);
-                               spin_lock(&journal->j_list_lock);
-                       }
-                       locked = 1;
-               }
-               /* We have to get bh_state lock. Again out of order, sigh. */
-               if (!inverted_lock(journal, bh)) {
-                       jbd_lock_bh_state(bh);
-                       spin_lock(&journal->j_list_lock);
-               }
-               /* Someone already cleaned up the buffer? */
-               if (!buffer_jbd(bh)
-                       || jh->b_transaction != commit_transaction
-                       || jh->b_jlist != BJ_SyncData) {
-                       jbd_unlock_bh_state(bh);
-                       if (locked)
-                               unlock_buffer(bh);
-                       BUFFER_TRACE(bh, "already cleaned up");
-                       put_bh(bh);
-                       continue;
-               }
-               if (locked && test_clear_buffer_dirty(bh)) {
-                       BUFFER_TRACE(bh, "needs writeout, adding to array");
-                       wbuf[bufs++] = bh;
-                       __jbd2_journal_file_buffer(jh, commit_transaction,
-                                               BJ_Locked);
-                       jbd_unlock_bh_state(bh);
-                       if (bufs == journal->j_wbufsize) {
-                               spin_unlock(&journal->j_list_lock);
-                               journal_do_submit_data(wbuf, bufs);
-                               bufs = 0;
-                               goto write_out_data;
-                       }
-               } else if (!locked && buffer_locked(bh)) {
-                       __jbd2_journal_file_buffer(jh, commit_transaction,
-                                               BJ_Locked);
-                       jbd_unlock_bh_state(bh);
-                       put_bh(bh);
+       /* Now refile inode to proper lists */
+       list_for_each_entry_safe(jinode, next_i,
+                                &commit_transaction->t_inode_list, i_list) {
+               list_del(&jinode->i_list);
+               if (jinode->i_next_transaction) {
+                       jinode->i_transaction = jinode->i_next_transaction;
+                       jinode->i_next_transaction = NULL;
+                       list_add(&jinode->i_list,
+                               &jinode->i_transaction->t_inode_list);
                } else {
-                       BUFFER_TRACE(bh, "writeout complete: unfile");
-                       __jbd2_journal_unfile_buffer(jh);
-                       jbd_unlock_bh_state(bh);
-                       if (locked)
-                               unlock_buffer(bh);
-                       jbd2_journal_remove_journal_head(bh);
-                       /* Once for our safety reference, once for
-                        * jbd2_journal_remove_journal_head() */
-                       put_bh(bh);
-                       put_bh(bh);
-               }
-
-               if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
-                       spin_unlock(&journal->j_list_lock);
-                       goto write_out_data;
+                       jinode->i_transaction = NULL;
                }
        }
        spin_unlock(&journal->j_list_lock);
-       journal_do_submit_data(wbuf, bufs);
+
+       return ret;
 }
 
 static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
@@ -527,21 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         * Now start flushing things to disk, in the order they appear
         * on the transaction lists.  Data blocks go first.
         */
-       err = 0;
-       journal_submit_data_buffers(journal, commit_transaction);
-
-       /*
-        * Wait for all previously submitted IO to complete if commit
-        * record is to be written synchronously.
-        */
-       spin_lock(&journal->j_list_lock);
-       if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
-               JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
-               err = journal_wait_on_locked_list(journal,
-                                               commit_transaction);
-
-       spin_unlock(&journal->j_list_lock);
-
+       err = journal_submit_data_buffers(journal, commit_transaction);
        if (err)
                jbd2_journal_abort(journal, err);
 
@@ -549,16 +467,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
        jbd_debug(3, "JBD: commit phase 2\n");
 
-       /*
-        * If we found any dirty or locked buffers, then we should have
-        * looped back up to the write_out_data label.  If there weren't
-        * any then journal_clean_data_list should have wiped the list
-        * clean by now, so check that it is in fact empty.
-        */
-       J_ASSERT (commit_transaction->t_sync_datalist == NULL);
-
-       jbd_debug (3, "JBD: commit phase 3\n");
-
        /*
         * Way to go: we have now written out all of the data for a
         * transaction!  Now comes the tricky part: we need to write out
@@ -577,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        J_ASSERT(commit_transaction->t_nr_buffers <=
                 commit_transaction->t_outstanding_credits);
 
+       err = 0;
        descriptor = NULL;
        bufs = 0;
        while (commit_transaction->t_buffers) {
@@ -751,15 +660,19 @@ start_journal_io:
                                                 &cbh, crc32_sum);
                if (err)
                        __jbd2_journal_abort_hard(journal);
-
-               spin_lock(&journal->j_list_lock);
-               err = journal_wait_on_locked_list(journal,
-                                               commit_transaction);
-               spin_unlock(&journal->j_list_lock);
-               if (err)
-                       __jbd2_journal_abort_hard(journal);
        }
 
+       /*
+        * This is the right place to wait for data buffers both for ASYNC
+        * and !ASYNC commit. If commit is ASYNC, we need to wait only after
+        * the commit block went to disk (which happens above). If commit is
+        * SYNC, we need to wait for data buffers before we start writing
+        * commit block, which happens below in such setting.
+        */
+       err = journal_finish_inode_data_buffers(journal, commit_transaction);
+       if (err)
+               jbd2_journal_abort(journal, err);
+
        /* Lo and behold: we have just managed to send a transaction to
            the log.  Before we can commit it, wait for the IO so far to
            complete.  Control buffers being written are on the
@@ -771,7 +684,7 @@ start_journal_io:
           so we incur less scheduling load.
        */
 
-       jbd_debug(3, "JBD: commit phase 4\n");
+       jbd_debug(3, "JBD: commit phase 3\n");
 
        /*
         * akpm: these are BJ_IO, and j_list_lock is not needed.
@@ -830,7 +743,7 @@ wait_for_iobuf:
 
        J_ASSERT (commit_transaction->t_shadow_list == NULL);
 
-       jbd_debug(3, "JBD: commit phase 5\n");
+       jbd_debug(3, "JBD: commit phase 4\n");
 
        /* Here we wait for the revoke record and descriptor record buffers */
  wait_for_ctlbuf:
@@ -857,7 +770,7 @@ wait_for_iobuf:
                /* AKPM: bforget here */
        }
 
-       jbd_debug(3, "JBD: commit phase 6\n");
+       jbd_debug(3, "JBD: commit phase 5\n");
 
        if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
@@ -877,9 +790,9 @@ wait_for_iobuf:
            transaction can be removed from any checkpoint list it was on
            before. */
 
-       jbd_debug(3, "JBD: commit phase 7\n");
+       jbd_debug(3, "JBD: commit phase 6\n");
 
-       J_ASSERT(commit_transaction->t_sync_datalist == NULL);
+       J_ASSERT(list_empty(&commit_transaction->t_inode_list));
        J_ASSERT(commit_transaction->t_buffers == NULL);
        J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
        J_ASSERT(commit_transaction->t_iobuf_list == NULL);
@@ -1000,7 +913,7 @@ restart_loop:
 
        /* Done with this transaction! */
 
-       jbd_debug(3, "JBD: commit phase 8\n");
+       jbd_debug(3, "JBD: commit phase 7\n");
 
        J_ASSERT(commit_transaction->t_state == T_COMMIT);