]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - fs/ntfs/aops.c
Merge davem@outer-richmond.davemloft.net:src/GIT/net-2.6/
[linux-2.6-omap-h63xx.git] / fs / ntfs / aops.c
index 78adad7a988d981a22f61b739cc082071ddc8513..b6cc8cf24626b0a1f15ec8deffddc704da797c0e 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/swap.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/bit_spinlock.h>
 
 #include "aops.h"
 #include "attrib.h"
@@ -55,9 +56,8 @@
  */
 static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 {
-       static DEFINE_SPINLOCK(page_uptodate_lock);
        unsigned long flags;
-       struct buffer_head *tmp;
+       struct buffer_head *first, *tmp;
        struct page *page;
        ntfs_inode *ni;
        int page_uptodate = 1;
@@ -89,11 +89,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
                }
        } else {
                clear_buffer_uptodate(bh);
+               SetPageError(page);
                ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
                                (unsigned long long)bh->b_blocknr);
-               SetPageError(page);
        }
-       spin_lock_irqsave(&page_uptodate_lock, flags);
+       first = page_buffers(page);
+       local_irq_save(flags);
+       bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
        clear_buffer_async_read(bh);
        unlock_buffer(bh);
        tmp = bh;
@@ -108,7 +110,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
                }
                tmp = tmp->b_this_page;
        } while (tmp != bh);
-       spin_unlock_irqrestore(&page_uptodate_lock, flags);
+       bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+       local_irq_restore(flags);
        /*
         * If none of the buffers had errors then we can set the page uptodate,
         * but we first have to perform the post read mst fixups, if the
@@ -141,7 +144,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
        unlock_page(page);
        return;
 still_busy:
-       spin_unlock_irqrestore(&page_uptodate_lock, flags);
+       bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+       local_irq_restore(flags);
        return;
 }
 
@@ -185,13 +189,15 @@ static int ntfs_read_block(struct page *page)
        blocksize_bits = VFS_I(ni)->i_blkbits;
        blocksize = 1 << blocksize_bits;
 
-       if (!page_has_buffers(page))
+       if (!page_has_buffers(page)) {
                create_empty_buffers(page, blocksize, 0);
-       bh = head = page_buffers(page);
-       if (unlikely(!bh)) {
-               unlock_page(page);
-               return -ENOMEM;
+               if (unlikely(!page_has_buffers(page))) {
+                       unlock_page(page);
+                       return -ENOMEM;
+               }
        }
+       bh = head = page_buffers(page);
+       BUG_ON(!bh);
 
        iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
        read_lock_irqsave(&ni->size_lock, flags);
@@ -204,6 +210,7 @@ static int ntfs_read_block(struct page *page)
        nr = i = 0;
        do {
                u8 *kaddr;
+               int err;
 
                if (unlikely(buffer_uptodate(bh)))
                        continue;
@@ -211,6 +218,7 @@ static int ntfs_read_block(struct page *page)
                        arr[nr++] = bh;
                        continue;
                }
+               err = 0;
                bh->b_bdev = vol->sb->s_bdev;
                /* Is the block within the allowed limits? */
                if (iblock < lblock) {
@@ -252,7 +260,6 @@ lock_retry_remap:
                                goto handle_hole;
                        /* If first try and runlist unmapped, map and retry. */
                        if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
-                               int err;
                                is_retry = TRUE;
                                /*
                                 * Attempt to map runlist, dropping lock for
@@ -263,20 +270,30 @@ lock_retry_remap:
                                if (likely(!err))
                                        goto lock_retry_remap;
                                rl = NULL;
-                               lcn = err;
                        } else if (!rl)
                                up_read(&ni->runlist.lock);
+                       /*
+                        * If buffer is outside the runlist, treat it as a
+                        * hole.  This can happen due to concurrent truncate
+                        * for example.
+                        */
+                       if (err == -ENOENT || lcn == LCN_ENOENT) {
+                               err = 0;
+                               goto handle_hole;
+                       }
                        /* Hard error, zero out region. */
+                       if (!err)
+                               err = -EIO;
                        bh->b_blocknr = -1;
                        SetPageError(page);
                        ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
                                        "attribute type 0x%x, vcn 0x%llx, "
                                        "offset 0x%x because its location on "
                                        "disk could not be determined%s "
-                                       "(error code %lli).", ni->mft_no,
+                                       "(error code %i).", ni->mft_no,
                                        ni->type, (unsigned long long)vcn,
                                        vcn_ofs, is_retry ? " even after "
-                                       "retrying" : "", (long long)lcn);
+                                       "retrying" : "", err);
                }
                /*
                 * Either iblock was outside lblock limits or
@@ -289,9 +306,10 @@ handle_hole:
 handle_zblock:
                kaddr = kmap_atomic(page, KM_USER0);
                memset(kaddr + i * blocksize, 0, blocksize);
-               flush_dcache_page(page);
                kunmap_atomic(kaddr, KM_USER0);
-               set_buffer_uptodate(bh);
+               flush_dcache_page(page);
+               if (likely(!err))
+                       set_buffer_uptodate(bh);
        } while (i++, iblock++, (bh = bh->b_this_page) != head);
 
        /* Release the lock if we took it. */
@@ -367,31 +385,38 @@ retry_readpage:
                return 0;
        }
        ni = NTFS_I(page->mapping->host);
-
+       /*
+        * Only $DATA attributes can be encrypted and only unnamed $DATA
+        * attributes can be compressed.  Index root can have the flags set but
+        * this means to create compressed/encrypted files, not that the
+        * attribute is compressed/encrypted.
+        */
+       if (ni->type != AT_INDEX_ROOT) {
+               /* If attribute is encrypted, deny access, just like NT4. */
+               if (NInoEncrypted(ni)) {
+                       BUG_ON(ni->type != AT_DATA);
+                       err = -EACCES;
+                       goto err_out;
+               }
+               /* Compressed data streams are handled in compress.c. */
+               if (NInoNonResident(ni) && NInoCompressed(ni)) {
+                       BUG_ON(ni->type != AT_DATA);
+                       BUG_ON(ni->name_len);
+                       return ntfs_read_compressed_block(page);
+               }
+       }
        /* NInoNonResident() == NInoIndexAllocPresent() */
        if (NInoNonResident(ni)) {
-               /*
-                * Only unnamed $DATA attributes can be compressed or
-                * encrypted.
-                */
-               if (ni->type == AT_DATA && !ni->name_len) {
-                       /* If file is encrypted, deny access, just like NT4. */
-                       if (NInoEncrypted(ni)) {
-                               err = -EACCES;
-                               goto err_out;
-                       }
-                       /* Compressed data streams are handled in compress.c. */
-                       if (NInoCompressed(ni))
-                               return ntfs_read_compressed_block(page);
-               }
-               /* Normal data stream. */
+               /* Normal, non-resident data stream. */
                return ntfs_read_block(page);
        }
        /*
         * Attribute is resident, implying it is not compressed or encrypted.
         * This also means the attribute is smaller than an mft record and
         * hence smaller than a page, so can simply zero out any pages with
-        * index above 0.
+        * index above 0.  Note the attribute can actually be marked compressed
+        * but if it is resident the actual data is not compressed so we are
+        * ok to ignore the compressed flag here.
         */
        if (unlikely(page->index > 0)) {
                kaddr = kmap_atomic(page, KM_USER0);
@@ -511,19 +536,21 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
                BUG_ON(!PageUptodate(page));
                create_empty_buffers(page, blocksize,
                                (1 << BH_Uptodate) | (1 << BH_Dirty));
+               if (unlikely(!page_has_buffers(page))) {
+                       ntfs_warning(vol->sb, "Error allocating page "
+                                       "buffers.  Redirtying page so we try "
+                                       "again later.");
+                       /*
+                        * Put the page back on mapping->dirty_pages, but leave
+                        * its buffers' dirty state as-is.
+                        */
+                       redirty_page_for_writepage(wbc, page);
+                       unlock_page(page);
+                       return 0;
+               }
        }
        bh = head = page_buffers(page);
-       if (unlikely(!bh)) {
-               ntfs_warning(vol->sb, "Error allocating page buffers. "
-                               "Redirtying page so we try again later.");
-               /*
-                * Put the page back on mapping->dirty_pages, but leave its
-                * buffer's dirty state as-is.
-                */
-               redirty_page_for_writepage(wbc, page);
-               unlock_page(page);
-               return 0;
-       }
+       BUG_ON(!bh);
 
        /* NOTE: Different naming scheme to ntfs_read_block()! */
 
@@ -670,6 +697,27 @@ lock_retry_remap:
                }
                /* It is a hole, need to instantiate it. */
                if (lcn == LCN_HOLE) {
+                       u8 *kaddr;
+                       unsigned long *bpos, *bend;
+
+                       /* Check if the buffer is zero. */
+                       kaddr = kmap_atomic(page, KM_USER0);
+                       bpos = (unsigned long *)(kaddr + bh_offset(bh));
+                       bend = (unsigned long *)((u8*)bpos + blocksize);
+                       do {
+                               if (unlikely(*bpos))
+                                       break;
+                       } while (likely(++bpos < bend));
+                       kunmap_atomic(kaddr, KM_USER0);
+                       if (bpos == bend) {
+                               /*
+                                * Buffer is zero and sparse, no need to write
+                                * it.
+                                */
+                               bh->b_blocknr = -1;
+                               clear_buffer_dirty(bh);
+                               continue;
+                       }
                        // TODO: Instantiate the hole.
                        // clear_buffer_new(bh);
                        // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
@@ -690,20 +738,37 @@ lock_retry_remap:
                        if (likely(!err))
                                goto lock_retry_remap;
                        rl = NULL;
-                       lcn = err;
                } else if (!rl)
                        up_read(&ni->runlist.lock);
+               /*
+                * If buffer is outside the runlist, truncate has cut it out
+                * of the runlist.  Just clean and clear the buffer and set it
+                * uptodate so it can get discarded by the VM.
+                */
+               if (err == -ENOENT || lcn == LCN_ENOENT) {
+                       u8 *kaddr;
+
+                       bh->b_blocknr = -1;
+                       clear_buffer_dirty(bh);
+                       kaddr = kmap_atomic(page, KM_USER0);
+                       memset(kaddr + bh_offset(bh), 0, blocksize);
+                       kunmap_atomic(kaddr, KM_USER0);
+                       flush_dcache_page(page);
+                       set_buffer_uptodate(bh);
+                       err = 0;
+                       continue;
+               }
                /* Failed to map the buffer, even after retrying. */
+               if (!err)
+                       err = -EIO;
                bh->b_blocknr = -1;
                ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
                                "attribute type 0x%x, vcn 0x%llx, offset 0x%x "
                                "because its location on disk could not be "
-                               "determined%s (error code %lli).", ni->mft_no,
+                               "determined%s (error code %i).", ni->mft_no,
                                ni->type, (unsigned long long)vcn,
                                vcn_ofs, is_retry ? " even after "
-                               "retrying" : "", (long long)lcn);
-               if (!err)
-                       err = -EIO;
+                               "retrying" : "", err);
                break;
        } while (block++, (bh = bh->b_this_page) != head);
 
@@ -714,7 +779,7 @@ lock_retry_remap:
        /* For the error case, need to reset bh to the beginning. */
        bh = head;
 
-       /* Just an optimization, so ->readpage() isn't called later. */
+       /* Just an optimization, so ->readpage() is not called later. */
        if (unlikely(!PageUptodate(page))) {
                int uptodate = 1;
                do {
@@ -730,7 +795,6 @@ lock_retry_remap:
 
        /* Setup all mapped, dirty buffers for async write i/o. */
        do {
-               get_bh(bh);
                if (buffer_mapped(bh) && buffer_dirty(bh)) {
                        lock_buffer(bh);
                        if (test_clear_buffer_dirty(bh)) {
@@ -768,14 +832,8 @@ lock_retry_remap:
 
        BUG_ON(PageWriteback(page));
        set_page_writeback(page);       /* Keeps try_to_free_buffers() away. */
-       unlock_page(page);
 
-       /*
-        * Submit the prepared buffers for i/o. Note the page is unlocked,
-        * and the async write i/o completion handler can end_page_writeback()
-        * at any time after the *first* submit_bh(). So the buffers can then
-        * disappear...
-        */
+       /* Submit the prepared buffers for i/o. */
        need_end_writeback = TRUE;
        do {
                struct buffer_head *next = bh->b_this_page;
@@ -783,9 +841,9 @@ lock_retry_remap:
                        submit_bh(WRITE, bh);
                        need_end_writeback = FALSE;
                }
-               put_bh(bh);
                bh = next;
        } while (bh != head);
+       unlock_page(page);
 
        /* If no i/o was started, need to end_page_writeback(). */
        if (unlikely(need_end_writeback))
@@ -860,7 +918,6 @@ static int ntfs_write_mst_block(struct page *page,
        sync = (wbc->sync_mode == WB_SYNC_ALL);
 
        /* Make sure we have mapped buffers. */
-       BUG_ON(!page_has_buffers(page));
        bh = head = page_buffers(page);
        BUG_ON(!bh);
 
@@ -1280,38 +1337,42 @@ retry_writepage:
                ntfs_debug("Write outside i_size - truncated?");
                return 0;
        }
+       /*
+        * Only $DATA attributes can be encrypted and only unnamed $DATA
+        * attributes can be compressed.  Index root can have the flags set but
+        * this means to create compressed/encrypted files, not that the
+        * attribute is compressed/encrypted.
+        */
+       if (ni->type != AT_INDEX_ROOT) {
+               /* If file is encrypted, deny access, just like NT4. */
+               if (NInoEncrypted(ni)) {
+                       unlock_page(page);
+                       BUG_ON(ni->type != AT_DATA);
+                       ntfs_debug("Denying write access to encrypted "
+                                       "file.");
+                       return -EACCES;
+               }
+               /* Compressed data streams are handled in compress.c. */
+               if (NInoNonResident(ni) && NInoCompressed(ni)) {
+                       BUG_ON(ni->type != AT_DATA);
+                       BUG_ON(ni->name_len);
+                       // TODO: Implement and replace this with
+                       // return ntfs_write_compressed_block(page);
+                       unlock_page(page);
+                       ntfs_error(vi->i_sb, "Writing to compressed files is "
+                                       "not supported yet.  Sorry.");
+                       return -EOPNOTSUPP;
+               }
+               // TODO: Implement and remove this check.
+               if (NInoNonResident(ni) && NInoSparse(ni)) {
+                       unlock_page(page);
+                       ntfs_error(vi->i_sb, "Writing to sparse files is not "
+                                       "supported yet.  Sorry.");
+                       return -EOPNOTSUPP;
+               }
+       }
        /* NInoNonResident() == NInoIndexAllocPresent() */
        if (NInoNonResident(ni)) {
-               /*
-                * Only unnamed $DATA attributes can be compressed, encrypted,
-                * and/or sparse.
-                */
-               if (ni->type == AT_DATA && !ni->name_len) {
-                       /* If file is encrypted, deny access, just like NT4. */
-                       if (NInoEncrypted(ni)) {
-                               unlock_page(page);
-                               ntfs_debug("Denying write access to encrypted "
-                                               "file.");
-                               return -EACCES;
-                       }
-                       /* Compressed data streams are handled in compress.c. */
-                       if (NInoCompressed(ni)) {
-                               // TODO: Implement and replace this check with
-                               // return ntfs_write_compressed_block(page);
-                               unlock_page(page);
-                               ntfs_error(vi->i_sb, "Writing to compressed "
-                                               "files is not supported yet. "
-                                               "Sorry.");
-                               return -EOPNOTSUPP;
-                       }
-                       // TODO: Implement and remove this check.
-                       if (NInoSparse(ni)) {
-                               unlock_page(page);
-                               ntfs_error(vi->i_sb, "Writing to sparse files "
-                                               "is not supported yet. Sorry.");
-                               return -EOPNOTSUPP;
-                       }
-               }
                /* We have to zero every time due to mmap-at-end-of-file. */
                if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
                        /* The page straddles i_size. */
@@ -1324,14 +1385,16 @@ retry_writepage:
                /* Handle mst protected attributes. */
                if (NInoMstProtected(ni))
                        return ntfs_write_mst_block(page, wbc);
-               /* Normal data stream. */
+               /* Normal, non-resident data stream. */
                return ntfs_write_block(page, wbc);
        }
        /*
-        * Attribute is resident, implying it is not compressed, encrypted,
-        * sparse, or mst protected.  This also means the attribute is smaller
-        * than an mft record and hence smaller than a page, so can simply
-        * return error on any pages with index above 0.
+        * Attribute is resident, implying it is not compressed, encrypted, or
+        * mst protected.  This also means the attribute is smaller than an mft
+        * record and hence smaller than a page, so can simply return error on
+        * any pages with index above 0.  Note the attribute can actually be
+        * marked compressed but if it is resident the actual data is not
+        * compressed so we are ok to ignore the compressed flag here.
         */
        BUG_ON(page_has_buffers(page));
        BUG_ON(!PageUptodate(page));
@@ -1380,30 +1443,14 @@ retry_writepage:
        BUG_ON(PageWriteback(page));
        set_page_writeback(page);
        unlock_page(page);
-
        /*
-        * Here, we don't need to zero the out of bounds area everytime because
-        * the below memcpy() already takes care of the mmap-at-end-of-file
-        * requirements. If the file is converted to a non-resident one, then
-        * the code path use is switched to the non-resident one where the
-        * zeroing happens on each ntfs_writepage() invocation.
-        *
-        * The above also applies nicely when i_size is decreased.
-        *
-        * When i_size is increased, the memory between the old and new i_size
-        * _must_ be zeroed (or overwritten with new data). Otherwise we will
-        * expose data to userspace/disk which should never have been exposed.
-        *
-        * FIXME: Ensure that i_size increases do the zeroing/overwriting and
-        * if we cannot guarantee that, then enable the zeroing below.  If the
-        * zeroing below is enabled, we MUST move the unlock_page() from above
-        * to after the kunmap_atomic(), i.e. just before the
-        * end_page_writeback().
-        * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
-        * increases for resident attributes so those are ok.
-        * TODO: ntfs_truncate(), others?
+        * Here, we do not need to zero the out of bounds area everytime
+        * because the below memcpy() already takes care of the
+        * mmap-at-end-of-file requirements.  If the file is converted to a
+        * non-resident one, then the code path use is switched to the
+        * non-resident one where the zeroing happens on each ntfs_writepage()
+        * invocation.
         */
-
        attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
        i_size = i_size_read(vi);
        if (unlikely(attr_len > i_size)) {
@@ -1681,27 +1728,25 @@ lock_retry_remap:
                                        if (likely(!err))
                                                goto lock_retry_remap;
                                        rl = NULL;
-                                       lcn = err;
                                } else if (!rl)
                                        up_read(&ni->runlist.lock);
                                /*
                                 * Failed to map the buffer, even after
                                 * retrying.
                                 */
+                               if (!err)
+                                       err = -EIO;
                                bh->b_blocknr = -1;
                                ntfs_error(vol->sb, "Failed to write to inode "
                                                "0x%lx, attribute type 0x%x, "
                                                "vcn 0x%llx, offset 0x%x "
                                                "because its location on disk "
                                                "could not be determined%s "
-                                               "(error code %lli).",
+                                               "(error code %i).",
                                                ni->mft_no, ni->type,
                                                (unsigned long long)vcn,
                                                vcn_ofs, is_retry ? " even "
-                                               "after retrying" : "",
-                                               (long long)lcn);
-                               if (!err)
-                                       err = -EIO;
+                                               "after retrying" : "", err);
                                goto err_out;
                        }
                        /* We now have a successful remap, i.e. lcn >= 0. */
@@ -2357,6 +2402,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
                        buffers_to_free = bh;
        }
        bh = head = page_buffers(page);
+       BUG_ON(!bh);
        do {
                bh_ofs = bh_offset(bh);
                if (bh_ofs + bh_size <= ofs)