]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - fs/xfs/linux-2.6/xfs_aops.c
Pull sem2mutex-ioc4 into release branch
[linux-2.6-omap-h63xx.git] / fs / xfs / linux-2.6 / xfs_aops.c
index e99d04d3fe82214a17e141fd6b86f32f006ff666..74d8be87f983d4995c32bc87ebcc89ff33b577bb 100644 (file)
@@ -40,6 +40,7 @@
 #include "xfs_rw.h"
 #include "xfs_iomap.h"
 #include <linux/mpage.h>
+#include <linux/pagevec.h>
 #include <linux/writeback.h>
 
 STATIC void xfs_count_page_state(struct page *, int *, int *, int *);
@@ -53,7 +54,6 @@ xfs_page_trace(
        int             mask)
 {
        xfs_inode_t     *ip;
-       bhv_desc_t      *bdp;
        vnode_t         *vp = LINVFS_GET_VP(inode);
        loff_t          isize = i_size_read(inode);
        loff_t          offset = page_offset(page);
@@ -62,8 +62,7 @@ xfs_page_trace(
        if (page_has_buffers(page))
                xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
 
-       bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
-       ip = XFS_BHVTOI(bdp);
+       ip = xfs_vtoi(vp);
        if (!ip->i_rwtrace)
                return;
 
@@ -227,29 +226,13 @@ xfs_map_blocks(
        return -error;
 }
 
-/*
- * Finds the corresponding mapping in block @map array of the
- * given @offset within a @page.
- */
-STATIC xfs_iomap_t *
-xfs_offset_to_map(
-       struct page             *page,
+STATIC inline int
+xfs_iomap_valid(
        xfs_iomap_t             *iomapp,
-       unsigned long           offset)
+       loff_t                  offset)
 {
-       xfs_off_t               full_offset;    /* offset from start of file */
-
-       ASSERT(offset < PAGE_CACHE_SIZE);
-
-       full_offset = page->index;              /* NB: using 64bit number */
-       full_offset <<= PAGE_CACHE_SHIFT;       /* offset from file start */
-       full_offset += offset;                  /* offset from page start */
-
-       if (full_offset < iomapp->iomap_offset)
-               return NULL;
-       if (iomapp->iomap_offset + (iomapp->iomap_bsize -1) >= full_offset)
-               return iomapp;
-       return NULL;
+       return offset >= iomapp->iomap_offset &&
+               offset < iomapp->iomap_offset + iomapp->iomap_bsize;
 }
 
 /*
@@ -353,24 +336,47 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
 }
 
 /*
- * Submit all of the bios for all of the ioends we have saved up,
- * covering the initial writepage page and also any probed pages.
+ * Submit all of the bios for all of the ioends we have saved up, covering the
+ * initial writepage page and also any probed pages.
+ *
+ * Because we may have multiple ioends spanning a page, we need to start
+ * writeback on all the buffers before we submit them for I/O. If we mark the
+ * buffers as we got, then we can end up with a page that only has buffers
+ * marked async write and I/O complete on can occur before we mark the other
+ * buffers async write.
+ *
+ * The end result of this is that we trip a bug in end_page_writeback() because
+ * we call it twice for the one page as the code in end_buffer_async_write()
+ * assumes that all buffers on the page are started at the same time.
+ *
+ * The fix is two passes across the ioend list - one to start writeback on the
+ * bufferheads, and then the second one submit them for I/O.
  */
 STATIC void
 xfs_submit_ioend(
        xfs_ioend_t             *ioend)
 {
+       xfs_ioend_t             *head = ioend;
        xfs_ioend_t             *next;
        struct buffer_head      *bh;
        struct bio              *bio;
        sector_t                lastblock = 0;
 
+       /* Pass 1 - start writeback */
+       do {
+               next = ioend->io_list;
+               for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+                       xfs_start_buffer_writeback(bh);
+               }
+       } while ((ioend = next) != NULL);
+
+       /* Pass 2 - submit I/O */
+       ioend = head;
        do {
                next = ioend->io_list;
                bio = NULL;
 
                for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-                       xfs_start_buffer_writeback(bh);
 
                        if (!bio) {
  retry:
@@ -429,7 +435,7 @@ STATIC void
 xfs_add_to_ioend(
        struct inode            *inode,
        struct buffer_head      *bh,
-       unsigned int            p_offset,
+       xfs_off_t               offset,
        unsigned int            type,
        xfs_ioend_t             **result,
        int                     need_ioend)
@@ -438,10 +444,7 @@ xfs_add_to_ioend(
 
        if (!ioend || need_ioend || type != ioend->io_type) {
                xfs_ioend_t     *previous = *result;
-               xfs_off_t       offset;
 
-               offset = (xfs_off_t)bh->b_page->index << PAGE_CACHE_SHIFT;
-               offset += p_offset;
                ioend = xfs_alloc_ioend(inode, type);
                ioend->io_offset = offset;
                ioend->io_buffer_head = bh;
@@ -460,31 +463,23 @@ xfs_add_to_ioend(
 
 STATIC void
 xfs_map_at_offset(
-       struct page             *page,
        struct buffer_head      *bh,
-       unsigned long           offset,
+       loff_t                  offset,
        int                     block_bits,
-       xfs_iomap_t             *iomapp,
-       xfs_ioend_t             *ioend)
+       xfs_iomap_t             *iomapp)
 {
        xfs_daddr_t             bn;
-       xfs_off_t               delta;
        int                     sector_shift;
 
        ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
        ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
        ASSERT(iomapp->iomap_bn != IOMAP_DADDR_NULL);
 
-       delta = page->index;
-       delta <<= PAGE_CACHE_SHIFT;
-       delta += offset;
-       delta -= iomapp->iomap_offset;
-       delta >>= block_bits;
-
        sector_shift = block_bits - BBSHIFT;
-       bn = iomapp->iomap_bn >> sector_shift;
-       bn += delta;
-       BUG_ON(!bn && !(iomapp->iomap_flags & IOMAP_REALTIME));
+       bn = (iomapp->iomap_bn >> sector_shift) +
+             ((offset - iomapp->iomap_offset) >> block_bits);
+
+       ASSERT(bn || (iomapp->iomap_flags & IOMAP_REALTIME));
        ASSERT((bn << sector_shift) >= iomapp->iomap_bn);
 
        lock_buffer(bh);
@@ -496,23 +491,18 @@ xfs_map_at_offset(
 }
 
 /*
- * Look for a page at index which is unlocked and not mapped
- * yet - clustering for mmap write case.
+ * Look for a page at index that is suitable for clustering.
  */
 STATIC unsigned int
-xfs_probe_unmapped_page(
-       struct address_space    *mapping,
-       pgoff_t                 index,
-       unsigned int            pg_offset)
+xfs_probe_page(
+       struct page             *page,
+       unsigned int            pg_offset,
+       int                     mapped)
 {
-       struct page             *page;
        int                     ret = 0;
 
-       page = find_trylock_page(mapping, index);
-       if (!page)
-               return 0;
        if (PageWriteback(page))
-               goto out;
+               return 0;
 
        if (page->mapping && PageDirty(page)) {
                if (page_has_buffers(page)) {
@@ -520,81 +510,101 @@ xfs_probe_unmapped_page(
 
                        bh = head = page_buffers(page);
                        do {
-                               if (buffer_mapped(bh) || !buffer_uptodate(bh))
+                               if (!buffer_uptodate(bh))
+                                       break;
+                               if (mapped != buffer_mapped(bh))
                                        break;
                                ret += bh->b_size;
                                if (ret >= pg_offset)
                                        break;
                        } while ((bh = bh->b_this_page) != head);
                } else
-                       ret = PAGE_CACHE_SIZE;
+                       ret = mapped ? 0 : PAGE_CACHE_SIZE;
        }
 
-out:
-       unlock_page(page);
        return ret;
 }
 
 STATIC size_t
-xfs_probe_unmapped_cluster(
+xfs_probe_cluster(
        struct inode            *inode,
        struct page             *startpage,
        struct buffer_head      *bh,
-       struct buffer_head      *head)
+       struct buffer_head      *head,
+       int                     mapped)
 {
-       size_t                  len, total = 0;
+       struct pagevec          pvec;
        pgoff_t                 tindex, tlast, tloff;
-       unsigned int            pg_offset;
-       struct address_space    *mapping = inode->i_mapping;
+       size_t                  total = 0;
+       int                     done = 0, i;
 
        /* First sum forwards in this page */
        do {
-               if (buffer_mapped(bh))
-                       break;
+               if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh)))
+                       return total;
                total += bh->b_size;
        } while ((bh = bh->b_this_page) != head);
 
-       /* If we reached the end of the page, sum forwards in
-        * following pages.
-        */
-       if (bh == head) {
-               tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
-               /* Prune this back to avoid pathological behavior */
-               tloff = min(tlast, startpage->index + 64);
-               for (tindex = startpage->index + 1; tindex < tloff; tindex++) {
-                       len = xfs_probe_unmapped_page(mapping, tindex,
-                                                       PAGE_CACHE_SIZE);
-                       if (!len)
-                               return total;
+       /* if we reached the end of the page, sum forwards in following pages */
+       tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
+       tindex = startpage->index + 1;
+
+       /* Prune this back to avoid pathological behavior */
+       tloff = min(tlast, startpage->index + 64);
+
+       pagevec_init(&pvec, 0);
+       while (!done && tindex <= tloff) {
+               unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
+
+               if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
+                       break;
+
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *page = pvec.pages[i];
+                       size_t pg_offset, len = 0;
+
+                       if (tindex == tlast) {
+                               pg_offset =
+                                   i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
+                               if (!pg_offset) {
+                                       done = 1;
+                                       break;
+                               }
+                       } else
+                               pg_offset = PAGE_CACHE_SIZE;
+
+                       if (page->index == tindex && !TestSetPageLocked(page)) {
+                               len = xfs_probe_page(page, pg_offset, mapped);
+                               unlock_page(page);
+                       }
+
+                       if (!len) {
+                               done = 1;
+                               break;
+                       }
+
                        total += len;
+                       tindex++;
                }
-               if (tindex == tlast &&
-                   (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
-                       total += xfs_probe_unmapped_page(mapping,
-                                                       tindex, pg_offset);
-               }
+
+               pagevec_release(&pvec);
+               cond_resched();
        }
+
        return total;
 }
 
 /*
- * Probe for a given page (index) in the inode and test if it is suitable
- * for writing as part of an unwritten or delayed allocate extent.
- * Returns page locked and with an extra reference count if so, else NULL.
+ * Test if a given page is suitable for writing as part of an unwritten
+ * or delayed allocate extent.
  */
-STATIC struct page *
-xfs_probe_delayed_page(
-       struct inode            *inode,
-       pgoff_t                 index,
+STATIC int
+xfs_is_delayed_page(
+       struct page             *page,
        unsigned int            type)
 {
-       struct page             *page;
-
-       page = find_trylock_page(inode->i_mapping, index);
-       if (!page)
-               return NULL;
        if (PageWriteback(page))
-               goto out;
+               return 0;
 
        if (page->mapping && page_has_buffers(page)) {
                struct buffer_head      *bh, *head;
@@ -606,17 +616,17 @@ xfs_probe_delayed_page(
                                acceptable = (type == IOMAP_UNWRITTEN);
                        else if (buffer_delay(bh))
                                acceptable = (type == IOMAP_DELAY);
+                       else if (buffer_mapped(bh))
+                               acceptable = (type == 0);
                        else
                                break;
                } while ((bh = bh->b_this_page) != head);
 
                if (acceptable)
-                       return page;
+                       return 1;
        }
 
-out:
-       unlock_page(page);
-       return NULL;
+       return 0;
 }
 
 /*
@@ -629,36 +639,59 @@ STATIC int
 xfs_convert_page(
        struct inode            *inode,
        struct page             *page,
-       xfs_iomap_t             *iomapp,
+       loff_t                  tindex,
+       xfs_iomap_t             *mp,
        xfs_ioend_t             **ioendp,
        struct writeback_control *wbc,
-       void                    *private,
        int                     startio,
        int                     all_bh)
 {
        struct buffer_head      *bh, *head;
-       xfs_iomap_t             *mp = iomapp, *tmp;
-       unsigned long           p_offset, end_offset;
+       xfs_off_t               end_offset;
+       unsigned long           p_offset;
        unsigned int            type;
        int                     bbits = inode->i_blkbits;
        int                     len, page_dirty;
        int                     count = 0, done = 0, uptodate = 1;
+       xfs_off_t               offset = page_offset(page);
 
-       end_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1));
+       if (page->index != tindex)
+               goto fail;
+       if (TestSetPageLocked(page))
+               goto fail;
+       if (PageWriteback(page))
+               goto fail_unlock_page;
+       if (page->mapping != inode->i_mapping)
+               goto fail_unlock_page;
+       if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
+               goto fail_unlock_page;
 
        /*
         * page_dirty is initially a count of buffers on the page before
         * EOF and is decrememted as we move each into a cleanable state.
+        *
+        * Derivation:
+        *
+        * End offset is the highest offset that this page should represent.
+        * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
+        * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
+        * hence give us the correct page_dirty count. On any other page,
+        * it will be zero and in that case we need page_dirty to be the
+        * count of buffers on the page.
         */
+       end_offset = min_t(unsigned long long,
+                       (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+                       i_size_read(inode));
+
        len = 1 << inode->i_blkbits;
-       end_offset = max(end_offset, PAGE_CACHE_SIZE);
-       end_offset = roundup(end_offset, len);
-       page_dirty = end_offset / len;
+       p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
+                                       PAGE_CACHE_SIZE);
+       p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
+       page_dirty = p_offset / len;
 
-       p_offset = 0;
        bh = head = page_buffers(page);
        do {
-               if (p_offset >= end_offset)
+               if (offset >= end_offset)
                        break;
                if (!buffer_uptodate(bh))
                        uptodate = 0;
@@ -667,54 +700,69 @@ xfs_convert_page(
                        continue;
                }
 
-               if (buffer_unwritten(bh))
-                       type = IOMAP_UNWRITTEN;
-               else if (buffer_delay(bh))
-                       type = IOMAP_DELAY;
-               else {
-                       type = 0;
-                       if (!(buffer_mapped(bh) && all_bh && startio)) {
+               if (buffer_unwritten(bh) || buffer_delay(bh)) {
+                       if (buffer_unwritten(bh))
+                               type = IOMAP_UNWRITTEN;
+                       else
+                               type = IOMAP_DELAY;
+
+                       if (!xfs_iomap_valid(mp, offset)) {
                                done = 1;
-                       } else if (startio) {
+                               continue;
+                       }
+
+                       ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
+                       ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
+
+                       xfs_map_at_offset(bh, offset, bbits, mp);
+                       if (startio) {
+                               xfs_add_to_ioend(inode, bh, offset,
+                                               type, ioendp, done);
+                       } else {
+                               set_buffer_dirty(bh);
+                               unlock_buffer(bh);
+                               mark_buffer_dirty(bh);
+                       }
+                       page_dirty--;
+                       count++;
+               } else {
+                       type = 0;
+                       if (buffer_mapped(bh) && all_bh && startio) {
                                lock_buffer(bh);
-                               xfs_add_to_ioend(inode, bh, p_offset,
+                               xfs_add_to_ioend(inode, bh, offset,
                                                type, ioendp, done);
                                count++;
                                page_dirty--;
+                       } else {
+                               done = 1;
                        }
-                       continue;
-               }
-               tmp = xfs_offset_to_map(page, mp, p_offset);
-               if (!tmp) {
-                       done = 1;
-                       continue;
-               }
-               ASSERT(!(tmp->iomap_flags & IOMAP_HOLE));
-               ASSERT(!(tmp->iomap_flags & IOMAP_DELAY));
-
-               xfs_map_at_offset(page, bh, p_offset, bbits, tmp, *ioendp);
-               if (startio) {
-                       xfs_add_to_ioend(inode, bh, p_offset,
-                                       type, ioendp, done);
-                       count++;
-               } else {
-                       set_buffer_dirty(bh);
-                       unlock_buffer(bh);
-                       mark_buffer_dirty(bh);
                }
-               page_dirty--;
-       } while (p_offset += len, (bh = bh->b_this_page) != head);
+       } while (offset += len, (bh = bh->b_this_page) != head);
 
        if (uptodate && bh == head)
                SetPageUptodate(page);
 
        if (startio) {
-               if (count)
+               if (count) {
+                       struct backing_dev_info *bdi;
+
+                       bdi = inode->i_mapping->backing_dev_info;
                        wbc->nr_to_write--;
+                       if (bdi_write_congested(bdi)) {
+                               wbc->encountered_congestion = 1;
+                               done = 1;
+                       } else if (wbc->nr_to_write <= 0) {
+                               done = 1;
+                       }
+               }
                xfs_start_page_writeback(page, wbc, !page_dirty, count);
        }
 
        return done;
+ fail_unlock_page:
+       unlock_page(page);
+ fail:
+       return 1;
 }
 
 /*
@@ -732,16 +780,25 @@ xfs_cluster_write(
        int                     all_bh,
        pgoff_t                 tlast)
 {
-       struct page             *page;
-       unsigned int            type = (*ioendp)->io_type;
-       int                     done;
+       struct pagevec          pvec;
+       int                     done = 0, i;
 
-       for (done = 0; tindex <= tlast && !done; tindex++) {
-               page = xfs_probe_delayed_page(inode, tindex, type);
-               if (!page)
+       pagevec_init(&pvec, 0);
+       while (!done && tindex <= tlast) {
+               unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
+
+               if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
                        break;
-               done = xfs_convert_page(inode, page, iomapp, ioendp,
-                                               wbc, NULL, startio, all_bh);
+
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       done = xfs_convert_page(inode, pvec.pages[i], tindex++,
+                                       iomapp, ioendp, wbc, startio, all_bh);
+                       if (done)
+                               break;
+               }
+
+               pagevec_release(&pvec);
+               cond_resched();
        }
 }
 
@@ -773,19 +830,20 @@ xfs_page_state_convert(
        int             unmapped) /* also implies page uptodate */
 {
        struct buffer_head      *bh, *head;
-       xfs_iomap_t             *iomp, iomap;
+       xfs_iomap_t             iomap;
        xfs_ioend_t             *ioend = NULL, *iohead = NULL;
        loff_t                  offset;
        unsigned long           p_offset = 0;
        unsigned int            type;
        __uint64_t              end_offset;
        pgoff_t                 end_index, last_index, tlast;
-       int                     flags, len, err, done = 1;
-       int                     uptodate = 1;
+       ssize_t                 size, len;
+       int                     flags, err, iomap_valid = 0, uptodate = 1;
        int                     page_dirty, count = 0, trylock_flag = 0;
+       int                     all_bh = unmapped;
 
        /* wait for other IO threads? */
-       if (startio && wbc->sync_mode != WB_SYNC_NONE)
+       if (startio && (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking))
                trylock_flag |= BMAPI_TRYLOCK;
 
        /* Is this page beyond the end of the file? */
@@ -822,11 +880,11 @@ xfs_page_state_convert(
        p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
        page_dirty = p_offset / len;
 
-       iomp = NULL;
        bh = head = page_buffers(page);
        offset = page_offset(page);
+       flags = -1;
+       type = 0;
 
-       /* TODO: fix up "done" variable and iomap pointer (boolean) */
        /* TODO: cleanup count and page_dirty */
 
        do {
@@ -835,14 +893,16 @@ xfs_page_state_convert(
                if (!buffer_uptodate(bh))
                        uptodate = 0;
                if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
-                       done = 1;
+                       /*
+                        * the iomap is actually still valid, but the ioend
+                        * isn't.  shouldn't happen too often.
+                        */
+                       iomap_valid = 0;
                        continue;
                }
 
-               if (iomp) {
-                       iomp = xfs_offset_to_map(page, &iomap, p_offset);
-                       done = (iomp == NULL);
-               }
+               if (iomap_valid)
+                       iomap_valid = xfs_iomap_valid(&iomap, offset);
 
                /*
                 * First case, map an unwritten extent and prepare for
@@ -850,34 +910,53 @@ xfs_page_state_convert(
                 *
                 * Second case, allocate space for a delalloc buffer.
                 * We can return EAGAIN here in the release page case.
-                */
-               if (buffer_unwritten(bh) || buffer_delay(bh)) {
+                *
+                * Third case, an unmapped buffer was found, and we are
+                * in a path where we need to write the whole page out.
+                */
+               if (buffer_unwritten(bh) || buffer_delay(bh) ||
+                   ((buffer_uptodate(bh) || PageUptodate(page)) &&
+                    !buffer_mapped(bh) && (unmapped || startio))) {
+                       /*
+                        * Make sure we don't use a read-only iomap
+                        */
+                       if (flags == BMAPI_READ)
+                               iomap_valid = 0;
+
                        if (buffer_unwritten(bh)) {
                                type = IOMAP_UNWRITTEN;
                                flags = BMAPI_WRITE|BMAPI_IGNSTATE;
-                       } else {
+                       } else if (buffer_delay(bh)) {
                                type = IOMAP_DELAY;
                                flags = BMAPI_ALLOCATE;
                                if (!startio)
                                        flags |= trylock_flag;
+                       } else {
+                               type = IOMAP_NEW;
+                               flags = BMAPI_WRITE|BMAPI_MMAP;
                        }
 
-                       if (!iomp) {
-                               done = 1;
-                               err = xfs_map_blocks(inode, offset, len, &iomap,
-                                               flags);
+                       if (!iomap_valid) {
+                               if (type == IOMAP_NEW) {
+                                       size = xfs_probe_cluster(inode,
+                                                       page, bh, head, 0);
+                               } else {
+                                       size = len;
+                               }
+
+                               err = xfs_map_blocks(inode, offset, size,
+                                               &iomap, flags);
                                if (err)
                                        goto error;
-                               iomp = xfs_offset_to_map(page, &iomap,
-                                                               p_offset);
-                               done = (iomp == NULL);
+                               iomap_valid = xfs_iomap_valid(&iomap, offset);
                        }
-                       if (iomp) {
-                               xfs_map_at_offset(page, bh, p_offset,
-                                               inode->i_blkbits, iomp, ioend);
+                       if (iomap_valid) {
+                               xfs_map_at_offset(bh, offset,
+                                               inode->i_blkbits, &iomap);
                                if (startio) {
-                                       xfs_add_to_ioend(inode, bh, p_offset,
-                                               type, &ioend, done);
+                                       xfs_add_to_ioend(inode, bh, offset,
+                                                       type, &ioend,
+                                                       !iomap_valid);
                                } else {
                                        set_buffer_dirty(bh);
                                        unlock_buffer(bh);
@@ -885,68 +964,39 @@ xfs_page_state_convert(
                                }
                                page_dirty--;
                                count++;
-                       } else {
-                               done = 1;
                        }
-               } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
-                          (unmapped || startio)) {
+               } else if (buffer_uptodate(bh) && startio) {
+                       /*
+                        * we got here because the buffer is already mapped.
+                        * That means it must already have extents allocated
+                        * underneath it. Map the extent by reading it.
+                        */
+                       if (!iomap_valid || type != 0) {
+                               flags = BMAPI_READ;
+                               size = xfs_probe_cluster(inode, page, bh,
+                                                               head, 1);
+                               err = xfs_map_blocks(inode, offset, size,
+                                               &iomap, flags);
+                               if (err)
+                                       goto error;
+                               iomap_valid = xfs_iomap_valid(&iomap, offset);
+                       }
 
                        type = 0;
-                       if (!buffer_mapped(bh)) {
-
-                               /*
-                                * Getting here implies an unmapped buffer
-                                * was found, and we are in a path where we
-                                * need to write the whole page out.
-                                */
-                               if (!iomp) {
-                                       int     size;
-
-                                       size = xfs_probe_unmapped_cluster(
-                                                       inode, page, bh, head);
-                                       err = xfs_map_blocks(inode, offset,
-                                                       size, &iomap,
-                                                       BMAPI_WRITE|BMAPI_MMAP);
-                                       if (err) {
-                                               goto error;
-                                       }
-                                       iomp = xfs_offset_to_map(page, &iomap,
-                                                                    p_offset);
-                                       done = (iomp == NULL);
-                               }
-                               if (iomp) {
-                                       xfs_map_at_offset(page, bh, p_offset,
-                                                       inode->i_blkbits, iomp,
-                                                       ioend);
-                                       if (startio) {
-                                               xfs_add_to_ioend(inode,
-                                                       bh, p_offset, type,
-                                                       &ioend, done);
-                                       } else {
-                                               set_buffer_dirty(bh);
-                                               unlock_buffer(bh);
-                                               mark_buffer_dirty(bh);
-                                       }
-                                       page_dirty--;
-                                       count++;
-                               } else {
-                                       done = 1;
-                               }
-                       } else if (startio) {
-                               if (buffer_uptodate(bh) &&
-                                   !test_and_set_bit(BH_Lock, &bh->b_state)) {
-                                       ASSERT(buffer_mapped(bh));
-                                       xfs_add_to_ioend(inode,
-                                                       bh, p_offset, type,
-                                                       &ioend, done);
-                                       page_dirty--;
-                                       count++;
-                               } else {
-                                       done = 1;
-                               }
+                       if (!test_and_set_bit(BH_Lock, &bh->b_state)) {
+                               ASSERT(buffer_mapped(bh));
+                               if (iomap_valid)
+                                       all_bh = 1;
+                               xfs_add_to_ioend(inode, bh, offset, type,
+                                               &ioend, !iomap_valid);
+                               page_dirty--;
+                               count++;
                        } else {
-                               done = 1;
+                               iomap_valid = 0;
                        }
+               } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
+                          (unmapped || startio)) {
+                       iomap_valid = 0;
                }
 
                if (!iohead)
@@ -960,12 +1010,12 @@ xfs_page_state_convert(
        if (startio)
                xfs_start_page_writeback(page, wbc, 1, count);
 
-       if (ioend && iomp && !done) {
-               offset = (iomp->iomap_offset + iomp->iomap_bsize - 1) >>
+       if (ioend && iomap_valid) {
+               offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
                                        PAGE_CACHE_SHIFT;
                tlast = min_t(pgoff_t, offset, last_index);
-               xfs_cluster_write(inode, page->index + 1, iomp, &ioend,
-                                       wbc, startio, unmapped, tlast);
+               xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
+                                       wbc, startio, all_bh, tlast);
        }
 
        if (iohead)
@@ -1413,4 +1463,5 @@ struct address_space_operations linvfs_aops = {
        .commit_write           = generic_commit_write,
        .bmap                   = linvfs_bmap,
        .direct_IO              = linvfs_direct_IO,
+       .migratepage            = buffer_migrate_page,
 };