unsigned long size, pgoff;
        int did_readaround = 0, majmin = VM_FAULT_MINOR;
 
+       BUG_ON(!(area->vm_flags & VM_CAN_INVALIDATE));
+
        pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
 
-retry_all:
        size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
        if (pgoff >= size)
                goto outside_data_content;
         * Do we have something in the page cache already?
         */
 retry_find:
-       page = find_get_page(mapping, pgoff);
+       page = find_lock_page(mapping, pgoff);
        if (!page) {
                unsigned long ra_pages;
 
                                start = pgoff - ra_pages / 2;
                        do_page_cache_readahead(mapping, file, start, ra_pages);
                }
-               page = find_get_page(mapping, pgoff);
+               page = find_lock_page(mapping, pgoff);
                if (!page)
                        goto no_cached_page;
        }
                ra->mmap_hit++;
 
        /*
-        * Ok, found a page in the page cache, now we need to check
-        * that it's up-to-date.
+        * We have a locked page in the page cache, now we need to check
+        * that it's up-to-date. If not, it is going to be due to an error.
         */
-       if (!PageUptodate(page))
+       if (unlikely(!PageUptodate(page)))
                goto page_not_uptodate;
 
-success:
+       /* Must recheck i_size under page lock */
+       size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+       if (unlikely(pgoff >= size)) {
+               unlock_page(page);
+               goto outside_data_content;
+       }
+
        /*
         * Found the page and have a reference on it.
         */
        return NOPAGE_SIGBUS;
 
 page_not_uptodate:
+       /* IO error path */
        if (!did_readaround) {
                majmin = VM_FAULT_MAJOR;
                count_vm_event(PGMAJFAULT);
         * because there really aren't any performance issues here
         * and we need to check for errors.
         */
-       lock_page(page);
-
-       /* Somebody truncated the page on us? */
-       if (!page->mapping) {
-               unlock_page(page);
-               page_cache_release(page);
-               goto retry_all;
-       }
-
-       /* Somebody else successfully read it in? */
-       if (PageUptodate(page)) {
-               unlock_page(page);
-               goto success;
-       }
        ClearPageError(page);
        error = mapping->a_ops->readpage(file, page);
-       if (!error) {
-               wait_on_page_locked(page);
-               if (PageUptodate(page))
-                       goto success;
-       } else if (error == AOP_TRUNCATED_PAGE) {
-               page_cache_release(page);
+       page_cache_release(page);
+
+       if (!error || error == AOP_TRUNCATED_PAGE)
                goto retry_find;
-       }
 
-       /*
-        * Things didn't work out. Return zero to tell the
-        * mm layer so, possibly freeing the page cache page first.
-        */
+       /* Things didn't work out. Return zero to tell the mm layer so. */
        shrink_readahead_size_eio(file, ra);
-       page_cache_release(page);
        return NOPAGE_SIGBUS;
 }
 EXPORT_SYMBOL(filemap_nopage);
                return -ENOEXEC;
        file_accessed(file);
        vma->vm_ops = &generic_file_vm_ops;
+       vma->vm_flags |= VM_CAN_INVALIDATE;
        return 0;
 }
 
 
        unsigned long restart_addr;
        int need_break;
 
+       /*
+        * files that support invalidating or truncating portions of the
+        * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and
+        * have their .nopage function return the page locked.
+        */
+       BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
+
 again:
        restart_addr = vma->vm_truncate_count;
        if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
 
        spin_lock(&mapping->i_mmap_lock);
 
-       /* serialize i_size write against truncate_count write */
-       smp_wmb();
-       /* Protect against page faults, and endless unmapping loops */
+       /* Protect against endless unmapping loops */
        mapping->truncate_count++;
-       /*
-        * For archs where spin_lock has inclusive semantics like ia64
-        * this smp_mb() will prevent to read pagetable contents
-        * before the truncate_count increment is visible to
-        * other cpus.
-        */
-       smp_mb();
        if (unlikely(is_restart_addr(mapping->truncate_count))) {
                if (mapping->truncate_count == 0)
                        reset_vma_truncate_counts(mapping);
        if (IS_SWAPFILE(inode))
                goto out_busy;
        i_size_write(inode, offset);
+
+       /*
+        * unmap_mapping_range is called twice, first simply for efficiency
+        * so that truncate_inode_pages does fewer single-page unmaps. However
+        * after this first call, and before truncate_inode_pages finishes,
+        * it is possible for private pages to be COWed, which remain after
+        * truncate_inode_pages finishes, hence the second unmap_mapping_range
+        * call must be made for correctness.
+        */
        unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
        truncate_inode_pages(mapping, offset);
+       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
        goto out_truncate;
 
 do_expand:
        down_write(&inode->i_alloc_sem);
        unmap_mapping_range(mapping, offset, (end - offset), 1);
        truncate_inode_pages_range(mapping, offset, end);
+       unmap_mapping_range(mapping, offset, (end - offset), 1);
        inode->i_op->truncate_range(inode, offset, end);
        up_write(&inode->i_alloc_sem);
        mutex_unlock(&inode->i_mutex);
 
        /* No need to invalidate - it was non-present before */
        update_mmu_cache(vma, address, pte);
-       lazy_mmu_prot_update(pte);
 unlock:
        pte_unmap_unlock(page_table, ptl);
 out:
                int write_access)
 {
        spinlock_t *ptl;
-       struct page *new_page;
-       struct address_space *mapping = NULL;
+       struct page *page, *nopage_page;
        pte_t entry;
-       unsigned int sequence = 0;
        int ret = VM_FAULT_MINOR;
        int anon = 0;
        struct page *dirty_page = NULL;
        pte_unmap(page_table);
        BUG_ON(vma->vm_flags & VM_PFNMAP);
 
-       if (vma->vm_file) {
-               mapping = vma->vm_file->f_mapping;
-               sequence = mapping->truncate_count;
-               smp_rmb(); /* serializes i_size against truncate_count */
-       }
-retry:
-       new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
-       /*
-        * No smp_rmb is needed here as long as there's a full
-        * spin_lock/unlock sequence inside the ->nopage callback
-        * (for the pagecache lookup) that acts as an implicit
-        * smp_mb() and prevents the i_size read to happen
-        * after the next truncate_count read.
-        */
-
+       nopage_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
        /* no page was available -- either SIGBUS, OOM or REFAULT */
-       if (unlikely(new_page == NOPAGE_SIGBUS))
+       if (unlikely(nopage_page == NOPAGE_SIGBUS))
                return VM_FAULT_SIGBUS;
-       else if (unlikely(new_page == NOPAGE_OOM))
+       else if (unlikely(nopage_page == NOPAGE_OOM))
                return VM_FAULT_OOM;
-       else if (unlikely(new_page == NOPAGE_REFAULT))
+       else if (unlikely(nopage_page == NOPAGE_REFAULT))
                return VM_FAULT_MINOR;
 
+       BUG_ON(vma->vm_flags & VM_CAN_INVALIDATE && !PageLocked(nopage_page));
+       /*
+        * For consistency in subsequent calls, make the nopage_page always
+        * locked.
+        */
+       if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE)))
+               lock_page(nopage_page);
+
        /*
         * Should we do an early C-O-W break?
         */
+       page = nopage_page;
        if (write_access) {
                if (!(vma->vm_flags & VM_SHARED)) {
-                       struct page *page;
-
-                       if (unlikely(anon_vma_prepare(vma)))
-                               goto oom;
-                       page = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
-                                               vma, address);
-                       if (!page)
-                               goto oom;
-                       copy_user_highpage(page, new_page, address, vma);
-                       page_cache_release(new_page);
-                       new_page = page;
+                       if (unlikely(anon_vma_prepare(vma))) {
+                               ret = VM_FAULT_OOM;
+                               goto out_error;
+                       }
+                       page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+                       if (!page) {
+                               ret = VM_FAULT_OOM;
+                               goto out_error;
+                       }
+                       copy_user_highpage(page, nopage_page, address, vma);
                        anon = 1;
-
                } else {
                        /* if the page will be shareable, see if the backing
                         * address space wants to know that the page is about
                         * to become writable */
                        if (vma->vm_ops->page_mkwrite &&
-                           vma->vm_ops->page_mkwrite(vma, new_page) < 0
-                           ) {
-                               page_cache_release(new_page);
-                               return VM_FAULT_SIGBUS;
+                           vma->vm_ops->page_mkwrite(vma, page) < 0) {
+                               ret = VM_FAULT_SIGBUS;
+                               goto out_error;
                        }
                }
        }
 
        page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-       /*
-        * For a file-backed vma, someone could have truncated or otherwise
-        * invalidated this page.  If unmap_mapping_range got called,
-        * retry getting the page.
-        */
-       if (mapping && unlikely(sequence != mapping->truncate_count)) {
-               pte_unmap_unlock(page_table, ptl);
-               page_cache_release(new_page);
-               cond_resched();
-               sequence = mapping->truncate_count;
-               smp_rmb();
-               goto retry;
-       }
 
        /*
         * This silly early PAGE_DIRTY setting removes a race
         * handle that later.
         */
        /* Only go through if we didn't race with anybody else... */
-       if (pte_none(*page_table)) {
-               flush_icache_page(vma, new_page);
-               entry = mk_pte(new_page, vma->vm_page_prot);
+       if (likely(pte_none(*page_table))) {
+               flush_icache_page(vma, page);
+               entry = mk_pte(page, vma->vm_page_prot);
                if (write_access)
                        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                set_pte_at(mm, address, page_table, entry);
                if (anon) {
-                       inc_mm_counter(mm, anon_rss);
-                       lru_cache_add_active(new_page);
-                       page_add_new_anon_rmap(new_page, vma, address);
+                        inc_mm_counter(mm, anon_rss);
+                        lru_cache_add_active(page);
+                        page_add_new_anon_rmap(page, vma, address);
                } else {
                        inc_mm_counter(mm, file_rss);
-                       page_add_file_rmap(new_page);
+                       page_add_file_rmap(page);
                        if (write_access) {
-                               dirty_page = new_page;
+                               dirty_page = page;
                                get_page(dirty_page);
                        }
                }
+
+               /* no need to invalidate: a not-present page won't be cached */
+               update_mmu_cache(vma, address, entry);
+               lazy_mmu_prot_update(entry);
        } else {
-               /* One of our sibling threads was faster, back out. */
-               page_cache_release(new_page);
-               goto unlock;
+               if (anon)
+                       page_cache_release(page);
+               else
+                       anon = 1; /* not anon, but release nopage_page */
        }
 
-       /* no need to invalidate: a not-present page shouldn't be cached */
-       update_mmu_cache(vma, address, entry);
-       lazy_mmu_prot_update(entry);
-unlock:
        pte_unmap_unlock(page_table, ptl);
-       if (dirty_page) {
+
+out:
+       unlock_page(nopage_page);
+       if (anon)
+               page_cache_release(nopage_page);
+       else if (dirty_page) {
                set_page_dirty_balance(dirty_page);
                put_page(dirty_page);
        }
+
        return ret;
-oom:
-       page_cache_release(new_page);
-       return VM_FAULT_OOM;
+
+out_error:
+       anon = 1; /* relase nopage_page */
+       goto out;
 }
 
 /*