X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=mm%2Fmemory.c;h=7a11ddd5060ff51067523c7872b85e0539537385;hb=7e91c55b32a469d9dda845a90038c015a70838e3;hp=745b3482e6c2969fa5489f24787c3e576909e07b;hpb=5d2a2dbbc1025dbf7998b9289574d9592b8f21cc;p=linux-2.6-omap-h63xx.git diff --git a/mm/memory.c b/mm/memory.c index 745b3482e6c..7a11ddd5060 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -349,6 +349,11 @@ void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) dump_stack(); } +static inline int is_cow_mapping(unsigned int flags) +{ + return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; +} + /* * This function gets the "struct page" associated with a pte. * @@ -377,6 +382,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT; if (pfn == vma->vm_pgoff + off) return NULL; + if (!is_cow_mapping(vma->vm_flags)) + return NULL; } /* @@ -437,7 +444,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, * If it's a COW mapping, write protect it both * in the parent and the child */ - if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) { + if (is_cow_mapping(vm_flags)) { ptep_set_wrprotect(src_mm, addr, src_pte); pte = *src_pte; } @@ -567,7 +574,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, * readonly mappings. The tradeoff is that copy_page_range is more * efficient than faulting. */ - if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP))) { + if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) { if (!vma->anon_vma) return 0; } @@ -1002,7 +1009,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, continue; } - if (!vma || (vma->vm_flags & VM_IO) + if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP)) || !(vm_flags & vma->vm_flags)) return i ? : -EFAULT; @@ -1146,12 +1153,12 @@ int zeromap_page_range(struct vm_area_struct *vma, return err; } -pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl) +pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl) { pgd_t * pgd = pgd_offset(mm, addr); pud_t * pud = pud_alloc(mm, pgd, addr); if (pud) { - pmd_t * pmd = pmd_alloc(mm, pgd, addr); + pmd_t * pmd = pmd_alloc(mm, pud, addr); if (pmd) return pte_alloc_map_lock(mm, pmd, addr, ptl); } @@ -1172,7 +1179,7 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa spinlock_t *ptl; retval = -EINVAL; - if (PageAnon(page) || !PageReserved(page)) + if (PageAnon(page)) goto out; retval = -ENOMEM; flush_dcache_page(page); @@ -1197,45 +1204,34 @@ out: } /* - * Somebody does a pfn remapping that doesn't actually work as a vma. + * This allows drivers to insert individual pages they've allocated + * into a user vma. + * + * The page has to be a nice clean _individual_ kernel allocation. + * If you allocate a compound page, you need to have marked it as + * such (__GFP_COMP), or manually just split the page up yourself + * (which is mainly an issue of doing "set_page_count(page, 1)" for + * each sub-page, and then freeing them one by one when you free + * them rather than freeing it as a compound page). * - * Do it as individual pages instead, and warn about it. It's bad form, - * and very inefficient. + * NOTE! Traditionally this was done with "remap_pfn_range()" which + * took an arbitrary page protection parameter. This doesn't allow + * that. Your vma protection will have to be set up correctly, which + * means that if you want a shared writable mapping, you'd better + * ask for a shared writable mapping! + * + * The page does not need to be reserved. */ -static int incomplete_pfn_remap(struct vm_area_struct *vma, - unsigned long start, unsigned long end, - unsigned long pfn, pgprot_t prot) +int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page) { - static int warn = 10; - struct page *page; - int retval; - - if (!(vma->vm_flags & VM_INCOMPLETE)) { - if (warn) { - warn--; - printk("%s does an incomplete pfn remapping", current->comm); - dump_stack(); - } - } - vma->vm_flags |= VM_INCOMPLETE | VM_IO | VM_RESERVED; - - if (start < vma->vm_start || end > vma->vm_end) - return -EINVAL; - - if (!pfn_valid(pfn)) + if (addr < vma->vm_start || addr >= vma->vm_end) + return -EFAULT; + if (!page_count(page)) return -EINVAL; - - retval = 0; - page = pfn_to_page(pfn); - while (start < end) { - retval = insert_page(vma->vm_mm, start, page, prot); - if (retval < 0) - break; - start += PAGE_SIZE; - page++; - } - return retval; + vma->vm_flags |= VM_INSERTPAGE; + return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot); } +EXPORT_SYMBOL(vm_insert_page); /* * maps a range of physical memory into the requested pages. the old @@ -1311,9 +1307,6 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, struct mm_struct *mm = vma->vm_mm; int err; - if (addr != vma->vm_start || end != vma->vm_end) - return incomplete_pfn_remap(vma, addr, end, pfn, prot); - /* * Physically remapped pages are special. Tell the * rest of the world about it: @@ -1327,9 +1320,18 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * VM_PFNMAP tells the core MM that the base pages are just * raw PFN mappings, and do not have a "struct page" associated * with them. + * + * There's a horrible special case to handle copy-on-write + * behaviour that some programs depend on. We mark the "original" + * un-COW'ed pages by matching them up with "vma->vm_pgoff". */ + if (is_cow_mapping(vma->vm_flags)) { + if (addr != vma->vm_start || end != vma->vm_end) + return -EINVAL; + vma->vm_pgoff = pfn; + } + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; - vma->vm_pgoff = pfn; BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; @@ -1433,12 +1435,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *page_table, pmd_t *pmd, spinlock_t *ptl, pte_t orig_pte) { - struct page *old_page, *src_page, *new_page; + struct page *old_page, *new_page; pte_t entry; int ret = VM_FAULT_MINOR; old_page = vm_normal_page(vma, address, orig_pte); - src_page = old_page; if (!old_page) goto gotten; @@ -1466,7 +1467,7 @@ gotten: if (unlikely(anon_vma_prepare(vma))) goto oom; - if (src_page == ZERO_PAGE(address)) { + if (old_page == ZERO_PAGE(address)) { new_page = alloc_zeroed_user_highpage(vma, address); if (!new_page) goto oom; @@ -1474,7 +1475,7 @@ gotten: new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); if (!new_page) goto oom; - cow_user_page(new_page, src_page, address); + cow_user_page(new_page, old_page, address); } /* @@ -1497,7 +1498,7 @@ gotten: update_mmu_cache(vma, address, entry); lazy_mmu_prot_update(entry); lru_cache_add_active(new_page); - page_add_anon_rmap(new_page, vma, address); + page_add_new_anon_rmap(new_page, vma, address); /* Free the old page.. */ new_page = old_page; @@ -1769,9 +1770,32 @@ out_big: out_busy: return -ETXTBSY; } - EXPORT_SYMBOL(vmtruncate); +int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) +{ + struct address_space *mapping = inode->i_mapping; + + /* + * If the underlying filesystem is not going to provide + * a way to truncate a range of blocks (punch a hole) - + * we should return failure right now. + */ + if (!inode->i_op || !inode->i_op->truncate_range) + return -ENOSYS; + + mutex_lock(&inode->i_mutex); + down_write(&inode->i_alloc_sem); + unmap_mapping_range(mapping, offset, (end - offset), 1); + truncate_inode_pages_range(mapping, offset, end); + inode->i_op->truncate_range(inode, offset, end); + up_write(&inode->i_alloc_sem); + mutex_unlock(&inode->i_mutex); + + return 0; +} +EXPORT_SYMBOL(vmtruncate_range); + /* * Primitive swap readahead code. We simply read an aligned block of * (1 << page_cluster) entries in the swap area. This method is chosen @@ -1953,8 +1977,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, goto release; inc_mm_counter(mm, anon_rss); lru_cache_add_active(page); - SetPageReferenced(page); - page_add_anon_rmap(page, vma, address); + page_add_new_anon_rmap(page, vma, address); } else { /* Map the ZERO_PAGE - vm_page_prot is readonly */ page = ZERO_PAGE(address); @@ -2010,6 +2033,8 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, int anon = 0; pte_unmap(page_table); + BUG_ON(vma->vm_flags & VM_PFNMAP); + if (vma->vm_file) { mapping = vma->vm_file->f_mapping; sequence = mapping->truncate_count; @@ -2042,7 +2067,7 @@ retry: page = alloc_page_vma(GFP_HIGHUSER, vma, address); if (!page) goto oom; - cow_user_page(page, new_page, address); + copy_user_highpage(page, new_page, address); page_cache_release(new_page); new_page = page; anon = 1; @@ -2083,7 +2108,7 @@ retry: if (anon) { inc_mm_counter(mm, anon_rss); lru_cache_add_active(new_page); - page_add_anon_rmap(new_page, vma, address); + page_add_new_anon_rmap(new_page, vma, address); } else { inc_mm_counter(mm, file_rss); page_add_file_rmap(new_page); @@ -2242,6 +2267,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, return handle_pte_fault(mm, vma, address, pte, pmd, write_access); } +EXPORT_SYMBOL_GPL(__handle_mm_fault); + #ifndef __PAGETABLE_PUD_FOLDED /* * Allocate page upper directory.