2 * linux/mm/filemap_xip.c
4 * Copyright (C) 2005 IBM Corporation
5 * Author: Carsten Otte <cotte@de.ibm.com>
7 * derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds
12 #include <linux/pagemap.h>
13 #include <linux/module.h>
14 #include <linux/uio.h>
15 #include <linux/rmap.h>
16 #include <asm/tlbflush.h>
20 * This is a file read routine for execute in place files, and uses
21 * the mapping->a_ops->get_xip_page() function for the actual low-level
24 * Note the struct file* is not used at all. It may be NULL.
27 do_xip_mapping_read(struct address_space *mapping,
28 struct file_ra_state *_ra,
31 read_descriptor_t *desc,
34 struct inode *inode = mapping->host;
35 unsigned long index, end_index, offset;
38 BUG_ON(!mapping->a_ops->get_xip_page);
40 index = *ppos >> PAGE_CACHE_SHIFT;
41 offset = *ppos & ~PAGE_CACHE_MASK;
43 isize = i_size_read(inode);
47 end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
50 unsigned long nr, ret;
52 /* nr is the maximum number of bytes to copy from this page */
54 if (index >= end_index) {
55 if (index > end_index)
57 nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
64 page = mapping->a_ops->get_xip_page(mapping,
65 index*(PAGE_SIZE/512), 0);
68 if (unlikely(IS_ERR(page))) {
69 if (PTR_ERR(page) == -ENODATA) {
71 page = virt_to_page(empty_zero_page);
73 desc->error = PTR_ERR(page);
77 BUG_ON(!PageUptodate(page));
79 /* If users can be writing to this page using arbitrary
80 * virtual addresses, take care about potential aliasing
81 * before reading the page on the kernel side.
83 if (mapping_writably_mapped(mapping))
84 flush_dcache_page(page);
87 * Ok, we have the page, and it's up-to-date, so
88 * now we can copy it to user space...
90 * The actor routine returns how many bytes were actually used..
91 * NOTE! This may not be the same as how much of a user buffer
92 * we filled up (we may be padding etc), so we can only update
93 * "pos" here (the actor routine has to update the user buffer
94 * pointers and the remaining count).
96 ret = actor(desc, page, offset, nr);
98 index += offset >> PAGE_CACHE_SHIFT;
99 offset &= ~PAGE_CACHE_MASK;
101 if (ret == nr && desc->count)
106 /* Did not get the page. Report it */
112 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
118 * This is the "read()" routine for all filesystems
119 * that uses the get_xip_page address space operation.
122 __xip_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
123 unsigned long nr_segs, loff_t *ppos)
125 struct file *filp = iocb->ki_filp;
131 for (seg = 0; seg < nr_segs; seg++) {
132 const struct iovec *iv = &iov[seg];
135 * If any segment has a negative length, or the cumulative
136 * length ever wraps negative then return -EINVAL.
138 count += iv->iov_len;
139 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
141 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
146 count -= iv->iov_len; /* This segment is no good */
152 for (seg = 0; seg < nr_segs; seg++) {
153 read_descriptor_t desc;
156 desc.arg.buf = iov[seg].iov_base;
157 desc.count = iov[seg].iov_len;
161 do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp,
162 ppos, &desc, file_read_actor);
163 retval += desc.written;
174 xip_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count,
177 struct iovec local_iov = { .iov_base = buf, .iov_len = count };
179 BUG_ON(iocb->ki_pos != pos);
180 return __xip_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
182 EXPORT_SYMBOL_GPL(xip_file_aio_read);
185 xip_file_readv(struct file *filp, const struct iovec *iov,
186 unsigned long nr_segs, loff_t *ppos)
190 init_sync_kiocb(&kiocb, filp);
191 return __xip_file_aio_read(&kiocb, iov, nr_segs, ppos);
193 EXPORT_SYMBOL_GPL(xip_file_readv);
196 xip_file_sendfile(struct file *in_file, loff_t *ppos,
197 size_t count, read_actor_t actor, void *target)
199 read_descriptor_t desc;
206 desc.arg.data = target;
209 do_xip_mapping_read(in_file->f_mapping, &in_file->f_ra, in_file,
215 EXPORT_SYMBOL_GPL(xip_file_sendfile);
218 * __xip_unmap is invoked from xip_unmap and
221 * This function walks all vmas of the address_space and unmaps the
222 * empty_zero_page when found at pgoff. Should it go in rmap.c?
225 __xip_unmap (struct address_space * mapping,
228 struct vm_area_struct *vma;
229 struct mm_struct *mm;
230 struct prio_tree_iter iter;
231 unsigned long address;
235 spin_lock(&mapping->i_mmap_lock);
236 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
238 address = vma->vm_start +
239 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
240 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
242 * We need the page_table_lock to protect us from page faults,
243 * munmap, fork, etc...
245 pte = page_check_address(virt_to_page(empty_zero_page), mm,
248 /* Nuke the page table entry. */
249 flush_cache_page(vma, address, pte_pfn(pte));
250 pteval = ptep_clear_flush(vma, address, pte);
251 BUG_ON(pte_dirty(pteval));
253 spin_unlock(&mm->page_table_lock);
256 spin_unlock(&mapping->i_mmap_lock);
260 * xip_nopage() is invoked via the vma operations vector for a
261 * mapped memory region to read in file data during a page fault.
263 * This function is derived from filemap_nopage, but used for execute in place
266 xip_file_nopage(struct vm_area_struct * area,
267 unsigned long address,
270 struct file *file = area->vm_file;
271 struct address_space *mapping = file->f_mapping;
272 struct inode *inode = mapping->host;
274 unsigned long size, pgoff, endoff;
276 pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
278 endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT)
281 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
286 page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
288 BUG_ON(!PageUptodate(page));
291 if (PTR_ERR(page) != -ENODATA)
295 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
296 (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) &&
297 (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
298 /* maybe shared writable, allocate new block */
299 page = mapping->a_ops->get_xip_page (mapping,
300 pgoff*(PAGE_SIZE/512), 1);
303 BUG_ON(!PageUptodate(page));
304 /* unmap page at pgoff from all other vmas */
305 __xip_unmap(mapping, pgoff);
307 /* not shared and writable, use empty_zero_page */
308 page = virt_to_page(empty_zero_page);
314 static struct vm_operations_struct xip_file_vm_ops = {
315 .nopage = xip_file_nopage,
318 int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
320 BUG_ON(!file->f_mapping->a_ops->get_xip_page);
323 vma->vm_ops = &xip_file_vm_ops;
326 EXPORT_SYMBOL_GPL(xip_file_mmap);
329 do_xip_file_write(struct kiocb *iocb, const struct iovec *iov,
330 unsigned long nr_segs, loff_t pos, loff_t *ppos,
333 struct file *file = iocb->ki_filp;
334 struct address_space * mapping = file->f_mapping;
335 struct address_space_operations *a_ops = mapping->a_ops;
336 struct inode *inode = mapping->host;
340 const struct iovec *cur_iov = iov; /* current iovec */
341 size_t iov_base = 0; /* offset in the current iovec */
345 BUG_ON(!mapping->a_ops->get_xip_page);
350 unsigned long offset;
353 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
354 index = pos >> PAGE_CACHE_SHIFT;
355 bytes = PAGE_CACHE_SIZE - offset;
360 * Bring in the user page that we will copy from _first_.
361 * Otherwise there's a nasty deadlock on copying from the
362 * same page as we're writing to, without it being marked
365 fault_in_pages_readable(buf, bytes);
367 page = a_ops->get_xip_page(mapping,
368 index*(PAGE_SIZE/512), 0);
369 if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) {
370 /* we allocate a new page unmap it */
371 page = a_ops->get_xip_page(mapping,
372 index*(PAGE_SIZE/512), 1);
374 /* unmap page at pgoff from all other vmas */
375 __xip_unmap(mapping, index);
380 status = PTR_ERR(page);
384 BUG_ON(!PageUptodate(page));
386 if (likely(nr_segs == 1))
387 copied = filemap_copy_from_user(page, offset,
390 copied = filemap_copy_from_user_iovec(page, offset,
391 cur_iov, iov_base, bytes);
392 flush_dcache_page(page);
393 if (likely(copied > 0)) {
401 if (unlikely(nr_segs > 1))
402 filemap_set_next_iovec(&cur_iov,
406 if (unlikely(copied != bytes))
414 * No need to use i_size_read() here, the i_size
415 * cannot change under us because we hold i_sem.
417 if (pos > inode->i_size) {
418 i_size_write(inode, pos);
419 mark_inode_dirty(inode);
422 return written ? written : status;
426 xip_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
427 unsigned long nr_segs, loff_t *ppos)
429 struct file *file = iocb->ki_filp;
430 struct address_space * mapping = file->f_mapping;
431 size_t ocount; /* original count */
432 size_t count; /* after file limit checks */
433 struct inode *inode = mapping->host;
440 for (seg = 0; seg < nr_segs; seg++) {
441 const struct iovec *iv = &iov[seg];
444 * If any segment has a negative length, or the cumulative
445 * length ever wraps negative then return -EINVAL.
447 ocount += iv->iov_len;
448 if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
450 if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
455 ocount -= iv->iov_len; /* This segment is no good */
462 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
466 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
473 err = remove_suid(file->f_dentry);
477 inode_update_time(inode, 1);
479 /* use execute in place to copy directly to disk */
480 written = do_xip_file_write (iocb, iov,
481 nr_segs, pos, ppos, count);
483 return written ? written : err;
487 __xip_file_write_nolock(struct file *file, const struct iovec *iov,
488 unsigned long nr_segs, loff_t *ppos)
492 init_sync_kiocb(&kiocb, file);
493 return xip_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
497 xip_file_aio_write(struct kiocb *iocb, const char __user *buf,
498 size_t count, loff_t pos)
500 struct file *file = iocb->ki_filp;
501 struct address_space *mapping = file->f_mapping;
502 struct inode *inode = mapping->host;
504 struct iovec local_iov = { .iov_base = (void __user *)buf,
507 BUG_ON(iocb->ki_pos != pos);
510 ret = xip_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
514 EXPORT_SYMBOL_GPL(xip_file_aio_write);
516 ssize_t xip_file_writev(struct file *file, const struct iovec *iov,
517 unsigned long nr_segs, loff_t *ppos)
519 struct address_space *mapping = file->f_mapping;
520 struct inode *inode = mapping->host;
524 ret = __xip_file_write_nolock(file, iov, nr_segs, ppos);
528 EXPORT_SYMBOL_GPL(xip_file_writev);
531 * truncate a page used for execute in place
532 * functionality is analog to block_truncate_page but does use get_xip_page
533 * to get the page instead of page cache
536 xip_truncate_page(struct address_space *mapping, loff_t from)
538 pgoff_t index = from >> PAGE_CACHE_SHIFT;
539 unsigned offset = from & (PAGE_CACHE_SIZE-1);
546 BUG_ON(!mapping->a_ops->get_xip_page);
548 blocksize = 1 << mapping->host->i_blkbits;
549 length = offset & (blocksize - 1);
551 /* Block boundary? Nothing to do */
555 length = blocksize - length;
557 page = mapping->a_ops->get_xip_page(mapping,
558 index*(PAGE_SIZE/512), 0);
562 if (unlikely(IS_ERR(page))) {
563 if (PTR_ERR(page) == -ENODATA) {
564 /* Hole? No need to truncate */
571 BUG_ON(!PageUptodate(page));
572 kaddr = kmap_atomic(page, KM_USER0);
573 memset(kaddr + offset, 0, length);
574 kunmap_atomic(kaddr, KM_USER0);
576 flush_dcache_page(page);
581 EXPORT_SYMBOL_GPL(xip_truncate_page);