Driver core: add missing kset uevent

[linux-2.6-omap-h63xx.git] / mm / shmem.c
diff --git a/mm/shmem.c b/mm/shmem.c

index fcb07882c8e0224530de5046e016958c65ff7524..0493e4d0bcaab2d5b0281446171b4de57598b8ee 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -175,7 +175,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
                 vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE));
  }
  
-static struct super_operations shmem_ops;
+static const struct super_operations shmem_ops;
  static const struct address_space_operations shmem_aops;
  static const struct file_operations shmem_file_operations;
  static const struct inode_operations shmem_inode_operations;
@@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
  /*
   * shmem_free_swp - free some swap entries in a directory
   *
- * @dir:   pointer to the directory
- * @edir:  pointer after last entry of the directory
+ * @dir:        pointer to the directory
+ * @edir:       pointer after last entry of the directory
+ * @punch_lock: pointer to spinlock when needed for the holepunch case
   */
-static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
+static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
+                                               spinlock_t *punch_lock)
  {
+       spinlock_t *punch_unlock = NULL;
         swp_entry_t *ptr;
         int freed = 0;
  
         for (ptr = dir; ptr < edir; ptr++) {
                 if (ptr->val) {
+                       if (unlikely(punch_lock)) {
+                               punch_unlock = punch_lock;
+                               punch_lock = NULL;
+                               spin_lock(punch_unlock);
+                               if (!ptr->val)
+                                       continue;
+                       }
                         free_swap_and_cache(*ptr);
                         *ptr = (swp_entry_t){0};
                         freed++;
                 }
         }
+       if (punch_unlock)
+               spin_unlock(punch_unlock);
         return freed;
  }
  
-static int shmem_map_and_free_swp(struct page *subdir,
-               int offset, int limit, struct page ***dir)
+static int shmem_map_and_free_swp(struct page *subdir, int offset,
+               int limit, struct page ***dir, spinlock_t *punch_lock)
  {
         swp_entry_t *ptr;
         int freed = 0;
@@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct page *subdir,
                 int size = limit - offset;
                 if (size > LATENCY_LIMIT)
                         size = LATENCY_LIMIT;
-               freed += shmem_free_swp(ptr+offset, ptr+offset+size);
+               freed += shmem_free_swp(ptr+offset, ptr+offset+size,
+                                                       punch_lock);
                 if (need_resched()) {
                         shmem_swp_unmap(ptr);
                         if (*dir) {
@@ -481,7 +494,10 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
         long nr_swaps_freed = 0;
         int offset;
         int freed;
-       int punch_hole = 0;
+       int punch_hole;
+       spinlock_t *needs_lock;
+       spinlock_t *punch_lock;
+       unsigned long upper_limit;
  
         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
         idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -492,11 +508,20 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
         info->flags |= SHMEM_TRUNCATE;
         if (likely(end == (loff_t) -1)) {
                 limit = info->next_index;
+               upper_limit = SHMEM_MAX_INDEX;
                 info->next_index = idx;
+               needs_lock = NULL;
+               punch_hole = 0;
         } else {
-               limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-               if (limit > info->next_index)
-                       limit = info->next_index;
+               if (end + 1 >= inode->i_size) { /* we may free a little more */
+                       limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
+                                                       PAGE_CACHE_SHIFT;
+                       upper_limit = SHMEM_MAX_INDEX;
+               } else {
+                       limit = (end + 1) >> PAGE_CACHE_SHIFT;
+                       upper_limit = limit;
+               }
+               needs_lock = &info->lock;
                 punch_hole = 1;
         }
  
@@ -513,17 +538,30 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
                 size = limit;
                 if (size > SHMEM_NR_DIRECT)
                         size = SHMEM_NR_DIRECT;
-               nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size);
+               nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
         }
  
         /*
          * If there are no indirect blocks or we are punching a hole
          * below indirect blocks, nothing to be done.
          */
-       if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT)))
+       if (!topdir || limit <= SHMEM_NR_DIRECT)
                 goto done2;
  
-       BUG_ON(limit <= SHMEM_NR_DIRECT);
+       /*
+        * The truncation case has already dropped info->lock, and we're safe
+        * because i_size and next_index have already been lowered, preventing
+        * access beyond.  But in the punch_hole case, we still need to take
+        * the lock when updating the swap directory, because there might be
+        * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
+        * shmem_writepage.  However, whenever we find we can remove a whole
+        * directory page (not at the misaligned start or end of the range),
+        * we first NULLify its pointer in the level above, and then have no
+        * need to take the lock when updating its contents: needs_lock and
+        * punch_lock (either pointing to info->lock or NULL) manage this.
+        */
+
+       upper_limit -= SHMEM_NR_DIRECT;
         limit -= SHMEM_NR_DIRECT;
         idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
         offset = idx % ENTRIES_PER_PAGE;
@@ -543,8 +581,14 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
                 if (*dir) {
                         diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
                                 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
-                       if (!diroff && !offset) {
-                               *dir = NULL;
+                       if (!diroff && !offset && upper_limit >= stage) {
+                               if (needs_lock) {
+                                       spin_lock(needs_lock);
+                                       *dir = NULL;
+                                       spin_unlock(needs_lock);
+                                       needs_lock = NULL;
+                               } else
+                                       *dir = NULL;
                                 nr_pages_to_free++;
                                 list_add(&middir->lru, &pages_to_free);
                         }
@@ -570,39 +614,55 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
                         }
                         stage = idx + ENTRIES_PER_PAGEPAGE;
                         middir = *dir;
-                       *dir = NULL;
-                       nr_pages_to_free++;
-                       list_add(&middir->lru, &pages_to_free);
+                       if (punch_hole)
+                               needs_lock = &info->lock;
+                       if (upper_limit >= stage) {
+                               if (needs_lock) {
+                                       spin_lock(needs_lock);
+                                       *dir = NULL;
+                                       spin_unlock(needs_lock);
+                                       needs_lock = NULL;
+                               } else
+                                       *dir = NULL;
+                               nr_pages_to_free++;
+                               list_add(&middir->lru, &pages_to_free);
+                       }
                         shmem_dir_unmap(dir);
                         cond_resched();
                         dir = shmem_dir_map(middir);
                         diroff = 0;
                 }
+               punch_lock = needs_lock;
                 subdir = dir[diroff];
-               if (subdir && page_private(subdir)) {
+               if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
+                       if (needs_lock) {
+                               spin_lock(needs_lock);
+                               dir[diroff] = NULL;
+                               spin_unlock(needs_lock);
+                               punch_lock = NULL;
+                       } else
+                               dir[diroff] = NULL;
+                       nr_pages_to_free++;
+                       list_add(&subdir->lru, &pages_to_free);
+               }
+               if (subdir && page_private(subdir) /* has swap entries */) {
                         size = limit - idx;
                         if (size > ENTRIES_PER_PAGE)
                                 size = ENTRIES_PER_PAGE;
                         freed = shmem_map_and_free_swp(subdir,
-                                               offset, size, &dir);
+                                       offset, size, &dir, punch_lock);
                         if (!dir)
                                 dir = shmem_dir_map(middir);
                         nr_swaps_freed += freed;
-                       if (offset)
+                       if (offset || punch_lock) {
                                 spin_lock(&info->lock);
-                       set_page_private(subdir, page_private(subdir) - freed);
-                       if (offset)
+                               set_page_private(subdir,
+                                       page_private(subdir) - freed);
                                 spin_unlock(&info->lock);
-                       if (!punch_hole)
-                               BUG_ON(page_private(subdir) > offset);
-               }
-               if (offset)
-                       offset = 0;
-               else if (subdir && !page_private(subdir)) {
-                       dir[diroff] = NULL;
-                       nr_pages_to_free++;
-                       list_add(&subdir->lru, &pages_to_free);
+                       } else
+                               BUG_ON(page_private(subdir) != freed);
                 }
+               offset = 0;
         }
  done1:
         shmem_dir_unmap(dir);
@@ -614,8 +674,16 @@ done2:
                  * generic_delete_inode did it, before we lowered next_index.
                  * Also, though shmem_getpage checks i_size before adding to
                  * cache, no recheck after: so fix the narrow window there too.
+                *
+                * Recalling truncate_inode_pages_range and unmap_mapping_range
+                * every time for punch_hole (which never got a chance to clear
+                * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
+                * yet hardly ever necessary: try to optimize them out later.
                  */
                 truncate_inode_pages_range(inode->i_mapping, start, end);
+               if (punch_hole)
+                       unmap_mapping_range(inode->i_mapping, start,
+                                                       end - start, 1);
         }
  
         spin_lock(&info->lock);
@@ -899,6 +967,8 @@ static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_
                 *nodelist++ = '\0';
                 if (nodelist_parse(nodelist, *policy_nodes))
                         goto out;
+               if (!nodes_subset(*policy_nodes, node_online_map))
+                       goto out;
         }
         if (!strcmp(value, "default")) {
                 *policy = MPOL_DEFAULT;
@@ -1030,9 +1100,9 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
          * Normally, filepage is NULL on entry, and either found
          * uptodate immediately, or allocated and zeroed, or read
          * in under swappage, which is then assigned to filepage.
-        * But shmem_prepare_write passes in a locked filepage,
-        * which may be found not uptodate by other callers too,
-        * and may need to be copied from the swappage read in.
+        * But shmem_readpage and shmem_prepare_write pass in a locked
+        * filepage, which may be found not uptodate by other callers
+        * too, and may need to be copied from the swappage read in.
          */
  repeat:
         if (!filepage)
@@ -1415,9 +1485,18 @@ static const struct inode_operations shmem_symlink_inode_operations;
  static const struct inode_operations shmem_symlink_inline_operations;
  
  /*
- * Normally tmpfs makes no use of shmem_prepare_write, but it
- * lets a tmpfs file be used read-write below the loop driver.
+ * Normally tmpfs avoids the use of shmem_readpage and shmem_prepare_write;
+ * but providing them allows a tmpfs file to be used for splice, sendfile, and
+ * below the loop driver, in the generic fashion that many filesystems support.
   */
+static int shmem_readpage(struct file *file, struct page *page)
+{
+       struct inode *inode = page->mapping->host;
+       int error = shmem_getpage(inode, page->index, &page, SGP_CACHE, NULL);
+       unlock_page(page);
+       return error;
+}
+
  static int
  shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
  {
@@ -1641,25 +1720,6 @@ static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count
         return desc.error;
  }
  
-static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos,
-                        size_t count, read_actor_t actor, void *target)
-{
-       read_descriptor_t desc;
-
-       if (!count)
-               return 0;
-
-       desc.written = 0;
-       desc.count = count;
-       desc.arg.data = target;
-       desc.error = 0;
-
-       do_shmem_file_read(in_file, ppos, &desc, actor);
-       if (desc.written)
-               return desc.written;
-       return desc.error;
-}
-
  static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
  {
         struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2290,14 +2350,11 @@ static void init_once(void *foo, struct kmem_cache *cachep,
  {
         struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
  
-       if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
-           SLAB_CTOR_CONSTRUCTOR) {
-               inode_init_once(&p->vfs_inode);
+       inode_init_once(&p->vfs_inode);
  #ifdef CONFIG_TMPFS_POSIX_ACL
-               p->i_acl = NULL;
-               p->i_default_acl = NULL;
+       p->i_acl = NULL;
+       p->i_default_acl = NULL;
  #endif
-       }
  }
  
  static int init_inodecache(void)
@@ -2319,6 +2376,7 @@ static const struct address_space_operations shmem_aops = {
         .writepage      = shmem_writepage,
         .set_page_dirty = __set_page_dirty_no_writeback,
  #ifdef CONFIG_TMPFS
+       .readpage       = shmem_readpage,
         .prepare_write  = shmem_prepare_write,
         .commit_write   = simple_commit_write,
  #endif
@@ -2332,7 +2390,8 @@ static const struct file_operations shmem_file_operations = {
         .read           = shmem_file_read,
         .write          = shmem_file_write,
         .fsync          = simple_sync_file,
-       .sendfile       = shmem_file_sendfile,
+       .splice_read    = generic_file_splice_read,
+       .splice_write   = generic_file_splice_write,
  #endif
  };
  
@@ -2383,7 +2442,7 @@ static const struct inode_operations shmem_special_inode_operations = {
  #endif
  };
  
-static struct super_operations shmem_ops = {
+static const struct super_operations shmem_ops = {
         .alloc_inode    = shmem_alloc_inode,
         .destroy_inode  = shmem_destroy_inode,
  #ifdef CONFIG_TMPFS