]> pilppa.org Git - linux-2.6-omap-h63xx.git/blobdiff - kernel/pid.c
Hibernation: Introduce begin() and end() callbacks
[linux-2.6-omap-h63xx.git] / kernel / pid.c
index a2b4cbbdd63dbe20d54e78c3ed6737d164070275..f815455431bff3c95855a674086e6c1d8322a7c4 100644 (file)
  * allocation scenario when all but one out of 1 million PIDs possible are
  * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
  * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
+ *
+ * Pid namespaces:
+ *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
+ *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
+ *     Many thanks to Oleg Nesterov for comments and help
+ *
  */
 
 #include <linux/mm.h>
 #include <linux/hash.h>
 #include <linux/pid_namespace.h>
 #include <linux/init_task.h>
+#include <linux/syscalls.h>
 
 #define pid_hashfn(nr, ns)     \
        hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
 static struct hlist_head *pid_hash;
 static int pidhash_shift;
 struct pid init_struct_pid = INIT_STRUCT_PID;
+static struct kmem_cache *pid_ns_cachep;
 
 int pid_max = PID_MAX_DEFAULT;
 
@@ -73,10 +81,20 @@ struct pid_namespace init_pid_ns = {
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
-int is_global_init(struct task_struct *tsk)
+int is_container_init(struct task_struct *tsk)
 {
-       return tsk == init_pid_ns.child_reaper;
+       int ret = 0;
+       struct pid *pid;
+
+       rcu_read_lock();
+       pid = task_pid(tsk);
+       if (pid != NULL && pid->numbers[pid->level].nr == 1)
+               ret = 1;
+       rcu_read_unlock();
+
+       return ret;
 }
+EXPORT_SYMBOL(is_container_init);
 
 /*
  * Note: disable interrupts while the pidmap_lock is held as an
@@ -192,8 +210,7 @@ fastcall void put_pid(struct pid *pid)
        if ((atomic_read(&pid->count) == 1) ||
             atomic_dec_and_test(&pid->count)) {
                kmem_cache_free(ns->pid_cachep, pid);
-               if (ns != &init_pid_ns)
-                       put_pid_ns(ns);
+               put_pid_ns(ns);
        }
 }
 EXPORT_SYMBOL_GPL(put_pid);
@@ -244,11 +261,8 @@ struct pid *alloc_pid(struct pid_namespace *ns)
                tmp = tmp->parent;
        }
 
-       if (ns != &init_pid_ns)
-               get_pid_ns(ns);
-
+       get_pid_ns(ns);
        pid->level = ns->level;
-       pid->nr = pid->numbers[0].nr;
        atomic_set(&pid->count, 1);
        for (type = 0; type < PIDTYPE_MAX; ++type)
                INIT_HLIST_HEAD(&pid->tasks[type]);
@@ -288,6 +302,18 @@ struct pid * fastcall find_pid_ns(int nr, struct pid_namespace *ns)
 }
 EXPORT_SYMBOL_GPL(find_pid_ns);
 
+struct pid *find_vpid(int nr)
+{
+       return find_pid_ns(nr, current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL_GPL(find_vpid);
+
+struct pid *find_pid(int nr)
+{
+       return find_pid_ns(nr, &init_pid_ns);
+}
+EXPORT_SYMBOL_GPL(find_pid);
+
 /*
  * attach_pid() must be called with the tasklist_lock write-held.
  */
@@ -354,6 +380,25 @@ struct task_struct *find_task_by_pid_type_ns(int type, int nr,
 
 EXPORT_SYMBOL(find_task_by_pid_type_ns);
 
+struct task_struct *find_task_by_pid(pid_t nr)
+{
+       return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
+}
+EXPORT_SYMBOL(find_task_by_pid);
+
+struct task_struct *find_task_by_vpid(pid_t vnr)
+{
+       return find_task_by_pid_type_ns(PIDTYPE_PID, vnr,
+                       current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL(find_task_by_vpid);
+
+struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
+{
+       return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns);
+}
+EXPORT_SYMBOL(find_task_by_pid_ns);
+
 struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
 {
        struct pid *pid;
@@ -398,6 +443,30 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
        return nr;
 }
 
+pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+       return pid_nr_ns(task_pid(tsk), ns);
+}
+EXPORT_SYMBOL(task_pid_nr_ns);
+
+pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+       return pid_nr_ns(task_tgid(tsk), ns);
+}
+EXPORT_SYMBOL(task_tgid_nr_ns);
+
+pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+       return pid_nr_ns(task_pgrp(tsk), ns);
+}
+EXPORT_SYMBOL(task_pgrp_nr_ns);
+
+pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+       return pid_nr_ns(task_session(tsk), ns);
+}
+EXPORT_SYMBOL(task_session_nr_ns);
+
 /*
  * Used by proc to find the first pid that is greater then or equal to nr.
  *
@@ -449,8 +518,8 @@ static struct kmem_cache *create_pid_cachep(int nr_ids)
 
        snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
        cachep = kmem_cache_create(pcache->name,
-                       /* FIXME add numerical ids here */
-                       sizeof(struct pid), 0, SLAB_HWCACHE_ALIGN, NULL);
+                       sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
+                       0, SLAB_HWCACHE_ALIGN, NULL);
        if (cachep == NULL)
                goto err_cachep;
 
@@ -468,19 +537,128 @@ err_alloc:
        return NULL;
 }
 
+#ifdef CONFIG_PID_NS
+static struct pid_namespace *create_pid_namespace(int level)
+{
+       struct pid_namespace *ns;
+       int i;
+
+       ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
+       if (ns == NULL)
+               goto out;
+
+       ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!ns->pidmap[0].page)
+               goto out_free;
+
+       ns->pid_cachep = create_pid_cachep(level + 1);
+       if (ns->pid_cachep == NULL)
+               goto out_free_map;
+
+       kref_init(&ns->kref);
+       ns->last_pid = 0;
+       ns->child_reaper = NULL;
+       ns->level = level;
+
+       set_bit(0, ns->pidmap[0].page);
+       atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
+
+       for (i = 1; i < PIDMAP_ENTRIES; i++) {
+               ns->pidmap[i].page = 0;
+               atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
+       }
+
+       return ns;
+
+out_free_map:
+       kfree(ns->pidmap[0].page);
+out_free:
+       kmem_cache_free(pid_ns_cachep, ns);
+out:
+       return ERR_PTR(-ENOMEM);
+}
+
+static void destroy_pid_namespace(struct pid_namespace *ns)
+{
+       int i;
+
+       for (i = 0; i < PIDMAP_ENTRIES; i++)
+               kfree(ns->pidmap[i].page);
+       kmem_cache_free(pid_ns_cachep, ns);
+}
+
 struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
 {
+       struct pid_namespace *new_ns;
+
        BUG_ON(!old_ns);
-       get_pid_ns(old_ns);
-       return old_ns;
+       new_ns = get_pid_ns(old_ns);
+       if (!(flags & CLONE_NEWPID))
+               goto out;
+
+       new_ns = ERR_PTR(-EINVAL);
+       if (flags & CLONE_THREAD)
+               goto out_put;
+
+       new_ns = create_pid_namespace(old_ns->level + 1);
+       if (!IS_ERR(new_ns))
+               new_ns->parent = get_pid_ns(old_ns);
+
+out_put:
+       put_pid_ns(old_ns);
+out:
+       return new_ns;
 }
 
 void free_pid_ns(struct kref *kref)
 {
-       struct pid_namespace *ns;
+       struct pid_namespace *ns, *parent;
 
        ns = container_of(kref, struct pid_namespace, kref);
-       kfree(ns);
+
+       parent = ns->parent;
+       destroy_pid_namespace(ns);
+
+       if (parent != NULL)
+               put_pid_ns(parent);
+}
+#endif /* CONFIG_PID_NS */
+
+void zap_pid_ns_processes(struct pid_namespace *pid_ns)
+{
+       int nr;
+       int rc;
+
+       /*
+        * The last thread in the cgroup-init thread group is terminating.
+        * Find remaining pid_ts in the namespace, signal and wait for them
+        * to exit.
+        *
+        * Note:  This signals each threads in the namespace - even those that
+        *        belong to the same thread group, To avoid this, we would have
+        *        to walk the entire tasklist looking a processes in this
+        *        namespace, but that could be unnecessarily expensive if the
+        *        pid namespace has just a few processes. Or we need to
+        *        maintain a tasklist for each pid namespace.
+        *
+        */
+       read_lock(&tasklist_lock);
+       nr = next_pidmap(pid_ns, 1);
+       while (nr > 0) {
+               kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
+               nr = next_pidmap(pid_ns, nr);
+       }
+       read_unlock(&tasklist_lock);
+
+       do {
+               clear_thread_flag(TIF_SIGPENDING);
+               rc = sys_wait4(-1, NULL, __WALL, NULL);
+       } while (rc != -ECHILD);
+
+
+       /* Child reaper for the pid namespace is going away */
+       pid_ns->child_reaper = NULL;
+       return;
 }
 
 /*
@@ -518,4 +696,6 @@ void __init pidmap_init(void)
        init_pid_ns.pid_cachep = create_pid_cachep(1);
        if (init_pid_ns.pid_cachep == NULL)
                panic("Can't create pid_1 cachep\n");
+
+       pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
 }