include/linux/cgroup.h

   1 #ifndef _LINUX_CGROUP_H
   2 #define _LINUX_CGROUP_H
   3 /*
   4  *  cgroup interface
   5  *
   6  *  Copyright (C) 2003 BULL SA
   7  *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
   8  *
   9  */
  10
  11 #include <linux/sched.h>
  12 #include <linux/cpumask.h>
  13 #include <linux/nodemask.h>
  14 #include <linux/rcupdate.h>
  15 #include <linux/cgroupstats.h>
  16 #include <linux/prio_heap.h>
  17 #include <linux/rwsem.h>
  18
  19 #ifdef CONFIG_CGROUPS
  20
  21 struct cgroupfs_root;
  22 struct cgroup_subsys;
  23 struct inode;
  24 struct cgroup;
  25
  26 extern int cgroup_init_early(void);
  27 extern int cgroup_init(void);
  28 extern void cgroup_lock(void);
  29 extern bool cgroup_lock_live_group(struct cgroup *cgrp);
  30 extern void cgroup_unlock(void);
  31 extern void cgroup_fork(struct task_struct *p);
  32 extern void cgroup_fork_callbacks(struct task_struct *p);
  33 extern void cgroup_post_fork(struct task_struct *p);
  34 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
  35 extern int cgroupstats_build(struct cgroupstats *stats,
  36                                 struct dentry *dentry);
  37
  38 extern struct file_operations proc_cgroup_operations;
  39
  40 /* Define the enumeration of all cgroup subsystems */
  41 #define SUBSYS(_x) _x ## _subsys_id,
  42 enum cgroup_subsys_id {
  43 #include <linux/cgroup_subsys.h>
  44         CGROUP_SUBSYS_COUNT
  45 };
  46 #undef SUBSYS
  47
  48 /* Per-subsystem/per-cgroup state maintained by the system. */
  49 struct cgroup_subsys_state {
  50         /* The cgroup that this subsystem is attached to. Useful
  51          * for subsystems that want to know about the cgroup
  52          * hierarchy structure */
  53         struct cgroup *cgroup;
  54
  55         /* State maintained by the cgroup system to allow
  56          * subsystems to be "busy". Should be accessed via css_get()
  57          * and css_put() */
  58
  59         atomic_t refcnt;
  60
  61         unsigned long flags;
  62 };
  63
  64 /* bits in struct cgroup_subsys_state flags field */
  65 enum {
  66         CSS_ROOT, /* This CSS is the root of the subsystem */
  67 };
  68
  69 /*
  70  * Call css_get() to hold a reference on the cgroup;
  71  *
  72  */
  73
  74 static inline void css_get(struct cgroup_subsys_state *css)
  75 {
  76         /* We don't need to reference count the root state */
  77         if (!test_bit(CSS_ROOT, &css->flags))
  78                 atomic_inc(&css->refcnt);
  79 }
  80 /*
  81  * css_put() should be called to release a reference taken by
  82  * css_get()
  83  */
  84
  85 extern void __css_put(struct cgroup_subsys_state *css);
  86 static inline void css_put(struct cgroup_subsys_state *css)
  87 {
  88         if (!test_bit(CSS_ROOT, &css->flags))
  89                 __css_put(css);
  90 }
  91
  92 /* bits in struct cgroup flags field */
  93 enum {
  94         /* Control Group is dead */
  95         CGRP_REMOVED,
  96         /* Control Group has previously had a child cgroup or a task,
  97          * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */
  98         CGRP_RELEASABLE,
  99         /* Control Group requires release notifications to userspace */
 100         CGRP_NOTIFY_ON_RELEASE,
 101 };
 102
 103 struct cgroup {
 104         unsigned long flags;            /* "unsigned long" so bitops work */
 105
 106         /* count users of this cgroup. >0 means busy, but doesn't
 107          * necessarily indicate the number of tasks in the
 108          * cgroup */
 109         atomic_t count;
 110
 111         /*
 112          * We link our 'sibling' struct into our parent's 'children'.
 113          * Our children link their 'sibling' into our 'children'.
 114          */
 115         struct list_head sibling;       /* my parent's children */
 116         struct list_head children;      /* my children */
 117
 118         struct cgroup *parent;  /* my parent */
 119         struct dentry *dentry;          /* cgroup fs entry */
 120
 121         /* Private pointers for each registered subsystem */
 122         struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
 123
 124         struct cgroupfs_root *root;
 125         struct cgroup *top_cgroup;
 126
 127         /*
 128          * List of cg_cgroup_links pointing at css_sets with
 129          * tasks in this cgroup. Protected by css_set_lock
 130          */
 131         struct list_head css_sets;
 132
 133         /*
 134          * Linked list running through all cgroups that can
 135          * potentially be reaped by the release agent. Protected by
 136          * release_list_lock
 137          */
 138         struct list_head release_list;
 139
 140         /* pids_mutex protects the fields below */
 141         struct rw_semaphore pids_mutex;
 142         /* Array of process ids in the cgroup */
 143         pid_t *tasks_pids;
 144         /* How many files are using the current tasks_pids array */
 145         int pids_use_count;
 146         /* Length of the current tasks_pids array */
 147         int pids_length;
 148 };
 149
 150 /* A css_set is a structure holding pointers to a set of
 151  * cgroup_subsys_state objects. This saves space in the task struct
 152  * object and speeds up fork()/exit(), since a single inc/dec and a
 153  * list_add()/del() can bump the reference count on the entire
 154  * cgroup set for a task.
 155  */
 156
 157 struct css_set {
 158
 159         /* Reference count */
 160         atomic_t refcount;
 161
 162         /*
 163          * List running through all cgroup groups in the same hash
 164          * slot. Protected by css_set_lock
 165          */
 166         struct hlist_node hlist;
 167
 168         /*
 169          * List running through all tasks using this cgroup
 170          * group. Protected by css_set_lock
 171          */
 172         struct list_head tasks;
 173
 174         /*
 175          * List of cg_cgroup_link objects on link chains from
 176          * cgroups referenced from this css_set. Protected by
 177          * css_set_lock
 178          */
 179         struct list_head cg_links;
 180
 181         /*
 182          * Set of subsystem states, one for each subsystem. This array
 183          * is immutable after creation apart from the init_css_set
 184          * during subsystem registration (at boot time).
 185          */
 186         struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
 187 };
 188
 189 /*
 190  * cgroup_map_cb is an abstract callback API for reporting map-valued
 191  * control files
 192  */
 193
 194 struct cgroup_map_cb {
 195         int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value);
 196         void *state;
 197 };
 198
 199 /* struct cftype:
 200  *
 201  * The files in the cgroup filesystem mostly have a very simple read/write
 202  * handling, some common function will take care of it. Nevertheless some cases
 203  * (read tasks) are special and therefore I define this structure for every
 204  * kind of file.
 205  *
 206  *
 207  * When reading/writing to a file:
 208  *      - the cgroup to use is file->f_dentry->d_parent->d_fsdata
 209  *      - the 'cftype' of the file is file->f_dentry->d_fsdata
 210  */
 211
 212 #define MAX_CFTYPE_NAME 64
 213 struct cftype {
 214         /* By convention, the name should begin with the name of the
 215          * subsystem, followed by a period */
 216         char name[MAX_CFTYPE_NAME];
 217         int private;
 218
 219         /*
 220          * If non-zero, defines the maximum length of string that can
 221          * be passed to write_string; defaults to 64
 222          */
 223         size_t max_write_len;
 224
 225         int (*open)(struct inode *inode, struct file *file);
 226         ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
 227                         struct file *file,
 228                         char __user *buf, size_t nbytes, loff_t *ppos);
 229         /*
 230          * read_u64() is a shortcut for the common case of returning a
 231          * single integer. Use it in place of read()
 232          */
 233         u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
 234         /*
 235          * read_s64() is a signed version of read_u64()
 236          */
 237         s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
 238         /*
 239          * read_map() is used for defining a map of key/value
 240          * pairs. It should call cb->fill(cb, key, value) for each
 241          * entry. The key/value pairs (and their ordering) should not
 242          * change between reboots.
 243          */
 244         int (*read_map)(struct cgroup *cont, struct cftype *cft,
 245                         struct cgroup_map_cb *cb);
 246         /*
 247          * read_seq_string() is used for outputting a simple sequence
 248          * using seqfile.
 249          */
 250         int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
 251                                struct seq_file *m);
 252
 253         ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
 254                          struct file *file,
 255                          const char __user *buf, size_t nbytes, loff_t *ppos);
 256
 257         /*
 258          * write_u64() is a shortcut for the common case of accepting
 259          * a single integer (as parsed by simple_strtoull) from
 260          * userspace. Use in place of write(); return 0 or error.
 261          */
 262         int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
 263         /*
 264          * write_s64() is a signed version of write_u64()
 265          */
 266         int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
 267
 268         /*
 269          * write_string() is passed a nul-terminated kernelspace
 270          * buffer of maximum length determined by max_write_len.
 271          * Returns 0 or -ve error code.
 272          */
 273         int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
 274                             const char *buffer);
 275         /*
 276          * trigger() callback can be used to get some kick from the
 277          * userspace, when the actual string written is not important
 278          * at all. The private field can be used to determine the
 279          * kick type for multiplexing.
 280          */
 281         int (*trigger)(struct cgroup *cgrp, unsigned int event);
 282
 283         int (*release)(struct inode *inode, struct file *file);
 284 };
 285
 286 struct cgroup_scanner {
 287         struct cgroup *cg;
 288         int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
 289         void (*process_task)(struct task_struct *p,
 290                         struct cgroup_scanner *scan);
 291         struct ptr_heap *heap;
 292 };
 293
 294 /* Add a new file to the given cgroup directory. Should only be
 295  * called by subsystems from within a populate() method */
 296 int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
 297                        const struct cftype *cft);
 298
 299 /* Add a set of new files to the given cgroup directory. Should
 300  * only be called by subsystems from within a populate() method */
 301 int cgroup_add_files(struct cgroup *cgrp,
 302                         struct cgroup_subsys *subsys,
 303                         const struct cftype cft[],
 304                         int count);
 305
 306 int cgroup_is_removed(const struct cgroup *cgrp);
 307
 308 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
 309
 310 int cgroup_task_count(const struct cgroup *cgrp);
 311
 312 /* Return true if the cgroup is a descendant of the current cgroup */
 313 int cgroup_is_descendant(const struct cgroup *cgrp);
 314
 315 /* Control Group subsystem type. See Documentation/cgroups.txt for details */
 316
 317 struct cgroup_subsys {
 318         struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss,
 319                                                   struct cgroup *cgrp);
 320         void (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 321         void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 322         int (*can_attach)(struct cgroup_subsys *ss,
 323                           struct cgroup *cgrp, struct task_struct *tsk);
 324         void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 325                         struct cgroup *old_cgrp, struct task_struct *tsk);
 326         void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
 327         void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
 328         int (*populate)(struct cgroup_subsys *ss,
 329                         struct cgroup *cgrp);
 330         void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 331         void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
 332         /*
 333          * This routine is called with the task_lock of mm->owner held
 334          */
 335         void (*mm_owner_changed)(struct cgroup_subsys *ss,
 336                                         struct cgroup *old,
 337                                         struct cgroup *new,
 338                                         struct task_struct *p);
 339         int subsys_id;
 340         int active;
 341         int disabled;
 342         int early_init;
 343 #define MAX_CGROUP_TYPE_NAMELEN 32
 344         const char *name;
 345
 346         /* Protected by RCU */
 347         struct cgroupfs_root *root;
 348
 349         struct list_head sibling;
 350 };
 351
 352 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
 353 #include <linux/cgroup_subsys.h>
 354 #undef SUBSYS
 355
 356 static inline struct cgroup_subsys_state *cgroup_subsys_state(
 357         struct cgroup *cgrp, int subsys_id)
 358 {
 359         return cgrp->subsys[subsys_id];
 360 }
 361
 362 static inline struct cgroup_subsys_state *task_subsys_state(
 363         struct task_struct *task, int subsys_id)
 364 {
 365         return rcu_dereference(task->cgroups->subsys[subsys_id]);
 366 }
 367
 368 static inline struct cgroup* task_cgroup(struct task_struct *task,
 369                                                int subsys_id)
 370 {
 371         return task_subsys_state(task, subsys_id)->cgroup;
 372 }
 373
 374 int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
 375                                                         char *nodename);
 376
 377 /* A cgroup_iter should be treated as an opaque object */
 378 struct cgroup_iter {
 379         struct list_head *cg_link;
 380         struct list_head *task;
 381 };
 382
 383 /* To iterate across the tasks in a cgroup:
 384  *
 385  * 1) call cgroup_iter_start to intialize an iterator
 386  *
 387  * 2) call cgroup_iter_next() to retrieve member tasks until it
 388  *    returns NULL or until you want to end the iteration
 389  *
 390  * 3) call cgroup_iter_end() to destroy the iterator.
 391  *
 392  * Or, call cgroup_scan_tasks() to iterate through every task in a cpuset.
 393  *    - cgroup_scan_tasks() holds the css_set_lock when calling the test_task()
 394  *      callback, but not while calling the process_task() callback.
 395  */
 396 void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it);
 397 struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 398                                         struct cgroup_iter *it);
 399 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 400 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 401 int cgroup_attach_task(struct cgroup *, struct task_struct *);
 402
 403 void cgroup_mm_owner_callbacks(struct task_struct *old,
 404                                struct task_struct *new);
 405
 406 #else /* !CONFIG_CGROUPS */
 407
 408 static inline int cgroup_init_early(void) { return 0; }
 409 static inline int cgroup_init(void) { return 0; }
 410 static inline void cgroup_fork(struct task_struct *p) {}
 411 static inline void cgroup_fork_callbacks(struct task_struct *p) {}
 412 static inline void cgroup_post_fork(struct task_struct *p) {}
 413 static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
 414
 415 static inline void cgroup_lock(void) {}
 416 static inline void cgroup_unlock(void) {}
 417 static inline int cgroupstats_build(struct cgroupstats *stats,
 418                                         struct dentry *dentry)
 419 {
 420         return -EINVAL;
 421 }
 422
 423 static inline void cgroup_mm_owner_callbacks(struct task_struct *old,
 424                                              struct task_struct *new) {}
 425
 426 #endif /* !CONFIG_CGROUPS */
 427
 428 #endif /* _LINUX_CGROUP_H */