X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fcpuset.c;h=86ea9e34e3260138401663be21dd22d1892c4ce3;hb=5083e56326208f9a1d4e597529912004968c77d7;hp=fe5407ca2f1ebd90cde99a2620c2d9d5b68fde1e;hpb=addf2c739d9015d3e9c0500b58a3af051cd58ea7;p=linux-2.6-omap-h63xx.git diff --git a/kernel/cpuset.c b/kernel/cpuset.c index fe5407ca2f1..86ea9e34e32 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -127,6 +127,7 @@ struct cpuset_hotplug_scanner { typedef enum { CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, + CS_MEM_HARDWALL, CS_MEMORY_MIGRATE, CS_SCHED_LOAD_BALANCE, CS_SPREAD_PAGE, @@ -144,6 +145,11 @@ static inline int is_mem_exclusive(const struct cpuset *cs) return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); } +static inline int is_mem_hardwall(const struct cpuset *cs) +{ + return test_bit(CS_MEM_HARDWALL, &cs->flags); +} + static inline int is_sched_load_balance(const struct cpuset *cs) { return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); @@ -1025,11 +1031,9 @@ int current_cpuset_is_being_rebound(void) return task_cs(current) == cpuset_being_rebound; } -static int update_relax_domain_level(struct cpuset *cs, char *buf) +static int update_relax_domain_level(struct cpuset *cs, s64 val) { - int val = simple_strtol(buf, NULL, 10); - - if (val < 0) + if ((int)val < 0) val = -1; if (val != cs->relax_domain_level) { @@ -1042,12 +1046,9 @@ static int update_relax_domain_level(struct cpuset *cs, char *buf) /* * update_flag - read a 0 or a 1 in a file and update associated flag - * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, - * CS_SCHED_LOAD_BALANCE, - * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, - * CS_SPREAD_PAGE, CS_SPREAD_SLAB) - * cs: the cpuset to update - * buf: the buffer where we read the 0 or 1 + * bit: the bit to update (see cpuset_flagbits_t) + * cs: the cpuset to update + * turning_on: whether the flag is being set or cleared * * Call with cgroup_mutex held. */ @@ -1228,6 +1229,7 @@ typedef enum { FILE_MEMLIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, + FILE_MEM_HARDWALL, FILE_SCHED_LOAD_BALANCE, FILE_SCHED_RELAX_DOMAIN_LEVEL, FILE_MEMORY_PRESSURE_ENABLED, @@ -1276,9 +1278,6 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, case FILE_MEMLIST: retval = update_nodemask(cs, buffer); break; - case FILE_SCHED_RELAX_DOMAIN_LEVEL: - retval = update_relax_domain_level(cs, buffer); - break; default: retval = -EINVAL; goto out2; @@ -1313,6 +1312,9 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) case FILE_MEM_EXCLUSIVE: retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); break; + case FILE_MEM_HARDWALL: + retval = update_flag(CS_MEM_HARDWALL, cs, val); + break; case FILE_SCHED_LOAD_BALANCE: retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); break; @@ -1341,6 +1343,30 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) return retval; } +static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) +{ + int retval = 0; + struct cpuset *cs = cgroup_cs(cgrp); + cpuset_filetype_t type = cft->private; + + cgroup_lock(); + + if (cgroup_is_removed(cgrp)) { + cgroup_unlock(); + return -ENODEV; + } + switch (type) { + case FILE_SCHED_RELAX_DOMAIN_LEVEL: + retval = update_relax_domain_level(cs, val); + break; + default: + retval = -EINVAL; + break; + } + cgroup_unlock(); + return retval; +} + /* * These ascii lists should be read in a single call, by using a user * buffer large enough to hold the entire map. If read in smaller @@ -1399,9 +1425,6 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont, case FILE_MEMLIST: s += cpuset_sprintf_memlist(s, cs); break; - case FILE_SCHED_RELAX_DOMAIN_LEVEL: - s += sprintf(s, "%d", cs->relax_domain_level); - break; default: retval = -EINVAL; goto out; @@ -1423,6 +1446,8 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) return is_cpu_exclusive(cs); case FILE_MEM_EXCLUSIVE: return is_mem_exclusive(cs); + case FILE_MEM_HARDWALL: + return is_mem_hardwall(cs); case FILE_SCHED_LOAD_BALANCE: return is_sched_load_balance(cs); case FILE_MEMORY_MIGRATE: @@ -1440,6 +1465,18 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) } } +static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft) +{ + struct cpuset *cs = cgroup_cs(cont); + cpuset_filetype_t type = cft->private; + switch (type) { + case FILE_SCHED_RELAX_DOMAIN_LEVEL: + return cs->relax_domain_level; + default: + BUG(); + } +} + /* * for the common functions, 'private' gives the type of file @@ -1474,6 +1511,13 @@ static struct cftype files[] = { .private = FILE_MEM_EXCLUSIVE, }, + { + .name = "mem_hardwall", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_MEM_HARDWALL, + }, + { .name = "sched_load_balance", .read_u64 = cpuset_read_u64, @@ -1483,8 +1527,8 @@ static struct cftype files[] = { { .name = "sched_relax_domain_level", - .read_u64 = cpuset_read_u64, - .write_u64 = cpuset_write_u64, + .read_s64 = cpuset_read_s64, + .write_s64 = cpuset_write_s64, .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, }, @@ -1963,14 +2007,14 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) } /* - * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive - * ancestor to the specified cpuset. Call holding callback_mutex. - * If no ancestor is mem_exclusive (an unusual configuration), then - * returns the root cpuset. + * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or + * mem_hardwall ancestor to the specified cpuset. Call holding + * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall + * (an unusual configuration), then returns the root cpuset. */ -static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) +static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs) { - while (!is_mem_exclusive(cs) && cs->parent) + while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent) cs = cs->parent; return cs; } @@ -1984,7 +2028,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * __GFP_THISNODE is set, yes, we can always allocate. If zone * z's node is in our tasks mems_allowed, yes. If it's not a * __GFP_HARDWALL request and this zone's nodes is in the nearest - * mem_exclusive cpuset ancestor to this tasks cpuset, yes. + * hardwalled cpuset ancestor to this tasks cpuset, yes. * If the task has been OOM killed and has access to memory reserves * as specified by the TIF_MEMDIE flag, yes. * Otherwise, no. @@ -2007,7 +2051,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * and do not allow allocations outside the current tasks cpuset * unless the task has been OOM killed as is marked TIF_MEMDIE. * GFP_KERNEL allocations are not so marked, so can escape to the - * nearest enclosing mem_exclusive ancestor cpuset. + * nearest enclosing hardwalled ancestor cpuset. * * Scanning up parent cpusets requires callback_mutex. The * __alloc_pages() routine only calls here with __GFP_HARDWALL bit @@ -2030,7 +2074,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok * TIF_MEMDIE - any node ok - * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok + * GFP_KERNEL - any node in enclosing hardwalled cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. * * Rule: @@ -2067,7 +2111,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) mutex_lock(&callback_mutex); task_lock(current); - cs = nearest_exclusive_ancestor(task_cs(current)); + cs = nearest_hardwall_ancestor(task_cs(current)); task_unlock(current); allowed = node_isset(node, cs->mems_allowed);