X-Git-Url: http://pilppa.org/gitweb/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fsched.c;h=dd1a1466c1e6a7412ca43022185c8aed0f76c88d;hb=0a6d4e1dc9154c4376358663d74060d1e33d203e;hp=930bf2e6d714251db28fa423e818e34abd3d21b4;hpb=ad6b646fe55c40ec193240b974561f0a3775d68d;p=linux-2.6-omap-h63xx.git diff --git a/kernel/sched.c b/kernel/sched.c index 930bf2e6d71..dd1a1466c1e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -464,11 +464,15 @@ struct rt_rq { struct rt_prio_array active; unsigned long rt_nr_running; #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED - int highest_prio; /* highest queued rt task prio */ + struct { + int curr; /* highest queued rt task prio */ + int next; /* next highest */ + } highest_prio; #endif #ifdef CONFIG_SMP unsigned long rt_nr_migratory; int overloaded; + struct plist_head pushable_tasks; #endif int rt_throttled; u64 rt_time; @@ -1607,21 +1611,42 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) #endif +#ifdef CONFIG_PREEMPT + /* - * double_lock_balance - lock the busiest runqueue, this_rq is locked already. + * fair double_lock_balance: Safely acquires both rq->locks in a fair + * way at the expense of forcing extra atomic operations in all + * invocations. This assures that the double_lock is acquired using the + * same underlying policy as the spinlock_t on this architecture, which + * reduces latency compared to the unfair variant below. However, it + * also adds more overhead and therefore may reduce throughput. */ -static int double_lock_balance(struct rq *this_rq, struct rq *busiest) +static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) + __releases(this_rq->lock) + __acquires(busiest->lock) + __acquires(this_rq->lock) +{ + spin_unlock(&this_rq->lock); + double_rq_lock(this_rq, busiest); + + return 1; +} + +#else +/* + * Unfair double_lock_balance: Optimizes throughput at the expense of + * latency by eliminating extra atomic operations when the locks are + * already in proper order on entry. This favors lower cpu-ids and will + * grant the double lock to lower cpus over higher ids under contention, + * regardless of entry order into the function. + */ +static int _double_lock_balance(struct rq *this_rq, struct rq *busiest) __releases(this_rq->lock) __acquires(busiest->lock) __acquires(this_rq->lock) { int ret = 0; - if (unlikely(!irqs_disabled())) { - /* printk() doesn't work good under rq->lock */ - spin_unlock(&this_rq->lock); - BUG_ON(1); - } if (unlikely(!spin_trylock(&busiest->lock))) { if (busiest < this_rq) { spin_unlock(&this_rq->lock); @@ -1634,6 +1659,22 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) return ret; } +#endif /* CONFIG_PREEMPT */ + +/* + * double_lock_balance - lock the busiest runqueue, this_rq is locked already. + */ +static int double_lock_balance(struct rq *this_rq, struct rq *busiest) +{ + if (unlikely(!irqs_disabled())) { + /* printk() doesn't work good under rq->lock */ + spin_unlock(&this_rq->lock); + BUG_ON(1); + } + + return _double_lock_balance(this_rq, busiest); +} + static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) __releases(busiest->lock) { @@ -2445,6 +2486,8 @@ void sched_fork(struct task_struct *p, int clone_flags) /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; #endif + plist_node_init(&p->pushable_tasks, MAX_PRIO); + put_cpu(); } @@ -2585,6 +2628,12 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) { struct mm_struct *mm = rq->prev_mm; long prev_state; +#ifdef CONFIG_SMP + int post_schedule = 0; + + if (current->sched_class->needs_post_schedule) + post_schedule = current->sched_class->needs_post_schedule(rq); +#endif rq->prev_mm = NULL; @@ -2603,7 +2652,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) finish_arch_switch(prev); finish_lock_switch(rq, prev); #ifdef CONFIG_SMP - if (current->sched_class->post_schedule) + if (post_schedule) current->sched_class->post_schedule(rq); #endif @@ -2984,6 +3033,16 @@ next: pulled++; rem_load_move -= p->se.load.weight; +#ifdef CONFIG_PREEMPT + /* + * NEWIDLE balancing is a source of latency, so preemptible kernels + * will stop after the first task is pulled to minimize the critical + * section. + */ + if (idle == CPU_NEWLY_IDLE) + goto out; +#endif + /* * We only want to steal up to the prescribed amount of weighted load. */ @@ -3030,9 +3089,15 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, sd, idle, all_pinned, &this_best_prio); class = class->next; +#ifdef CONFIG_PREEMPT + /* + * NEWIDLE balancing is a source of latency, so preemptible + * kernels will stop after the first task is pulled to minimize + * the critical section. + */ if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) break; - +#endif } while (class && max_load_move > total_load_moved); return total_load_moved > 0; @@ -3715,7 +3780,7 @@ redo: * don't kick the migration_thread, if the curr * task on busiest cpu can't be moved to this_cpu */ - if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { + if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { double_unlock_balance(this_rq, busiest); all_pinned = 1; return ld_moved; @@ -3728,8 +3793,13 @@ redo: } double_unlock_balance(this_rq, busiest); + /* + * Should not call ttwu while holding a rq->lock + */ + spin_unlock(&this_rq->lock); if (active_balance) wake_up_process(busiest->migration_thread); + spin_lock(&this_rq->lock); } else sd->nr_balance_failed = 0; @@ -6257,9 +6327,7 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { int dest_cpu; - /* FIXME: Use cpumask_of_node here. */ - cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu)); - const struct cpumask *nodemask = &_nodemask; + const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu)); again: /* Look for allowed, online CPU in same node. */ @@ -6959,7 +7027,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) spin_unlock_irqrestore(&rq->lock, flags); } -static int init_rootdomain(struct root_domain *rd, bool bootmem) +static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) { memset(rd, 0, sizeof(*rd)); @@ -6972,7 +7040,7 @@ static int init_rootdomain(struct root_domain *rd, bool bootmem) } if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) - goto free_rd; + goto out; if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) goto free_span; if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) @@ -6988,8 +7056,7 @@ free_online: free_cpumask_var(rd->online); free_span: free_cpumask_var(rd->span); -free_rd: - kfree(rd); +out: return -ENOMEM; } @@ -7170,21 +7237,18 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) static void sched_domain_node_span(int node, struct cpumask *span) { nodemask_t used_nodes; - /* FIXME: use cpumask_of_node() */ - node_to_cpumask_ptr(nodemask, node); int i; - cpus_clear(*span); + cpumask_clear(span); nodes_clear(used_nodes); - cpus_or(*span, *span, *nodemask); + cpumask_or(span, span, cpumask_of_node(node)); node_set(node, used_nodes); for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { int next_node = find_next_best_node(node, &used_nodes); - node_to_cpumask_ptr_next(nodemask, next_node); - cpus_or(*span, *span, *nodemask); + cpumask_or(span, span, cpumask_of_node(next_node)); } } #endif /* CONFIG_NUMA */ @@ -7264,9 +7328,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, { int group; #ifdef CONFIG_SCHED_MC - /* FIXME: Use cpu_coregroup_mask. */ - *mask = cpu_coregroup_map(cpu); - cpus_and(*mask, *mask, *cpu_map); + cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); group = cpumask_first(mask); #elif defined(CONFIG_SCHED_SMT) cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); @@ -7296,10 +7358,8 @@ static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, struct cpumask *nodemask) { int group; - /* FIXME: use cpumask_of_node */ - node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu)); - cpumask_and(nodemask, pnodemask, cpu_map); + cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map); group = cpumask_first(nodemask); if (sg) @@ -7350,10 +7410,8 @@ static void free_sched_groups(const struct cpumask *cpu_map, for (i = 0; i < nr_node_ids; i++) { struct sched_group *oldsg, *sg = sched_group_nodes[i]; - /* FIXME: Use cpumask_of_node */ - node_to_cpumask_ptr(pnodemask, i); - cpus_and(*nodemask, *pnodemask, *cpu_map); + cpumask_and(nodemask, cpumask_of_node(i), cpu_map); if (cpumask_empty(nodemask)) continue; @@ -7562,9 +7620,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, for_each_cpu(i, cpu_map) { struct sched_domain *sd = NULL, *p; - /* FIXME: use cpumask_of_node */ - *nodemask = node_to_cpumask(cpu_to_node(i)); - cpus_and(*nodemask, *nodemask, *cpu_map); + cpumask_and(nodemask, cpumask_of_node(cpu_to_node(i)), cpu_map); #ifdef CONFIG_NUMA if (cpumask_weight(cpu_map) > @@ -7605,9 +7661,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map, sd = &per_cpu(core_domains, i).sd; SD_INIT(sd, MC); set_domain_attribute(sd, attr); - *sched_domain_span(sd) = cpu_coregroup_map(i); - cpumask_and(sched_domain_span(sd), - sched_domain_span(sd), cpu_map); + cpumask_and(sched_domain_span(sd), cpu_map, + cpu_coregroup_mask(i)); sd->parent = p; p->child = sd; cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask); @@ -7643,9 +7698,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, #ifdef CONFIG_SCHED_MC /* Set up multi-core groups */ for_each_cpu(i, cpu_map) { - /* FIXME: Use cpu_coregroup_mask */ - *this_core_map = cpu_coregroup_map(i); - cpus_and(*this_core_map, *this_core_map, *cpu_map); + cpumask_and(this_core_map, cpu_coregroup_mask(i), cpu_map); if (i != cpumask_first(this_core_map)) continue; @@ -7657,9 +7710,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, /* Set up physical groups */ for (i = 0; i < nr_node_ids; i++) { - /* FIXME: Use cpumask_of_node */ - *nodemask = node_to_cpumask(i); - cpus_and(*nodemask, *nodemask, *cpu_map); + cpumask_and(nodemask, cpumask_of_node(i), cpu_map); if (cpumask_empty(nodemask)) continue; @@ -7681,11 +7732,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map, struct sched_group *sg, *prev; int j; - /* FIXME: Use cpumask_of_node */ - *nodemask = node_to_cpumask(i); cpumask_clear(covered); - - cpus_and(*nodemask, *nodemask, *cpu_map); + cpumask_and(nodemask, cpumask_of_node(i), cpu_map); if (cpumask_empty(nodemask)) { sched_group_nodes[i] = NULL; continue; @@ -7716,8 +7764,6 @@ static int __build_sched_domains(const struct cpumask *cpu_map, for (j = 0; j < nr_node_ids; j++) { int n = (i + j) % nr_node_ids; - /* FIXME: Use cpumask_of_node */ - node_to_cpumask_ptr(pnodemask, n); cpumask_complement(notcovered, covered); cpumask_and(tmpmask, notcovered, cpu_map); @@ -7725,7 +7771,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map, if (cpumask_empty(tmpmask)) break; - cpumask_and(tmpmask, tmpmask, pnodemask); + cpumask_and(tmpmask, tmpmask, cpumask_of_node(n)); if (cpumask_empty(tmpmask)) continue; @@ -8010,7 +8056,7 @@ match2: } #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -int arch_reinit_sched_domains(void) +static void arch_reinit_sched_domains(void) { get_online_cpus(); @@ -8019,13 +8065,10 @@ int arch_reinit_sched_domains(void) rebuild_sched_domains(); put_online_cpus(); - - return 0; } static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) { - int ret; unsigned int level = 0; if (sscanf(buf, "%u", &level) != 1) @@ -8046,9 +8089,9 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) else sched_mc_power_savings = level; - ret = arch_reinit_sched_domains(); + arch_reinit_sched_domains(); - return ret ? ret : count; + return count; } #ifdef CONFIG_SCHED_MC @@ -8083,7 +8126,7 @@ static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_store); #endif -int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) +int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) { int err = 0; @@ -8223,11 +8266,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) __set_bit(MAX_RT_PRIO, array->bitmap); #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED - rt_rq->highest_prio = MAX_RT_PRIO; + rt_rq->highest_prio.curr = MAX_RT_PRIO; + rt_rq->highest_prio.next = MAX_RT_PRIO; #endif #ifdef CONFIG_SMP rt_rq->rt_nr_migratory = 0; rt_rq->overloaded = 0; + plist_head_init(&rq->rt.pushable_tasks, &rq->lock); #endif rt_rq->rt_time = 0;