scheduler: genetic library plugin This patch attempts to tune the CPU scheduler using the genetic library. Under SpecJBB on a 4x PowerPC server the 95% confidence interval for throughput performance increase is -0.2% to 1.1% using the average of four runs. Signed-off-by: Brandon Philips --- init/Kconfig | 9 + kernel/sched.c | 367 +++++++++++++++++++++++++++++++++++++++++++++++++++++- lib/Kconfig.debug | 1 3 files changed, 376 insertions(+), 1 deletion(-) Index: linux-rc/kernel/sched.c =================================================================== --- linux-rc.orig/kernel/sched.c +++ linux-rc/kernel/sched.c @@ -16,6 +16,7 @@ * by Davide Libenzi, preemptible kernel bits by Robert Love. * 2003-09-03 Interactivity tuning by Con Kolivas. * 2004-04-02 Scheduler domains code by Nick Piggin + * 2006-06-28 Genetic library plugin by Brandon Philips */ #include @@ -57,6 +58,12 @@ #include +#ifdef CONFIG_GENETIC_CPU_SCHED +#include +#include +#endif + + /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], @@ -140,6 +147,114 @@ unsigned long starvation_limit; #define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG)) + +#ifdef CONFIG_GENETIC_CPU_SCHED +#define CPU_NUM_CHILDREN 8 + +#define CPU_THROUGHPUT_UID 1 +#define CPU_THROUGHPUT_NUM_GENES 0 + +#define CPU_LATENCY_UID 2 +#define CPU_LATENCY_NUM_GENES 0 + +#define CPU_GENERAL_UID (CPU_THROUGHPUT_UID | CPU_LATENCY_UID) +#define CPU_GENERAL_NUM_GENES 3 + +#define GENETIC_SCHED_TUNABLE(_name) \ +unsigned long _name; \ +GENETIC_TUNABLE(_name, _name); + +struct cpu_genes { + unsigned long def_timeslice; + unsigned long min_timeslice; + unsigned long prio_bonus_ratio; +#if 0 + unsigned long child_penalty; + unsigned long parent_penalty; + unsigned long on_runqueue_weight; + unsigned long exit_weight; + unsigned long max_bonus; + unsigned long starvation_limit; + unsigned long ns_max_sleep_avg; + unsigned long max_sleep_avg; + unsigned long interactive_delta; +#endif +}; + +gene_param_t cpu_gene_param[CPU_GENERAL_NUM_GENES] = { + { "def_timeslice", DEFAULT_DEF_TIMESLICE/3, + DEFAULT_DEF_TIMESLICE*3, DEFAULT_DEF_TIMESLICE, genetic_generic_iterative_mutate_gene }, + + { "min_timeslice", 1, 30, 1, genetic_generic_iterative_mutate_gene, genetic_generic_iterative_mutate_gene }, + { "prio_bonus_ratio", DEFAULT_PRIO_BONUS_RATIO/3, + DEFAULT_PRIO_BONUS_RATIO*3, DEFAULT_PRIO_BONUS_RATIO, genetic_generic_iterative_mutate_gene }, + +#if 0 + { "def_timeslice", DEFAULT_DEF_TIMESLICE/3, + DEFAULT_DEF_TIMESLICE*3, DEFAULT_DEF_TIMESLICE, genetic_generic_iterative_mutate_gene }, + { "child_penalty", DEFAULT_CHILD_PENALTY/3, + DEFAULT_CHILD_PENALTY*3, DEFAULT_CHILD_PENALTY, 0 }, + { "parent_penalty", DEFAULT_PARENT_PENALTY/3, + DEFAULT_PARENT_PENALTY*3, DEFAULT_PARENT_PENALTY, 0 }, + { "on_runqueue_weight", DEFAULT_ON_RUNQUEUE_WEIGHT/3, + DEFAULT_ON_RUNQUEUE_WEIGHT*3, DEFAULT_ON_RUNQUEUE_WEIGHT, 0 }, + { "exit_weight", DEFAULT_EXIT_WEIGHT/3, + DEFAULT_EXIT_WEIGHT*3, DEFAULT_EXIT_WEIGHT, 0 }, + { "prio_bonus_ratio", DEFAULT_PRIO_BONUS_RATIO/3, + DEFAULT_PRIO_BONUS_RATIO*3, DEFAULT_PRIO_BONUS_RATIO, 0 }, + { "max_bonus", DEFAULT_MAX_BONUS/3, + DEFAULT_MAX_BONUS*3, DEFAULT_MAX_BONUS, 0 }, + { "starvation_limit", DEFAULT_STARVATION_LIMIT/3, + DEFAULT_STARVATION_LIMIT*3, DEFAULT_STARVATION_LIMIT, 0 }, + { "max_sleep_avg", DEFAULT_MAX_SLEEP_AVG/3, + DEFAULT_MAX_SLEEP_AVG*3, DEFAULT_MAX_SLEEP_AVG, 0 }, + { "ns_max_sleep_avg", DEFAULT_NS_MAX_SLEEP_AVG/2, + (DEFAULT_NS_MAX_SLEEP_AVG/2) + DEFAULT_NS_MAX_SLEEP_AVG, + DEFAULT_NS_MAX_SLEEP_AVG, 0 }, + { "interactive_delta", 1, + DEFAULT_INTERACTIVE_DELTA*3, DEFAULT_INTERACTIVE_DELTA, 0 }, +#endif +}; + +static void cpu_take_stats_snapshot(phenotype_t * pt); +static void cpu_general_create_child(genetic_child_t * child); +static void cpu_general_set_child_genes(void * in_genes); +static void cpu_throughput_create_child(genetic_child_t * child); +static void cpu_latency_create_child(genetic_child_t * child); +static void cpu_throughput_calc_fitness(genetic_child_t * child); +static void cpu_latency_calc_fitness(genetic_child_t * child); +static void cpu_general_calc_post_fitness(phenotype_t * in_pt); +static void cpu_shift_mutation_rate(phenotype_t * in_pt); + +struct genetic_cpu_stats { + unsigned long long nr_switches; + unsigned long long run_delay; +}; + +struct genetic_cpu_stats * cpu_stats_snapshot; + +struct genetic_ops cpu_general_genetic_ops = { + .create_child = cpu_general_create_child, + .set_child_genes = cpu_general_set_child_genes, + .combine_genes = genetic_generic_combine_genes, + .mutate_child = genetic_generic_mutate_child, + .calc_post_fitness = cpu_general_calc_post_fitness, + .take_snapshot = cpu_take_stats_snapshot, + .shift_mutation_rate = cpu_shift_mutation_rate, +}; + +struct genetic_ops cpu_throughput_genetic_ops = { + .create_child = cpu_throughput_create_child, + .calc_fitness = cpu_throughput_calc_fitness, +}; + +struct genetic_ops cpu_latency_genetic_ops = { + .create_child = cpu_latency_create_child, + .calc_fitness = cpu_latency_calc_fitness, +}; + +#endif + /* * If a task is 'interactive' then we reinsert it in the active * array after it has expired its current timeslice. (it will not @@ -329,6 +444,8 @@ static DEFINE_PER_CPU(struct rq, runqueu # define finish_arch_switch(prev) do { } while (0) #endif + + #ifndef __ARCH_WANT_UNLOCKED_CTXSW static inline int task_running(struct rq *rq, struct task_struct *p) { @@ -6790,7 +6907,7 @@ int __init init_debugfs(void) debugfs_sched_create(starvation_limit, root); err_root: - return; + return 0; } postcore_initcall(init_debugfs); #endif @@ -6975,3 +7092,251 @@ void set_curr_task(int cpu, struct task_ } #endif + +#ifdef CONFIG_GENETIC_CPU_SCHED +#define CPU_REGISTER_GENE(_gp, _name, _min, _max, _init, _mutate)\ +sysfs_create_file(&_gp.kobj, &sched_attr_##_name.attr); + +#define CHECK_PHENOTYPE(_pt) \ +if(_pt == NULL) panic("%s: failed to register phenotype", \ + __FUNCTION__); + +static int genetic_cpu_sched_init(void) +{ + int ret, i = 0; + genetic_t * genetic = NULL; + phenotype_t * pt; + + cpu_stats_snapshot = (struct genetic_cpu_stats *)kmalloc(sizeof(struct genetic_cpu_stats), GFP_KERNEL); + + if (!cpu_stats_snapshot) + panic("%s: failed to malloc enough space", __FUNCTION__); + + ret = genetic_init(&genetic, + CPU_NUM_CHILDREN, + 2 * HZ, + 0, + "ohone-cpuscheduler"); + if (ret) + panic("%s: failed to init genetic lib", + __FUNCTION__); + + + pt = genetic_register_phenotype(genetic, + &cpu_throughput_genetic_ops, + CPU_NUM_CHILDREN, + "throughput", + CPU_THROUGHPUT_NUM_GENES, + CPU_THROUGHPUT_UID); + + CHECK_PHENOTYPE(pt); + + pt = genetic_register_phenotype(genetic, + &cpu_latency_genetic_ops, + CPU_NUM_CHILDREN, + "latency", + CPU_LATENCY_NUM_GENES, + CPU_LATENCY_UID); + + CHECK_PHENOTYPE(pt); + + pt = genetic_register_phenotype(genetic, + &cpu_general_genetic_ops, + CPU_NUM_CHILDREN, + "general", + CPU_GENERAL_NUM_GENES, + CPU_GENERAL_UID); + + CHECK_PHENOTYPE(pt); + + for (i = 0; i < CPU_GENERAL_NUM_GENES; i++) { + genetic_gene_obj_create(&cpu_gene_param[i], genetic, + &pt->genes); + } + + genetic_start(genetic); + + return 0; +} +postcore_initcall(genetic_cpu_sched_init); + + +#define child_stats(child) ((struct genetic_cpu_stats *)(child)->stats_snapshot) + +static void cpu_take_stats_snapshot(phenotype_t * pt) +{ + int cpu; + unsigned long long nr_running = 0, run_delay = 0; + struct genetic_cpu_stats * ss = child_stats(pt->child_ranking[0]); + + memset(ss, 0, sizeof(struct sched_info)); + + for_each_online_cpu(cpu) { + struct rq *rq = cpu_rq(cpu); + + run_delay += rq->rq_sched_info.run_delay / rq->nr_running; + } + + ss->nr_switches = nr_context_switches(); +} + +static void cpu_general_create_child(genetic_child_t * child) +{ + BUG_ON(!child); + + child->genes = (void *)kmalloc(sizeof(struct cpu_genes), GFP_KERNEL); + if (!child->genes) + panic("cpu_general_create_child: error mallocing space"); + + child->num_genes = CPU_GENERAL_NUM_GENES; + child->gene_param = cpu_gene_param; + child->stats_snapshot = cpu_stats_snapshot; + + genetic_create_child_defaults(child); +} + +static void cpu_general_set_child_genes(void * in_genes) +{ + struct cpu_genes * genes = (struct cpu_genes *)in_genes; + + prio_bonus_ratio = genes->prio_bonus_ratio; + + min_timeslice = genes->min_timeslice; + + if (genes->def_timeslice < min_timeslice) + def_timeslice = genes->min_timeslice; + else + def_timeslice = genes->def_timeslice; + +#if 0 + def_timeslice = genes->def_timeslice; + on_runqueue_weight = genes->on_runqueue_weight; + exit_weight = genes->exit_weight; + max_bonus = genes->max_bonus; + + + interactive_delta = genes->interactive_delta; + child_penalty = genes->child_penalty; + parent_penalty = genes->parent_penalty; +#endif + max_sleep_avg = def_timeslice * max_bonus; + starvation_limit = max_sleep_avg; +} + +static void inline cpu_create_child(genetic_child_t * child) +{ + BUG_ON(!child); + + child->genes = 0; + child->gene_param = 0; + child->num_genes = 0; + child->stats_snapshot = cpu_stats_snapshot; +} + +static void cpu_throughput_create_child(genetic_child_t * child) +{ + cpu_create_child(child); +} + +static void cpu_latency_create_child(genetic_child_t * child) +{ + cpu_create_child(child); +} + +static void cpu_throughput_calc_fitness(genetic_child_t * child) +{ + struct genetic_cpu_stats * ss = child_stats(child); + + child->fitness = (nr_context_switches() - ss->nr_switches); + child->fitness = -child->fitness; +} + +static void cpu_latency_calc_fitness(genetic_child_t * child) +{ + int cpu; + unsigned long run_delay = 0; + struct genetic_cpu_stats * ss = child_stats(child); + + for_each_online_cpu(cpu) { + struct rq *rq = cpu_rq(cpu); + + run_delay += rq->rq_sched_info.run_delay; + } + + child->fitness = (run_delay - ss->run_delay); + child->fitness = -child->fitness; +} + +static void cpu_general_calc_post_fitness(phenotype_t * in_pt) +{ + struct list_head * entry; + phenotype_t * pt; + genetic_t * genetic = to_phenotype_genetic(in_pt); + int ranking[CPU_NUM_CHILDREN]; + int weight = 1; + int i; + + memset(ranking, 0, sizeof(ranking)); + + list_for_each(entry, &genetic->phenotypes.list) { + pt = to_phenotype(to_kobj(entry)); + + /* Look at everyone else that contributes to this + phenotype */ + if (pt->uid & CPU_GENERAL_UID && pt->uid != CPU_GENERAL_UID) { + + switch (pt->uid) { + case CPU_THROUGHPUT_UID: + weight = 2; + break; + case CPU_LATENCY_UID: + weight = 1; + break; + default: + BUG(); + } + + for (i = 0; i < pt->num_children; i++) + ranking[pt->child_ranking[i]->id] += (i * weight); + } + } + + for (i = 0; i < in_pt->num_children; i++) + in_pt->child_ranking[i]->fitness = ranking[i]; +} + +static void cpu_shift_mutation_rate(phenotype_t * in_pt) +{ + struct list_head * entry; + struct genetic_s * g = to_phenotype_genetic(in_pt); + phenotype_t * pt; + int count = 0; + long rate = 0; + + list_for_each(entry, &g->phenotypes.list) { + pt = to_phenotype(to_kobj(entry)); + + /* Look at everyone else that contributes to this + phenotype */ + if (pt->uid & CPU_GENERAL_UID && pt->uid != CPU_GENERAL_UID) { + + switch (pt->uid) { + case CPU_THROUGHPUT_UID: + case CPU_LATENCY_UID: + rate += pt->mutation_rate; + count++; + break; + default: + BUG(); + } + } + } + + /* If we are a general phenotype that is made up of other + phenotypes then we take the average */ + if (count) + in_pt->mutation_rate = (rate / count); + else + BUG(); +} +#endif Index: linux-rc/init/Kconfig =================================================================== --- linux-rc.orig/init/Kconfig +++ linux-rc/init/Kconfig @@ -182,6 +182,14 @@ config TASK_DELAY_ACCT Say N if unsure. +config GENETIC_CPU_SCHED + bool "Genetic CPU scheduler (EXPERIMENTAL)" + default y + depends on GENETIC_LIB && SCHEDSTATS && EXPERIMENTAL + help + This will tune the O(1) CPU Scheduler tunables dynamically based on + workload statistics. + config SYSCTL bool "Sysctl support" if EMBEDDED default y @@ -509,6 +517,7 @@ config STOP_MACHINE depends on (SMP && MODULE_UNLOAD) || HOTPLUG_CPU help Need stop_machine() primitive. + endmenu menu "Block layer" Index: linux-rc/lib/Kconfig.debug =================================================================== --- linux-rc.orig/lib/Kconfig.debug +++ linux-rc/lib/Kconfig.debug @@ -84,6 +84,7 @@ config DETECT_SOFTLOCKUP config SCHEDSTATS bool "Collect scheduler statistics" depends on DEBUG_KERNEL && PROC_FS + default y help If you say Y here, additional code will be inserted into the scheduler and related routines to collect statistics about