Index: linux-2.6.36/include/linux/sched.h =================================================================== --- linux-2.6.36.orig/include/linux/sched.h +++ linux-2.6.36/include/linux/sched.h @@ -506,6 +506,8 @@ struct thread_group_cputimer { spinlock_t lock; }; +struct autogroup; + /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -573,6 +575,9 @@ struct signal_struct { struct tty_struct *tty; /* NULL if no tty */ +#ifdef CONFIG_SCHED_AUTOGROUP + struct autogroup *autogroup; +#endif /* * Cumulative resource counters for dead threads in the group, * and for reaped dead child processes forked by this group. @@ -1900,6 +1905,20 @@ int sched_rt_handler(struct ctl_table *t extern unsigned int sysctl_sched_compat_yield; +#ifdef CONFIG_SCHED_AUTOGROUP +extern unsigned int sysctl_sched_autogroup_enabled; + +extern void sched_autogroup_create_attach(struct task_struct *p); +extern void sched_autogroup_detach(struct task_struct *p); +extern void sched_autogroup_fork(struct signal_struct *sig); +extern void sched_autogroup_exit(struct signal_struct *sig); +#else +static inline void sched_autogroup_create_attach(struct task_struct *p) { } +static inline void sched_autogroup_detach(struct task_struct *p) { } +static inline void sched_autogroup_fork(struct signal_struct *sig) { } +static inline void sched_autogroup_exit(struct signal_struct *sig) { } +#endif + #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); Index: linux-2.6.36/kernel/sched.c =================================================================== --- linux-2.6.36.orig/kernel/sched.c +++ linux-2.6.36/kernel/sched.c @@ -78,6 +78,7 @@ #include "sched_cpupri.h" #include "workqueue_sched.h" +#include "sched_autogroup.h" #define CREATE_TRACE_POINTS #include @@ -268,6 +269,10 @@ struct task_group { struct task_group *parent; struct list_head siblings; struct list_head children; + +#if defined(CONFIG_SCHED_AUTOGROUP) + struct autogroup *autogroup; +#endif }; #define root_task_group init_task_group @@ -612,11 +617,14 @@ static inline int cpu_of(struct rq *rq) */ static inline struct task_group *task_group(struct task_struct *p) { + struct task_group *tg; struct cgroup_subsys_state *css; css = task_subsys_state_check(p, cpu_cgroup_subsys_id, lockdep_is_held(&task_rq(p)->lock)); - return container_of(css, struct task_group, css); + tg = container_of(css, struct task_group, css); + + return autogroup_task_group(p, tg); } /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ @@ -1920,6 +1928,7 @@ static void deactivate_task(struct rq *r #include "sched_idletask.c" #include "sched_fair.c" #include "sched_rt.c" +#include "sched_autogroup.c" #ifdef CONFIG_SCHED_DEBUG # include "sched_debug.c" #endif @@ -7749,7 +7758,7 @@ void __init sched_init(void) #ifdef CONFIG_CGROUP_SCHED list_add(&init_task_group.list, &task_groups); INIT_LIST_HEAD(&init_task_group.children); - + autogroup_init(&init_task); #endif /* CONFIG_CGROUP_SCHED */ #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP Index: linux-2.6.36/kernel/fork.c =================================================================== --- linux-2.6.36.orig/kernel/fork.c +++ linux-2.6.36/kernel/fork.c @@ -173,8 +173,10 @@ static inline void free_signal_struct(st static inline void put_signal_struct(struct signal_struct *sig) { - if (atomic_dec_and_test(&sig->sigcnt)) + if (atomic_dec_and_test(&sig->sigcnt)) { + sched_autogroup_exit(sig); free_signal_struct(sig); + } } void __put_task_struct(struct task_struct *tsk) @@ -900,6 +902,7 @@ static int copy_signal(unsigned long clo posix_cpu_timers_init_group(sig); tty_audit_fork(sig); + sched_autogroup_fork(sig); sig->oom_adj = current->signal->oom_adj; sig->oom_score_adj = current->signal->oom_score_adj; Index: linux-2.6.36/kernel/sys.c =================================================================== --- linux-2.6.36.orig/kernel/sys.c +++ linux-2.6.36/kernel/sys.c @@ -1080,8 +1080,10 @@ SYSCALL_DEFINE0(setsid) err = session; out: write_unlock_irq(&tasklist_lock); - if (err > 0) + if (err > 0) { proc_sid_connector(group_leader); + sched_autogroup_create_attach(group_leader); + } return err; } Index: linux-2.6.36/kernel/sched_debug.c =================================================================== --- linux-2.6.36.orig/kernel/sched_debug.c +++ linux-2.6.36/kernel/sched_debug.c @@ -87,6 +87,20 @@ static void print_cfs_group_stats(struct } #endif +#if defined(CONFIG_CGROUP_SCHED) && \ + (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)) +static void task_group_path(struct task_group *tg, char *buf, int buflen) +{ + /* may be NULL if the underlying cgroup isn't fully-created yet */ + if (!tg->css.cgroup) { + buf[0] = '\0'; + autogroup_path(tg, buf, buflen); + return; + } + cgroup_path(tg->css.cgroup, buf, buflen); +} +#endif + static void print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) { @@ -115,7 +129,7 @@ print_task(struct seq_file *m, struct rq char path[64]; rcu_read_lock(); - cgroup_path(task_group(p)->css.cgroup, path, sizeof(path)); + task_group_path(task_group(p), path, sizeof(path)); rcu_read_unlock(); SEQ_printf(m, " %s", path); } @@ -147,19 +161,6 @@ static void print_rq(struct seq_file *m, read_unlock_irqrestore(&tasklist_lock, flags); } -#if defined(CONFIG_CGROUP_SCHED) && \ - (defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)) -static void task_group_path(struct task_group *tg, char *buf, int buflen) -{ - /* may be NULL if the underlying cgroup isn't fully-created yet */ - if (!tg->css.cgroup) { - buf[0] = '\0'; - return; - } - cgroup_path(tg->css.cgroup, buf, buflen); -} -#endif - void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, Index: linux-2.6.36/kernel/sched_autogroup.h =================================================================== --- /dev/null +++ linux-2.6.36/kernel/sched_autogroup.h @@ -0,0 +1,23 @@ +#ifdef CONFIG_SCHED_AUTOGROUP + +static inline struct task_group * +autogroup_task_group(struct task_struct *p, struct task_group *tg); + +#else /* !CONFIG_SCHED_AUTOGROUP */ + +static inline void autogroup_init(struct task_struct *init_task) { } + +static inline struct task_group * +autogroup_task_group(struct task_struct *p, struct task_group *tg) +{ + return tg; +} + +#ifdef CONFIG_SCHED_DEBUG +static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) +{ + return 0; +} +#endif + +#endif /* CONFIG_SCHED_AUTOGROUP */ Index: linux-2.6.36/kernel/sched_autogroup.c =================================================================== --- /dev/null +++ linux-2.6.36/kernel/sched_autogroup.c @@ -0,0 +1,170 @@ +#ifdef CONFIG_SCHED_AUTOGROUP + +unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; + +struct autogroup { + struct task_group *tg; + struct kref kref; + unsigned long id; +}; + +static struct autogroup autogroup_default; +static atomic_t autogroup_seq_nr; + +static void autogroup_init(struct task_struct *init_task) +{ + autogroup_default.tg = &init_task_group; + autogroup_default.id = 0; + atomic_set(&autogroup_seq_nr, 1); + kref_init(&autogroup_default.kref); + init_task->signal->autogroup = &autogroup_default; +} + +static inline void autogroup_destroy(struct kref *kref) +{ + struct autogroup *ag = container_of(kref, struct autogroup, kref); + struct task_group *tg = ag->tg; + + kfree(ag); + sched_destroy_group(tg); +} + +static inline void autogroup_kref_put(struct autogroup *ag) +{ + kref_put(&ag->kref, autogroup_destroy); +} + +static inline struct autogroup *autogroup_kref_get(struct autogroup *ag) +{ + kref_get(&ag->kref); + return ag; +} + +static inline struct autogroup *autogroup_create(void) +{ + struct autogroup *ag = kmalloc(sizeof(*ag), GFP_KERNEL); + + if (!ag) + goto out_fail; + + ag->tg = sched_create_group(&init_task_group); + + if (IS_ERR(ag->tg)) + goto out_fail; + + kref_init(&ag->kref); + ag->tg->autogroup = ag; + ag->id = atomic_inc_return(&autogroup_seq_nr); + + return ag; + +out_fail: + if (ag) { + kfree(ag); + WARN_ON(1); + } else + WARN_ON(1); + + return autogroup_kref_get(&autogroup_default); +} + +static inline bool +task_wants_autogroup(struct task_struct *p, struct task_group *tg) +{ + if (tg != &root_task_group) + return false; + + if (p->sched_class != &fair_sched_class) + return false; + + if (p->flags & PF_EXITING) + return false; + + return true; +} + +static inline struct task_group * +autogroup_task_group(struct task_struct *p, struct task_group *tg) +{ + int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); + + if (enabled && task_wants_autogroup(p, tg)) + return p->signal->autogroup->tg; + + return tg; +} + +static void +autogroup_move_group(struct task_struct *p, struct autogroup *ag) +{ + struct autogroup *prev; + struct task_struct *t; + + spin_lock(&p->sighand->siglock); + + prev = p->signal->autogroup; + if (prev == ag) { + spin_unlock(&p->sighand->siglock); + return; + } + + p->signal->autogroup = autogroup_kref_get(ag); + t = p; + + do { + sched_move_task(p); + } while_each_thread(p, t); + + spin_unlock(&p->sighand->siglock); + + autogroup_kref_put(prev); +} + +/* Allocates GFP_KERNEL, cannot be called under any spinlock */ +void sched_autogroup_create_attach(struct task_struct *p) +{ + struct autogroup *ag = autogroup_create(); + + autogroup_move_group(p, ag); + /* drop extra refrence added by autogroup_create() */ + autogroup_kref_put(ag); +} +EXPORT_SYMBOL(sched_autogroup_create_attach); + +/* Cannot be called under siglock. Currently has no users */ +void sched_autogroup_detach(struct task_struct *p) +{ + autogroup_move_group(p, &autogroup_default); +} +EXPORT_SYMBOL(sched_autogroup_detach); + +void sched_autogroup_fork(struct signal_struct *sig) +{ + struct sighand_struct *sighand = current->sighand; + + spin_lock(&sighand->siglock); + sig->autogroup = autogroup_kref_get(current->signal->autogroup); + spin_unlock(&sighand->siglock); +} + +void sched_autogroup_exit(struct signal_struct *sig) +{ + autogroup_kref_put(sig->autogroup); +} + +static int __init setup_autogroup(char *str) +{ + sysctl_sched_autogroup_enabled = 0; + + return 1; +} + +__setup("noautogroup", setup_autogroup); + +#ifdef CONFIG_SCHED_DEBUG +static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) +{ + return snprintf(buf, buflen, "%s-%ld", "autogroup", tg->autogroup->id); +} +#endif +#endif /* CONFIG_SCHED_AUTOGROUP */ Index: linux-2.6.36/kernel/sysctl.c =================================================================== --- linux-2.6.36.orig/kernel/sysctl.c +++ linux-2.6.36/kernel/sysctl.c @@ -384,6 +384,17 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_SCHED_AUTOGROUP + { + .procname = "sched_autogroup_enabled", + .data = &sysctl_sched_autogroup_enabled, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &one, + }, +#endif #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", Index: linux-2.6.36/init/Kconfig =================================================================== --- linux-2.6.36.orig/init/Kconfig +++ linux-2.6.36/init/Kconfig @@ -652,6 +652,18 @@ config DEBUG_BLK_CGROUP endif # CGROUPS +config SCHED_AUTOGROUP + bool "Automatic process group scheduling" + select CGROUPS + select CGROUP_SCHED + select FAIR_GROUP_SCHED + help + This option optimizes the scheduler for common desktop workloads by + automatically creating and populating task groups. This separation + of workloads isolates aggressive CPU burners (like build jobs) from + desktop applications. Task group autogeneration is currently based + upon task session. + config MM_OWNER bool Index: linux-2.6.36/Documentation/kernel-parameters.txt =================================================================== --- linux-2.6.36.orig/Documentation/kernel-parameters.txt +++ linux-2.6.36/Documentation/kernel-parameters.txt @@ -1610,6 +1610,8 @@ and is between 256 and 4096 characters. noapic [SMP,APIC] Tells the kernel to not make use of any IOAPICs that may be present in the system. + noautogroup Disable scheduler automatic task group creation. + nobats [PPC] Do not use BATs for mapping kernel lowmem on "Classic" PPC cores. Index: linux-2.6.36.git/kernel/sched.c =================================================================== --- linux-2.6.36.git.orig/kernel/sched.c +++ linux-2.6.36.git/kernel/sched.c @@ -8297,12 +8297,12 @@ void sched_move_task(struct task_struct if (unlikely(running)) tsk->sched_class->put_prev_task(rq, tsk); - set_task_rq(tsk, task_cpu(tsk)); - #ifdef CONFIG_FAIR_GROUP_SCHED - if (tsk->sched_class->moved_group) - tsk->sched_class->moved_group(tsk, on_rq); + if (tsk->sched_class->task_move_group) + tsk->sched_class->task_move_group(tsk, on_rq); + else #endif + set_task_rq(tsk, task_cpu(tsk)); if (unlikely(running)) tsk->sched_class->set_curr_task(rq); Index: linux-2.6.36.git/include/linux/sched.h =================================================================== --- linux-2.6.36.git.orig/include/linux/sched.h +++ linux-2.6.36.git/include/linux/sched.h @@ -1072,7 +1072,7 @@ struct sched_class { struct task_struct *task); #ifdef CONFIG_FAIR_GROUP_SCHED - void (*moved_group) (struct task_struct *p, int on_rq); + void (*task_move_group) (struct task_struct *p, int on_rq); #endif }; Index: linux-2.6.36.git/kernel/sched_fair.c =================================================================== --- linux-2.6.36.git.orig/kernel/sched_fair.c +++ linux-2.6.36.git/kernel/sched_fair.c @@ -3824,13 +3824,26 @@ static void set_curr_task_fair(struct rq } #ifdef CONFIG_FAIR_GROUP_SCHED -static void moved_group_fair(struct task_struct *p, int on_rq) +static void task_move_group_fair(struct task_struct *p, int on_rq) { - struct cfs_rq *cfs_rq = task_cfs_rq(p); - - update_curr(cfs_rq); + /* + * If the task was not on the rq at the time of this cgroup movement + * it must have been asleep, sleeping tasks keep their ->vruntime + * absolute on their old rq until wakeup (needed for the fair sleeper + * bonus in place_entity()). + * + * If it was on the rq, we've just 'preempted' it, which does convert + * ->vruntime to a relative base. + * + * Make sure both cases convert their relative position when migrating + * to another cgroup's rq. This does somewhat interfere with the + * fair sleeper stuff for the first placement, but who cares. + */ + if (!on_rq) + p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; + set_task_rq(p, task_cpu(p)); if (!on_rq) - place_entity(cfs_rq, &p->se, 1); + p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; } #endif @@ -3882,7 +3895,7 @@ static const struct sched_class fair_sch .get_rr_interval = get_rr_interval_fair, #ifdef CONFIG_FAIR_GROUP_SCHED - .moved_group = moved_group_fair, + .task_move_group = task_move_group_fair, #endif };