diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index b727f5f7b8f9..041f0b97c45b 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -28,6 +28,7 @@ extern int boot_cpuid; extern int spinning_secondaries; extern u32 *cpu_to_phys_id; +extern bool coregroup_enabled; extern void cpu_die(void); extern int cpu_to_chip_id(int cpu); diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index f0b6300e7dd3..6609174918ab 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -88,12 +88,22 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) extern int find_and_online_cpu_nid(int cpu); +extern int cpu_to_coregroup_id(int cpu); #else static inline int find_and_online_cpu_nid(int cpu) { return 0; } +static inline int cpu_to_coregroup_id(int cpu) +{ +#ifdef CONFIG_SMP + return cpu_to_core_id(cpu); +#else + return 0; +#endif +} + #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ #include diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8261999c7d52..3d96752d6570 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -75,17 +75,28 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; struct task_struct *secondary_current; bool has_big_cores; +bool coregroup_enabled; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); EXPORT_SYMBOL_GPL(has_big_cores); +enum { +#ifdef CONFIG_SCHED_SMT + smt_idx, +#endif + cache_idx, + mc_idx, + die_idx, +}; + #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 struct thread_groups { @@ -789,10 +800,6 @@ static int init_cpu_l1_cache_map(int cpu) if (err) goto out; - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), - GFP_KERNEL, - cpu_to_node(cpu)); - cpu_group_start = get_cpu_thread_group_start(cpu, &tg); if (unlikely(cpu_group_start == -1)) { @@ -801,6 +808,9 @@ static int init_cpu_l1_cache_map(int cpu) goto out; } + zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + for (i = first_thread; i < first_thread + threads_per_core; i++) { int i_group_start = get_cpu_thread_group_start(i, &tg); @@ -819,6 +829,74 @@ out: return err; } +static bool shared_caches; + +#ifdef CONFIG_SCHED_SMT +/* cpumask of CPUs with asymmetric SMT dependency */ +static int powerpc_smt_flags(void) +{ + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { + printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); + flags |= SD_ASYM_PACKING; + } + return flags; +} +#endif + +/* + * P9 has a slightly odd architecture where pairs of cores share an L2 cache. + * This topology makes it *much* cheaper to migrate tasks between adjacent cores + * since the migrated task remains cache hot. We want to take advantage of this + * at the scheduler level so an extra topology level is required. + */ +static int powerpc_shared_cache_flags(void) +{ + return SD_SHARE_PKG_RESOURCES; +} + +/* + * We can't just pass cpu_l2_cache_mask() directly because + * returns a non-const pointer and the compiler barfs on that. + */ +static const struct cpumask *shared_cache_mask(int cpu) +{ + return per_cpu(cpu_l2_cache_map, cpu); +} + +#ifdef CONFIG_SCHED_SMT +static const struct cpumask *smallcore_smt_mask(int cpu) +{ + return cpu_smallcore_mask(cpu); +} +#endif + +static struct cpumask *cpu_coregroup_mask(int cpu) +{ + return per_cpu(cpu_coregroup_map, cpu); +} + +static bool has_coregroup_support(void) +{ + return coregroup_enabled; +} + +static const struct cpumask *cpu_mc_mask(int cpu) +{ + return cpu_coregroup_mask(cpu); +} + +static struct sched_domain_topology_level powerpc_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, +#endif + { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, + { cpu_mc_mask, SD_INIT_NAME(MC) }, + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + static int init_big_cores(void) { int cpu; @@ -861,6 +939,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) GFP_KERNEL, cpu_to_node(cpu)); zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), GFP_KERNEL, cpu_to_node(cpu)); + if (has_coregroup_support()) + zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu), + GFP_KERNEL, cpu_to_node(cpu)); + +#ifdef CONFIG_NEED_MULTIPLE_NODES /* * numa_node_id() works after this. */ @@ -869,6 +952,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu])); } +#endif } /* Init the cpumasks so the boot CPU is related to itself */ @@ -876,6 +960,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); + if (has_coregroup_support()) + cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid)); + init_big_cores(); if (has_big_cores) { cpumask_set_cpu(boot_cpuid, @@ -1132,9 +1219,23 @@ static bool update_mask_by_l2(int cpu, struct cpumask *(*mask_fn)(int)) int i; l2_cache = cpu_to_l2cache(cpu); - if (!l2_cache) - return false; + if (!l2_cache) { + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; + /* + * If no l2cache for this CPU, assume all siblings to share + * cache with this CPU. + */ + if (has_big_cores) + sibling_mask = cpu_smallcore_mask; + + for_each_cpu(i, sibling_mask(cpu)) + set_cpus_related(cpu, i, cpu_l2_cache_mask); + + return false; + } + + cpumask_set_cpu(cpu, mask_fn(cpu)); for_each_cpu(i, cpu_online_mask) { /* * when updating the marks the current CPU has not been marked @@ -1166,6 +1267,8 @@ static void remove_cpu_from_masks(int cpu) set_cpus_unrelated(cpu, i, cpu_sibling_mask); if (has_big_cores) set_cpus_unrelated(cpu, i, cpu_smallcore_mask); + if (has_coregroup_support()) + set_cpus_unrelated(cpu, i, cpu_coregroup_mask); } } #endif @@ -1217,42 +1320,54 @@ static void add_cpu_to_masks(int cpu) * add it to it's own thread sibling mask. */ cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); + cpumask_set_cpu(cpu, cpu_core_mask(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) if (cpu_online(i)) set_cpus_related(i, cpu, cpu_sibling_mask); add_cpu_to_smallcore_masks(cpu); - /* - * Copy the thread sibling mask into the cache sibling mask - * and mark any CPUs that share an L2 with this CPU. - */ - for_each_cpu(i, cpu_sibling_mask(cpu)) - set_cpus_related(cpu, i, cpu_l2_cache_mask); update_mask_by_l2(cpu, cpu_l2_cache_mask); - /* - * Copy the cache sibling mask into core sibling mask and mark - * any CPUs on the same chip as this CPU. - */ - for_each_cpu(i, cpu_l2_cache_mask(cpu)) - set_cpus_related(cpu, i, cpu_core_mask); + if (has_coregroup_support()) { + int coregroup_id = cpu_to_coregroup_id(cpu); + + cpumask_set_cpu(cpu, cpu_coregroup_mask(cpu)); + for_each_cpu_and(i, cpu_online_mask, cpu_cpu_mask(cpu)) { + int fcpu = cpu_first_thread_sibling(i); + + if (fcpu == first_thread) + set_cpus_related(cpu, i, cpu_coregroup_mask); + else if (coregroup_id == cpu_to_coregroup_id(i)) + set_cpus_related(cpu, i, cpu_coregroup_mask); + } + } + + if (pkg_id == -1) { + struct cpumask *(*mask)(int) = cpu_sibling_mask; + + /* + * Copy the sibling mask into core sibling mask and + * mark any CPUs on the same chip as this CPU. + */ + if (shared_caches) + mask = cpu_l2_cache_mask; + + for_each_cpu(i, mask(cpu)) + set_cpus_related(cpu, i, cpu_core_mask); - if (pkg_id == -1) return; + } for_each_cpu(i, cpu_online_mask) if (get_physical_package_id(i) == pkg_id) set_cpus_related(cpu, i, cpu_core_mask); } -static bool shared_caches; - /* Activate a secondary processor. */ void start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); - struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; mmgrab(&init_mm); current->active_mm = &init_mm; @@ -1278,14 +1393,20 @@ void start_secondary(void *unused) /* Update topology CPU masks */ add_cpu_to_masks(cpu); - if (has_big_cores) - sibling_mask = cpu_smallcore_mask; /* * Check for any shared caches. Note that this must be done on a * per-core basis because one core in the pair might be disabled. */ - if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) - shared_caches = true; + if (!shared_caches) { + struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; + struct cpumask *mask = cpu_l2_cache_mask(cpu); + + if (has_big_cores) + sibling_mask = cpu_smallcore_mask; + + if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu))) + shared_caches = true; + } set_numa_node(numa_cpu_lookup_table[cpu]); set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); @@ -1311,64 +1432,19 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } -#ifdef CONFIG_SCHED_SMT -/* cpumask of CPUs with asymetric SMT dependancy */ -static int powerpc_smt_flags(void) +static void fixup_topology(void) { - int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; - - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); - flags |= SD_ASYM_PACKING; +#ifdef CONFIG_SCHED_SMT + if (has_big_cores) { + pr_info("Big cores detected but using small core scheduling\n"); + powerpc_topology[smt_idx].mask = smallcore_smt_mask; } - return flags; -} #endif -static struct sched_domain_topology_level powerpc_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, -#endif - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; - -/* - * P9 has a slightly odd architecture where pairs of cores share an L2 cache. - * This topology makes it *much* cheaper to migrate tasks between adjacent cores - * since the migrated task remains cache hot. We want to take advantage of this - * at the scheduler level so an extra topology level is required. - */ -static int powerpc_shared_cache_flags(void) -{ - return SD_SHARE_PKG_RESOURCES; + if (!has_coregroup_support()) + powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask; } -/* - * We can't just pass cpu_l2_cache_mask() directly because - * returns a non-const pointer and the compiler barfs on that. - */ -static const struct cpumask *shared_cache_mask(int cpu) -{ - return cpu_l2_cache_mask(cpu); -} - -#ifdef CONFIG_SCHED_SMT -static const struct cpumask *smallcore_smt_mask(int cpu) -{ - return cpu_smallcore_mask(cpu); -} -#endif - -static struct sched_domain_topology_level power9_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, -#endif - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; - void __init smp_cpus_done(unsigned int max_cpus) { /* @@ -1382,24 +1458,8 @@ void __init smp_cpus_done(unsigned int max_cpus) dump_numa_cpu_topology(); -#ifdef CONFIG_SCHED_SMT - if (has_big_cores) { - pr_info("Big cores detected but using small core scheduling\n"); - power9_topology[0].mask = smallcore_smt_mask; - powerpc_topology[0].mask = smallcore_smt_mask; - } -#endif - /* - * If any CPU detects that it's sharing a cache with another CPU then - * use the deeper topology that is aware of this sharing. - */ - if (shared_caches) { - pr_info("Using shared cache scheduler topology\n"); - set_sched_topology(power9_topology); - } else { - pr_info("Using standard scheduler topology\n"); - set_sched_topology(powerpc_topology); - } + fixup_topology(); + set_sched_topology(powerpc_topology); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 481951ac3e55..b725fb66e913 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -897,7 +897,9 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) static void __init find_possible_nodes(void) { struct device_node *rtas; - u32 numnodes, i; + const __be32 *domains; + int prop_length, max_nodes; + u32 i; if (!numa_enabled) return; @@ -906,25 +908,31 @@ static void __init find_possible_nodes(void) if (!rtas) return; - if (of_property_read_u32_index(rtas, "ibm,current-associativity-domains", - min_common_depth, &numnodes)) { - /* - * ibm,current-associativity-domains is a fairly recent - * property. If it doesn't exist, then fallback on - * ibm,max-associativity-domains. Current denotes what the - * platform can support compared to max which denotes what the - * Hypervisor can support. - */ - if (of_property_read_u32_index(rtas, "ibm,max-associativity-domains", - min_common_depth, &numnodes)) + /* + * ibm,current-associativity-domains is a fairly recent property. If + * it doesn't exist, then fallback on ibm,max-associativity-domains. + * Current denotes what the platform can support compared to max + * which denotes what the Hypervisor can support. + */ + domains = of_get_property(rtas, "ibm,current-associativity-domains", + &prop_length); + if (!domains) { + domains = of_get_property(rtas, "ibm,max-associativity-domains", + &prop_length); + if (!domains) goto out; } - for (i = 0; i < numnodes; i++) { + max_nodes = of_read_number(&domains[min_common_depth], 1); + for (i = 0; i < max_nodes; i++) { if (!node_possible(i)) node_set(i, node_possible_map); } + prop_length /= sizeof(int); + if (prop_length > min_common_depth + 2) + coregroup_enabled = 1; + out: of_node_put(rtas); } @@ -1237,6 +1245,31 @@ int find_and_online_cpu_nid(int cpu) return new_nid; } +int cpu_to_coregroup_id(int cpu) +{ + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0}; + int index; + + if (cpu < 0 || cpu > nr_cpu_ids) + return -1; + + if (!coregroup_enabled) + goto out; + + if (!firmware_has_feature(FW_FEATURE_VPHN)) + goto out; + + if (vphn_get_associativity(cpu, associativity)) + goto out; + + index = of_read_number(associativity, 1); + if (index > min_common_depth + 1) + return of_read_number(&associativity[index - 1], 1); + +out: + return cpu_to_core_id(cpu); +} + static int topology_update_init(void) { topology_inited = 1;