From ea624248c72ea94d7f37afc85df6a9a8386c69d6 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 11 Dec 2023 11:48:54 +0100 Subject: [PATCH 1/2] cpufreq/cppc: Set the frequency used for computing the capacity mainline inclusion from mainline-v6.8-rc1 category: bugfix Save the frequency associated to the performance that has been used when initializing the capacity of CPUs. Also, cppc cpufreq driver can register an artificial energy model. In such case, it needs the frequency for this compute capacity. Signed-off-by: Vincent Guittot Signed-off-by: Ingo Molnar Tested-by: Pierre Gondois Acked-by: Sudeep Holla Acked-by: Viresh Kumar Link: https://lore.kernel.org/r/20231211104855.558096-7-vincent.guittot@linaro.org (cherry picked from commit 5477fa249b56c59c3baa1b237bf083cffa64c84a) Signed-off-by: Wentao Guan --- drivers/base/arch_topology.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 73e409be37ea1..087a30b73b0bf 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -349,6 +349,7 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) void topology_init_cpu_capacity_cppc(void) { + u64 capacity, capacity_scale = 0; struct cppc_perf_caps perf_caps; int cpu; @@ -365,6 +366,10 @@ void topology_init_cpu_capacity_cppc(void) (perf_caps.highest_perf >= perf_caps.nominal_perf) && (perf_caps.highest_perf >= perf_caps.lowest_perf)) { raw_capacity[cpu] = perf_caps.highest_perf; + capacity_scale = max_t(u64, capacity_scale, raw_capacity[cpu]); + + per_cpu(capacity_freq_ref, cpu) = cppc_perf_to_khz(&perf_caps, raw_capacity[cpu]); + pr_debug("cpu_capacity: CPU%d cpu_capacity=%u (raw).\n", cpu, raw_capacity[cpu]); continue; @@ -375,7 +380,15 @@ void topology_init_cpu_capacity_cppc(void) goto exit; } - topology_normalize_cpu_scale(); + for_each_possible_cpu(cpu) { + capacity = raw_capacity[cpu]; + capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, + capacity_scale); + topology_set_cpu_scale(cpu, capacity); + pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n", + cpu, topology_get_cpu_scale(cpu)); + } + schedule_work(&update_topology_flags_work); pr_debug("cpu_capacity: cpu_capacity initialization done\n"); From fcc3ca171022608c645f5a3baf4992ddc9465d0a Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Mon, 11 Dec 2023 11:48:55 +0100 Subject: [PATCH 2/2] arm64/amu: Use capacity_ref_freq() to set AMU ratio mainline inclusion from mainline-v6.8-rc1 category: bugfix Use the new capacity_ref_freq() method to set the ratio that is used by AMU for computing the arch_scale_freq_capacity(). This helps to keep everything aligned using the same reference for computing CPUs capacity. The default value of the ratio (stored in per_cpu(arch_max_freq_scale)) ensures that arch_scale_freq_capacity() returns max capacity until it is set to its correct value with the cpu capacity and capacity_ref_freq(). Signed-off-by: Vincent Guittot Signed-off-by: Ingo Molnar Acked-by: Sudeep Holla Acked-by: Will Deacon Link: https://lore.kernel.org/r/20231211104855.558096-8-vincent.guittot@linaro.org (cherry picked from commit 1f023007f5e782bda19ad9104830c404fd622c5d) Signed-off-by: Wentao Guan --- arch/arm64/kernel/topology.c | 26 +++++++++++++------------- drivers/base/arch_topology.c | 12 +++++++++++- include/linux/arch_topology.h | 1 + 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 817d788cd8666..1a2c72f3e7f80 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -82,7 +82,12 @@ int __init parse_acpi_topology(void) #undef pr_fmt #define pr_fmt(fmt) "AMU: " fmt -static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale); +/* + * Ensure that amu_scale_freq_tick() will return SCHED_CAPACITY_SCALE until + * the CPU capacity and its associated frequency have been correctly + * initialized. + */ +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT); static DEFINE_PER_CPU(u64, arch_const_cycles_prev); static DEFINE_PER_CPU(u64, arch_core_cycles_prev); static cpumask_var_t amu_fie_cpus; @@ -112,14 +117,14 @@ static inline bool freq_counters_valid(int cpu) return true; } -static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) +void freq_inv_set_max_ratio(int cpu, u64 max_rate) { - u64 ratio; + u64 ratio, ref_rate = arch_timer_get_rate(); if (unlikely(!max_rate || !ref_rate)) { - pr_debug("CPU%d: invalid maximum or reference frequency.\n", + WARN_ONCE(1, "CPU%d: invalid maximum or reference frequency.\n", cpu); - return -EINVAL; + return; } /* @@ -139,12 +144,10 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) ratio = div64_u64(ratio, max_rate); if (!ratio) { WARN_ONCE(1, "Reference frequency too low.\n"); - return -EINVAL; + return; } - per_cpu(arch_max_freq_scale, cpu) = (unsigned long)ratio; - - return 0; + WRITE_ONCE(per_cpu(arch_max_freq_scale, cpu), (unsigned long)ratio); } static void amu_scale_freq_tick(void) @@ -195,10 +198,7 @@ static void amu_fie_setup(const struct cpumask *cpus) return; for_each_cpu(cpu, cpus) { - if (!freq_counters_valid(cpu) || - freq_inv_set_max_ratio(cpu, - cpufreq_get_hw_max_freq(cpu) * 1000ULL, - arch_timer_get_rate())) + if (!freq_counters_valid(cpu)) return; } diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 087a30b73b0bf..2aa0c64252902 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -344,6 +344,10 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) return !ret; } +void __weak freq_inv_set_max_ratio(int cpu, u64 max_rate) +{ +} + #ifdef CONFIG_ACPI_CPPC_LIB #include @@ -381,6 +385,9 @@ void topology_init_cpu_capacity_cppc(void) } for_each_possible_cpu(cpu) { + freq_inv_set_max_ratio(cpu, + per_cpu(capacity_freq_ref, cpu) * HZ_PER_KHZ); + capacity = raw_capacity[cpu]; capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, capacity_scale); @@ -419,8 +426,11 @@ init_cpu_capacity_callback(struct notifier_block *nb, cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); - for_each_cpu(cpu, policy->related_cpus) + for_each_cpu(cpu, policy->related_cpus) { per_cpu(capacity_freq_ref, cpu) = policy->cpuinfo.max_freq; + freq_inv_set_max_ratio(cpu, + per_cpu(capacity_freq_ref, cpu) * HZ_PER_KHZ); + } if (cpumask_empty(cpus_to_visit)) { if (raw_capacity) { diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 32c24ff4f2a80..a63d61ca55afc 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -99,6 +99,7 @@ void update_siblings_masks(unsigned int cpu); void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); int parse_acpi_topology(void); +void freq_inv_set_max_ratio(int cpu, u64 max_rate); #endif #endif /* _LINUX_ARCH_TOPOLOGY_H_ */