diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index e9e4c52f368543..4cd8128c6ebc68 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -154,7 +154,8 @@ #define OPAL_FLASH_WRITE 111 #define OPAL_FLASH_ERASE 112 #define OPAL_PRD_MSG 113 -#define OPAL_LAST 113 +#define OPAL_NEST_IMA_CONTROL 116 +#define OPAL_LAST 116 /* Device tree flags */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 958e941c0cda88..b46c4b791a9c29 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -203,6 +203,8 @@ int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf, int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size, uint64_t token); +int64_t opal_nest_ima_control(uint32_t value); + /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index f9c083a5652a4c..6da656b50e3c0f 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_PERF_EVENTS) += callchain.o obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \ - power8-pmu.o + power8-pmu.o nest-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o diff --git a/arch/powerpc/perf/nest-pmu.c b/arch/powerpc/perf/nest-pmu.c new file mode 100644 index 00000000000000..31943c56d90528 --- /dev/null +++ b/arch/powerpc/perf/nest-pmu.c @@ -0,0 +1,514 @@ +/* + * Nest Performance Monitor counter support for POWER8 processors. + * + * Copyright (C) 2015 Madhavan Srinivasan, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include "nest-pmu.h" + +static struct perchip_nest_info p8_nest_perchip_info[P8_NEST_MAX_CHIPS]; +static struct nest_pmu *per_nest_pmu_arr[P8_NEST_MAX_PMUS]; +static cpumask_t nest_pmu_cpu_mask; + +PMU_FORMAT_ATTR(event, "config:0-20"); +struct attribute *p8_nest_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +struct attribute_group p8_nest_format_group = { + .name = "format", + .attrs = p8_nest_format_attrs, +}; + +static ssize_t nest_pmu_cpumask_get_attr(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &nest_pmu_cpu_mask); +} + +static DEVICE_ATTR(cpumask, S_IRUGO, nest_pmu_cpumask_get_attr, NULL); + +static struct attribute *nest_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group nest_pmu_cpumask_attr_group = { + .attrs = nest_pmu_cpumask_attrs, +}; + +static void nest_init(void *dummy) +{ + opal_nest_ima_control(P8_NEST_ENGINE_START); +} + +static void nest_change_cpu_context(int old_cpu, int new_cpu) +{ + int i; + + for (i = 0; per_nest_pmu_arr[i] != NULL; i++) + perf_pmu_migrate_context(&per_nest_pmu_arr[i]->pmu, + old_cpu, new_cpu); +} + +static void nest_exit_cpu(int cpu) +{ + int nid, target = -1; + struct cpumask *l_cpumask; + + /* + * Check in the designated list for this cpu. Dont bother + * if not one of them. + */ + if (!cpumask_test_and_clear_cpu(cpu, &nest_pmu_cpu_mask)) + return; + + /* + * Now that this cpu is one of the designated, + * find a next cpu a) which is online and b) in same chip. + */ + nid = cpu_to_node(cpu); + l_cpumask = cpumask_of_node(nid); + target = cpumask_next(cpu, l_cpumask); + + /* + * Update the cpumask with the target cpu and + * migrate the context if needed + */ + if (target >= 0 && target <= nr_cpu_ids) { + cpumask_set_cpu(target, &nest_pmu_cpu_mask); + nest_change_cpu_context(cpu, target); + } +} + +static void nest_init_cpu(int cpu) +{ + int nid, fcpu, ncpu; + struct cpumask *l_cpumask, tmp_mask; + + nid = cpu_to_node(cpu); + l_cpumask = cpumask_of_node(nid); + + /* + * if empty cpumask, just add incoming cpu and move on. + */ + if (!cpumask_and(&tmp_mask, l_cpumask, &nest_pmu_cpu_mask)) { + cpumask_set_cpu(cpu, &nest_pmu_cpu_mask); + return; + } + + /* + * Alway have the first online cpu of a chip as designated one. + */ + fcpu = cpumask_first(l_cpumask); + ncpu = cpumask_next(cpu, l_cpumask); + if (cpu == fcpu) { + if (cpumask_test_and_clear_cpu(ncpu, &nest_pmu_cpu_mask)) { + cpumask_set_cpu(cpu, &nest_pmu_cpu_mask); + nest_change_cpu_context(ncpu, cpu); + } + } +} + +static int nest_pmu_cpu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + nest_init_cpu(cpu); + break; + case CPU_DOWN_PREPARE: + nest_exit_cpu(cpu); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block nest_pmu_cpu_nb = { + .notifier_call = nest_pmu_cpu_notifier, + .priority = CPU_PRI_PERF + 1, +}; + +void nest_pmu_cpumask_init(void) +{ + const struct cpumask *l_cpumask; + int cpu, nid; + + cpu_notifier_register_begin(); + + /* + * Nest PMUs are per-chip counters. So designate a cpu + * from each chip for counter collection. + */ + for_each_online_node(nid) { + l_cpumask = cpumask_of_node(nid); + + /* designate first online cpu in this node */ + cpu = cpumask_first(l_cpumask); + cpumask_set_cpu(cpu, &nest_pmu_cpu_mask); + } + + /* Initialize Nest PMUs in each node using designated cpus */ + on_each_cpu_mask(&nest_pmu_cpu_mask, (smp_call_func_t)nest_init, NULL, 1); + + __register_cpu_notifier(&nest_pmu_cpu_nb); + + cpu_notifier_register_done(); +} + +static int p8_nest_event_init(struct perf_event *event) +{ + int chip_id; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Sampling not supported yet */ + if (event->hw.sample_period) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + chip_id = topology_physical_package_id(event->cpu); + event->hw.event_base = event->attr.config + + p8_nest_perchip_info[chip_id].vbase; + + return 0; +} + +static void p8_nest_read_counter(struct perf_event *event) +{ + uint64_t *addr; + u64 data = 0; + + addr = (u64 *)event->hw.event_base; + data = __be64_to_cpu(*addr); + local64_set(&event->hw.prev_count, data); +} + +static void p8_nest_perf_event_update(struct perf_event *event) +{ + u64 counter_prev, counter_new, final_count; + uint64_t *addr; + + addr = (uint64_t *)event->hw.event_base; + counter_prev = local64_read(&event->hw.prev_count); + counter_new = __be64_to_cpu(*addr); + final_count = counter_new - counter_prev; + + local64_set(&event->hw.prev_count, counter_new); + local64_add(final_count, &event->count); +} + +static void p8_nest_event_start(struct perf_event *event, int flags) +{ + event->hw.state = 0; + p8_nest_read_counter(event); +} + +static void p8_nest_event_stop(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_UPDATE) + p8_nest_perf_event_update(event); +} + +static int p8_nest_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + p8_nest_event_start(event, flags); + + return 0; +} + +/* + * Populate pmu ops in the structure + */ +static int update_pmu_ops(struct nest_pmu *pmu) +{ + if (!pmu) + return -EINVAL; + + pmu->pmu.task_ctx_nr = perf_invalid_context; + pmu->pmu.event_init = p8_nest_event_init; + pmu->pmu.add = p8_nest_event_add; + pmu->pmu.del = p8_nest_event_stop; + pmu->pmu.start = p8_nest_event_start; + pmu->pmu.stop = p8_nest_event_stop; + pmu->pmu.read = p8_nest_perf_event_update; + pmu->pmu.attr_groups = pmu->attr_groups; + + return 0; +} + +static int nest_event_info(struct property *pp, char *start, + struct nest_ima_events *p8_events, int flg, u32 val) +{ + char *buf; + + /* memory for event name */ + buf = kzalloc(P8_NEST_MAX_PMU_NAME_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + strncpy(buf, start, strlen(start)); + p8_events->ev_name = buf; + + /* memory for content */ + buf = kzalloc(P8_NEST_MAX_PMU_NAME_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (flg) { + /* string content*/ + if (!pp->value || + (strnlen(pp->value, pp->length) == pp->length)) + return -EINVAL; + + strncpy(buf, (const char *)pp->value, pp->length); + } else + sprintf(buf, "event=0x%x", val); + + p8_events->ev_value = buf; + return 0; +} + +/* + * Populate event name and string in attribute + */ +struct attribute *dev_str_attr(const char *name, const char *str) +{ + struct perf_pmu_events_attr *attr; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + + attr->event_str = str; + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = perf_event_sysfs_show; + + return &attr->attr.attr; +} + +int update_events_in_group( + struct nest_ima_events *p8_events, int idx, struct nest_pmu *pmu) +{ + struct attribute_group *attr_group; + struct attribute **attrs; + int i; + + /* Allocate memory for event attribute group */ + attr_group = kzalloc(((sizeof(struct attribute *) * (idx + 1)) + + sizeof(*attr_group)), GFP_KERNEL); + if (!attr_group) + return -ENOMEM; + + attrs = (struct attribute **)(attr_group + 1); + attr_group->name = "events"; + attr_group->attrs = attrs; + + for (i = 0; i < idx; i++, p8_events++) + attrs[i] = dev_str_attr((char *)p8_events->ev_name, + (char *)p8_events->ev_value); + + pmu->attr_groups[0] = attr_group; + return 0; +} + +static int nest_pmu_create(struct device_node *dev, int pmu_index) +{ + struct nest_ima_events **p8_events_arr, *p8_events; + struct nest_pmu *pmu_ptr; + struct property *pp; + char *buf, *start; + const __be32 *lval; + u32 val; + int idx = 0, ret; + + if (!dev) + return -EINVAL; + + /* memory for nest pmus */ + pmu_ptr = kzalloc(sizeof(struct nest_pmu), GFP_KERNEL); + if (!pmu_ptr) + return -ENOMEM; + + /* Needed for hotplug/migration */ + per_nest_pmu_arr[pmu_index] = pmu_ptr; + + /* memory for nest pmu events */ + p8_events_arr = kzalloc((sizeof(struct nest_ima_events) * 64), + GFP_KERNEL); + if (!p8_events_arr) + return -ENOMEM; + p8_events = (struct nest_ima_events *)p8_events_arr; + + /* + * Loop through each property + */ + for_each_property_of_node(dev, pp) { + start = pp->name; + + if (!strcmp(pp->name, "name")) { + if (!pp->value || + (strnlen(pp->value, pp->length) == pp->length)) + return -EINVAL; + + buf = kzalloc(P8_NEST_MAX_PMU_NAME_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Save the name to register it later */ + sprintf(buf, "Nest_%s", (char *)pp->value); + pmu_ptr->pmu.name = (char *)buf; + pmu_ptr->attr_groups[1] = &p8_nest_format_group; + pmu_ptr->attr_groups[2] = &nest_pmu_cpumask_attr_group; + continue; + } + + /* Skip these, we dont need it */ + if (!strcmp(pp->name, "phandle") || + !strcmp(pp->name, "device_type") || + !strcmp(pp->name, "linux,phandle")) + continue; + + if (strncmp(pp->name, "unit.", 5) == 0) { + /* Skip first few chars in the name */ + start += 5; + ret = nest_event_info(pp, start, p8_events++, 1, 0); + } else if (strncmp(pp->name, "scale.", 6) == 0) { + /* Skip first few chars in the name */ + start += 6; + ret = nest_event_info(pp, start, p8_events++, 1, 0); + } else { + lval = of_get_property(dev, pp->name, NULL); + val = (uint32_t)be32_to_cpup(lval); + + ret = nest_event_info(pp, start, p8_events++, 0, val); + } + + if (ret) + return ret; + + /* book keeping */ + idx++; + } + + update_events_in_group( + (struct nest_ima_events *)p8_events_arr, idx, pmu_ptr); + + update_pmu_ops(pmu_ptr); + /* Register the pmu */ + ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1); + if (ret) { + pr_err("Nest PMU %s Register failed\n", pmu_ptr->pmu.name); + return ret; + } + + pr_info("%s performance monitor hardware support registered\n", + pmu_ptr->pmu.name); + return 0; +} + +static int nest_ima_dt_parser(void) +{ + const __be32 *gcid; + const __be64 *chip_ima_reg; + const __be64 *chip_ima_size; + struct device_node *dev; + struct perchip_nest_info *p8ni; + int idx, ret; + + /* + * "nest-ima" folder contains two things, + * a) per-chip reserved memory region for Nest PMU Counter data + * b) Support Nest PMU units and their event files + */ + for_each_node_with_property(dev, "ibm,ima-chip") { + gcid = of_get_property(dev, "ibm,chip-id", NULL); + chip_ima_reg = of_get_property(dev, "reg", NULL); + chip_ima_size = of_get_property(dev, "size", NULL); + + if ((!gcid) || (!chip_ima_reg) || (!chip_ima_size)) { + pr_err("Nest_PMU: device %s missing property\n", + dev->full_name); + return -ENODEV; + } + + /* chip id to save reserve memory region */ + idx = (uint32_t)be32_to_cpup(gcid); + + /* + * Using a local variable to make it compact and + * easier to read + */ + p8ni = &p8_nest_perchip_info[idx]; + p8ni->pbase = be64_to_cpup(chip_ima_reg); + p8ni->size = be64_to_cpup(chip_ima_size); + p8ni->vbase = (uint64_t) phys_to_virt(p8ni->pbase); + } + + /* Look for supported Nest PMU units */ + idx = 0; + for_each_node_by_type(dev, "nest-ima-unit") { + ret = nest_pmu_create(dev, idx); + if (ret) + return ret; + idx++; + } + + return 0; +} + +static int __init nest_pmu_init(void) +{ + int ret = -ENODEV; + + /* + * Lets do this only if we are hypervisor + */ + if (!cur_cpu_spec->oprofile_cpu_type || + !(strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8") == 0) || + !cpu_has_feature(CPU_FTR_HVMODE)) + return ret; + + /* + * Nest PMU information is grouped under "nest-ima" node + * of the top-level device-tree directory. Detect Nest PMU + * by the "ibm,ima-chip" property. + */ + if (!of_find_node_with_property(NULL, "ibm,ima-chip")) + return ret; + + /* + * Parse device-tree for Nest PMU information + */ + ret = nest_ima_dt_parser(); + if (ret) + return ret; + + /* Add cpumask and register for hotplug notification */ + nest_pmu_cpumask_init(); + + return 0; +} +device_initcall(nest_pmu_init); diff --git a/arch/powerpc/perf/nest-pmu.h b/arch/powerpc/perf/nest-pmu.h new file mode 100644 index 00000000000000..ecb5d266d5a28f --- /dev/null +++ b/arch/powerpc/perf/nest-pmu.h @@ -0,0 +1,53 @@ +/* + * Nest Performance Monitor counter support for POWER8 processors. + * + * Copyright (C) 2015 Madhavan Srinivasan, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#define P8_NEST_MAX_CHIPS 32 +#define P8_NEST_MAX_PMUS 32 +#define P8_NEST_MAX_PMU_NAME_LEN 256 +#define P8_NEST_MAX_EVENTS_SUPPORTED 256 +#define P8_NEST_ENGINE_START 1 +#define P8_NEST_ENGINE_STOP 0 + +/* + * Structure to hold per chip specific memory address + * information for nest pmus. Nest Counter data are exported + * in per-chip reserved memory region by the PORE Engine. + */ +struct perchip_nest_info { + uint32_t chip_id; + uint64_t pbase; + uint64_t vbase; + uint32_t size; +}; + +/* + * Place holder for nest pmu events and values. + */ +struct nest_ima_events { + const char *ev_name; + const char *ev_value; +}; + +/* + * Device tree parser code detects nest pmu support and + * registers new nest pmus. This structure will + * hold the pmu functions and attrs for each nest pmu and + * will be referenced at the time of pmu registration. + */ +struct nest_pmu { + struct pmu pmu; + const struct attribute_group *attr_groups[4]; +}; diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index d6a7b8252e4da2..c475c04468fb6d 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -297,3 +297,4 @@ OPAL_CALL(opal_flash_read, OPAL_FLASH_READ); OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE); OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); +OPAL_CALL(opal_nest_ima_control, OPAL_NEST_IMA_CONTROL);